1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc-private/kernels/blockinvert.h> 9 #include <petsc-private/kernels/blockmatmult.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values) *values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs) { 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ" 129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,*work,*w,*workt,*t; 133 const MatScalar *v,*aa = a->a, *idiag; 134 const PetscScalar *b,*xb; 135 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 136 PetscErrorCode ierr; 137 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 138 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 139 140 PetscFunctionBegin; 141 its = its*lits; 142 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 143 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 144 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 145 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 146 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 147 148 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 149 150 if (!m) PetscFunctionReturn(0); 151 diag = a->diag; 152 idiag = a->idiag; 153 k = PetscMax(A->rmap->n,A->cmap->n); 154 if (!a->mult_work) { 155 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 156 } 157 if (!a->sor_workt) { 158 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 159 } 160 if (!a->sor_work) { 161 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 162 } 163 work = a->mult_work; 164 t = a->sor_workt; 165 w = a->sor_work; 166 167 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 168 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 169 170 if (flag & SOR_ZERO_INITIAL_GUESS) { 171 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 172 switch (bs) { 173 case 1: 174 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 175 t[0] = b[0]; 176 i2 = 1; 177 idiag += 1; 178 for (i=1; i<m; i++) { 179 v = aa + ai[i]; 180 vi = aj + ai[i]; 181 nz = diag[i] - ai[i]; 182 s[0] = b[i2]; 183 for (j=0; j<nz; j++) { 184 xw[0] = x[vi[j]]; 185 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 186 } 187 t[i2] = s[0]; 188 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 189 x[i2] = xw[0]; 190 idiag += 1; 191 i2 += 1; 192 } 193 break; 194 case 2: 195 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 196 t[0] = b[0]; t[1] = b[1]; 197 i2 = 2; 198 idiag += 4; 199 for (i=1; i<m; i++) { 200 v = aa + 4*ai[i]; 201 vi = aj + ai[i]; 202 nz = diag[i] - ai[i]; 203 s[0] = b[i2]; s[1] = b[i2+1]; 204 for (j=0; j<nz; j++) { 205 idx = 2*vi[j]; 206 it = 4*j; 207 xw[0] = x[idx]; xw[1] = x[1+idx]; 208 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 209 } 210 t[i2] = s[0]; t[i2+1] = s[1]; 211 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 212 x[i2] = xw[0]; x[i2+1] = xw[1]; 213 idiag += 4; 214 i2 += 2; 215 } 216 break; 217 case 3: 218 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 219 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 220 i2 = 3; 221 idiag += 9; 222 for (i=1; i<m; i++) { 223 v = aa + 9*ai[i]; 224 vi = aj + ai[i]; 225 nz = diag[i] - ai[i]; 226 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 227 while (nz--) { 228 idx = 3*(*vi++); 229 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 230 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 231 v += 9; 232 } 233 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 234 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 235 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 236 idiag += 9; 237 i2 += 3; 238 } 239 break; 240 case 4: 241 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 242 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 243 i2 = 4; 244 idiag += 16; 245 for (i=1; i<m; i++) { 246 v = aa + 16*ai[i]; 247 vi = aj + ai[i]; 248 nz = diag[i] - ai[i]; 249 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 250 while (nz--) { 251 idx = 4*(*vi++); 252 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 253 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 254 v += 16; 255 } 256 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 257 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 258 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 259 idiag += 16; 260 i2 += 4; 261 } 262 break; 263 case 5: 264 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 265 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 266 i2 = 5; 267 idiag += 25; 268 for (i=1; i<m; i++) { 269 v = aa + 25*ai[i]; 270 vi = aj + ai[i]; 271 nz = diag[i] - ai[i]; 272 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 273 while (nz--) { 274 idx = 5*(*vi++); 275 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 276 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 277 v += 25; 278 } 279 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 280 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 281 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 282 idiag += 25; 283 i2 += 5; 284 } 285 break; 286 case 6: 287 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 288 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 289 i2 = 6; 290 idiag += 36; 291 for (i=1; i<m; i++) { 292 v = aa + 36*ai[i]; 293 vi = aj + ai[i]; 294 nz = diag[i] - ai[i]; 295 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 296 while (nz--) { 297 idx = 6*(*vi++); 298 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 299 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 300 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 301 v += 36; 302 } 303 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 304 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 305 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 306 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 307 idiag += 36; 308 i2 += 6; 309 } 310 break; 311 case 7: 312 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 313 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 314 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 315 i2 = 7; 316 idiag += 49; 317 for (i=1; i<m; i++) { 318 v = aa + 49*ai[i]; 319 vi = aj + ai[i]; 320 nz = diag[i] - ai[i]; 321 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 322 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 323 while (nz--) { 324 idx = 7*(*vi++); 325 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 326 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 327 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 328 v += 49; 329 } 330 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 331 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 332 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 333 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 334 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 335 idiag += 49; 336 i2 += 7; 337 } 338 break; 339 default: 340 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 341 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 342 i2 = bs; 343 idiag += bs2; 344 for (i=1; i<m; i++) { 345 v = aa + bs2*ai[i]; 346 vi = aj + ai[i]; 347 nz = diag[i] - ai[i]; 348 349 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 350 /* copy all rows of x that are needed into contiguous space */ 351 workt = work; 352 for (j=0; j<nz; j++) { 353 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 354 workt += bs; 355 } 356 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 357 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 358 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 359 360 idiag += bs2; 361 i2 += bs; 362 } 363 break; 364 } 365 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 366 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 367 xb = t; 368 } 369 else xb = b; 370 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 371 idiag = a->idiag+bs2*(a->mbs-1); 372 i2 = bs * (m-1); 373 switch (bs) { 374 case 1: 375 s[0] = xb[i2]; 376 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 377 x[i2] = xw[0]; 378 i2 -= 1; 379 for (i=m-2; i>=0; i--) { 380 v = aa + (diag[i]+1); 381 vi = aj + diag[i] + 1; 382 nz = ai[i+1] - diag[i] - 1; 383 s[0] = xb[i2]; 384 for (j=0; j<nz; j++) { 385 xw[0] = x[vi[j]]; 386 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 387 } 388 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 389 x[i2] = xw[0]; 390 idiag -= 1; 391 i2 -= 1; 392 } 393 break; 394 case 2: 395 s[0] = xb[i2]; s[1] = xb[i2+1]; 396 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 397 x[i2] = xw[0]; x[i2+1] = xw[1]; 398 i2 -= 2; 399 idiag -= 4; 400 for (i=m-2; i>=0; i--) { 401 v = aa + 4*(diag[i] + 1); 402 vi = aj + diag[i] + 1; 403 nz = ai[i+1] - diag[i] - 1; 404 s[0] = xb[i2]; s[1] = xb[i2+1]; 405 for (j=0; j<nz; j++) { 406 idx = 2*vi[j]; 407 it = 4*j; 408 xw[0] = x[idx]; xw[1] = x[1+idx]; 409 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 410 } 411 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 412 x[i2] = xw[0]; x[i2+1] = xw[1]; 413 idiag -= 4; 414 i2 -= 2; 415 } 416 break; 417 case 3: 418 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 419 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 420 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 421 i2 -= 3; 422 idiag -= 9; 423 for (i=m-2; i>=0; i--) { 424 v = aa + 9*(diag[i]+1); 425 vi = aj + diag[i] + 1; 426 nz = ai[i+1] - diag[i] - 1; 427 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 428 while (nz--) { 429 idx = 3*(*vi++); 430 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 431 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 432 v += 9; 433 } 434 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 435 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 436 idiag -= 9; 437 i2 -= 3; 438 } 439 break; 440 case 4: 441 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 442 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 443 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 444 i2 -= 4; 445 idiag -= 16; 446 for (i=m-2; i>=0; i--) { 447 v = aa + 16*(diag[i]+1); 448 vi = aj + diag[i] + 1; 449 nz = ai[i+1] - diag[i] - 1; 450 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 451 while (nz--) { 452 idx = 4*(*vi++); 453 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 454 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 455 v += 16; 456 } 457 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 458 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 459 idiag -= 16; 460 i2 -= 4; 461 } 462 break; 463 case 5: 464 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 465 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 466 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 467 i2 -= 5; 468 idiag -= 25; 469 for (i=m-2; i>=0; i--) { 470 v = aa + 25*(diag[i]+1); 471 vi = aj + diag[i] + 1; 472 nz = ai[i+1] - diag[i] - 1; 473 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 474 while (nz--) { 475 idx = 5*(*vi++); 476 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 477 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 478 v += 25; 479 } 480 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 481 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 482 idiag -= 25; 483 i2 -= 5; 484 } 485 break; 486 case 6: 487 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 488 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 489 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 490 i2 -= 6; 491 idiag -= 36; 492 for (i=m-2; i>=0; i--) { 493 v = aa + 36*(diag[i]+1); 494 vi = aj + diag[i] + 1; 495 nz = ai[i+1] - diag[i] - 1; 496 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 497 while (nz--) { 498 idx = 6*(*vi++); 499 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 500 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 501 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 502 v += 36; 503 } 504 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 505 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 506 idiag -= 36; 507 i2 -= 6; 508 } 509 break; 510 case 7: 511 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 512 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 513 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 514 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 515 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 516 i2 -= 7; 517 idiag -= 49; 518 for (i=m-2; i>=0; i--) { 519 v = aa + 49*(diag[i]+1); 520 vi = aj + diag[i] + 1; 521 nz = ai[i+1] - diag[i] - 1; 522 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 523 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 524 while (nz--) { 525 idx = 7*(*vi++); 526 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 527 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 528 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 529 v += 49; 530 } 531 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 532 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 533 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 534 idiag -= 49; 535 i2 -= 7; 536 } 537 break; 538 default: 539 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 540 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 541 i2 -= bs; 542 idiag -= bs2; 543 for (i=m-2; i>=0; i--) { 544 v = aa + bs2*(diag[i]+1); 545 vi = aj + diag[i] + 1; 546 nz = ai[i+1] - diag[i] - 1; 547 548 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 549 /* copy all rows of x that are needed into contiguous space */ 550 workt = work; 551 for (j=0; j<nz; j++) { 552 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 553 workt += bs; 554 } 555 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 556 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 557 558 idiag -= bs2; 559 i2 -= bs; 560 } 561 break; 562 } 563 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 564 } 565 its--; 566 } 567 while (its--) { 568 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 569 idiag = a->idiag; 570 i2 = 0; 571 switch (bs) { 572 case 1: 573 for (i=0; i<m; i++) { 574 v = aa + ai[i]; 575 vi = aj + ai[i]; 576 nz = ai[i+1] - ai[i]; 577 s[0] = b[i2]; 578 for (j=0; j<nz; j++) { 579 xw[0] = x[vi[j]]; 580 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 581 } 582 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 583 x[i2] += xw[0]; 584 idiag += 1; 585 i2 += 1; 586 } 587 break; 588 case 2: 589 for (i=0; i<m; i++) { 590 v = aa + 4*ai[i]; 591 vi = aj + ai[i]; 592 nz = ai[i+1] - ai[i]; 593 s[0] = b[i2]; s[1] = b[i2+1]; 594 for (j=0; j<nz; j++) { 595 idx = 2*vi[j]; 596 it = 4*j; 597 xw[0] = x[idx]; xw[1] = x[1+idx]; 598 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 599 } 600 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 601 x[i2] += xw[0]; x[i2+1] += xw[1]; 602 idiag += 4; 603 i2 += 2; 604 } 605 break; 606 case 3: 607 for (i=0; i<m; i++) { 608 v = aa + 9*ai[i]; 609 vi = aj + ai[i]; 610 nz = ai[i+1] - ai[i]; 611 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 612 while (nz--) { 613 idx = 3*(*vi++); 614 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 615 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 616 v += 9; 617 } 618 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 619 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 620 idiag += 9; 621 i2 += 3; 622 } 623 break; 624 case 4: 625 for (i=0; i<m; i++) { 626 v = aa + 16*ai[i]; 627 vi = aj + ai[i]; 628 nz = ai[i+1] - ai[i]; 629 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 630 while (nz--) { 631 idx = 4*(*vi++); 632 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 633 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 634 v += 16; 635 } 636 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 637 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 638 idiag += 16; 639 i2 += 4; 640 } 641 break; 642 case 5: 643 for (i=0; i<m; i++) { 644 v = aa + 25*ai[i]; 645 vi = aj + ai[i]; 646 nz = ai[i+1] - ai[i]; 647 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 648 while (nz--) { 649 idx = 5*(*vi++); 650 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 651 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 652 v += 25; 653 } 654 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 655 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 656 idiag += 25; 657 i2 += 5; 658 } 659 break; 660 case 6: 661 for (i=0; i<m; i++) { 662 v = aa + 36*ai[i]; 663 vi = aj + ai[i]; 664 nz = ai[i+1] - ai[i]; 665 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 666 while (nz--) { 667 idx = 6*(*vi++); 668 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 669 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 670 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 671 v += 36; 672 } 673 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 674 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 675 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 676 idiag += 36; 677 i2 += 6; 678 } 679 break; 680 case 7: 681 for (i=0; i<m; i++) { 682 v = aa + 49*ai[i]; 683 vi = aj + ai[i]; 684 nz = ai[i+1] - ai[i]; 685 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 686 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 687 while (nz--) { 688 idx = 7*(*vi++); 689 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 690 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 691 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 692 v += 49; 693 } 694 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 695 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 696 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 697 idiag += 49; 698 i2 += 7; 699 } 700 break; 701 default: 702 for (i=0; i<m; i++) { 703 v = aa + bs2*ai[i]; 704 vi = aj + ai[i]; 705 nz = ai[i+1] - ai[i]; 706 707 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 708 /* copy all rows of x that are needed into contiguous space */ 709 workt = work; 710 for (j=0; j<nz; j++) { 711 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 712 workt += bs; 713 } 714 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 715 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 716 717 idiag += bs2; 718 i2 += bs; 719 } 720 break; 721 } 722 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 723 } 724 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 725 idiag = a->idiag+bs2*(a->mbs-1); 726 i2 = bs * (m-1); 727 switch (bs) { 728 case 1: 729 for (i=m-1; i>=0; i--) { 730 v = aa + ai[i]; 731 vi = aj + ai[i]; 732 nz = ai[i+1] - ai[i]; 733 s[0] = b[i2]; 734 for (j=0; j<nz; j++) { 735 xw[0] = x[vi[j]]; 736 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 737 } 738 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 739 x[i2] += xw[0]; 740 idiag -= 1; 741 i2 -= 1; 742 } 743 break; 744 case 2: 745 for (i=m-1; i>=0; i--) { 746 v = aa + 4*ai[i]; 747 vi = aj + ai[i]; 748 nz = ai[i+1] - ai[i]; 749 s[0] = b[i2]; s[1] = b[i2+1]; 750 for (j=0; j<nz; j++) { 751 idx = 2*vi[j]; 752 it = 4*j; 753 xw[0] = x[idx]; xw[1] = x[1+idx]; 754 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 755 } 756 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 757 x[i2] += xw[0]; x[i2+1] += xw[1]; 758 idiag -= 4; 759 i2 -= 2; 760 } 761 break; 762 case 3: 763 for (i=m-1; i>=0; i--) { 764 v = aa + 9*ai[i]; 765 vi = aj + ai[i]; 766 nz = ai[i+1] - ai[i]; 767 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 768 while (nz--) { 769 idx = 3*(*vi++); 770 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 771 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 772 v += 9; 773 } 774 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 775 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 776 idiag -= 9; 777 i2 -= 3; 778 } 779 break; 780 case 4: 781 for (i=m-1; i>=0; i--) { 782 v = aa + 16*ai[i]; 783 vi = aj + ai[i]; 784 nz = ai[i+1] - ai[i]; 785 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 786 while (nz--) { 787 idx = 4*(*vi++); 788 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 789 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 790 v += 16; 791 } 792 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 793 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 794 idiag -= 16; 795 i2 -= 4; 796 } 797 break; 798 case 5: 799 for (i=m-1; i>=0; i--) { 800 v = aa + 25*ai[i]; 801 vi = aj + ai[i]; 802 nz = ai[i+1] - ai[i]; 803 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 804 while (nz--) { 805 idx = 5*(*vi++); 806 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 807 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 808 v += 25; 809 } 810 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 811 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 812 idiag -= 25; 813 i2 -= 5; 814 } 815 break; 816 case 6: 817 for (i=m-1; i>=0; i--) { 818 v = aa + 36*ai[i]; 819 vi = aj + ai[i]; 820 nz = ai[i+1] - ai[i]; 821 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 822 while (nz--) { 823 idx = 6*(*vi++); 824 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 825 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 826 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 827 v += 36; 828 } 829 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 830 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 831 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 832 idiag -= 36; 833 i2 -= 6; 834 } 835 break; 836 case 7: 837 for (i=m-1; i>=0; i--) { 838 v = aa + 49*ai[i]; 839 vi = aj + ai[i]; 840 nz = ai[i+1] - ai[i]; 841 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 842 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 843 while (nz--) { 844 idx = 7*(*vi++); 845 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 846 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 847 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 848 v += 49; 849 } 850 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 851 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 852 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 853 idiag -= 49; 854 i2 -= 7; 855 } 856 break; 857 default: 858 for (i=m-1; i>=0; i--) { 859 v = aa + bs2*ai[i]; 860 vi = aj + ai[i]; 861 nz = ai[i+1] - ai[i]; 862 863 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 864 /* copy all rows of x that are needed into contiguous space */ 865 workt = work; 866 for (j=0; j<nz; j++) { 867 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 868 workt += bs; 869 } 870 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 871 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 872 873 idiag -= bs2; 874 i2 -= bs; 875 } 876 break; 877 } 878 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 879 } 880 } 881 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 882 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 887 /* 888 Special version for direct calls from Fortran (Used in PETSc-fun3d) 889 */ 890 #if defined(PETSC_HAVE_FORTRAN_CAPS) 891 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 892 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 893 #define matsetvaluesblocked4_ matsetvaluesblocked4 894 #endif 895 896 #undef __FUNCT__ 897 #define __FUNCT__ "matsetvaluesblocked4_" 898 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 899 { 900 Mat A = *AA; 901 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 902 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 903 PetscInt *ai =a->i,*ailen=a->ilen; 904 PetscInt *aj =a->j,stepval,lastcol = -1; 905 const PetscScalar *value = v; 906 MatScalar *ap,*aa = a->a,*bap; 907 908 PetscFunctionBegin; 909 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 910 stepval = (n-1)*4; 911 for (k=0; k<m; k++) { /* loop over added rows */ 912 row = im[k]; 913 rp = aj + ai[row]; 914 ap = aa + 16*ai[row]; 915 nrow = ailen[row]; 916 low = 0; 917 high = nrow; 918 for (l=0; l<n; l++) { /* loop over added columns */ 919 col = in[l]; 920 if (col <= lastcol) low = 0; 921 else high = nrow; 922 lastcol = col; 923 value = v + k*(stepval+4 + l)*4; 924 while (high-low > 7) { 925 t = (low+high)/2; 926 if (rp[t] > col) high = t; 927 else low = t; 928 } 929 for (i=low; i<high; i++) { 930 if (rp[i] > col) break; 931 if (rp[i] == col) { 932 bap = ap + 16*i; 933 for (ii=0; ii<4; ii++,value+=stepval) { 934 for (jj=ii; jj<16; jj+=4) { 935 bap[jj] += *value++; 936 } 937 } 938 goto noinsert2; 939 } 940 } 941 N = nrow++ - 1; 942 high++; /* added new column index thus must search to one higher than before */ 943 /* shift up all the later entries in this row */ 944 for (ii=N; ii>=i; ii--) { 945 rp[ii+1] = rp[ii]; 946 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 947 } 948 if (N >= i) { 949 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 950 } 951 rp[i] = col; 952 bap = ap + 16*i; 953 for (ii=0; ii<4; ii++,value+=stepval) { 954 for (jj=ii; jj<16; jj+=4) { 955 bap[jj] = *value++; 956 } 957 } 958 noinsert2:; 959 low = i; 960 } 961 ailen[row] = nrow; 962 } 963 PetscFunctionReturnVoid(); 964 } 965 966 #if defined(PETSC_HAVE_FORTRAN_CAPS) 967 #define matsetvalues4_ MATSETVALUES4 968 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 969 #define matsetvalues4_ matsetvalues4 970 #endif 971 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatSetValues4_" 974 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 975 { 976 Mat A = *AA; 977 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 978 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 979 PetscInt *ai=a->i,*ailen=a->ilen; 980 PetscInt *aj=a->j,brow,bcol; 981 PetscInt ridx,cidx,lastcol = -1; 982 MatScalar *ap,value,*aa=a->a,*bap; 983 984 PetscFunctionBegin; 985 for (k=0; k<m; k++) { /* loop over added rows */ 986 row = im[k]; brow = row/4; 987 rp = aj + ai[brow]; 988 ap = aa + 16*ai[brow]; 989 nrow = ailen[brow]; 990 low = 0; 991 high = nrow; 992 for (l=0; l<n; l++) { /* loop over added columns */ 993 col = in[l]; bcol = col/4; 994 ridx = row % 4; cidx = col % 4; 995 value = v[l + k*n]; 996 if (col <= lastcol) low = 0; 997 else high = nrow; 998 lastcol = col; 999 while (high-low > 7) { 1000 t = (low+high)/2; 1001 if (rp[t] > bcol) high = t; 1002 else low = t; 1003 } 1004 for (i=low; i<high; i++) { 1005 if (rp[i] > bcol) break; 1006 if (rp[i] == bcol) { 1007 bap = ap + 16*i + 4*cidx + ridx; 1008 *bap += value; 1009 goto noinsert1; 1010 } 1011 } 1012 N = nrow++ - 1; 1013 high++; /* added new column thus must search to one higher than before */ 1014 /* shift up all the later entries in this row */ 1015 for (ii=N; ii>=i; ii--) { 1016 rp[ii+1] = rp[ii]; 1017 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1018 } 1019 if (N>=i) { 1020 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1021 } 1022 rp[i] = bcol; 1023 ap[16*i + 4*cidx + ridx] = value; 1024 noinsert1:; 1025 low = i; 1026 } 1027 ailen[brow] = nrow; 1028 } 1029 PetscFunctionReturnVoid(); 1030 } 1031 1032 /* 1033 Checks for missing diagonals 1034 */ 1035 #undef __FUNCT__ 1036 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1037 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1038 { 1039 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1040 PetscErrorCode ierr; 1041 PetscInt *diag,*ii = a->i,i; 1042 1043 PetscFunctionBegin; 1044 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1045 *missing = PETSC_FALSE; 1046 if (A->rmap->n > 0 && !ii) { 1047 *missing = PETSC_TRUE; 1048 if (d) *d = 0; 1049 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1050 } else { 1051 diag = a->diag; 1052 for (i=0; i<a->mbs; i++) { 1053 if (diag[i] >= ii[i+1]) { 1054 *missing = PETSC_TRUE; 1055 if (d) *d = i; 1056 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1057 break; 1058 } 1059 } 1060 } 1061 PetscFunctionReturn(0); 1062 } 1063 1064 #undef __FUNCT__ 1065 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1066 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1067 { 1068 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1069 PetscErrorCode ierr; 1070 PetscInt i,j,m = a->mbs; 1071 1072 PetscFunctionBegin; 1073 if (!a->diag) { 1074 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1075 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1076 a->free_diag = PETSC_TRUE; 1077 } 1078 for (i=0; i<m; i++) { 1079 a->diag[i] = a->i[i+1]; 1080 for (j=a->i[i]; j<a->i[i+1]; j++) { 1081 if (a->j[j] == i) { 1082 a->diag[i] = j; 1083 break; 1084 } 1085 } 1086 } 1087 PetscFunctionReturn(0); 1088 } 1089 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1093 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1094 { 1095 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1096 PetscErrorCode ierr; 1097 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1098 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1099 1100 PetscFunctionBegin; 1101 *nn = n; 1102 if (!ia) PetscFunctionReturn(0); 1103 if (symmetric) { 1104 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1105 nz = tia[n]; 1106 } else { 1107 tia = a->i; tja = a->j; 1108 } 1109 1110 if (!blockcompressed && bs > 1) { 1111 (*nn) *= bs; 1112 /* malloc & create the natural set of indices */ 1113 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1114 if (n) { 1115 (*ia)[0] = 0; 1116 for (j=1; j<bs; j++) { 1117 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1118 } 1119 } 1120 1121 for (i=1; i<n; i++) { 1122 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1123 for (j=1; j<bs; j++) { 1124 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1125 } 1126 } 1127 if (n) { 1128 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1129 } 1130 1131 if (inja) { 1132 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1133 cnt = 0; 1134 for (i=0; i<n; i++) { 1135 for (j=0; j<bs; j++) { 1136 for (k=tia[i]; k<tia[i+1]; k++) { 1137 for (l=0; l<bs; l++) { 1138 (*ja)[cnt++] = bs*tja[k] + l; 1139 } 1140 } 1141 } 1142 } 1143 } 1144 1145 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1146 ierr = PetscFree(tia);CHKERRQ(ierr); 1147 ierr = PetscFree(tja);CHKERRQ(ierr); 1148 } 1149 } else if (oshift == 1) { 1150 if (symmetric) { 1151 nz = tia[A->rmap->n/bs]; 1152 /* add 1 to i and j indices */ 1153 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1154 *ia = tia; 1155 if (ja) { 1156 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1157 *ja = tja; 1158 } 1159 } else { 1160 nz = a->i[A->rmap->n/bs]; 1161 /* malloc space and add 1 to i and j indices */ 1162 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1163 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1164 if (ja) { 1165 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1166 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1167 } 1168 } 1169 } else { 1170 *ia = tia; 1171 if (ja) *ja = tja; 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1178 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1179 { 1180 PetscErrorCode ierr; 1181 1182 PetscFunctionBegin; 1183 if (!ia) PetscFunctionReturn(0); 1184 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1185 ierr = PetscFree(*ia);CHKERRQ(ierr); 1186 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1187 } 1188 PetscFunctionReturn(0); 1189 } 1190 1191 #undef __FUNCT__ 1192 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1193 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1194 { 1195 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1196 PetscErrorCode ierr; 1197 1198 PetscFunctionBegin; 1199 #if defined(PETSC_USE_LOG) 1200 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1201 #endif 1202 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1203 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1204 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1205 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1206 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1207 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1208 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1209 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1210 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1211 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1212 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1213 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1214 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1215 1216 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1217 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1218 ierr = PetscFree(A->data);CHKERRQ(ierr); 1219 1220 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1226 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1227 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1236 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1237 { 1238 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1239 PetscErrorCode ierr; 1240 1241 PetscFunctionBegin; 1242 switch (op) { 1243 case MAT_ROW_ORIENTED: 1244 a->roworiented = flg; 1245 break; 1246 case MAT_KEEP_NONZERO_PATTERN: 1247 a->keepnonzeropattern = flg; 1248 break; 1249 case MAT_NEW_NONZERO_LOCATIONS: 1250 a->nonew = (flg ? 0 : 1); 1251 break; 1252 case MAT_NEW_NONZERO_LOCATION_ERR: 1253 a->nonew = (flg ? -1 : 0); 1254 break; 1255 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1256 a->nonew = (flg ? -2 : 0); 1257 break; 1258 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1259 a->nounused = (flg ? -1 : 0); 1260 break; 1261 case MAT_NEW_DIAGONALS: 1262 case MAT_IGNORE_OFF_PROC_ENTRIES: 1263 case MAT_USE_HASH_TABLE: 1264 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1265 break; 1266 case MAT_SPD: 1267 case MAT_SYMMETRIC: 1268 case MAT_STRUCTURALLY_SYMMETRIC: 1269 case MAT_HERMITIAN: 1270 case MAT_SYMMETRY_ETERNAL: 1271 /* These options are handled directly by MatSetOption() */ 1272 break; 1273 default: 1274 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1275 } 1276 PetscFunctionReturn(0); 1277 } 1278 1279 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1280 #undef __FUNCT__ 1281 #define __FUNCT__ "MatGetRow_SeqBAIJ_private" 1282 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1283 { 1284 PetscErrorCode ierr; 1285 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1286 MatScalar *aa_i; 1287 PetscScalar *v_i; 1288 1289 PetscFunctionBegin; 1290 bs = A->rmap->bs; 1291 bs2 = bs*bs; 1292 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1293 1294 bn = row/bs; /* Block number */ 1295 bp = row % bs; /* Block Position */ 1296 M = ai[bn+1] - ai[bn]; 1297 *nz = bs*M; 1298 1299 if (v) { 1300 *v = 0; 1301 if (*nz) { 1302 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1303 for (i=0; i<M; i++) { /* for each block in the block row */ 1304 v_i = *v + i*bs; 1305 aa_i = aa + bs2*(ai[bn] + i); 1306 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1307 } 1308 } 1309 } 1310 1311 if (idx) { 1312 *idx = 0; 1313 if (*nz) { 1314 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1315 for (i=0; i<M; i++) { /* for each block in the block row */ 1316 idx_i = *idx + i*bs; 1317 itmp = bs*aj[ai[bn] + i]; 1318 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1319 } 1320 } 1321 } 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1327 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1328 { 1329 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1330 PetscErrorCode ierr; 1331 1332 PetscFunctionBegin; 1333 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1339 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1340 { 1341 PetscErrorCode ierr; 1342 1343 PetscFunctionBegin; 1344 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1345 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1346 PetscFunctionReturn(0); 1347 } 1348 1349 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1350 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1353 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1354 { 1355 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1356 Mat C; 1357 PetscErrorCode ierr; 1358 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1359 PetscInt *rows,*cols,bs2=a->bs2; 1360 MatScalar *array; 1361 1362 PetscFunctionBegin; 1363 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1364 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1365 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1366 1367 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1368 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1369 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1370 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1371 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1372 ierr = PetscFree(col);CHKERRQ(ierr); 1373 } else { 1374 C = *B; 1375 } 1376 1377 array = a->a; 1378 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1379 for (i=0; i<mbs; i++) { 1380 cols[0] = i*bs; 1381 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1382 len = ai[i+1] - ai[i]; 1383 for (j=0; j<len; j++) { 1384 rows[0] = (*aj++)*bs; 1385 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1386 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1387 array += bs2; 1388 } 1389 } 1390 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1391 1392 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1393 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1394 1395 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1396 *B = C; 1397 } else { 1398 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1399 } 1400 PetscFunctionReturn(0); 1401 } 1402 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1405 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1406 { 1407 PetscErrorCode ierr; 1408 Mat Btrans; 1409 1410 PetscFunctionBegin; 1411 *f = PETSC_FALSE; 1412 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1413 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1414 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1415 PetscFunctionReturn(0); 1416 } 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1420 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1421 { 1422 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1423 PetscErrorCode ierr; 1424 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1425 int fd; 1426 PetscScalar *aa; 1427 FILE *file; 1428 1429 PetscFunctionBegin; 1430 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1431 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1432 col_lens[0] = MAT_FILE_CLASSID; 1433 1434 col_lens[1] = A->rmap->N; 1435 col_lens[2] = A->cmap->n; 1436 col_lens[3] = a->nz*bs2; 1437 1438 /* store lengths of each row and write (including header) to file */ 1439 count = 0; 1440 for (i=0; i<a->mbs; i++) { 1441 for (j=0; j<bs; j++) { 1442 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1443 } 1444 } 1445 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1446 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1447 1448 /* store column indices (zero start index) */ 1449 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1450 count = 0; 1451 for (i=0; i<a->mbs; i++) { 1452 for (j=0; j<bs; j++) { 1453 for (k=a->i[i]; k<a->i[i+1]; k++) { 1454 for (l=0; l<bs; l++) { 1455 jj[count++] = bs*a->j[k] + l; 1456 } 1457 } 1458 } 1459 } 1460 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1461 ierr = PetscFree(jj);CHKERRQ(ierr); 1462 1463 /* store nonzero values */ 1464 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1465 count = 0; 1466 for (i=0; i<a->mbs; i++) { 1467 for (j=0; j<bs; j++) { 1468 for (k=a->i[i]; k<a->i[i+1]; k++) { 1469 for (l=0; l<bs; l++) { 1470 aa[count++] = a->a[bs2*k + l*bs + j]; 1471 } 1472 } 1473 } 1474 } 1475 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1476 ierr = PetscFree(aa);CHKERRQ(ierr); 1477 1478 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1479 if (file) { 1480 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1481 } 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1487 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1488 { 1489 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1490 PetscErrorCode ierr; 1491 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1492 PetscViewerFormat format; 1493 1494 PetscFunctionBegin; 1495 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1496 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1497 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1498 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1499 const char *matname; 1500 Mat aij; 1501 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1502 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1503 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1504 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1505 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1506 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1507 PetscFunctionReturn(0); 1508 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1509 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1510 for (i=0; i<a->mbs; i++) { 1511 for (j=0; j<bs; j++) { 1512 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1513 for (k=a->i[i]; k<a->i[i+1]; k++) { 1514 for (l=0; l<bs; l++) { 1515 #if defined(PETSC_USE_COMPLEX) 1516 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1517 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1518 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1519 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1520 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1521 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1522 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1523 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1524 } 1525 #else 1526 if (a->a[bs2*k + l*bs + j] != 0.0) { 1527 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1528 } 1529 #endif 1530 } 1531 } 1532 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1533 } 1534 } 1535 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1536 } else { 1537 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1538 for (i=0; i<a->mbs; i++) { 1539 for (j=0; j<bs; j++) { 1540 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1541 for (k=a->i[i]; k<a->i[i+1]; k++) { 1542 for (l=0; l<bs; l++) { 1543 #if defined(PETSC_USE_COMPLEX) 1544 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1545 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1546 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1547 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1548 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1549 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1550 } else { 1551 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1552 } 1553 #else 1554 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1555 #endif 1556 } 1557 } 1558 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1559 } 1560 } 1561 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1562 } 1563 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1564 PetscFunctionReturn(0); 1565 } 1566 1567 #include <petscdraw.h> 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1570 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1571 { 1572 Mat A = (Mat) Aa; 1573 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1574 PetscErrorCode ierr; 1575 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1576 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1577 MatScalar *aa; 1578 PetscViewer viewer; 1579 PetscViewerFormat format; 1580 1581 PetscFunctionBegin; 1582 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1583 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1584 1585 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1586 1587 /* loop over matrix elements drawing boxes */ 1588 1589 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1590 color = PETSC_DRAW_BLUE; 1591 for (i=0,row=0; i<mbs; i++,row+=bs) { 1592 for (j=a->i[i]; j<a->i[i+1]; j++) { 1593 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1594 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1595 aa = a->a + j*bs2; 1596 for (k=0; k<bs; k++) { 1597 for (l=0; l<bs; l++) { 1598 if (PetscRealPart(*aa++) >= 0.) continue; 1599 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1600 } 1601 } 1602 } 1603 } 1604 color = PETSC_DRAW_CYAN; 1605 for (i=0,row=0; i<mbs; i++,row+=bs) { 1606 for (j=a->i[i]; j<a->i[i+1]; j++) { 1607 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1608 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1609 aa = a->a + j*bs2; 1610 for (k=0; k<bs; k++) { 1611 for (l=0; l<bs; l++) { 1612 if (PetscRealPart(*aa++) != 0.) continue; 1613 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1614 } 1615 } 1616 } 1617 } 1618 color = PETSC_DRAW_RED; 1619 for (i=0,row=0; i<mbs; i++,row+=bs) { 1620 for (j=a->i[i]; j<a->i[i+1]; j++) { 1621 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1622 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1623 aa = a->a + j*bs2; 1624 for (k=0; k<bs; k++) { 1625 for (l=0; l<bs; l++) { 1626 if (PetscRealPart(*aa++) <= 0.) continue; 1627 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1628 } 1629 } 1630 } 1631 } 1632 } else { 1633 /* use contour shading to indicate magnitude of values */ 1634 /* first determine max of all nonzero values */ 1635 PetscDraw popup; 1636 PetscReal scale,maxv = 0.0; 1637 1638 for (i=0; i<a->nz*a->bs2; i++) { 1639 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1640 } 1641 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1642 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1643 if (popup) { 1644 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1645 } 1646 for (i=0,row=0; i<mbs; i++,row+=bs) { 1647 for (j=a->i[i]; j<a->i[i+1]; j++) { 1648 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1649 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1650 aa = a->a + j*bs2; 1651 for (k=0; k<bs; k++) { 1652 for (l=0; l<bs; l++) { 1653 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1654 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1655 } 1656 } 1657 } 1658 } 1659 } 1660 PetscFunctionReturn(0); 1661 } 1662 1663 #undef __FUNCT__ 1664 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1665 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1666 { 1667 PetscErrorCode ierr; 1668 PetscReal xl,yl,xr,yr,w,h; 1669 PetscDraw draw; 1670 PetscBool isnull; 1671 1672 PetscFunctionBegin; 1673 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1674 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1675 1676 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1677 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1678 xr += w; yr += h; xl = -w; yl = -h; 1679 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1680 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1681 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatView_SeqBAIJ" 1687 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1688 { 1689 PetscErrorCode ierr; 1690 PetscBool iascii,isbinary,isdraw; 1691 1692 PetscFunctionBegin; 1693 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1694 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1695 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1696 if (iascii) { 1697 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1698 } else if (isbinary) { 1699 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1700 } else if (isdraw) { 1701 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1702 } else { 1703 Mat B; 1704 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1705 ierr = MatView(B,viewer);CHKERRQ(ierr); 1706 ierr = MatDestroy(&B);CHKERRQ(ierr); 1707 } 1708 PetscFunctionReturn(0); 1709 } 1710 1711 1712 #undef __FUNCT__ 1713 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1714 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1715 { 1716 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1717 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1718 PetscInt *ai = a->i,*ailen = a->ilen; 1719 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1720 MatScalar *ap,*aa = a->a; 1721 1722 PetscFunctionBegin; 1723 for (k=0; k<m; k++) { /* loop over rows */ 1724 row = im[k]; brow = row/bs; 1725 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1726 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1727 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1728 nrow = ailen[brow]; 1729 for (l=0; l<n; l++) { /* loop over columns */ 1730 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1731 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1732 col = in[l]; 1733 bcol = col/bs; 1734 cidx = col%bs; 1735 ridx = row%bs; 1736 high = nrow; 1737 low = 0; /* assume unsorted */ 1738 while (high-low > 5) { 1739 t = (low+high)/2; 1740 if (rp[t] > bcol) high = t; 1741 else low = t; 1742 } 1743 for (i=low; i<high; i++) { 1744 if (rp[i] > bcol) break; 1745 if (rp[i] == bcol) { 1746 *v++ = ap[bs2*i+bs*cidx+ridx]; 1747 goto finished; 1748 } 1749 } 1750 *v++ = 0.0; 1751 finished:; 1752 } 1753 } 1754 PetscFunctionReturn(0); 1755 } 1756 1757 #undef __FUNCT__ 1758 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1759 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1760 { 1761 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1762 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1763 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1764 PetscErrorCode ierr; 1765 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1766 PetscBool roworiented=a->roworiented; 1767 const PetscScalar *value = v; 1768 MatScalar *ap,*aa = a->a,*bap; 1769 1770 PetscFunctionBegin; 1771 if (roworiented) { 1772 stepval = (n-1)*bs; 1773 } else { 1774 stepval = (m-1)*bs; 1775 } 1776 for (k=0; k<m; k++) { /* loop over added rows */ 1777 row = im[k]; 1778 if (row < 0) continue; 1779 #if defined(PETSC_USE_DEBUG) 1780 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1781 #endif 1782 rp = aj + ai[row]; 1783 ap = aa + bs2*ai[row]; 1784 rmax = imax[row]; 1785 nrow = ailen[row]; 1786 low = 0; 1787 high = nrow; 1788 for (l=0; l<n; l++) { /* loop over added columns */ 1789 if (in[l] < 0) continue; 1790 #if defined(PETSC_USE_DEBUG) 1791 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1792 #endif 1793 col = in[l]; 1794 if (roworiented) { 1795 value = v + (k*(stepval+bs) + l)*bs; 1796 } else { 1797 value = v + (l*(stepval+bs) + k)*bs; 1798 } 1799 if (col <= lastcol) low = 0; 1800 else high = nrow; 1801 lastcol = col; 1802 while (high-low > 7) { 1803 t = (low+high)/2; 1804 if (rp[t] > col) high = t; 1805 else low = t; 1806 } 1807 for (i=low; i<high; i++) { 1808 if (rp[i] > col) break; 1809 if (rp[i] == col) { 1810 bap = ap + bs2*i; 1811 if (roworiented) { 1812 if (is == ADD_VALUES) { 1813 for (ii=0; ii<bs; ii++,value+=stepval) { 1814 for (jj=ii; jj<bs2; jj+=bs) { 1815 bap[jj] += *value++; 1816 } 1817 } 1818 } else { 1819 for (ii=0; ii<bs; ii++,value+=stepval) { 1820 for (jj=ii; jj<bs2; jj+=bs) { 1821 bap[jj] = *value++; 1822 } 1823 } 1824 } 1825 } else { 1826 if (is == ADD_VALUES) { 1827 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1828 for (jj=0; jj<bs; jj++) { 1829 bap[jj] += value[jj]; 1830 } 1831 bap += bs; 1832 } 1833 } else { 1834 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1835 for (jj=0; jj<bs; jj++) { 1836 bap[jj] = value[jj]; 1837 } 1838 bap += bs; 1839 } 1840 } 1841 } 1842 goto noinsert2; 1843 } 1844 } 1845 if (nonew == 1) goto noinsert2; 1846 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1847 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1848 N = nrow++ - 1; high++; 1849 /* shift up all the later entries in this row */ 1850 for (ii=N; ii>=i; ii--) { 1851 rp[ii+1] = rp[ii]; 1852 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1853 } 1854 if (N >= i) { 1855 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1856 } 1857 rp[i] = col; 1858 bap = ap + bs2*i; 1859 if (roworiented) { 1860 for (ii=0; ii<bs; ii++,value+=stepval) { 1861 for (jj=ii; jj<bs2; jj+=bs) { 1862 bap[jj] = *value++; 1863 } 1864 } 1865 } else { 1866 for (ii=0; ii<bs; ii++,value+=stepval) { 1867 for (jj=0; jj<bs; jj++) { 1868 *bap++ = *value++; 1869 } 1870 } 1871 } 1872 noinsert2:; 1873 low = i; 1874 } 1875 ailen[row] = nrow; 1876 } 1877 PetscFunctionReturn(0); 1878 } 1879 1880 #undef __FUNCT__ 1881 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1882 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1883 { 1884 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1885 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1886 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1887 PetscErrorCode ierr; 1888 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1889 MatScalar *aa = a->a,*ap; 1890 PetscReal ratio=0.6; 1891 1892 PetscFunctionBegin; 1893 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1894 1895 if (m) rmax = ailen[0]; 1896 for (i=1; i<mbs; i++) { 1897 /* move each row back by the amount of empty slots (fshift) before it*/ 1898 fshift += imax[i-1] - ailen[i-1]; 1899 rmax = PetscMax(rmax,ailen[i]); 1900 if (fshift) { 1901 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1902 N = ailen[i]; 1903 for (j=0; j<N; j++) { 1904 ip[j-fshift] = ip[j]; 1905 1906 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1907 } 1908 } 1909 ai[i] = ai[i-1] + ailen[i-1]; 1910 } 1911 if (mbs) { 1912 fshift += imax[mbs-1] - ailen[mbs-1]; 1913 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1914 } 1915 1916 /* reset ilen and imax for each row */ 1917 a->nonzerorowcnt = 0; 1918 for (i=0; i<mbs; i++) { 1919 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1920 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1921 } 1922 a->nz = ai[mbs]; 1923 1924 /* diagonals may have moved, so kill the diagonal pointers */ 1925 a->idiagvalid = PETSC_FALSE; 1926 if (fshift && a->diag) { 1927 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1928 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1929 a->diag = 0; 1930 } 1931 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1932 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1933 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1934 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1935 1936 A->info.mallocs += a->reallocs; 1937 a->reallocs = 0; 1938 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1939 a->rmax = rmax; 1940 1941 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1942 PetscFunctionReturn(0); 1943 } 1944 1945 /* 1946 This function returns an array of flags which indicate the locations of contiguous 1947 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1948 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1949 Assume: sizes should be long enough to hold all the values. 1950 */ 1951 #undef __FUNCT__ 1952 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1953 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1954 { 1955 PetscInt i,j,k,row; 1956 PetscBool flg; 1957 1958 PetscFunctionBegin; 1959 for (i=0,j=0; i<n; j++) { 1960 row = idx[i]; 1961 if (row%bs!=0) { /* Not the begining of a block */ 1962 sizes[j] = 1; 1963 i++; 1964 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1965 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1966 i++; 1967 } else { /* Begining of the block, so check if the complete block exists */ 1968 flg = PETSC_TRUE; 1969 for (k=1; k<bs; k++) { 1970 if (row+k != idx[i+k]) { /* break in the block */ 1971 flg = PETSC_FALSE; 1972 break; 1973 } 1974 } 1975 if (flg) { /* No break in the bs */ 1976 sizes[j] = bs; 1977 i += bs; 1978 } else { 1979 sizes[j] = 1; 1980 i++; 1981 } 1982 } 1983 } 1984 *bs_max = j; 1985 PetscFunctionReturn(0); 1986 } 1987 1988 #undef __FUNCT__ 1989 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1990 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 1991 { 1992 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1993 PetscErrorCode ierr; 1994 PetscInt i,j,k,count,*rows; 1995 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 1996 PetscScalar zero = 0.0; 1997 MatScalar *aa; 1998 const PetscScalar *xx; 1999 PetscScalar *bb; 2000 2001 PetscFunctionBegin; 2002 /* fix right hand side if needed */ 2003 if (x && b) { 2004 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2005 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2006 for (i=0; i<is_n; i++) { 2007 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2008 } 2009 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2010 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2011 } 2012 2013 /* Make a copy of the IS and sort it */ 2014 /* allocate memory for rows,sizes */ 2015 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2016 2017 /* copy IS values to rows, and sort them */ 2018 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2019 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2020 2021 if (baij->keepnonzeropattern) { 2022 for (i=0; i<is_n; i++) sizes[i] = 1; 2023 bs_max = is_n; 2024 } else { 2025 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2026 A->nonzerostate++; 2027 } 2028 2029 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2030 row = rows[j]; 2031 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2032 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2033 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2034 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2035 if (diag != (PetscScalar)0.0) { 2036 if (baij->ilen[row/bs] > 0) { 2037 baij->ilen[row/bs] = 1; 2038 baij->j[baij->i[row/bs]] = row/bs; 2039 2040 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2041 } 2042 /* Now insert all the diagonal values for this bs */ 2043 for (k=0; k<bs; k++) { 2044 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2045 } 2046 } else { /* (diag == 0.0) */ 2047 baij->ilen[row/bs] = 0; 2048 } /* end (diag == 0.0) */ 2049 } else { /* (sizes[i] != bs) */ 2050 #if defined(PETSC_USE_DEBUG) 2051 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2052 #endif 2053 for (k=0; k<count; k++) { 2054 aa[0] = zero; 2055 aa += bs; 2056 } 2057 if (diag != (PetscScalar)0.0) { 2058 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2059 } 2060 } 2061 } 2062 2063 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2064 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 #undef __FUNCT__ 2069 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2070 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2071 { 2072 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2073 PetscErrorCode ierr; 2074 PetscInt i,j,k,count; 2075 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2076 PetscScalar zero = 0.0; 2077 MatScalar *aa; 2078 const PetscScalar *xx; 2079 PetscScalar *bb; 2080 PetscBool *zeroed,vecs = PETSC_FALSE; 2081 2082 PetscFunctionBegin; 2083 /* fix right hand side if needed */ 2084 if (x && b) { 2085 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2086 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2087 vecs = PETSC_TRUE; 2088 } 2089 2090 /* zero the columns */ 2091 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2092 for (i=0; i<is_n; i++) { 2093 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2094 zeroed[is_idx[i]] = PETSC_TRUE; 2095 } 2096 for (i=0; i<A->rmap->N; i++) { 2097 if (!zeroed[i]) { 2098 row = i/bs; 2099 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2100 for (k=0; k<bs; k++) { 2101 col = bs*baij->j[j] + k; 2102 if (zeroed[col]) { 2103 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2104 if (vecs) bb[i] -= aa[0]*xx[col]; 2105 aa[0] = 0.0; 2106 } 2107 } 2108 } 2109 } else if (vecs) bb[i] = diag*xx[i]; 2110 } 2111 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2112 if (vecs) { 2113 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2114 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2115 } 2116 2117 /* zero the rows */ 2118 for (i=0; i<is_n; i++) { 2119 row = is_idx[i]; 2120 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2121 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2122 for (k=0; k<count; k++) { 2123 aa[0] = zero; 2124 aa += bs; 2125 } 2126 if (diag != (PetscScalar)0.0) { 2127 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2128 } 2129 } 2130 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2131 PetscFunctionReturn(0); 2132 } 2133 2134 #undef __FUNCT__ 2135 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2136 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2137 { 2138 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2139 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2140 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2141 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2142 PetscErrorCode ierr; 2143 PetscInt ridx,cidx,bs2=a->bs2; 2144 PetscBool roworiented=a->roworiented; 2145 MatScalar *ap,value,*aa=a->a,*bap; 2146 2147 PetscFunctionBegin; 2148 for (k=0; k<m; k++) { /* loop over added rows */ 2149 row = im[k]; 2150 brow = row/bs; 2151 if (row < 0) continue; 2152 #if defined(PETSC_USE_DEBUG) 2153 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2154 #endif 2155 rp = aj + ai[brow]; 2156 ap = aa + bs2*ai[brow]; 2157 rmax = imax[brow]; 2158 nrow = ailen[brow]; 2159 low = 0; 2160 high = nrow; 2161 for (l=0; l<n; l++) { /* loop over added columns */ 2162 if (in[l] < 0) continue; 2163 #if defined(PETSC_USE_DEBUG) 2164 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2165 #endif 2166 col = in[l]; bcol = col/bs; 2167 ridx = row % bs; cidx = col % bs; 2168 if (roworiented) { 2169 value = v[l + k*n]; 2170 } else { 2171 value = v[k + l*m]; 2172 } 2173 if (col <= lastcol) low = 0; else high = nrow; 2174 lastcol = col; 2175 while (high-low > 7) { 2176 t = (low+high)/2; 2177 if (rp[t] > bcol) high = t; 2178 else low = t; 2179 } 2180 for (i=low; i<high; i++) { 2181 if (rp[i] > bcol) break; 2182 if (rp[i] == bcol) { 2183 bap = ap + bs2*i + bs*cidx + ridx; 2184 if (is == ADD_VALUES) *bap += value; 2185 else *bap = value; 2186 goto noinsert1; 2187 } 2188 } 2189 if (nonew == 1) goto noinsert1; 2190 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2191 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2192 N = nrow++ - 1; high++; 2193 /* shift up all the later entries in this row */ 2194 for (ii=N; ii>=i; ii--) { 2195 rp[ii+1] = rp[ii]; 2196 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2197 } 2198 if (N>=i) { 2199 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2200 } 2201 rp[i] = bcol; 2202 ap[bs2*i + bs*cidx + ridx] = value; 2203 a->nz++; 2204 A->nonzerostate++; 2205 noinsert1:; 2206 low = i; 2207 } 2208 ailen[brow] = nrow; 2209 } 2210 PetscFunctionReturn(0); 2211 } 2212 2213 #undef __FUNCT__ 2214 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2215 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2216 { 2217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2218 Mat outA; 2219 PetscErrorCode ierr; 2220 PetscBool row_identity,col_identity; 2221 2222 PetscFunctionBegin; 2223 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2224 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2225 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2226 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2227 2228 outA = inA; 2229 inA->factortype = MAT_FACTOR_LU; 2230 2231 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2232 2233 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2234 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2235 a->row = row; 2236 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2237 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2238 a->col = col; 2239 2240 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2241 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2242 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2243 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2244 2245 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2246 if (!a->solve_work) { 2247 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2248 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2249 } 2250 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2251 PetscFunctionReturn(0); 2252 } 2253 2254 #undef __FUNCT__ 2255 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2256 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2257 { 2258 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2259 PetscInt i,nz,mbs; 2260 2261 PetscFunctionBegin; 2262 nz = baij->maxnz; 2263 mbs = baij->mbs; 2264 for (i=0; i<nz; i++) { 2265 baij->j[i] = indices[i]; 2266 } 2267 baij->nz = nz; 2268 for (i=0; i<mbs; i++) { 2269 baij->ilen[i] = baij->imax[i]; 2270 } 2271 PetscFunctionReturn(0); 2272 } 2273 2274 #undef __FUNCT__ 2275 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2276 /*@ 2277 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2278 in the matrix. 2279 2280 Input Parameters: 2281 + mat - the SeqBAIJ matrix 2282 - indices - the column indices 2283 2284 Level: advanced 2285 2286 Notes: 2287 This can be called if you have precomputed the nonzero structure of the 2288 matrix and want to provide it to the matrix object to improve the performance 2289 of the MatSetValues() operation. 2290 2291 You MUST have set the correct numbers of nonzeros per row in the call to 2292 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2293 2294 MUST be called before any calls to MatSetValues(); 2295 2296 @*/ 2297 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2298 { 2299 PetscErrorCode ierr; 2300 2301 PetscFunctionBegin; 2302 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2303 PetscValidPointer(indices,2); 2304 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2305 PetscFunctionReturn(0); 2306 } 2307 2308 #undef __FUNCT__ 2309 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2310 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2311 { 2312 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2313 PetscErrorCode ierr; 2314 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2315 PetscReal atmp; 2316 PetscScalar *x,zero = 0.0; 2317 MatScalar *aa; 2318 PetscInt ncols,brow,krow,kcol; 2319 2320 PetscFunctionBegin; 2321 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2322 bs = A->rmap->bs; 2323 aa = a->a; 2324 ai = a->i; 2325 aj = a->j; 2326 mbs = a->mbs; 2327 2328 ierr = VecSet(v,zero);CHKERRQ(ierr); 2329 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2330 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2331 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2332 for (i=0; i<mbs; i++) { 2333 ncols = ai[1] - ai[0]; ai++; 2334 brow = bs*i; 2335 for (j=0; j<ncols; j++) { 2336 for (kcol=0; kcol<bs; kcol++) { 2337 for (krow=0; krow<bs; krow++) { 2338 atmp = PetscAbsScalar(*aa);aa++; 2339 row = brow + krow; /* row index */ 2340 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2341 } 2342 } 2343 aj++; 2344 } 2345 } 2346 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2347 PetscFunctionReturn(0); 2348 } 2349 2350 #undef __FUNCT__ 2351 #define __FUNCT__ "MatCopy_SeqBAIJ" 2352 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2353 { 2354 PetscErrorCode ierr; 2355 2356 PetscFunctionBegin; 2357 /* If the two matrices have the same copy implementation, use fast copy. */ 2358 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2360 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2361 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2362 2363 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2364 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2365 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2366 } else { 2367 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2368 } 2369 PetscFunctionReturn(0); 2370 } 2371 2372 #undef __FUNCT__ 2373 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2374 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2375 { 2376 PetscErrorCode ierr; 2377 2378 PetscFunctionBegin; 2379 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2385 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2386 { 2387 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2388 2389 PetscFunctionBegin; 2390 *array = a->a; 2391 PetscFunctionReturn(0); 2392 } 2393 2394 #undef __FUNCT__ 2395 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2396 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2397 { 2398 PetscFunctionBegin; 2399 PetscFunctionReturn(0); 2400 } 2401 2402 #undef __FUNCT__ 2403 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ" 2404 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2405 { 2406 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2407 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2408 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2409 PetscErrorCode ierr; 2410 2411 PetscFunctionBegin; 2412 /* Set the number of nonzeros in the new matrix */ 2413 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2414 PetscFunctionReturn(0); 2415 } 2416 2417 #undef __FUNCT__ 2418 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2419 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2420 { 2421 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2422 PetscErrorCode ierr; 2423 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2424 PetscBLASInt one=1; 2425 2426 PetscFunctionBegin; 2427 if (str == SAME_NONZERO_PATTERN) { 2428 PetscScalar alpha = a; 2429 PetscBLASInt bnz; 2430 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2431 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2432 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2433 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2434 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2435 } else { 2436 Mat B; 2437 PetscInt *nnz; 2438 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2439 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2440 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2441 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2442 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2443 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2444 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2445 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2446 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2447 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2448 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2449 ierr = PetscFree(nnz);CHKERRQ(ierr); 2450 } 2451 PetscFunctionReturn(0); 2452 } 2453 2454 #undef __FUNCT__ 2455 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2456 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2457 { 2458 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2459 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2460 MatScalar *aa = a->a; 2461 2462 PetscFunctionBegin; 2463 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2464 PetscFunctionReturn(0); 2465 } 2466 2467 #undef __FUNCT__ 2468 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2469 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2470 { 2471 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2472 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2473 MatScalar *aa = a->a; 2474 2475 PetscFunctionBegin; 2476 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2477 PetscFunctionReturn(0); 2478 } 2479 2480 #undef __FUNCT__ 2481 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2482 /* 2483 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2484 */ 2485 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2486 { 2487 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2488 PetscErrorCode ierr; 2489 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2490 PetscInt nz = a->i[m],row,*jj,mr,col; 2491 2492 PetscFunctionBegin; 2493 *nn = n; 2494 if (!ia) PetscFunctionReturn(0); 2495 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2496 else { 2497 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2498 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2499 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2500 jj = a->j; 2501 for (i=0; i<nz; i++) { 2502 collengths[jj[i]]++; 2503 } 2504 cia[0] = oshift; 2505 for (i=0; i<n; i++) { 2506 cia[i+1] = cia[i] + collengths[i]; 2507 } 2508 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2509 jj = a->j; 2510 for (row=0; row<m; row++) { 2511 mr = a->i[row+1] - a->i[row]; 2512 for (i=0; i<mr; i++) { 2513 col = *jj++; 2514 2515 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2516 } 2517 } 2518 ierr = PetscFree(collengths);CHKERRQ(ierr); 2519 *ia = cia; *ja = cja; 2520 } 2521 PetscFunctionReturn(0); 2522 } 2523 2524 #undef __FUNCT__ 2525 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2526 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2527 { 2528 PetscErrorCode ierr; 2529 2530 PetscFunctionBegin; 2531 if (!ia) PetscFunctionReturn(0); 2532 ierr = PetscFree(*ia);CHKERRQ(ierr); 2533 ierr = PetscFree(*ja);CHKERRQ(ierr); 2534 PetscFunctionReturn(0); 2535 } 2536 2537 /* 2538 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2539 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2540 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2541 */ 2542 #undef __FUNCT__ 2543 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color" 2544 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2545 { 2546 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2547 PetscErrorCode ierr; 2548 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2549 PetscInt nz = a->i[m],row,*jj,mr,col; 2550 PetscInt *cspidx; 2551 2552 PetscFunctionBegin; 2553 *nn = n; 2554 if (!ia) PetscFunctionReturn(0); 2555 2556 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2557 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2558 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2559 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2560 jj = a->j; 2561 for (i=0; i<nz; i++) { 2562 collengths[jj[i]]++; 2563 } 2564 cia[0] = oshift; 2565 for (i=0; i<n; i++) { 2566 cia[i+1] = cia[i] + collengths[i]; 2567 } 2568 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2569 jj = a->j; 2570 for (row=0; row<m; row++) { 2571 mr = a->i[row+1] - a->i[row]; 2572 for (i=0; i<mr; i++) { 2573 col = *jj++; 2574 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2575 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2576 } 2577 } 2578 ierr = PetscFree(collengths);CHKERRQ(ierr); 2579 *ia = cia; *ja = cja; 2580 *spidx = cspidx; 2581 PetscFunctionReturn(0); 2582 } 2583 2584 #undef __FUNCT__ 2585 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color" 2586 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2587 { 2588 PetscErrorCode ierr; 2589 2590 PetscFunctionBegin; 2591 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2592 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2593 PetscFunctionReturn(0); 2594 } 2595 2596 #undef __FUNCT__ 2597 #define __FUNCT__ "MatShift_SeqBAIJ" 2598 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2599 { 2600 PetscErrorCode ierr; 2601 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2602 2603 PetscFunctionBegin; 2604 if (!aij->nz) { 2605 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2606 } 2607 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2608 PetscFunctionReturn(0); 2609 } 2610 2611 /* -------------------------------------------------------------------*/ 2612 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2613 MatGetRow_SeqBAIJ, 2614 MatRestoreRow_SeqBAIJ, 2615 MatMult_SeqBAIJ_N, 2616 /* 4*/ MatMultAdd_SeqBAIJ_N, 2617 MatMultTranspose_SeqBAIJ, 2618 MatMultTransposeAdd_SeqBAIJ, 2619 0, 2620 0, 2621 0, 2622 /* 10*/ 0, 2623 MatLUFactor_SeqBAIJ, 2624 0, 2625 0, 2626 MatTranspose_SeqBAIJ, 2627 /* 15*/ MatGetInfo_SeqBAIJ, 2628 MatEqual_SeqBAIJ, 2629 MatGetDiagonal_SeqBAIJ, 2630 MatDiagonalScale_SeqBAIJ, 2631 MatNorm_SeqBAIJ, 2632 /* 20*/ 0, 2633 MatAssemblyEnd_SeqBAIJ, 2634 MatSetOption_SeqBAIJ, 2635 MatZeroEntries_SeqBAIJ, 2636 /* 24*/ MatZeroRows_SeqBAIJ, 2637 0, 2638 0, 2639 0, 2640 0, 2641 /* 29*/ MatSetUp_SeqBAIJ, 2642 0, 2643 0, 2644 0, 2645 0, 2646 /* 34*/ MatDuplicate_SeqBAIJ, 2647 0, 2648 0, 2649 MatILUFactor_SeqBAIJ, 2650 0, 2651 /* 39*/ MatAXPY_SeqBAIJ, 2652 MatGetSubMatrices_SeqBAIJ, 2653 MatIncreaseOverlap_SeqBAIJ, 2654 MatGetValues_SeqBAIJ, 2655 MatCopy_SeqBAIJ, 2656 /* 44*/ 0, 2657 MatScale_SeqBAIJ, 2658 MatShift_SeqBAIJ, 2659 0, 2660 MatZeroRowsColumns_SeqBAIJ, 2661 /* 49*/ 0, 2662 MatGetRowIJ_SeqBAIJ, 2663 MatRestoreRowIJ_SeqBAIJ, 2664 MatGetColumnIJ_SeqBAIJ, 2665 MatRestoreColumnIJ_SeqBAIJ, 2666 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2667 0, 2668 0, 2669 0, 2670 MatSetValuesBlocked_SeqBAIJ, 2671 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2672 MatDestroy_SeqBAIJ, 2673 MatView_SeqBAIJ, 2674 0, 2675 0, 2676 /* 64*/ 0, 2677 0, 2678 0, 2679 0, 2680 0, 2681 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2682 0, 2683 MatConvert_Basic, 2684 0, 2685 0, 2686 /* 74*/ 0, 2687 MatFDColoringApply_BAIJ, 2688 0, 2689 0, 2690 0, 2691 /* 79*/ 0, 2692 0, 2693 0, 2694 0, 2695 MatLoad_SeqBAIJ, 2696 /* 84*/ 0, 2697 0, 2698 0, 2699 0, 2700 0, 2701 /* 89*/ 0, 2702 0, 2703 0, 2704 0, 2705 0, 2706 /* 94*/ 0, 2707 0, 2708 0, 2709 0, 2710 0, 2711 /* 99*/ 0, 2712 0, 2713 0, 2714 0, 2715 0, 2716 /*104*/ 0, 2717 MatRealPart_SeqBAIJ, 2718 MatImaginaryPart_SeqBAIJ, 2719 0, 2720 0, 2721 /*109*/ 0, 2722 0, 2723 0, 2724 0, 2725 MatMissingDiagonal_SeqBAIJ, 2726 /*114*/ 0, 2727 0, 2728 0, 2729 0, 2730 0, 2731 /*119*/ 0, 2732 0, 2733 MatMultHermitianTranspose_SeqBAIJ, 2734 MatMultHermitianTransposeAdd_SeqBAIJ, 2735 0, 2736 /*124*/ 0, 2737 0, 2738 MatInvertBlockDiagonal_SeqBAIJ, 2739 0, 2740 0, 2741 /*129*/ 0, 2742 0, 2743 0, 2744 0, 2745 0, 2746 /*134*/ 0, 2747 0, 2748 0, 2749 0, 2750 0, 2751 /*139*/ 0, 2752 0, 2753 0, 2754 MatFDColoringSetUp_SeqXAIJ, 2755 0, 2756 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ 2757 }; 2758 2759 #undef __FUNCT__ 2760 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2761 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2762 { 2763 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2764 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2765 PetscErrorCode ierr; 2766 2767 PetscFunctionBegin; 2768 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2769 2770 /* allocate space for values if not already there */ 2771 if (!aij->saved_values) { 2772 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2773 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2774 } 2775 2776 /* copy values over */ 2777 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2778 PetscFunctionReturn(0); 2779 } 2780 2781 #undef __FUNCT__ 2782 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2783 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2784 { 2785 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2786 PetscErrorCode ierr; 2787 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2788 2789 PetscFunctionBegin; 2790 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2791 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2792 2793 /* copy values over */ 2794 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2795 PetscFunctionReturn(0); 2796 } 2797 2798 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2799 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2800 2801 #undef __FUNCT__ 2802 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2803 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2804 { 2805 Mat_SeqBAIJ *b; 2806 PetscErrorCode ierr; 2807 PetscInt i,mbs,nbs,bs2; 2808 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2809 2810 PetscFunctionBegin; 2811 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2812 if (nz == MAT_SKIP_ALLOCATION) { 2813 skipallocation = PETSC_TRUE; 2814 nz = 0; 2815 } 2816 2817 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2818 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2819 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2820 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2821 2822 B->preallocated = PETSC_TRUE; 2823 2824 mbs = B->rmap->n/bs; 2825 nbs = B->cmap->n/bs; 2826 bs2 = bs*bs; 2827 2828 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2829 2830 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2831 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2832 if (nnz) { 2833 for (i=0; i<mbs; i++) { 2834 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2835 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2836 } 2837 } 2838 2839 b = (Mat_SeqBAIJ*)B->data; 2840 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2841 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2842 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2843 2844 if (!flg) { 2845 switch (bs) { 2846 case 1: 2847 B->ops->mult = MatMult_SeqBAIJ_1; 2848 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2849 break; 2850 case 2: 2851 B->ops->mult = MatMult_SeqBAIJ_2; 2852 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2853 break; 2854 case 3: 2855 B->ops->mult = MatMult_SeqBAIJ_3; 2856 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2857 break; 2858 case 4: 2859 B->ops->mult = MatMult_SeqBAIJ_4; 2860 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2861 break; 2862 case 5: 2863 B->ops->mult = MatMult_SeqBAIJ_5; 2864 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2865 break; 2866 case 6: 2867 B->ops->mult = MatMult_SeqBAIJ_6; 2868 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2869 break; 2870 case 7: 2871 B->ops->mult = MatMult_SeqBAIJ_7; 2872 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2873 break; 2874 case 15: 2875 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2876 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2877 break; 2878 default: 2879 B->ops->mult = MatMult_SeqBAIJ_N; 2880 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2881 break; 2882 } 2883 } 2884 B->ops->sor = MatSOR_SeqBAIJ; 2885 b->mbs = mbs; 2886 b->nbs = nbs; 2887 if (!skipallocation) { 2888 if (!b->imax) { 2889 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2890 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2891 2892 b->free_imax_ilen = PETSC_TRUE; 2893 } 2894 /* b->ilen will count nonzeros in each block row so far. */ 2895 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2896 if (!nnz) { 2897 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2898 else if (nz < 0) nz = 1; 2899 for (i=0; i<mbs; i++) b->imax[i] = nz; 2900 nz = nz*mbs; 2901 } else { 2902 nz = 0; 2903 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2904 } 2905 2906 /* allocate the matrix space */ 2907 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2908 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2909 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2910 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2911 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2912 2913 b->singlemalloc = PETSC_TRUE; 2914 b->i[0] = 0; 2915 for (i=1; i<mbs+1; i++) { 2916 b->i[i] = b->i[i-1] + b->imax[i-1]; 2917 } 2918 b->free_a = PETSC_TRUE; 2919 b->free_ij = PETSC_TRUE; 2920 #if defined(PETSC_THREADCOMM_ACTIVE) 2921 ierr = MatZeroEntries_SeqBAIJ(B);CHKERRQ(ierr); 2922 #endif 2923 } else { 2924 b->free_a = PETSC_FALSE; 2925 b->free_ij = PETSC_FALSE; 2926 } 2927 2928 b->bs2 = bs2; 2929 b->mbs = mbs; 2930 b->nz = 0; 2931 b->maxnz = nz; 2932 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2933 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2934 PetscFunctionReturn(0); 2935 } 2936 2937 #undef __FUNCT__ 2938 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2939 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2940 { 2941 PetscInt i,m,nz,nz_max=0,*nnz; 2942 PetscScalar *values=0; 2943 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2944 PetscErrorCode ierr; 2945 2946 PetscFunctionBegin; 2947 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2948 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2949 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2950 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2951 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2952 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2953 m = B->rmap->n/bs; 2954 2955 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2956 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2957 for (i=0; i<m; i++) { 2958 nz = ii[i+1]- ii[i]; 2959 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2960 nz_max = PetscMax(nz_max, nz); 2961 nnz[i] = nz; 2962 } 2963 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2964 ierr = PetscFree(nnz);CHKERRQ(ierr); 2965 2966 values = (PetscScalar*)V; 2967 if (!values) { 2968 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2969 } 2970 for (i=0; i<m; i++) { 2971 PetscInt ncols = ii[i+1] - ii[i]; 2972 const PetscInt *icols = jj + ii[i]; 2973 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2974 if (!roworiented) { 2975 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2976 } else { 2977 PetscInt j; 2978 for (j=0; j<ncols; j++) { 2979 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2980 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2981 } 2982 } 2983 } 2984 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2985 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2986 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2987 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2988 PetscFunctionReturn(0); 2989 } 2990 2991 /*MC 2992 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2993 block sparse compressed row format. 2994 2995 Options Database Keys: 2996 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2997 2998 Level: beginner 2999 3000 .seealso: MatCreateSeqBAIJ() 3001 M*/ 3002 3003 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3004 3005 #undef __FUNCT__ 3006 #define __FUNCT__ "MatCreate_SeqBAIJ" 3007 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3008 { 3009 PetscErrorCode ierr; 3010 PetscMPIInt size; 3011 Mat_SeqBAIJ *b; 3012 3013 PetscFunctionBegin; 3014 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3015 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3016 3017 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3018 B->data = (void*)b; 3019 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3020 3021 b->row = 0; 3022 b->col = 0; 3023 b->icol = 0; 3024 b->reallocs = 0; 3025 b->saved_values = 0; 3026 3027 b->roworiented = PETSC_TRUE; 3028 b->nonew = 0; 3029 b->diag = 0; 3030 B->spptr = 0; 3031 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3032 b->keepnonzeropattern = PETSC_FALSE; 3033 3034 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3035 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3036 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3037 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3039 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3044 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3045 PetscFunctionReturn(0); 3046 } 3047 3048 #undef __FUNCT__ 3049 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3050 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3051 { 3052 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3053 PetscErrorCode ierr; 3054 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3055 3056 PetscFunctionBegin; 3057 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3058 3059 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3060 c->imax = a->imax; 3061 c->ilen = a->ilen; 3062 c->free_imax_ilen = PETSC_FALSE; 3063 } else { 3064 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3065 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3066 for (i=0; i<mbs; i++) { 3067 c->imax[i] = a->imax[i]; 3068 c->ilen[i] = a->ilen[i]; 3069 } 3070 c->free_imax_ilen = PETSC_TRUE; 3071 } 3072 3073 /* allocate the matrix space */ 3074 if (mallocmatspace) { 3075 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3076 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3077 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3078 3079 c->i = a->i; 3080 c->j = a->j; 3081 c->singlemalloc = PETSC_FALSE; 3082 c->free_a = PETSC_TRUE; 3083 c->free_ij = PETSC_FALSE; 3084 c->parent = A; 3085 C->preallocated = PETSC_TRUE; 3086 C->assembled = PETSC_TRUE; 3087 3088 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3089 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3090 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3091 } else { 3092 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3093 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3094 3095 c->singlemalloc = PETSC_TRUE; 3096 c->free_a = PETSC_TRUE; 3097 c->free_ij = PETSC_TRUE; 3098 3099 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3100 if (mbs > 0) { 3101 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3102 if (cpvalues == MAT_COPY_VALUES) { 3103 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3104 } else { 3105 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3106 } 3107 } 3108 C->preallocated = PETSC_TRUE; 3109 C->assembled = PETSC_TRUE; 3110 } 3111 } 3112 3113 c->roworiented = a->roworiented; 3114 c->nonew = a->nonew; 3115 3116 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3117 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3118 3119 c->bs2 = a->bs2; 3120 c->mbs = a->mbs; 3121 c->nbs = a->nbs; 3122 3123 if (a->diag) { 3124 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3125 c->diag = a->diag; 3126 c->free_diag = PETSC_FALSE; 3127 } else { 3128 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3129 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3130 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3131 c->free_diag = PETSC_TRUE; 3132 } 3133 } else c->diag = 0; 3134 3135 c->nz = a->nz; 3136 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3137 c->solve_work = NULL; 3138 c->mult_work = NULL; 3139 c->sor_workt = NULL; 3140 c->sor_work = NULL; 3141 3142 c->compressedrow.use = a->compressedrow.use; 3143 c->compressedrow.nrows = a->compressedrow.nrows; 3144 if (a->compressedrow.use) { 3145 i = a->compressedrow.nrows; 3146 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3147 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3148 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3149 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3150 } else { 3151 c->compressedrow.use = PETSC_FALSE; 3152 c->compressedrow.i = NULL; 3153 c->compressedrow.rindex = NULL; 3154 } 3155 C->nonzerostate = A->nonzerostate; 3156 3157 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3158 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3159 PetscFunctionReturn(0); 3160 } 3161 3162 #undef __FUNCT__ 3163 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3164 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3165 { 3166 PetscErrorCode ierr; 3167 3168 PetscFunctionBegin; 3169 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3170 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3171 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3172 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3173 PetscFunctionReturn(0); 3174 } 3175 3176 #undef __FUNCT__ 3177 #define __FUNCT__ "MatLoad_SeqBAIJ" 3178 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3179 { 3180 Mat_SeqBAIJ *a; 3181 PetscErrorCode ierr; 3182 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3183 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3184 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3185 PetscInt *masked,nmask,tmp,bs2,ishift; 3186 PetscMPIInt size; 3187 int fd; 3188 PetscScalar *aa; 3189 MPI_Comm comm; 3190 3191 PetscFunctionBegin; 3192 /* force binary viewer to load .info file if it has not yet done so */ 3193 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3194 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3195 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3196 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3197 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3198 if (bs < 0) bs = 1; 3199 bs2 = bs*bs; 3200 3201 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3202 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3203 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3204 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3205 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3206 M = header[1]; N = header[2]; nz = header[3]; 3207 3208 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3209 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3210 3211 /* 3212 This code adds extra rows to make sure the number of rows is 3213 divisible by the blocksize 3214 */ 3215 mbs = M/bs; 3216 extra_rows = bs - M + bs*(mbs); 3217 if (extra_rows == bs) extra_rows = 0; 3218 else mbs++; 3219 if (extra_rows) { 3220 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3221 } 3222 3223 /* Set global sizes if not already set */ 3224 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3225 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3226 } else { /* Check if the matrix global sizes are correct */ 3227 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3228 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3229 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3230 } 3231 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3232 } 3233 3234 /* read in row lengths */ 3235 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3236 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3237 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3238 3239 /* read in column indices */ 3240 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3241 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3242 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3243 3244 /* loop over row lengths determining block row lengths */ 3245 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3246 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3247 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3248 rowcount = 0; 3249 nzcount = 0; 3250 for (i=0; i<mbs; i++) { 3251 nmask = 0; 3252 for (j=0; j<bs; j++) { 3253 kmax = rowlengths[rowcount]; 3254 for (k=0; k<kmax; k++) { 3255 tmp = jj[nzcount++]/bs; 3256 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3257 } 3258 rowcount++; 3259 } 3260 browlengths[i] += nmask; 3261 /* zero out the mask elements we set */ 3262 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3263 } 3264 3265 /* Do preallocation */ 3266 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3267 a = (Mat_SeqBAIJ*)newmat->data; 3268 3269 /* set matrix "i" values */ 3270 a->i[0] = 0; 3271 for (i=1; i<= mbs; i++) { 3272 a->i[i] = a->i[i-1] + browlengths[i-1]; 3273 a->ilen[i-1] = browlengths[i-1]; 3274 } 3275 a->nz = 0; 3276 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3277 3278 /* read in nonzero values */ 3279 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3280 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3281 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3282 3283 /* set "a" and "j" values into matrix */ 3284 nzcount = 0; jcount = 0; 3285 for (i=0; i<mbs; i++) { 3286 nzcountb = nzcount; 3287 nmask = 0; 3288 for (j=0; j<bs; j++) { 3289 kmax = rowlengths[i*bs+j]; 3290 for (k=0; k<kmax; k++) { 3291 tmp = jj[nzcount++]/bs; 3292 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3293 } 3294 } 3295 /* sort the masked values */ 3296 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3297 3298 /* set "j" values into matrix */ 3299 maskcount = 1; 3300 for (j=0; j<nmask; j++) { 3301 a->j[jcount++] = masked[j]; 3302 mask[masked[j]] = maskcount++; 3303 } 3304 /* set "a" values into matrix */ 3305 ishift = bs2*a->i[i]; 3306 for (j=0; j<bs; j++) { 3307 kmax = rowlengths[i*bs+j]; 3308 for (k=0; k<kmax; k++) { 3309 tmp = jj[nzcountb]/bs; 3310 block = mask[tmp] - 1; 3311 point = jj[nzcountb] - bs*tmp; 3312 idx = ishift + bs2*block + j + bs*point; 3313 a->a[idx] = (MatScalar)aa[nzcountb++]; 3314 } 3315 } 3316 /* zero out the mask elements we set */ 3317 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3318 } 3319 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3320 3321 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3322 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3323 ierr = PetscFree(aa);CHKERRQ(ierr); 3324 ierr = PetscFree(jj);CHKERRQ(ierr); 3325 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3326 3327 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3328 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3329 PetscFunctionReturn(0); 3330 } 3331 3332 #undef __FUNCT__ 3333 #define __FUNCT__ "MatCreateSeqBAIJ" 3334 /*@C 3335 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3336 compressed row) format. For good matrix assembly performance the 3337 user should preallocate the matrix storage by setting the parameter nz 3338 (or the array nnz). By setting these parameters accurately, performance 3339 during matrix assembly can be increased by more than a factor of 50. 3340 3341 Collective on MPI_Comm 3342 3343 Input Parameters: 3344 + comm - MPI communicator, set to PETSC_COMM_SELF 3345 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3346 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3347 . m - number of rows 3348 . n - number of columns 3349 . nz - number of nonzero blocks per block row (same for all rows) 3350 - nnz - array containing the number of nonzero blocks in the various block rows 3351 (possibly different for each block row) or NULL 3352 3353 Output Parameter: 3354 . A - the matrix 3355 3356 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3357 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3358 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3359 3360 Options Database Keys: 3361 . -mat_no_unroll - uses code that does not unroll the loops in the 3362 block calculations (much slower) 3363 . -mat_block_size - size of the blocks to use 3364 3365 Level: intermediate 3366 3367 Notes: 3368 The number of rows and columns must be divisible by blocksize. 3369 3370 If the nnz parameter is given then the nz parameter is ignored 3371 3372 A nonzero block is any block that as 1 or more nonzeros in it 3373 3374 The block AIJ format is fully compatible with standard Fortran 77 3375 storage. That is, the stored row and column indices can begin at 3376 either one (as in Fortran) or zero. See the users' manual for details. 3377 3378 Specify the preallocated storage with either nz or nnz (not both). 3379 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3380 allocation. See Users-Manual: ch_mat for details. 3381 matrices. 3382 3383 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3384 @*/ 3385 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3386 { 3387 PetscErrorCode ierr; 3388 3389 PetscFunctionBegin; 3390 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3391 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3392 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3393 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3394 PetscFunctionReturn(0); 3395 } 3396 3397 #undef __FUNCT__ 3398 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3399 /*@C 3400 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3401 per row in the matrix. For good matrix assembly performance the 3402 user should preallocate the matrix storage by setting the parameter nz 3403 (or the array nnz). By setting these parameters accurately, performance 3404 during matrix assembly can be increased by more than a factor of 50. 3405 3406 Collective on MPI_Comm 3407 3408 Input Parameters: 3409 + B - the matrix 3410 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3411 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3412 . nz - number of block nonzeros per block row (same for all rows) 3413 - nnz - array containing the number of block nonzeros in the various block rows 3414 (possibly different for each block row) or NULL 3415 3416 Options Database Keys: 3417 . -mat_no_unroll - uses code that does not unroll the loops in the 3418 block calculations (much slower) 3419 . -mat_block_size - size of the blocks to use 3420 3421 Level: intermediate 3422 3423 Notes: 3424 If the nnz parameter is given then the nz parameter is ignored 3425 3426 You can call MatGetInfo() to get information on how effective the preallocation was; 3427 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3428 You can also run with the option -info and look for messages with the string 3429 malloc in them to see if additional memory allocation was needed. 3430 3431 The block AIJ format is fully compatible with standard Fortran 77 3432 storage. That is, the stored row and column indices can begin at 3433 either one (as in Fortran) or zero. See the users' manual for details. 3434 3435 Specify the preallocated storage with either nz or nnz (not both). 3436 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3437 allocation. See Users-Manual: ch_mat for details. 3438 3439 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3440 @*/ 3441 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3442 { 3443 PetscErrorCode ierr; 3444 3445 PetscFunctionBegin; 3446 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3447 PetscValidType(B,1); 3448 PetscValidLogicalCollectiveInt(B,bs,2); 3449 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3450 PetscFunctionReturn(0); 3451 } 3452 3453 #undef __FUNCT__ 3454 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3455 /*@C 3456 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3457 (the default sequential PETSc format). 3458 3459 Collective on MPI_Comm 3460 3461 Input Parameters: 3462 + B - the matrix 3463 . i - the indices into j for the start of each local row (starts with zero) 3464 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3465 - v - optional values in the matrix 3466 3467 Level: developer 3468 3469 Notes: 3470 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3471 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3472 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3473 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3474 block column and the second index is over columns within a block. 3475 3476 .keywords: matrix, aij, compressed row, sparse 3477 3478 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3479 @*/ 3480 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3481 { 3482 PetscErrorCode ierr; 3483 3484 PetscFunctionBegin; 3485 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3486 PetscValidType(B,1); 3487 PetscValidLogicalCollectiveInt(B,bs,2); 3488 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3489 PetscFunctionReturn(0); 3490 } 3491 3492 3493 #undef __FUNCT__ 3494 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3495 /*@ 3496 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3497 3498 Collective on MPI_Comm 3499 3500 Input Parameters: 3501 + comm - must be an MPI communicator of size 1 3502 . bs - size of block 3503 . m - number of rows 3504 . n - number of columns 3505 . i - row indices 3506 . j - column indices 3507 - a - matrix values 3508 3509 Output Parameter: 3510 . mat - the matrix 3511 3512 Level: advanced 3513 3514 Notes: 3515 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3516 once the matrix is destroyed 3517 3518 You cannot set new nonzero locations into this matrix, that will generate an error. 3519 3520 The i and j indices are 0 based 3521 3522 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3523 3524 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3525 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3526 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3527 with column-major ordering within blocks. 3528 3529 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3530 3531 @*/ 3532 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3533 { 3534 PetscErrorCode ierr; 3535 PetscInt ii; 3536 Mat_SeqBAIJ *baij; 3537 3538 PetscFunctionBegin; 3539 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3540 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3541 3542 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3543 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3544 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3545 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3546 baij = (Mat_SeqBAIJ*)(*mat)->data; 3547 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3548 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3549 3550 baij->i = i; 3551 baij->j = j; 3552 baij->a = a; 3553 3554 baij->singlemalloc = PETSC_FALSE; 3555 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3556 baij->free_a = PETSC_FALSE; 3557 baij->free_ij = PETSC_FALSE; 3558 3559 for (ii=0; ii<m; ii++) { 3560 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3561 #if defined(PETSC_USE_DEBUG) 3562 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3563 #endif 3564 } 3565 #if defined(PETSC_USE_DEBUG) 3566 for (ii=0; ii<baij->i[m]; ii++) { 3567 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3568 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3569 } 3570 #endif 3571 3572 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3573 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3574 PetscFunctionReturn(0); 3575 } 3576 3577 #undef __FUNCT__ 3578 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ" 3579 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3580 { 3581 PetscErrorCode ierr; 3582 3583 PetscFunctionBegin; 3584 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3585 PetscFunctionReturn(0); 3586 } 3587