1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values) *values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs) { 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ" 129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,*work,*w,*workt,*t; 133 const MatScalar *v,*aa = a->a, *idiag; 134 const PetscScalar *b,*xb; 135 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 136 PetscErrorCode ierr; 137 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 138 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 139 140 PetscFunctionBegin; 141 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 142 its = its*lits; 143 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 144 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 145 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 146 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 147 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 148 149 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 150 151 if (!m) PetscFunctionReturn(0); 152 diag = a->diag; 153 idiag = a->idiag; 154 k = PetscMax(A->rmap->n,A->cmap->n); 155 if (!a->mult_work) { 156 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 157 } 158 if (!a->sor_workt) { 159 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 160 } 161 if (!a->sor_work) { 162 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 163 } 164 work = a->mult_work; 165 t = a->sor_workt; 166 w = a->sor_work; 167 168 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 169 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 170 171 if (flag & SOR_ZERO_INITIAL_GUESS) { 172 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 173 switch (bs) { 174 case 1: 175 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 176 t[0] = b[0]; 177 i2 = 1; 178 idiag += 1; 179 for (i=1; i<m; i++) { 180 v = aa + ai[i]; 181 vi = aj + ai[i]; 182 nz = diag[i] - ai[i]; 183 s[0] = b[i2]; 184 for (j=0; j<nz; j++) { 185 xw[0] = x[vi[j]]; 186 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 187 } 188 t[i2] = s[0]; 189 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 190 x[i2] = xw[0]; 191 idiag += 1; 192 i2 += 1; 193 } 194 break; 195 case 2: 196 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 197 t[0] = b[0]; t[1] = b[1]; 198 i2 = 2; 199 idiag += 4; 200 for (i=1; i<m; i++) { 201 v = aa + 4*ai[i]; 202 vi = aj + ai[i]; 203 nz = diag[i] - ai[i]; 204 s[0] = b[i2]; s[1] = b[i2+1]; 205 for (j=0; j<nz; j++) { 206 idx = 2*vi[j]; 207 it = 4*j; 208 xw[0] = x[idx]; xw[1] = x[1+idx]; 209 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 210 } 211 t[i2] = s[0]; t[i2+1] = s[1]; 212 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 213 x[i2] = xw[0]; x[i2+1] = xw[1]; 214 idiag += 4; 215 i2 += 2; 216 } 217 break; 218 case 3: 219 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 220 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 221 i2 = 3; 222 idiag += 9; 223 for (i=1; i<m; i++) { 224 v = aa + 9*ai[i]; 225 vi = aj + ai[i]; 226 nz = diag[i] - ai[i]; 227 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 228 while (nz--) { 229 idx = 3*(*vi++); 230 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 231 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 232 v += 9; 233 } 234 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 235 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 236 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 237 idiag += 9; 238 i2 += 3; 239 } 240 break; 241 case 4: 242 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 243 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 244 i2 = 4; 245 idiag += 16; 246 for (i=1; i<m; i++) { 247 v = aa + 16*ai[i]; 248 vi = aj + ai[i]; 249 nz = diag[i] - ai[i]; 250 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 251 while (nz--) { 252 idx = 4*(*vi++); 253 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 254 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 255 v += 16; 256 } 257 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 258 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 259 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 260 idiag += 16; 261 i2 += 4; 262 } 263 break; 264 case 5: 265 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 266 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 267 i2 = 5; 268 idiag += 25; 269 for (i=1; i<m; i++) { 270 v = aa + 25*ai[i]; 271 vi = aj + ai[i]; 272 nz = diag[i] - ai[i]; 273 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 274 while (nz--) { 275 idx = 5*(*vi++); 276 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 277 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 278 v += 25; 279 } 280 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 281 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 282 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 283 idiag += 25; 284 i2 += 5; 285 } 286 break; 287 case 6: 288 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 289 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 290 i2 = 6; 291 idiag += 36; 292 for (i=1; i<m; i++) { 293 v = aa + 36*ai[i]; 294 vi = aj + ai[i]; 295 nz = diag[i] - ai[i]; 296 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 297 while (nz--) { 298 idx = 6*(*vi++); 299 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 300 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 301 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 302 v += 36; 303 } 304 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 305 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 306 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 307 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 308 idiag += 36; 309 i2 += 6; 310 } 311 break; 312 case 7: 313 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 314 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 315 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 316 i2 = 7; 317 idiag += 49; 318 for (i=1; i<m; i++) { 319 v = aa + 49*ai[i]; 320 vi = aj + ai[i]; 321 nz = diag[i] - ai[i]; 322 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 323 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 324 while (nz--) { 325 idx = 7*(*vi++); 326 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 327 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 328 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 329 v += 49; 330 } 331 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 332 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 333 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 334 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 335 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 336 idiag += 49; 337 i2 += 7; 338 } 339 break; 340 default: 341 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 342 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 343 i2 = bs; 344 idiag += bs2; 345 for (i=1; i<m; i++) { 346 v = aa + bs2*ai[i]; 347 vi = aj + ai[i]; 348 nz = diag[i] - ai[i]; 349 350 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 351 /* copy all rows of x that are needed into contiguous space */ 352 workt = work; 353 for (j=0; j<nz; j++) { 354 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 355 workt += bs; 356 } 357 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 358 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 359 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 360 361 idiag += bs2; 362 i2 += bs; 363 } 364 break; 365 } 366 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 367 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 368 xb = t; 369 } 370 else xb = b; 371 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 372 idiag = a->idiag+bs2*(a->mbs-1); 373 i2 = bs * (m-1); 374 switch (bs) { 375 case 1: 376 s[0] = xb[i2]; 377 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 378 x[i2] = xw[0]; 379 i2 -= 1; 380 for (i=m-2; i>=0; i--) { 381 v = aa + (diag[i]+1); 382 vi = aj + diag[i] + 1; 383 nz = ai[i+1] - diag[i] - 1; 384 s[0] = xb[i2]; 385 for (j=0; j<nz; j++) { 386 xw[0] = x[vi[j]]; 387 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 388 } 389 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 390 x[i2] = xw[0]; 391 idiag -= 1; 392 i2 -= 1; 393 } 394 break; 395 case 2: 396 s[0] = xb[i2]; s[1] = xb[i2+1]; 397 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 398 x[i2] = xw[0]; x[i2+1] = xw[1]; 399 i2 -= 2; 400 idiag -= 4; 401 for (i=m-2; i>=0; i--) { 402 v = aa + 4*(diag[i] + 1); 403 vi = aj + diag[i] + 1; 404 nz = ai[i+1] - diag[i] - 1; 405 s[0] = xb[i2]; s[1] = xb[i2+1]; 406 for (j=0; j<nz; j++) { 407 idx = 2*vi[j]; 408 it = 4*j; 409 xw[0] = x[idx]; xw[1] = x[1+idx]; 410 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 411 } 412 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 413 x[i2] = xw[0]; x[i2+1] = xw[1]; 414 idiag -= 4; 415 i2 -= 2; 416 } 417 break; 418 case 3: 419 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 420 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 421 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 422 i2 -= 3; 423 idiag -= 9; 424 for (i=m-2; i>=0; i--) { 425 v = aa + 9*(diag[i]+1); 426 vi = aj + diag[i] + 1; 427 nz = ai[i+1] - diag[i] - 1; 428 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 429 while (nz--) { 430 idx = 3*(*vi++); 431 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 432 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 433 v += 9; 434 } 435 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 436 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 437 idiag -= 9; 438 i2 -= 3; 439 } 440 break; 441 case 4: 442 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 443 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 444 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 445 i2 -= 4; 446 idiag -= 16; 447 for (i=m-2; i>=0; i--) { 448 v = aa + 16*(diag[i]+1); 449 vi = aj + diag[i] + 1; 450 nz = ai[i+1] - diag[i] - 1; 451 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 452 while (nz--) { 453 idx = 4*(*vi++); 454 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 455 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 456 v += 16; 457 } 458 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 459 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 460 idiag -= 16; 461 i2 -= 4; 462 } 463 break; 464 case 5: 465 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 466 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 467 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 468 i2 -= 5; 469 idiag -= 25; 470 for (i=m-2; i>=0; i--) { 471 v = aa + 25*(diag[i]+1); 472 vi = aj + diag[i] + 1; 473 nz = ai[i+1] - diag[i] - 1; 474 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 475 while (nz--) { 476 idx = 5*(*vi++); 477 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 478 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 479 v += 25; 480 } 481 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 482 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 483 idiag -= 25; 484 i2 -= 5; 485 } 486 break; 487 case 6: 488 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 489 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 490 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 491 i2 -= 6; 492 idiag -= 36; 493 for (i=m-2; i>=0; i--) { 494 v = aa + 36*(diag[i]+1); 495 vi = aj + diag[i] + 1; 496 nz = ai[i+1] - diag[i] - 1; 497 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 498 while (nz--) { 499 idx = 6*(*vi++); 500 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 501 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 502 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 503 v += 36; 504 } 505 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 506 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 507 idiag -= 36; 508 i2 -= 6; 509 } 510 break; 511 case 7: 512 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 513 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 514 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 515 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 516 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 517 i2 -= 7; 518 idiag -= 49; 519 for (i=m-2; i>=0; i--) { 520 v = aa + 49*(diag[i]+1); 521 vi = aj + diag[i] + 1; 522 nz = ai[i+1] - diag[i] - 1; 523 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 524 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 525 while (nz--) { 526 idx = 7*(*vi++); 527 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 528 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 529 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 530 v += 49; 531 } 532 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 533 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 534 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 535 idiag -= 49; 536 i2 -= 7; 537 } 538 break; 539 default: 540 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 541 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 542 i2 -= bs; 543 idiag -= bs2; 544 for (i=m-2; i>=0; i--) { 545 v = aa + bs2*(diag[i]+1); 546 vi = aj + diag[i] + 1; 547 nz = ai[i+1] - diag[i] - 1; 548 549 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 550 /* copy all rows of x that are needed into contiguous space */ 551 workt = work; 552 for (j=0; j<nz; j++) { 553 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 554 workt += bs; 555 } 556 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 557 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 558 559 idiag -= bs2; 560 i2 -= bs; 561 } 562 break; 563 } 564 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 565 } 566 its--; 567 } 568 while (its--) { 569 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 570 idiag = a->idiag; 571 i2 = 0; 572 switch (bs) { 573 case 1: 574 for (i=0; i<m; i++) { 575 v = aa + ai[i]; 576 vi = aj + ai[i]; 577 nz = ai[i+1] - ai[i]; 578 s[0] = b[i2]; 579 for (j=0; j<nz; j++) { 580 xw[0] = x[vi[j]]; 581 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 582 } 583 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 584 x[i2] += xw[0]; 585 idiag += 1; 586 i2 += 1; 587 } 588 break; 589 case 2: 590 for (i=0; i<m; i++) { 591 v = aa + 4*ai[i]; 592 vi = aj + ai[i]; 593 nz = ai[i+1] - ai[i]; 594 s[0] = b[i2]; s[1] = b[i2+1]; 595 for (j=0; j<nz; j++) { 596 idx = 2*vi[j]; 597 it = 4*j; 598 xw[0] = x[idx]; xw[1] = x[1+idx]; 599 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 600 } 601 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 602 x[i2] += xw[0]; x[i2+1] += xw[1]; 603 idiag += 4; 604 i2 += 2; 605 } 606 break; 607 case 3: 608 for (i=0; i<m; i++) { 609 v = aa + 9*ai[i]; 610 vi = aj + ai[i]; 611 nz = ai[i+1] - ai[i]; 612 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 613 while (nz--) { 614 idx = 3*(*vi++); 615 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 616 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 617 v += 9; 618 } 619 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 620 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 621 idiag += 9; 622 i2 += 3; 623 } 624 break; 625 case 4: 626 for (i=0; i<m; i++) { 627 v = aa + 16*ai[i]; 628 vi = aj + ai[i]; 629 nz = ai[i+1] - ai[i]; 630 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 631 while (nz--) { 632 idx = 4*(*vi++); 633 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 634 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 635 v += 16; 636 } 637 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 638 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 639 idiag += 16; 640 i2 += 4; 641 } 642 break; 643 case 5: 644 for (i=0; i<m; i++) { 645 v = aa + 25*ai[i]; 646 vi = aj + ai[i]; 647 nz = ai[i+1] - ai[i]; 648 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 649 while (nz--) { 650 idx = 5*(*vi++); 651 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 652 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 653 v += 25; 654 } 655 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 656 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 657 idiag += 25; 658 i2 += 5; 659 } 660 break; 661 case 6: 662 for (i=0; i<m; i++) { 663 v = aa + 36*ai[i]; 664 vi = aj + ai[i]; 665 nz = ai[i+1] - ai[i]; 666 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 667 while (nz--) { 668 idx = 6*(*vi++); 669 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 670 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 671 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 672 v += 36; 673 } 674 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 675 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 676 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 677 idiag += 36; 678 i2 += 6; 679 } 680 break; 681 case 7: 682 for (i=0; i<m; i++) { 683 v = aa + 49*ai[i]; 684 vi = aj + ai[i]; 685 nz = ai[i+1] - ai[i]; 686 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 687 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 688 while (nz--) { 689 idx = 7*(*vi++); 690 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 691 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 692 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 693 v += 49; 694 } 695 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 696 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 697 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 698 idiag += 49; 699 i2 += 7; 700 } 701 break; 702 default: 703 for (i=0; i<m; i++) { 704 v = aa + bs2*ai[i]; 705 vi = aj + ai[i]; 706 nz = ai[i+1] - ai[i]; 707 708 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 709 /* copy all rows of x that are needed into contiguous space */ 710 workt = work; 711 for (j=0; j<nz; j++) { 712 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 713 workt += bs; 714 } 715 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 716 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 717 718 idiag += bs2; 719 i2 += bs; 720 } 721 break; 722 } 723 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 724 } 725 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 726 idiag = a->idiag+bs2*(a->mbs-1); 727 i2 = bs * (m-1); 728 switch (bs) { 729 case 1: 730 for (i=m-1; i>=0; i--) { 731 v = aa + ai[i]; 732 vi = aj + ai[i]; 733 nz = ai[i+1] - ai[i]; 734 s[0] = b[i2]; 735 for (j=0; j<nz; j++) { 736 xw[0] = x[vi[j]]; 737 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 738 } 739 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 740 x[i2] += xw[0]; 741 idiag -= 1; 742 i2 -= 1; 743 } 744 break; 745 case 2: 746 for (i=m-1; i>=0; i--) { 747 v = aa + 4*ai[i]; 748 vi = aj + ai[i]; 749 nz = ai[i+1] - ai[i]; 750 s[0] = b[i2]; s[1] = b[i2+1]; 751 for (j=0; j<nz; j++) { 752 idx = 2*vi[j]; 753 it = 4*j; 754 xw[0] = x[idx]; xw[1] = x[1+idx]; 755 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 756 } 757 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 758 x[i2] += xw[0]; x[i2+1] += xw[1]; 759 idiag -= 4; 760 i2 -= 2; 761 } 762 break; 763 case 3: 764 for (i=m-1; i>=0; i--) { 765 v = aa + 9*ai[i]; 766 vi = aj + ai[i]; 767 nz = ai[i+1] - ai[i]; 768 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 769 while (nz--) { 770 idx = 3*(*vi++); 771 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 772 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 773 v += 9; 774 } 775 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 776 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 777 idiag -= 9; 778 i2 -= 3; 779 } 780 break; 781 case 4: 782 for (i=m-1; i>=0; i--) { 783 v = aa + 16*ai[i]; 784 vi = aj + ai[i]; 785 nz = ai[i+1] - ai[i]; 786 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 787 while (nz--) { 788 idx = 4*(*vi++); 789 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 790 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 791 v += 16; 792 } 793 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 794 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 795 idiag -= 16; 796 i2 -= 4; 797 } 798 break; 799 case 5: 800 for (i=m-1; i>=0; i--) { 801 v = aa + 25*ai[i]; 802 vi = aj + ai[i]; 803 nz = ai[i+1] - ai[i]; 804 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 805 while (nz--) { 806 idx = 5*(*vi++); 807 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 808 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 809 v += 25; 810 } 811 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 812 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 813 idiag -= 25; 814 i2 -= 5; 815 } 816 break; 817 case 6: 818 for (i=m-1; i>=0; i--) { 819 v = aa + 36*ai[i]; 820 vi = aj + ai[i]; 821 nz = ai[i+1] - ai[i]; 822 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 823 while (nz--) { 824 idx = 6*(*vi++); 825 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 826 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 827 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 828 v += 36; 829 } 830 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 831 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 832 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 833 idiag -= 36; 834 i2 -= 6; 835 } 836 break; 837 case 7: 838 for (i=m-1; i>=0; i--) { 839 v = aa + 49*ai[i]; 840 vi = aj + ai[i]; 841 nz = ai[i+1] - ai[i]; 842 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 843 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 844 while (nz--) { 845 idx = 7*(*vi++); 846 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 847 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 848 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 849 v += 49; 850 } 851 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 852 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 853 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 854 idiag -= 49; 855 i2 -= 7; 856 } 857 break; 858 default: 859 for (i=m-1; i>=0; i--) { 860 v = aa + bs2*ai[i]; 861 vi = aj + ai[i]; 862 nz = ai[i+1] - ai[i]; 863 864 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 865 /* copy all rows of x that are needed into contiguous space */ 866 workt = work; 867 for (j=0; j<nz; j++) { 868 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 869 workt += bs; 870 } 871 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 872 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 873 874 idiag -= bs2; 875 i2 -= bs; 876 } 877 break; 878 } 879 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 880 } 881 } 882 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 883 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 884 PetscFunctionReturn(0); 885 } 886 887 888 /* 889 Special version for direct calls from Fortran (Used in PETSc-fun3d) 890 */ 891 #if defined(PETSC_HAVE_FORTRAN_CAPS) 892 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 893 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 894 #define matsetvaluesblocked4_ matsetvaluesblocked4 895 #endif 896 897 #undef __FUNCT__ 898 #define __FUNCT__ "matsetvaluesblocked4_" 899 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 900 { 901 Mat A = *AA; 902 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 903 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 904 PetscInt *ai =a->i,*ailen=a->ilen; 905 PetscInt *aj =a->j,stepval,lastcol = -1; 906 const PetscScalar *value = v; 907 MatScalar *ap,*aa = a->a,*bap; 908 909 PetscFunctionBegin; 910 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 911 stepval = (n-1)*4; 912 for (k=0; k<m; k++) { /* loop over added rows */ 913 row = im[k]; 914 rp = aj + ai[row]; 915 ap = aa + 16*ai[row]; 916 nrow = ailen[row]; 917 low = 0; 918 high = nrow; 919 for (l=0; l<n; l++) { /* loop over added columns */ 920 col = in[l]; 921 if (col <= lastcol) low = 0; 922 else high = nrow; 923 lastcol = col; 924 value = v + k*(stepval+4 + l)*4; 925 while (high-low > 7) { 926 t = (low+high)/2; 927 if (rp[t] > col) high = t; 928 else low = t; 929 } 930 for (i=low; i<high; i++) { 931 if (rp[i] > col) break; 932 if (rp[i] == col) { 933 bap = ap + 16*i; 934 for (ii=0; ii<4; ii++,value+=stepval) { 935 for (jj=ii; jj<16; jj+=4) { 936 bap[jj] += *value++; 937 } 938 } 939 goto noinsert2; 940 } 941 } 942 N = nrow++ - 1; 943 high++; /* added new column index thus must search to one higher than before */ 944 /* shift up all the later entries in this row */ 945 for (ii=N; ii>=i; ii--) { 946 rp[ii+1] = rp[ii]; 947 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 948 } 949 if (N >= i) { 950 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 951 } 952 rp[i] = col; 953 bap = ap + 16*i; 954 for (ii=0; ii<4; ii++,value+=stepval) { 955 for (jj=ii; jj<16; jj+=4) { 956 bap[jj] = *value++; 957 } 958 } 959 noinsert2:; 960 low = i; 961 } 962 ailen[row] = nrow; 963 } 964 PetscFunctionReturnVoid(); 965 } 966 967 #if defined(PETSC_HAVE_FORTRAN_CAPS) 968 #define matsetvalues4_ MATSETVALUES4 969 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 970 #define matsetvalues4_ matsetvalues4 971 #endif 972 973 #undef __FUNCT__ 974 #define __FUNCT__ "MatSetValues4_" 975 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 976 { 977 Mat A = *AA; 978 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 979 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 980 PetscInt *ai=a->i,*ailen=a->ilen; 981 PetscInt *aj=a->j,brow,bcol; 982 PetscInt ridx,cidx,lastcol = -1; 983 MatScalar *ap,value,*aa=a->a,*bap; 984 985 PetscFunctionBegin; 986 for (k=0; k<m; k++) { /* loop over added rows */ 987 row = im[k]; brow = row/4; 988 rp = aj + ai[brow]; 989 ap = aa + 16*ai[brow]; 990 nrow = ailen[brow]; 991 low = 0; 992 high = nrow; 993 for (l=0; l<n; l++) { /* loop over added columns */ 994 col = in[l]; bcol = col/4; 995 ridx = row % 4; cidx = col % 4; 996 value = v[l + k*n]; 997 if (col <= lastcol) low = 0; 998 else high = nrow; 999 lastcol = col; 1000 while (high-low > 7) { 1001 t = (low+high)/2; 1002 if (rp[t] > bcol) high = t; 1003 else low = t; 1004 } 1005 for (i=low; i<high; i++) { 1006 if (rp[i] > bcol) break; 1007 if (rp[i] == bcol) { 1008 bap = ap + 16*i + 4*cidx + ridx; 1009 *bap += value; 1010 goto noinsert1; 1011 } 1012 } 1013 N = nrow++ - 1; 1014 high++; /* added new column thus must search to one higher than before */ 1015 /* shift up all the later entries in this row */ 1016 for (ii=N; ii>=i; ii--) { 1017 rp[ii+1] = rp[ii]; 1018 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1019 } 1020 if (N>=i) { 1021 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1022 } 1023 rp[i] = bcol; 1024 ap[16*i + 4*cidx + ridx] = value; 1025 noinsert1:; 1026 low = i; 1027 } 1028 ailen[brow] = nrow; 1029 } 1030 PetscFunctionReturnVoid(); 1031 } 1032 1033 /* 1034 Checks for missing diagonals 1035 */ 1036 #undef __FUNCT__ 1037 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1038 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1039 { 1040 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1041 PetscErrorCode ierr; 1042 PetscInt *diag,*ii = a->i,i; 1043 1044 PetscFunctionBegin; 1045 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1046 *missing = PETSC_FALSE; 1047 if (A->rmap->n > 0 && !ii) { 1048 *missing = PETSC_TRUE; 1049 if (d) *d = 0; 1050 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1051 } else { 1052 diag = a->diag; 1053 for (i=0; i<a->mbs; i++) { 1054 if (diag[i] >= ii[i+1]) { 1055 *missing = PETSC_TRUE; 1056 if (d) *d = i; 1057 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1058 break; 1059 } 1060 } 1061 } 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNCT__ 1066 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1067 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1068 { 1069 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1070 PetscErrorCode ierr; 1071 PetscInt i,j,m = a->mbs; 1072 1073 PetscFunctionBegin; 1074 if (!a->diag) { 1075 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1076 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1077 a->free_diag = PETSC_TRUE; 1078 } 1079 for (i=0; i<m; i++) { 1080 a->diag[i] = a->i[i+1]; 1081 for (j=a->i[i]; j<a->i[i+1]; j++) { 1082 if (a->j[j] == i) { 1083 a->diag[i] = j; 1084 break; 1085 } 1086 } 1087 } 1088 PetscFunctionReturn(0); 1089 } 1090 1091 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1094 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1095 { 1096 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1097 PetscErrorCode ierr; 1098 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1099 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1100 1101 PetscFunctionBegin; 1102 *nn = n; 1103 if (!ia) PetscFunctionReturn(0); 1104 if (symmetric) { 1105 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1106 nz = tia[n]; 1107 } else { 1108 tia = a->i; tja = a->j; 1109 } 1110 1111 if (!blockcompressed && bs > 1) { 1112 (*nn) *= bs; 1113 /* malloc & create the natural set of indices */ 1114 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1115 if (n) { 1116 (*ia)[0] = 0; 1117 for (j=1; j<bs; j++) { 1118 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1119 } 1120 } 1121 1122 for (i=1; i<n; i++) { 1123 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1124 for (j=1; j<bs; j++) { 1125 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1126 } 1127 } 1128 if (n) { 1129 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1130 } 1131 1132 if (inja) { 1133 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1134 cnt = 0; 1135 for (i=0; i<n; i++) { 1136 for (j=0; j<bs; j++) { 1137 for (k=tia[i]; k<tia[i+1]; k++) { 1138 for (l=0; l<bs; l++) { 1139 (*ja)[cnt++] = bs*tja[k] + l; 1140 } 1141 } 1142 } 1143 } 1144 } 1145 1146 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1147 ierr = PetscFree(tia);CHKERRQ(ierr); 1148 ierr = PetscFree(tja);CHKERRQ(ierr); 1149 } 1150 } else if (oshift == 1) { 1151 if (symmetric) { 1152 nz = tia[A->rmap->n/bs]; 1153 /* add 1 to i and j indices */ 1154 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1155 *ia = tia; 1156 if (ja) { 1157 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1158 *ja = tja; 1159 } 1160 } else { 1161 nz = a->i[A->rmap->n/bs]; 1162 /* malloc space and add 1 to i and j indices */ 1163 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1164 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1165 if (ja) { 1166 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1167 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1168 } 1169 } 1170 } else { 1171 *ia = tia; 1172 if (ja) *ja = tja; 1173 } 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1179 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1180 { 1181 PetscErrorCode ierr; 1182 1183 PetscFunctionBegin; 1184 if (!ia) PetscFunctionReturn(0); 1185 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1186 ierr = PetscFree(*ia);CHKERRQ(ierr); 1187 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1188 } 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1194 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1195 { 1196 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1197 PetscErrorCode ierr; 1198 1199 PetscFunctionBegin; 1200 #if defined(PETSC_USE_LOG) 1201 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1202 #endif 1203 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1204 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1205 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1206 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1207 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1208 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1209 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1210 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1211 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1212 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1213 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1214 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1215 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1216 1217 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1218 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1219 ierr = PetscFree(A->data);CHKERRQ(ierr); 1220 1221 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1226 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1227 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1231 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1237 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1238 { 1239 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 switch (op) { 1244 case MAT_ROW_ORIENTED: 1245 a->roworiented = flg; 1246 break; 1247 case MAT_KEEP_NONZERO_PATTERN: 1248 a->keepnonzeropattern = flg; 1249 break; 1250 case MAT_NEW_NONZERO_LOCATIONS: 1251 a->nonew = (flg ? 0 : 1); 1252 break; 1253 case MAT_NEW_NONZERO_LOCATION_ERR: 1254 a->nonew = (flg ? -1 : 0); 1255 break; 1256 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1257 a->nonew = (flg ? -2 : 0); 1258 break; 1259 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1260 a->nounused = (flg ? -1 : 0); 1261 break; 1262 case MAT_NEW_DIAGONALS: 1263 case MAT_IGNORE_OFF_PROC_ENTRIES: 1264 case MAT_USE_HASH_TABLE: 1265 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1266 break; 1267 case MAT_SPD: 1268 case MAT_SYMMETRIC: 1269 case MAT_STRUCTURALLY_SYMMETRIC: 1270 case MAT_HERMITIAN: 1271 case MAT_SYMMETRY_ETERNAL: 1272 /* These options are handled directly by MatSetOption() */ 1273 break; 1274 default: 1275 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1276 } 1277 PetscFunctionReturn(0); 1278 } 1279 1280 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatGetRow_SeqBAIJ_private" 1283 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1284 { 1285 PetscErrorCode ierr; 1286 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1287 MatScalar *aa_i; 1288 PetscScalar *v_i; 1289 1290 PetscFunctionBegin; 1291 bs = A->rmap->bs; 1292 bs2 = bs*bs; 1293 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1294 1295 bn = row/bs; /* Block number */ 1296 bp = row % bs; /* Block Position */ 1297 M = ai[bn+1] - ai[bn]; 1298 *nz = bs*M; 1299 1300 if (v) { 1301 *v = 0; 1302 if (*nz) { 1303 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1304 for (i=0; i<M; i++) { /* for each block in the block row */ 1305 v_i = *v + i*bs; 1306 aa_i = aa + bs2*(ai[bn] + i); 1307 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1308 } 1309 } 1310 } 1311 1312 if (idx) { 1313 *idx = 0; 1314 if (*nz) { 1315 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1316 for (i=0; i<M; i++) { /* for each block in the block row */ 1317 idx_i = *idx + i*bs; 1318 itmp = bs*aj[ai[bn] + i]; 1319 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1320 } 1321 } 1322 } 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1328 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1329 { 1330 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1331 PetscErrorCode ierr; 1332 1333 PetscFunctionBegin; 1334 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1335 PetscFunctionReturn(0); 1336 } 1337 1338 #undef __FUNCT__ 1339 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1340 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1341 { 1342 PetscErrorCode ierr; 1343 1344 PetscFunctionBegin; 1345 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1346 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1347 PetscFunctionReturn(0); 1348 } 1349 1350 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1354 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1355 { 1356 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1357 Mat C; 1358 PetscErrorCode ierr; 1359 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1360 PetscInt *rows,*cols,bs2=a->bs2; 1361 MatScalar *array; 1362 1363 PetscFunctionBegin; 1364 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1365 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1366 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1367 1368 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1369 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1370 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1371 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1372 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1373 ierr = PetscFree(col);CHKERRQ(ierr); 1374 } else { 1375 C = *B; 1376 } 1377 1378 array = a->a; 1379 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1380 for (i=0; i<mbs; i++) { 1381 cols[0] = i*bs; 1382 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1383 len = ai[i+1] - ai[i]; 1384 for (j=0; j<len; j++) { 1385 rows[0] = (*aj++)*bs; 1386 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1387 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1388 array += bs2; 1389 } 1390 } 1391 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1392 1393 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1394 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1395 1396 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1397 *B = C; 1398 } else { 1399 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1400 } 1401 PetscFunctionReturn(0); 1402 } 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1406 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1407 { 1408 PetscErrorCode ierr; 1409 Mat Btrans; 1410 1411 PetscFunctionBegin; 1412 *f = PETSC_FALSE; 1413 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1414 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1415 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1416 PetscFunctionReturn(0); 1417 } 1418 1419 #undef __FUNCT__ 1420 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1421 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1422 { 1423 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1424 PetscErrorCode ierr; 1425 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1426 int fd; 1427 PetscScalar *aa; 1428 FILE *file; 1429 1430 PetscFunctionBegin; 1431 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1432 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1433 col_lens[0] = MAT_FILE_CLASSID; 1434 1435 col_lens[1] = A->rmap->N; 1436 col_lens[2] = A->cmap->n; 1437 col_lens[3] = a->nz*bs2; 1438 1439 /* store lengths of each row and write (including header) to file */ 1440 count = 0; 1441 for (i=0; i<a->mbs; i++) { 1442 for (j=0; j<bs; j++) { 1443 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1444 } 1445 } 1446 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1447 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1448 1449 /* store column indices (zero start index) */ 1450 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1451 count = 0; 1452 for (i=0; i<a->mbs; i++) { 1453 for (j=0; j<bs; j++) { 1454 for (k=a->i[i]; k<a->i[i+1]; k++) { 1455 for (l=0; l<bs; l++) { 1456 jj[count++] = bs*a->j[k] + l; 1457 } 1458 } 1459 } 1460 } 1461 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1462 ierr = PetscFree(jj);CHKERRQ(ierr); 1463 1464 /* store nonzero values */ 1465 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1466 count = 0; 1467 for (i=0; i<a->mbs; i++) { 1468 for (j=0; j<bs; j++) { 1469 for (k=a->i[i]; k<a->i[i+1]; k++) { 1470 for (l=0; l<bs; l++) { 1471 aa[count++] = a->a[bs2*k + l*bs + j]; 1472 } 1473 } 1474 } 1475 } 1476 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1477 ierr = PetscFree(aa);CHKERRQ(ierr); 1478 1479 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1480 if (file) { 1481 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1482 } 1483 PetscFunctionReturn(0); 1484 } 1485 1486 #undef __FUNCT__ 1487 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1488 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1489 { 1490 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1491 PetscErrorCode ierr; 1492 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1493 PetscViewerFormat format; 1494 1495 PetscFunctionBegin; 1496 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1497 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1498 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1499 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1500 const char *matname; 1501 Mat aij; 1502 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1503 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1504 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1505 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1506 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1507 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1508 PetscFunctionReturn(0); 1509 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1510 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1511 for (i=0; i<a->mbs; i++) { 1512 for (j=0; j<bs; j++) { 1513 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1514 for (k=a->i[i]; k<a->i[i+1]; k++) { 1515 for (l=0; l<bs; l++) { 1516 #if defined(PETSC_USE_COMPLEX) 1517 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1518 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1519 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1520 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1521 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1522 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1523 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1524 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1525 } 1526 #else 1527 if (a->a[bs2*k + l*bs + j] != 0.0) { 1528 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1529 } 1530 #endif 1531 } 1532 } 1533 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1534 } 1535 } 1536 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1537 } else { 1538 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1539 for (i=0; i<a->mbs; i++) { 1540 for (j=0; j<bs; j++) { 1541 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1542 for (k=a->i[i]; k<a->i[i+1]; k++) { 1543 for (l=0; l<bs; l++) { 1544 #if defined(PETSC_USE_COMPLEX) 1545 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1546 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1547 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1548 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1549 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1550 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1551 } else { 1552 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1553 } 1554 #else 1555 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1556 #endif 1557 } 1558 } 1559 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1560 } 1561 } 1562 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1563 } 1564 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1565 PetscFunctionReturn(0); 1566 } 1567 1568 #include <petscdraw.h> 1569 #undef __FUNCT__ 1570 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1571 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1572 { 1573 Mat A = (Mat) Aa; 1574 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1575 PetscErrorCode ierr; 1576 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1577 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1578 MatScalar *aa; 1579 PetscViewer viewer; 1580 PetscViewerFormat format; 1581 1582 PetscFunctionBegin; 1583 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1584 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1585 1586 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1587 1588 /* loop over matrix elements drawing boxes */ 1589 1590 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1591 color = PETSC_DRAW_BLUE; 1592 for (i=0,row=0; i<mbs; i++,row+=bs) { 1593 for (j=a->i[i]; j<a->i[i+1]; j++) { 1594 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1595 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1596 aa = a->a + j*bs2; 1597 for (k=0; k<bs; k++) { 1598 for (l=0; l<bs; l++) { 1599 if (PetscRealPart(*aa++) >= 0.) continue; 1600 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1601 } 1602 } 1603 } 1604 } 1605 color = PETSC_DRAW_CYAN; 1606 for (i=0,row=0; i<mbs; i++,row+=bs) { 1607 for (j=a->i[i]; j<a->i[i+1]; j++) { 1608 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1609 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1610 aa = a->a + j*bs2; 1611 for (k=0; k<bs; k++) { 1612 for (l=0; l<bs; l++) { 1613 if (PetscRealPart(*aa++) != 0.) continue; 1614 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1615 } 1616 } 1617 } 1618 } 1619 color = PETSC_DRAW_RED; 1620 for (i=0,row=0; i<mbs; i++,row+=bs) { 1621 for (j=a->i[i]; j<a->i[i+1]; j++) { 1622 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1623 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1624 aa = a->a + j*bs2; 1625 for (k=0; k<bs; k++) { 1626 for (l=0; l<bs; l++) { 1627 if (PetscRealPart(*aa++) <= 0.) continue; 1628 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1629 } 1630 } 1631 } 1632 } 1633 } else { 1634 /* use contour shading to indicate magnitude of values */ 1635 /* first determine max of all nonzero values */ 1636 PetscDraw popup; 1637 PetscReal scale,maxv = 0.0; 1638 1639 for (i=0; i<a->nz*a->bs2; i++) { 1640 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1641 } 1642 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1643 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1644 if (popup) { 1645 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1646 } 1647 for (i=0,row=0; i<mbs; i++,row+=bs) { 1648 for (j=a->i[i]; j<a->i[i+1]; j++) { 1649 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1650 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1651 aa = a->a + j*bs2; 1652 for (k=0; k<bs; k++) { 1653 for (l=0; l<bs; l++) { 1654 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1655 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1656 } 1657 } 1658 } 1659 } 1660 } 1661 PetscFunctionReturn(0); 1662 } 1663 1664 #undef __FUNCT__ 1665 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1666 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1667 { 1668 PetscErrorCode ierr; 1669 PetscReal xl,yl,xr,yr,w,h; 1670 PetscDraw draw; 1671 PetscBool isnull; 1672 1673 PetscFunctionBegin; 1674 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1675 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1676 1677 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1678 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1679 xr += w; yr += h; xl = -w; yl = -h; 1680 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1681 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1682 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatView_SeqBAIJ" 1688 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1689 { 1690 PetscErrorCode ierr; 1691 PetscBool iascii,isbinary,isdraw; 1692 1693 PetscFunctionBegin; 1694 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1695 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1696 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1697 if (iascii) { 1698 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1699 } else if (isbinary) { 1700 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1701 } else if (isdraw) { 1702 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1703 } else { 1704 Mat B; 1705 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1706 ierr = MatView(B,viewer);CHKERRQ(ierr); 1707 ierr = MatDestroy(&B);CHKERRQ(ierr); 1708 } 1709 PetscFunctionReturn(0); 1710 } 1711 1712 1713 #undef __FUNCT__ 1714 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1715 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1716 { 1717 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1718 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1719 PetscInt *ai = a->i,*ailen = a->ilen; 1720 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1721 MatScalar *ap,*aa = a->a; 1722 1723 PetscFunctionBegin; 1724 for (k=0; k<m; k++) { /* loop over rows */ 1725 row = im[k]; brow = row/bs; 1726 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1727 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1728 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1729 nrow = ailen[brow]; 1730 for (l=0; l<n; l++) { /* loop over columns */ 1731 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1732 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1733 col = in[l]; 1734 bcol = col/bs; 1735 cidx = col%bs; 1736 ridx = row%bs; 1737 high = nrow; 1738 low = 0; /* assume unsorted */ 1739 while (high-low > 5) { 1740 t = (low+high)/2; 1741 if (rp[t] > bcol) high = t; 1742 else low = t; 1743 } 1744 for (i=low; i<high; i++) { 1745 if (rp[i] > bcol) break; 1746 if (rp[i] == bcol) { 1747 *v++ = ap[bs2*i+bs*cidx+ridx]; 1748 goto finished; 1749 } 1750 } 1751 *v++ = 0.0; 1752 finished:; 1753 } 1754 } 1755 PetscFunctionReturn(0); 1756 } 1757 1758 #undef __FUNCT__ 1759 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1760 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1761 { 1762 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1763 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1764 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1765 PetscErrorCode ierr; 1766 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1767 PetscBool roworiented=a->roworiented; 1768 const PetscScalar *value = v; 1769 MatScalar *ap,*aa = a->a,*bap; 1770 1771 PetscFunctionBegin; 1772 if (roworiented) { 1773 stepval = (n-1)*bs; 1774 } else { 1775 stepval = (m-1)*bs; 1776 } 1777 for (k=0; k<m; k++) { /* loop over added rows */ 1778 row = im[k]; 1779 if (row < 0) continue; 1780 #if defined(PETSC_USE_DEBUG) 1781 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1782 #endif 1783 rp = aj + ai[row]; 1784 ap = aa + bs2*ai[row]; 1785 rmax = imax[row]; 1786 nrow = ailen[row]; 1787 low = 0; 1788 high = nrow; 1789 for (l=0; l<n; l++) { /* loop over added columns */ 1790 if (in[l] < 0) continue; 1791 #if defined(PETSC_USE_DEBUG) 1792 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1793 #endif 1794 col = in[l]; 1795 if (roworiented) { 1796 value = v + (k*(stepval+bs) + l)*bs; 1797 } else { 1798 value = v + (l*(stepval+bs) + k)*bs; 1799 } 1800 if (col <= lastcol) low = 0; 1801 else high = nrow; 1802 lastcol = col; 1803 while (high-low > 7) { 1804 t = (low+high)/2; 1805 if (rp[t] > col) high = t; 1806 else low = t; 1807 } 1808 for (i=low; i<high; i++) { 1809 if (rp[i] > col) break; 1810 if (rp[i] == col) { 1811 bap = ap + bs2*i; 1812 if (roworiented) { 1813 if (is == ADD_VALUES) { 1814 for (ii=0; ii<bs; ii++,value+=stepval) { 1815 for (jj=ii; jj<bs2; jj+=bs) { 1816 bap[jj] += *value++; 1817 } 1818 } 1819 } else { 1820 for (ii=0; ii<bs; ii++,value+=stepval) { 1821 for (jj=ii; jj<bs2; jj+=bs) { 1822 bap[jj] = *value++; 1823 } 1824 } 1825 } 1826 } else { 1827 if (is == ADD_VALUES) { 1828 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1829 for (jj=0; jj<bs; jj++) { 1830 bap[jj] += value[jj]; 1831 } 1832 bap += bs; 1833 } 1834 } else { 1835 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1836 for (jj=0; jj<bs; jj++) { 1837 bap[jj] = value[jj]; 1838 } 1839 bap += bs; 1840 } 1841 } 1842 } 1843 goto noinsert2; 1844 } 1845 } 1846 if (nonew == 1) goto noinsert2; 1847 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1848 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1849 N = nrow++ - 1; high++; 1850 /* shift up all the later entries in this row */ 1851 for (ii=N; ii>=i; ii--) { 1852 rp[ii+1] = rp[ii]; 1853 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1854 } 1855 if (N >= i) { 1856 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1857 } 1858 rp[i] = col; 1859 bap = ap + bs2*i; 1860 if (roworiented) { 1861 for (ii=0; ii<bs; ii++,value+=stepval) { 1862 for (jj=ii; jj<bs2; jj+=bs) { 1863 bap[jj] = *value++; 1864 } 1865 } 1866 } else { 1867 for (ii=0; ii<bs; ii++,value+=stepval) { 1868 for (jj=0; jj<bs; jj++) { 1869 *bap++ = *value++; 1870 } 1871 } 1872 } 1873 noinsert2:; 1874 low = i; 1875 } 1876 ailen[row] = nrow; 1877 } 1878 PetscFunctionReturn(0); 1879 } 1880 1881 #undef __FUNCT__ 1882 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1883 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1884 { 1885 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1886 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1887 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1888 PetscErrorCode ierr; 1889 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1890 MatScalar *aa = a->a,*ap; 1891 PetscReal ratio=0.6; 1892 1893 PetscFunctionBegin; 1894 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1895 1896 if (m) rmax = ailen[0]; 1897 for (i=1; i<mbs; i++) { 1898 /* move each row back by the amount of empty slots (fshift) before it*/ 1899 fshift += imax[i-1] - ailen[i-1]; 1900 rmax = PetscMax(rmax,ailen[i]); 1901 if (fshift) { 1902 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1903 N = ailen[i]; 1904 for (j=0; j<N; j++) { 1905 ip[j-fshift] = ip[j]; 1906 1907 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1908 } 1909 } 1910 ai[i] = ai[i-1] + ailen[i-1]; 1911 } 1912 if (mbs) { 1913 fshift += imax[mbs-1] - ailen[mbs-1]; 1914 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1915 } 1916 1917 /* reset ilen and imax for each row */ 1918 a->nonzerorowcnt = 0; 1919 for (i=0; i<mbs; i++) { 1920 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1921 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1922 } 1923 a->nz = ai[mbs]; 1924 1925 /* diagonals may have moved, so kill the diagonal pointers */ 1926 a->idiagvalid = PETSC_FALSE; 1927 if (fshift && a->diag) { 1928 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1929 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1930 a->diag = 0; 1931 } 1932 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1933 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1934 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1935 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1936 1937 A->info.mallocs += a->reallocs; 1938 a->reallocs = 0; 1939 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1940 a->rmax = rmax; 1941 1942 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1943 PetscFunctionReturn(0); 1944 } 1945 1946 /* 1947 This function returns an array of flags which indicate the locations of contiguous 1948 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1949 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1950 Assume: sizes should be long enough to hold all the values. 1951 */ 1952 #undef __FUNCT__ 1953 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1954 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1955 { 1956 PetscInt i,j,k,row; 1957 PetscBool flg; 1958 1959 PetscFunctionBegin; 1960 for (i=0,j=0; i<n; j++) { 1961 row = idx[i]; 1962 if (row%bs!=0) { /* Not the begining of a block */ 1963 sizes[j] = 1; 1964 i++; 1965 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1966 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1967 i++; 1968 } else { /* Begining of the block, so check if the complete block exists */ 1969 flg = PETSC_TRUE; 1970 for (k=1; k<bs; k++) { 1971 if (row+k != idx[i+k]) { /* break in the block */ 1972 flg = PETSC_FALSE; 1973 break; 1974 } 1975 } 1976 if (flg) { /* No break in the bs */ 1977 sizes[j] = bs; 1978 i += bs; 1979 } else { 1980 sizes[j] = 1; 1981 i++; 1982 } 1983 } 1984 } 1985 *bs_max = j; 1986 PetscFunctionReturn(0); 1987 } 1988 1989 #undef __FUNCT__ 1990 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1991 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 1992 { 1993 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1994 PetscErrorCode ierr; 1995 PetscInt i,j,k,count,*rows; 1996 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 1997 PetscScalar zero = 0.0; 1998 MatScalar *aa; 1999 const PetscScalar *xx; 2000 PetscScalar *bb; 2001 2002 PetscFunctionBegin; 2003 /* fix right hand side if needed */ 2004 if (x && b) { 2005 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2006 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2007 for (i=0; i<is_n; i++) { 2008 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2009 } 2010 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2011 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2012 } 2013 2014 /* Make a copy of the IS and sort it */ 2015 /* allocate memory for rows,sizes */ 2016 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2017 2018 /* copy IS values to rows, and sort them */ 2019 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2020 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2021 2022 if (baij->keepnonzeropattern) { 2023 for (i=0; i<is_n; i++) sizes[i] = 1; 2024 bs_max = is_n; 2025 } else { 2026 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2027 A->nonzerostate++; 2028 } 2029 2030 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2031 row = rows[j]; 2032 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2033 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2034 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2035 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2036 if (diag != (PetscScalar)0.0) { 2037 if (baij->ilen[row/bs] > 0) { 2038 baij->ilen[row/bs] = 1; 2039 baij->j[baij->i[row/bs]] = row/bs; 2040 2041 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2042 } 2043 /* Now insert all the diagonal values for this bs */ 2044 for (k=0; k<bs; k++) { 2045 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2046 } 2047 } else { /* (diag == 0.0) */ 2048 baij->ilen[row/bs] = 0; 2049 } /* end (diag == 0.0) */ 2050 } else { /* (sizes[i] != bs) */ 2051 #if defined(PETSC_USE_DEBUG) 2052 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2053 #endif 2054 for (k=0; k<count; k++) { 2055 aa[0] = zero; 2056 aa += bs; 2057 } 2058 if (diag != (PetscScalar)0.0) { 2059 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2060 } 2061 } 2062 } 2063 2064 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2065 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2066 PetscFunctionReturn(0); 2067 } 2068 2069 #undef __FUNCT__ 2070 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2071 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2072 { 2073 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2074 PetscErrorCode ierr; 2075 PetscInt i,j,k,count; 2076 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2077 PetscScalar zero = 0.0; 2078 MatScalar *aa; 2079 const PetscScalar *xx; 2080 PetscScalar *bb; 2081 PetscBool *zeroed,vecs = PETSC_FALSE; 2082 2083 PetscFunctionBegin; 2084 /* fix right hand side if needed */ 2085 if (x && b) { 2086 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2087 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2088 vecs = PETSC_TRUE; 2089 } 2090 2091 /* zero the columns */ 2092 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2093 for (i=0; i<is_n; i++) { 2094 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2095 zeroed[is_idx[i]] = PETSC_TRUE; 2096 } 2097 for (i=0; i<A->rmap->N; i++) { 2098 if (!zeroed[i]) { 2099 row = i/bs; 2100 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2101 for (k=0; k<bs; k++) { 2102 col = bs*baij->j[j] + k; 2103 if (zeroed[col]) { 2104 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2105 if (vecs) bb[i] -= aa[0]*xx[col]; 2106 aa[0] = 0.0; 2107 } 2108 } 2109 } 2110 } else if (vecs) bb[i] = diag*xx[i]; 2111 } 2112 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2113 if (vecs) { 2114 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2115 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2116 } 2117 2118 /* zero the rows */ 2119 for (i=0; i<is_n; i++) { 2120 row = is_idx[i]; 2121 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2122 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2123 for (k=0; k<count; k++) { 2124 aa[0] = zero; 2125 aa += bs; 2126 } 2127 if (diag != (PetscScalar)0.0) { 2128 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2129 } 2130 } 2131 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2132 PetscFunctionReturn(0); 2133 } 2134 2135 #undef __FUNCT__ 2136 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2137 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2138 { 2139 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2140 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2141 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2142 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2143 PetscErrorCode ierr; 2144 PetscInt ridx,cidx,bs2=a->bs2; 2145 PetscBool roworiented=a->roworiented; 2146 MatScalar *ap,value,*aa=a->a,*bap; 2147 2148 PetscFunctionBegin; 2149 for (k=0; k<m; k++) { /* loop over added rows */ 2150 row = im[k]; 2151 brow = row/bs; 2152 if (row < 0) continue; 2153 #if defined(PETSC_USE_DEBUG) 2154 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2155 #endif 2156 rp = aj + ai[brow]; 2157 ap = aa + bs2*ai[brow]; 2158 rmax = imax[brow]; 2159 nrow = ailen[brow]; 2160 low = 0; 2161 high = nrow; 2162 for (l=0; l<n; l++) { /* loop over added columns */ 2163 if (in[l] < 0) continue; 2164 #if defined(PETSC_USE_DEBUG) 2165 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2166 #endif 2167 col = in[l]; bcol = col/bs; 2168 ridx = row % bs; cidx = col % bs; 2169 if (roworiented) { 2170 value = v[l + k*n]; 2171 } else { 2172 value = v[k + l*m]; 2173 } 2174 if (col <= lastcol) low = 0; else high = nrow; 2175 lastcol = col; 2176 while (high-low > 7) { 2177 t = (low+high)/2; 2178 if (rp[t] > bcol) high = t; 2179 else low = t; 2180 } 2181 for (i=low; i<high; i++) { 2182 if (rp[i] > bcol) break; 2183 if (rp[i] == bcol) { 2184 bap = ap + bs2*i + bs*cidx + ridx; 2185 if (is == ADD_VALUES) *bap += value; 2186 else *bap = value; 2187 goto noinsert1; 2188 } 2189 } 2190 if (nonew == 1) goto noinsert1; 2191 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2192 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2193 N = nrow++ - 1; high++; 2194 /* shift up all the later entries in this row */ 2195 for (ii=N; ii>=i; ii--) { 2196 rp[ii+1] = rp[ii]; 2197 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2198 } 2199 if (N>=i) { 2200 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2201 } 2202 rp[i] = bcol; 2203 ap[bs2*i + bs*cidx + ridx] = value; 2204 a->nz++; 2205 A->nonzerostate++; 2206 noinsert1:; 2207 low = i; 2208 } 2209 ailen[brow] = nrow; 2210 } 2211 PetscFunctionReturn(0); 2212 } 2213 2214 #undef __FUNCT__ 2215 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2216 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2217 { 2218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2219 Mat outA; 2220 PetscErrorCode ierr; 2221 PetscBool row_identity,col_identity; 2222 2223 PetscFunctionBegin; 2224 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2225 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2226 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2227 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2228 2229 outA = inA; 2230 inA->factortype = MAT_FACTOR_LU; 2231 2232 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2233 2234 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2235 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2236 a->row = row; 2237 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2238 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2239 a->col = col; 2240 2241 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2242 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2243 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2244 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2245 2246 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2247 if (!a->solve_work) { 2248 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2249 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2250 } 2251 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2252 PetscFunctionReturn(0); 2253 } 2254 2255 #undef __FUNCT__ 2256 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2257 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2258 { 2259 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2260 PetscInt i,nz,mbs; 2261 2262 PetscFunctionBegin; 2263 nz = baij->maxnz; 2264 mbs = baij->mbs; 2265 for (i=0; i<nz; i++) { 2266 baij->j[i] = indices[i]; 2267 } 2268 baij->nz = nz; 2269 for (i=0; i<mbs; i++) { 2270 baij->ilen[i] = baij->imax[i]; 2271 } 2272 PetscFunctionReturn(0); 2273 } 2274 2275 #undef __FUNCT__ 2276 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2277 /*@ 2278 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2279 in the matrix. 2280 2281 Input Parameters: 2282 + mat - the SeqBAIJ matrix 2283 - indices - the column indices 2284 2285 Level: advanced 2286 2287 Notes: 2288 This can be called if you have precomputed the nonzero structure of the 2289 matrix and want to provide it to the matrix object to improve the performance 2290 of the MatSetValues() operation. 2291 2292 You MUST have set the correct numbers of nonzeros per row in the call to 2293 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2294 2295 MUST be called before any calls to MatSetValues(); 2296 2297 @*/ 2298 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2299 { 2300 PetscErrorCode ierr; 2301 2302 PetscFunctionBegin; 2303 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2304 PetscValidPointer(indices,2); 2305 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2306 PetscFunctionReturn(0); 2307 } 2308 2309 #undef __FUNCT__ 2310 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2311 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2312 { 2313 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2314 PetscErrorCode ierr; 2315 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2316 PetscReal atmp; 2317 PetscScalar *x,zero = 0.0; 2318 MatScalar *aa; 2319 PetscInt ncols,brow,krow,kcol; 2320 2321 PetscFunctionBegin; 2322 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2323 bs = A->rmap->bs; 2324 aa = a->a; 2325 ai = a->i; 2326 aj = a->j; 2327 mbs = a->mbs; 2328 2329 ierr = VecSet(v,zero);CHKERRQ(ierr); 2330 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2331 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2332 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2333 for (i=0; i<mbs; i++) { 2334 ncols = ai[1] - ai[0]; ai++; 2335 brow = bs*i; 2336 for (j=0; j<ncols; j++) { 2337 for (kcol=0; kcol<bs; kcol++) { 2338 for (krow=0; krow<bs; krow++) { 2339 atmp = PetscAbsScalar(*aa);aa++; 2340 row = brow + krow; /* row index */ 2341 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2342 } 2343 } 2344 aj++; 2345 } 2346 } 2347 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2348 PetscFunctionReturn(0); 2349 } 2350 2351 #undef __FUNCT__ 2352 #define __FUNCT__ "MatCopy_SeqBAIJ" 2353 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2354 { 2355 PetscErrorCode ierr; 2356 2357 PetscFunctionBegin; 2358 /* If the two matrices have the same copy implementation, use fast copy. */ 2359 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2360 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2361 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2362 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2363 2364 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2365 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2366 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2367 } else { 2368 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2369 } 2370 PetscFunctionReturn(0); 2371 } 2372 2373 #undef __FUNCT__ 2374 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2375 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2376 { 2377 PetscErrorCode ierr; 2378 2379 PetscFunctionBegin; 2380 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2381 PetscFunctionReturn(0); 2382 } 2383 2384 #undef __FUNCT__ 2385 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2386 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2387 { 2388 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2389 2390 PetscFunctionBegin; 2391 *array = a->a; 2392 PetscFunctionReturn(0); 2393 } 2394 2395 #undef __FUNCT__ 2396 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2397 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2398 { 2399 PetscFunctionBegin; 2400 PetscFunctionReturn(0); 2401 } 2402 2403 #undef __FUNCT__ 2404 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ" 2405 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2406 { 2407 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2408 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2409 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2410 PetscErrorCode ierr; 2411 2412 PetscFunctionBegin; 2413 /* Set the number of nonzeros in the new matrix */ 2414 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2415 PetscFunctionReturn(0); 2416 } 2417 2418 #undef __FUNCT__ 2419 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2420 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2421 { 2422 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2423 PetscErrorCode ierr; 2424 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2425 PetscBLASInt one=1; 2426 2427 PetscFunctionBegin; 2428 if (str == SAME_NONZERO_PATTERN) { 2429 PetscScalar alpha = a; 2430 PetscBLASInt bnz; 2431 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2432 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2433 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2434 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2435 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2436 } else { 2437 Mat B; 2438 PetscInt *nnz; 2439 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2440 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2441 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2442 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2443 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2444 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2445 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2446 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2447 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2448 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2449 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2450 ierr = PetscFree(nnz);CHKERRQ(ierr); 2451 } 2452 PetscFunctionReturn(0); 2453 } 2454 2455 #undef __FUNCT__ 2456 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2457 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2458 { 2459 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2460 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2461 MatScalar *aa = a->a; 2462 2463 PetscFunctionBegin; 2464 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2465 PetscFunctionReturn(0); 2466 } 2467 2468 #undef __FUNCT__ 2469 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2470 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2471 { 2472 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2473 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2474 MatScalar *aa = a->a; 2475 2476 PetscFunctionBegin; 2477 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2483 /* 2484 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2485 */ 2486 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2487 { 2488 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2489 PetscErrorCode ierr; 2490 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2491 PetscInt nz = a->i[m],row,*jj,mr,col; 2492 2493 PetscFunctionBegin; 2494 *nn = n; 2495 if (!ia) PetscFunctionReturn(0); 2496 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2497 else { 2498 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2499 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2500 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2501 jj = a->j; 2502 for (i=0; i<nz; i++) { 2503 collengths[jj[i]]++; 2504 } 2505 cia[0] = oshift; 2506 for (i=0; i<n; i++) { 2507 cia[i+1] = cia[i] + collengths[i]; 2508 } 2509 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2510 jj = a->j; 2511 for (row=0; row<m; row++) { 2512 mr = a->i[row+1] - a->i[row]; 2513 for (i=0; i<mr; i++) { 2514 col = *jj++; 2515 2516 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2517 } 2518 } 2519 ierr = PetscFree(collengths);CHKERRQ(ierr); 2520 *ia = cia; *ja = cja; 2521 } 2522 PetscFunctionReturn(0); 2523 } 2524 2525 #undef __FUNCT__ 2526 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2527 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2528 { 2529 PetscErrorCode ierr; 2530 2531 PetscFunctionBegin; 2532 if (!ia) PetscFunctionReturn(0); 2533 ierr = PetscFree(*ia);CHKERRQ(ierr); 2534 ierr = PetscFree(*ja);CHKERRQ(ierr); 2535 PetscFunctionReturn(0); 2536 } 2537 2538 /* 2539 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2540 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2541 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2542 */ 2543 #undef __FUNCT__ 2544 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color" 2545 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2546 { 2547 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2548 PetscErrorCode ierr; 2549 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2550 PetscInt nz = a->i[m],row,*jj,mr,col; 2551 PetscInt *cspidx; 2552 2553 PetscFunctionBegin; 2554 *nn = n; 2555 if (!ia) PetscFunctionReturn(0); 2556 2557 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2558 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2559 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2560 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2561 jj = a->j; 2562 for (i=0; i<nz; i++) { 2563 collengths[jj[i]]++; 2564 } 2565 cia[0] = oshift; 2566 for (i=0; i<n; i++) { 2567 cia[i+1] = cia[i] + collengths[i]; 2568 } 2569 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2570 jj = a->j; 2571 for (row=0; row<m; row++) { 2572 mr = a->i[row+1] - a->i[row]; 2573 for (i=0; i<mr; i++) { 2574 col = *jj++; 2575 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2576 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2577 } 2578 } 2579 ierr = PetscFree(collengths);CHKERRQ(ierr); 2580 *ia = cia; *ja = cja; 2581 *spidx = cspidx; 2582 PetscFunctionReturn(0); 2583 } 2584 2585 #undef __FUNCT__ 2586 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color" 2587 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2588 { 2589 PetscErrorCode ierr; 2590 2591 PetscFunctionBegin; 2592 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2593 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2594 PetscFunctionReturn(0); 2595 } 2596 2597 #undef __FUNCT__ 2598 #define __FUNCT__ "MatShift_SeqBAIJ" 2599 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2600 { 2601 PetscErrorCode ierr; 2602 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2603 2604 PetscFunctionBegin; 2605 if (!aij->nz) { 2606 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2607 } 2608 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2609 PetscFunctionReturn(0); 2610 } 2611 2612 /* -------------------------------------------------------------------*/ 2613 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2614 MatGetRow_SeqBAIJ, 2615 MatRestoreRow_SeqBAIJ, 2616 MatMult_SeqBAIJ_N, 2617 /* 4*/ MatMultAdd_SeqBAIJ_N, 2618 MatMultTranspose_SeqBAIJ, 2619 MatMultTransposeAdd_SeqBAIJ, 2620 0, 2621 0, 2622 0, 2623 /* 10*/ 0, 2624 MatLUFactor_SeqBAIJ, 2625 0, 2626 0, 2627 MatTranspose_SeqBAIJ, 2628 /* 15*/ MatGetInfo_SeqBAIJ, 2629 MatEqual_SeqBAIJ, 2630 MatGetDiagonal_SeqBAIJ, 2631 MatDiagonalScale_SeqBAIJ, 2632 MatNorm_SeqBAIJ, 2633 /* 20*/ 0, 2634 MatAssemblyEnd_SeqBAIJ, 2635 MatSetOption_SeqBAIJ, 2636 MatZeroEntries_SeqBAIJ, 2637 /* 24*/ MatZeroRows_SeqBAIJ, 2638 0, 2639 0, 2640 0, 2641 0, 2642 /* 29*/ MatSetUp_SeqBAIJ, 2643 0, 2644 0, 2645 0, 2646 0, 2647 /* 34*/ MatDuplicate_SeqBAIJ, 2648 0, 2649 0, 2650 MatILUFactor_SeqBAIJ, 2651 0, 2652 /* 39*/ MatAXPY_SeqBAIJ, 2653 MatGetSubMatrices_SeqBAIJ, 2654 MatIncreaseOverlap_SeqBAIJ, 2655 MatGetValues_SeqBAIJ, 2656 MatCopy_SeqBAIJ, 2657 /* 44*/ 0, 2658 MatScale_SeqBAIJ, 2659 MatShift_SeqBAIJ, 2660 0, 2661 MatZeroRowsColumns_SeqBAIJ, 2662 /* 49*/ 0, 2663 MatGetRowIJ_SeqBAIJ, 2664 MatRestoreRowIJ_SeqBAIJ, 2665 MatGetColumnIJ_SeqBAIJ, 2666 MatRestoreColumnIJ_SeqBAIJ, 2667 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2668 0, 2669 0, 2670 0, 2671 MatSetValuesBlocked_SeqBAIJ, 2672 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2673 MatDestroy_SeqBAIJ, 2674 MatView_SeqBAIJ, 2675 0, 2676 0, 2677 /* 64*/ 0, 2678 0, 2679 0, 2680 0, 2681 0, 2682 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2683 0, 2684 MatConvert_Basic, 2685 0, 2686 0, 2687 /* 74*/ 0, 2688 MatFDColoringApply_BAIJ, 2689 0, 2690 0, 2691 0, 2692 /* 79*/ 0, 2693 0, 2694 0, 2695 0, 2696 MatLoad_SeqBAIJ, 2697 /* 84*/ 0, 2698 0, 2699 0, 2700 0, 2701 0, 2702 /* 89*/ 0, 2703 0, 2704 0, 2705 0, 2706 0, 2707 /* 94*/ 0, 2708 0, 2709 0, 2710 0, 2711 0, 2712 /* 99*/ 0, 2713 0, 2714 0, 2715 0, 2716 0, 2717 /*104*/ 0, 2718 MatRealPart_SeqBAIJ, 2719 MatImaginaryPart_SeqBAIJ, 2720 0, 2721 0, 2722 /*109*/ 0, 2723 0, 2724 0, 2725 0, 2726 MatMissingDiagonal_SeqBAIJ, 2727 /*114*/ 0, 2728 0, 2729 0, 2730 0, 2731 0, 2732 /*119*/ 0, 2733 0, 2734 MatMultHermitianTranspose_SeqBAIJ, 2735 MatMultHermitianTransposeAdd_SeqBAIJ, 2736 0, 2737 /*124*/ 0, 2738 0, 2739 MatInvertBlockDiagonal_SeqBAIJ, 2740 0, 2741 0, 2742 /*129*/ 0, 2743 0, 2744 0, 2745 0, 2746 0, 2747 /*134*/ 0, 2748 0, 2749 0, 2750 0, 2751 0, 2752 /*139*/ 0, 2753 0, 2754 0, 2755 MatFDColoringSetUp_SeqXAIJ, 2756 0, 2757 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ 2758 }; 2759 2760 #undef __FUNCT__ 2761 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2762 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2763 { 2764 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2765 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2766 PetscErrorCode ierr; 2767 2768 PetscFunctionBegin; 2769 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2770 2771 /* allocate space for values if not already there */ 2772 if (!aij->saved_values) { 2773 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2774 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2775 } 2776 2777 /* copy values over */ 2778 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2779 PetscFunctionReturn(0); 2780 } 2781 2782 #undef __FUNCT__ 2783 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2784 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2785 { 2786 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2787 PetscErrorCode ierr; 2788 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2789 2790 PetscFunctionBegin; 2791 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2792 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2793 2794 /* copy values over */ 2795 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2796 PetscFunctionReturn(0); 2797 } 2798 2799 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2800 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2801 2802 #undef __FUNCT__ 2803 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2804 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2805 { 2806 Mat_SeqBAIJ *b; 2807 PetscErrorCode ierr; 2808 PetscInt i,mbs,nbs,bs2; 2809 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2810 2811 PetscFunctionBegin; 2812 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2813 if (nz == MAT_SKIP_ALLOCATION) { 2814 skipallocation = PETSC_TRUE; 2815 nz = 0; 2816 } 2817 2818 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2819 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2820 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2821 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2822 2823 B->preallocated = PETSC_TRUE; 2824 2825 mbs = B->rmap->n/bs; 2826 nbs = B->cmap->n/bs; 2827 bs2 = bs*bs; 2828 2829 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2830 2831 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2832 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2833 if (nnz) { 2834 for (i=0; i<mbs; i++) { 2835 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2836 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2837 } 2838 } 2839 2840 b = (Mat_SeqBAIJ*)B->data; 2841 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2842 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2843 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2844 2845 if (!flg) { 2846 switch (bs) { 2847 case 1: 2848 B->ops->mult = MatMult_SeqBAIJ_1; 2849 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2850 break; 2851 case 2: 2852 B->ops->mult = MatMult_SeqBAIJ_2; 2853 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2854 break; 2855 case 3: 2856 B->ops->mult = MatMult_SeqBAIJ_3; 2857 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2858 break; 2859 case 4: 2860 B->ops->mult = MatMult_SeqBAIJ_4; 2861 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2862 break; 2863 case 5: 2864 B->ops->mult = MatMult_SeqBAIJ_5; 2865 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2866 break; 2867 case 6: 2868 B->ops->mult = MatMult_SeqBAIJ_6; 2869 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2870 break; 2871 case 7: 2872 B->ops->mult = MatMult_SeqBAIJ_7; 2873 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2874 break; 2875 case 15: 2876 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2877 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2878 break; 2879 default: 2880 B->ops->mult = MatMult_SeqBAIJ_N; 2881 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2882 break; 2883 } 2884 } 2885 B->ops->sor = MatSOR_SeqBAIJ; 2886 b->mbs = mbs; 2887 b->nbs = nbs; 2888 if (!skipallocation) { 2889 if (!b->imax) { 2890 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2891 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2892 2893 b->free_imax_ilen = PETSC_TRUE; 2894 } 2895 /* b->ilen will count nonzeros in each block row so far. */ 2896 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2897 if (!nnz) { 2898 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2899 else if (nz < 0) nz = 1; 2900 for (i=0; i<mbs; i++) b->imax[i] = nz; 2901 nz = nz*mbs; 2902 } else { 2903 nz = 0; 2904 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2905 } 2906 2907 /* allocate the matrix space */ 2908 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2909 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2910 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2911 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2912 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2913 2914 b->singlemalloc = PETSC_TRUE; 2915 b->i[0] = 0; 2916 for (i=1; i<mbs+1; i++) { 2917 b->i[i] = b->i[i-1] + b->imax[i-1]; 2918 } 2919 b->free_a = PETSC_TRUE; 2920 b->free_ij = PETSC_TRUE; 2921 } else { 2922 b->free_a = PETSC_FALSE; 2923 b->free_ij = PETSC_FALSE; 2924 } 2925 2926 b->bs2 = bs2; 2927 b->mbs = mbs; 2928 b->nz = 0; 2929 b->maxnz = nz; 2930 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2931 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2932 PetscFunctionReturn(0); 2933 } 2934 2935 #undef __FUNCT__ 2936 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2937 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2938 { 2939 PetscInt i,m,nz,nz_max=0,*nnz; 2940 PetscScalar *values=0; 2941 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2942 PetscErrorCode ierr; 2943 2944 PetscFunctionBegin; 2945 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2946 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2947 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2948 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2949 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2950 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2951 m = B->rmap->n/bs; 2952 2953 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2954 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2955 for (i=0; i<m; i++) { 2956 nz = ii[i+1]- ii[i]; 2957 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2958 nz_max = PetscMax(nz_max, nz); 2959 nnz[i] = nz; 2960 } 2961 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2962 ierr = PetscFree(nnz);CHKERRQ(ierr); 2963 2964 values = (PetscScalar*)V; 2965 if (!values) { 2966 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2967 } 2968 for (i=0; i<m; i++) { 2969 PetscInt ncols = ii[i+1] - ii[i]; 2970 const PetscInt *icols = jj + ii[i]; 2971 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2972 if (!roworiented) { 2973 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2974 } else { 2975 PetscInt j; 2976 for (j=0; j<ncols; j++) { 2977 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2978 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2979 } 2980 } 2981 } 2982 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2983 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2984 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2985 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2986 PetscFunctionReturn(0); 2987 } 2988 2989 /*MC 2990 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2991 block sparse compressed row format. 2992 2993 Options Database Keys: 2994 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2995 2996 Level: beginner 2997 2998 .seealso: MatCreateSeqBAIJ() 2999 M*/ 3000 3001 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3002 3003 #undef __FUNCT__ 3004 #define __FUNCT__ "MatCreate_SeqBAIJ" 3005 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3006 { 3007 PetscErrorCode ierr; 3008 PetscMPIInt size; 3009 Mat_SeqBAIJ *b; 3010 3011 PetscFunctionBegin; 3012 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3013 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3014 3015 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3016 B->data = (void*)b; 3017 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3018 3019 b->row = 0; 3020 b->col = 0; 3021 b->icol = 0; 3022 b->reallocs = 0; 3023 b->saved_values = 0; 3024 3025 b->roworiented = PETSC_TRUE; 3026 b->nonew = 0; 3027 b->diag = 0; 3028 B->spptr = 0; 3029 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3030 b->keepnonzeropattern = PETSC_FALSE; 3031 3032 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3033 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3034 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3035 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3036 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3037 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3039 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3042 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3043 PetscFunctionReturn(0); 3044 } 3045 3046 #undef __FUNCT__ 3047 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3048 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3049 { 3050 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3051 PetscErrorCode ierr; 3052 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3053 3054 PetscFunctionBegin; 3055 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3056 3057 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3058 c->imax = a->imax; 3059 c->ilen = a->ilen; 3060 c->free_imax_ilen = PETSC_FALSE; 3061 } else { 3062 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3063 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3064 for (i=0; i<mbs; i++) { 3065 c->imax[i] = a->imax[i]; 3066 c->ilen[i] = a->ilen[i]; 3067 } 3068 c->free_imax_ilen = PETSC_TRUE; 3069 } 3070 3071 /* allocate the matrix space */ 3072 if (mallocmatspace) { 3073 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3074 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3075 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3076 3077 c->i = a->i; 3078 c->j = a->j; 3079 c->singlemalloc = PETSC_FALSE; 3080 c->free_a = PETSC_TRUE; 3081 c->free_ij = PETSC_FALSE; 3082 c->parent = A; 3083 C->preallocated = PETSC_TRUE; 3084 C->assembled = PETSC_TRUE; 3085 3086 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3087 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3088 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3089 } else { 3090 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3091 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3092 3093 c->singlemalloc = PETSC_TRUE; 3094 c->free_a = PETSC_TRUE; 3095 c->free_ij = PETSC_TRUE; 3096 3097 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3098 if (mbs > 0) { 3099 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3100 if (cpvalues == MAT_COPY_VALUES) { 3101 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3102 } else { 3103 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3104 } 3105 } 3106 C->preallocated = PETSC_TRUE; 3107 C->assembled = PETSC_TRUE; 3108 } 3109 } 3110 3111 c->roworiented = a->roworiented; 3112 c->nonew = a->nonew; 3113 3114 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3115 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3116 3117 c->bs2 = a->bs2; 3118 c->mbs = a->mbs; 3119 c->nbs = a->nbs; 3120 3121 if (a->diag) { 3122 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3123 c->diag = a->diag; 3124 c->free_diag = PETSC_FALSE; 3125 } else { 3126 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3127 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3128 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3129 c->free_diag = PETSC_TRUE; 3130 } 3131 } else c->diag = 0; 3132 3133 c->nz = a->nz; 3134 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3135 c->solve_work = NULL; 3136 c->mult_work = NULL; 3137 c->sor_workt = NULL; 3138 c->sor_work = NULL; 3139 3140 c->compressedrow.use = a->compressedrow.use; 3141 c->compressedrow.nrows = a->compressedrow.nrows; 3142 if (a->compressedrow.use) { 3143 i = a->compressedrow.nrows; 3144 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3145 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3146 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3147 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3148 } else { 3149 c->compressedrow.use = PETSC_FALSE; 3150 c->compressedrow.i = NULL; 3151 c->compressedrow.rindex = NULL; 3152 } 3153 C->nonzerostate = A->nonzerostate; 3154 3155 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3156 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3157 PetscFunctionReturn(0); 3158 } 3159 3160 #undef __FUNCT__ 3161 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3162 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3163 { 3164 PetscErrorCode ierr; 3165 3166 PetscFunctionBegin; 3167 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3168 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3169 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3170 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3171 PetscFunctionReturn(0); 3172 } 3173 3174 #undef __FUNCT__ 3175 #define __FUNCT__ "MatLoad_SeqBAIJ" 3176 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3177 { 3178 Mat_SeqBAIJ *a; 3179 PetscErrorCode ierr; 3180 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3181 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3182 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3183 PetscInt *masked,nmask,tmp,bs2,ishift; 3184 PetscMPIInt size; 3185 int fd; 3186 PetscScalar *aa; 3187 MPI_Comm comm; 3188 3189 PetscFunctionBegin; 3190 /* force binary viewer to load .info file if it has not yet done so */ 3191 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3192 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3193 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3194 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3195 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3196 if (bs < 0) bs = 1; 3197 bs2 = bs*bs; 3198 3199 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3200 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3201 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3202 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3203 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3204 M = header[1]; N = header[2]; nz = header[3]; 3205 3206 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3207 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3208 3209 /* 3210 This code adds extra rows to make sure the number of rows is 3211 divisible by the blocksize 3212 */ 3213 mbs = M/bs; 3214 extra_rows = bs - M + bs*(mbs); 3215 if (extra_rows == bs) extra_rows = 0; 3216 else mbs++; 3217 if (extra_rows) { 3218 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3219 } 3220 3221 /* Set global sizes if not already set */ 3222 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3223 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3224 } else { /* Check if the matrix global sizes are correct */ 3225 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3226 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3227 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3228 } 3229 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3230 } 3231 3232 /* read in row lengths */ 3233 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3234 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3235 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3236 3237 /* read in column indices */ 3238 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3239 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3240 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3241 3242 /* loop over row lengths determining block row lengths */ 3243 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3244 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3245 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3246 rowcount = 0; 3247 nzcount = 0; 3248 for (i=0; i<mbs; i++) { 3249 nmask = 0; 3250 for (j=0; j<bs; j++) { 3251 kmax = rowlengths[rowcount]; 3252 for (k=0; k<kmax; k++) { 3253 tmp = jj[nzcount++]/bs; 3254 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3255 } 3256 rowcount++; 3257 } 3258 browlengths[i] += nmask; 3259 /* zero out the mask elements we set */ 3260 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3261 } 3262 3263 /* Do preallocation */ 3264 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3265 a = (Mat_SeqBAIJ*)newmat->data; 3266 3267 /* set matrix "i" values */ 3268 a->i[0] = 0; 3269 for (i=1; i<= mbs; i++) { 3270 a->i[i] = a->i[i-1] + browlengths[i-1]; 3271 a->ilen[i-1] = browlengths[i-1]; 3272 } 3273 a->nz = 0; 3274 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3275 3276 /* read in nonzero values */ 3277 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3278 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3279 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3280 3281 /* set "a" and "j" values into matrix */ 3282 nzcount = 0; jcount = 0; 3283 for (i=0; i<mbs; i++) { 3284 nzcountb = nzcount; 3285 nmask = 0; 3286 for (j=0; j<bs; j++) { 3287 kmax = rowlengths[i*bs+j]; 3288 for (k=0; k<kmax; k++) { 3289 tmp = jj[nzcount++]/bs; 3290 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3291 } 3292 } 3293 /* sort the masked values */ 3294 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3295 3296 /* set "j" values into matrix */ 3297 maskcount = 1; 3298 for (j=0; j<nmask; j++) { 3299 a->j[jcount++] = masked[j]; 3300 mask[masked[j]] = maskcount++; 3301 } 3302 /* set "a" values into matrix */ 3303 ishift = bs2*a->i[i]; 3304 for (j=0; j<bs; j++) { 3305 kmax = rowlengths[i*bs+j]; 3306 for (k=0; k<kmax; k++) { 3307 tmp = jj[nzcountb]/bs; 3308 block = mask[tmp] - 1; 3309 point = jj[nzcountb] - bs*tmp; 3310 idx = ishift + bs2*block + j + bs*point; 3311 a->a[idx] = (MatScalar)aa[nzcountb++]; 3312 } 3313 } 3314 /* zero out the mask elements we set */ 3315 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3316 } 3317 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3318 3319 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3320 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3321 ierr = PetscFree(aa);CHKERRQ(ierr); 3322 ierr = PetscFree(jj);CHKERRQ(ierr); 3323 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3324 3325 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3326 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3327 PetscFunctionReturn(0); 3328 } 3329 3330 #undef __FUNCT__ 3331 #define __FUNCT__ "MatCreateSeqBAIJ" 3332 /*@C 3333 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3334 compressed row) format. For good matrix assembly performance the 3335 user should preallocate the matrix storage by setting the parameter nz 3336 (or the array nnz). By setting these parameters accurately, performance 3337 during matrix assembly can be increased by more than a factor of 50. 3338 3339 Collective on MPI_Comm 3340 3341 Input Parameters: 3342 + comm - MPI communicator, set to PETSC_COMM_SELF 3343 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3344 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3345 . m - number of rows 3346 . n - number of columns 3347 . nz - number of nonzero blocks per block row (same for all rows) 3348 - nnz - array containing the number of nonzero blocks in the various block rows 3349 (possibly different for each block row) or NULL 3350 3351 Output Parameter: 3352 . A - the matrix 3353 3354 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3355 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3356 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3357 3358 Options Database Keys: 3359 . -mat_no_unroll - uses code that does not unroll the loops in the 3360 block calculations (much slower) 3361 . -mat_block_size - size of the blocks to use 3362 3363 Level: intermediate 3364 3365 Notes: 3366 The number of rows and columns must be divisible by blocksize. 3367 3368 If the nnz parameter is given then the nz parameter is ignored 3369 3370 A nonzero block is any block that as 1 or more nonzeros in it 3371 3372 The block AIJ format is fully compatible with standard Fortran 77 3373 storage. That is, the stored row and column indices can begin at 3374 either one (as in Fortran) or zero. See the users' manual for details. 3375 3376 Specify the preallocated storage with either nz or nnz (not both). 3377 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3378 allocation. See Users-Manual: ch_mat for details. 3379 matrices. 3380 3381 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3382 @*/ 3383 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3384 { 3385 PetscErrorCode ierr; 3386 3387 PetscFunctionBegin; 3388 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3389 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3390 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3391 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3392 PetscFunctionReturn(0); 3393 } 3394 3395 #undef __FUNCT__ 3396 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3397 /*@C 3398 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3399 per row in the matrix. For good matrix assembly performance the 3400 user should preallocate the matrix storage by setting the parameter nz 3401 (or the array nnz). By setting these parameters accurately, performance 3402 during matrix assembly can be increased by more than a factor of 50. 3403 3404 Collective on MPI_Comm 3405 3406 Input Parameters: 3407 + B - the matrix 3408 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3409 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3410 . nz - number of block nonzeros per block row (same for all rows) 3411 - nnz - array containing the number of block nonzeros in the various block rows 3412 (possibly different for each block row) or NULL 3413 3414 Options Database Keys: 3415 . -mat_no_unroll - uses code that does not unroll the loops in the 3416 block calculations (much slower) 3417 . -mat_block_size - size of the blocks to use 3418 3419 Level: intermediate 3420 3421 Notes: 3422 If the nnz parameter is given then the nz parameter is ignored 3423 3424 You can call MatGetInfo() to get information on how effective the preallocation was; 3425 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3426 You can also run with the option -info and look for messages with the string 3427 malloc in them to see if additional memory allocation was needed. 3428 3429 The block AIJ format is fully compatible with standard Fortran 77 3430 storage. That is, the stored row and column indices can begin at 3431 either one (as in Fortran) or zero. See the users' manual for details. 3432 3433 Specify the preallocated storage with either nz or nnz (not both). 3434 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3435 allocation. See Users-Manual: ch_mat for details. 3436 3437 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3438 @*/ 3439 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3440 { 3441 PetscErrorCode ierr; 3442 3443 PetscFunctionBegin; 3444 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3445 PetscValidType(B,1); 3446 PetscValidLogicalCollectiveInt(B,bs,2); 3447 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3448 PetscFunctionReturn(0); 3449 } 3450 3451 #undef __FUNCT__ 3452 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3453 /*@C 3454 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3455 (the default sequential PETSc format). 3456 3457 Collective on MPI_Comm 3458 3459 Input Parameters: 3460 + B - the matrix 3461 . i - the indices into j for the start of each local row (starts with zero) 3462 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3463 - v - optional values in the matrix 3464 3465 Level: developer 3466 3467 Notes: 3468 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3469 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3470 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3471 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3472 block column and the second index is over columns within a block. 3473 3474 .keywords: matrix, aij, compressed row, sparse 3475 3476 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3477 @*/ 3478 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3479 { 3480 PetscErrorCode ierr; 3481 3482 PetscFunctionBegin; 3483 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3484 PetscValidType(B,1); 3485 PetscValidLogicalCollectiveInt(B,bs,2); 3486 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3487 PetscFunctionReturn(0); 3488 } 3489 3490 3491 #undef __FUNCT__ 3492 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3493 /*@ 3494 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3495 3496 Collective on MPI_Comm 3497 3498 Input Parameters: 3499 + comm - must be an MPI communicator of size 1 3500 . bs - size of block 3501 . m - number of rows 3502 . n - number of columns 3503 . i - row indices 3504 . j - column indices 3505 - a - matrix values 3506 3507 Output Parameter: 3508 . mat - the matrix 3509 3510 Level: advanced 3511 3512 Notes: 3513 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3514 once the matrix is destroyed 3515 3516 You cannot set new nonzero locations into this matrix, that will generate an error. 3517 3518 The i and j indices are 0 based 3519 3520 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3521 3522 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3523 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3524 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3525 with column-major ordering within blocks. 3526 3527 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3528 3529 @*/ 3530 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3531 { 3532 PetscErrorCode ierr; 3533 PetscInt ii; 3534 Mat_SeqBAIJ *baij; 3535 3536 PetscFunctionBegin; 3537 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3538 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3539 3540 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3541 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3542 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3543 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3544 baij = (Mat_SeqBAIJ*)(*mat)->data; 3545 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3546 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3547 3548 baij->i = i; 3549 baij->j = j; 3550 baij->a = a; 3551 3552 baij->singlemalloc = PETSC_FALSE; 3553 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3554 baij->free_a = PETSC_FALSE; 3555 baij->free_ij = PETSC_FALSE; 3556 3557 for (ii=0; ii<m; ii++) { 3558 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3559 #if defined(PETSC_USE_DEBUG) 3560 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3561 #endif 3562 } 3563 #if defined(PETSC_USE_DEBUG) 3564 for (ii=0; ii<baij->i[m]; ii++) { 3565 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3566 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3567 } 3568 #endif 3569 3570 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3571 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3572 PetscFunctionReturn(0); 3573 } 3574 3575 #undef __FUNCT__ 3576 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ" 3577 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3578 { 3579 PetscErrorCode ierr; 3580 3581 PetscFunctionBegin; 3582 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3583 PetscFunctionReturn(0); 3584 } 3585