1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values) *values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs) { 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 PetscBool wouldcrash; 67 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,A->erroriffailure,&wouldcrash);CHKERRQ(ierr); 68 diag += 9; 69 mdiag += 9; 70 } 71 break; 72 case 4: 73 for (i=0; i<mbs; i++) { 74 odiag = v + 16*diag_offset[i]; 75 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 77 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 78 diag += 16; 79 mdiag += 16; 80 } 81 break; 82 case 5: 83 for (i=0; i<mbs; i++) { 84 odiag = v + 25*diag_offset[i]; 85 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 87 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 88 diag += 25; 89 mdiag += 25; 90 } 91 break; 92 case 6: 93 for (i=0; i<mbs; i++) { 94 odiag = v + 36*diag_offset[i]; 95 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 97 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 98 diag += 36; 99 mdiag += 36; 100 } 101 break; 102 case 7: 103 for (i=0; i<mbs; i++) { 104 odiag = v + 49*diag_offset[i]; 105 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 107 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 108 diag += 49; 109 mdiag += 49; 110 } 111 break; 112 default: 113 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 114 for (i=0; i<mbs; i++) { 115 odiag = v + bs2*diag_offset[i]; 116 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 118 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 119 diag += bs2; 120 mdiag += bs2; 121 } 122 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 123 } 124 a->idiagvalid = PETSC_TRUE; 125 PetscFunctionReturn(0); 126 } 127 128 #undef __FUNCT__ 129 #define __FUNCT__ "MatSOR_SeqBAIJ" 130 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 131 { 132 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 133 PetscScalar *x,*work,*w,*workt,*t; 134 const MatScalar *v,*aa = a->a, *idiag; 135 const PetscScalar *b,*xb; 136 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 137 PetscErrorCode ierr; 138 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 139 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 140 141 PetscFunctionBegin; 142 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 143 its = its*lits; 144 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 145 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 146 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 147 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 148 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 149 150 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 151 152 if (!m) PetscFunctionReturn(0); 153 diag = a->diag; 154 idiag = a->idiag; 155 k = PetscMax(A->rmap->n,A->cmap->n); 156 if (!a->mult_work) { 157 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 158 } 159 if (!a->sor_workt) { 160 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 161 } 162 if (!a->sor_work) { 163 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 164 } 165 work = a->mult_work; 166 t = a->sor_workt; 167 w = a->sor_work; 168 169 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 170 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 171 172 if (flag & SOR_ZERO_INITIAL_GUESS) { 173 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 174 switch (bs) { 175 case 1: 176 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 177 t[0] = b[0]; 178 i2 = 1; 179 idiag += 1; 180 for (i=1; i<m; i++) { 181 v = aa + ai[i]; 182 vi = aj + ai[i]; 183 nz = diag[i] - ai[i]; 184 s[0] = b[i2]; 185 for (j=0; j<nz; j++) { 186 xw[0] = x[vi[j]]; 187 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 188 } 189 t[i2] = s[0]; 190 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 191 x[i2] = xw[0]; 192 idiag += 1; 193 i2 += 1; 194 } 195 break; 196 case 2: 197 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 198 t[0] = b[0]; t[1] = b[1]; 199 i2 = 2; 200 idiag += 4; 201 for (i=1; i<m; i++) { 202 v = aa + 4*ai[i]; 203 vi = aj + ai[i]; 204 nz = diag[i] - ai[i]; 205 s[0] = b[i2]; s[1] = b[i2+1]; 206 for (j=0; j<nz; j++) { 207 idx = 2*vi[j]; 208 it = 4*j; 209 xw[0] = x[idx]; xw[1] = x[1+idx]; 210 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 211 } 212 t[i2] = s[0]; t[i2+1] = s[1]; 213 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 214 x[i2] = xw[0]; x[i2+1] = xw[1]; 215 idiag += 4; 216 i2 += 2; 217 } 218 break; 219 case 3: 220 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 221 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 222 i2 = 3; 223 idiag += 9; 224 for (i=1; i<m; i++) { 225 v = aa + 9*ai[i]; 226 vi = aj + ai[i]; 227 nz = diag[i] - ai[i]; 228 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 229 while (nz--) { 230 idx = 3*(*vi++); 231 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 232 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 233 v += 9; 234 } 235 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 236 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 237 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 238 idiag += 9; 239 i2 += 3; 240 } 241 break; 242 case 4: 243 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 244 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 245 i2 = 4; 246 idiag += 16; 247 for (i=1; i<m; i++) { 248 v = aa + 16*ai[i]; 249 vi = aj + ai[i]; 250 nz = diag[i] - ai[i]; 251 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 252 while (nz--) { 253 idx = 4*(*vi++); 254 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 255 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 256 v += 16; 257 } 258 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 259 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 260 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 261 idiag += 16; 262 i2 += 4; 263 } 264 break; 265 case 5: 266 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 267 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 268 i2 = 5; 269 idiag += 25; 270 for (i=1; i<m; i++) { 271 v = aa + 25*ai[i]; 272 vi = aj + ai[i]; 273 nz = diag[i] - ai[i]; 274 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 275 while (nz--) { 276 idx = 5*(*vi++); 277 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 278 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 279 v += 25; 280 } 281 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 282 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 283 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 284 idiag += 25; 285 i2 += 5; 286 } 287 break; 288 case 6: 289 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 290 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 291 i2 = 6; 292 idiag += 36; 293 for (i=1; i<m; i++) { 294 v = aa + 36*ai[i]; 295 vi = aj + ai[i]; 296 nz = diag[i] - ai[i]; 297 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 298 while (nz--) { 299 idx = 6*(*vi++); 300 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 301 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 302 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 303 v += 36; 304 } 305 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 306 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 307 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 308 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 309 idiag += 36; 310 i2 += 6; 311 } 312 break; 313 case 7: 314 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 315 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 316 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 317 i2 = 7; 318 idiag += 49; 319 for (i=1; i<m; i++) { 320 v = aa + 49*ai[i]; 321 vi = aj + ai[i]; 322 nz = diag[i] - ai[i]; 323 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 324 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 325 while (nz--) { 326 idx = 7*(*vi++); 327 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 328 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 329 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 330 v += 49; 331 } 332 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 333 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 334 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 335 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 336 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 337 idiag += 49; 338 i2 += 7; 339 } 340 break; 341 default: 342 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 343 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 344 i2 = bs; 345 idiag += bs2; 346 for (i=1; i<m; i++) { 347 v = aa + bs2*ai[i]; 348 vi = aj + ai[i]; 349 nz = diag[i] - ai[i]; 350 351 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 352 /* copy all rows of x that are needed into contiguous space */ 353 workt = work; 354 for (j=0; j<nz; j++) { 355 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 356 workt += bs; 357 } 358 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 359 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 360 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 361 362 idiag += bs2; 363 i2 += bs; 364 } 365 break; 366 } 367 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 368 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 369 xb = t; 370 } 371 else xb = b; 372 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 373 idiag = a->idiag+bs2*(a->mbs-1); 374 i2 = bs * (m-1); 375 switch (bs) { 376 case 1: 377 s[0] = xb[i2]; 378 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 379 x[i2] = xw[0]; 380 i2 -= 1; 381 for (i=m-2; i>=0; i--) { 382 v = aa + (diag[i]+1); 383 vi = aj + diag[i] + 1; 384 nz = ai[i+1] - diag[i] - 1; 385 s[0] = xb[i2]; 386 for (j=0; j<nz; j++) { 387 xw[0] = x[vi[j]]; 388 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 389 } 390 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 391 x[i2] = xw[0]; 392 idiag -= 1; 393 i2 -= 1; 394 } 395 break; 396 case 2: 397 s[0] = xb[i2]; s[1] = xb[i2+1]; 398 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 399 x[i2] = xw[0]; x[i2+1] = xw[1]; 400 i2 -= 2; 401 idiag -= 4; 402 for (i=m-2; i>=0; i--) { 403 v = aa + 4*(diag[i] + 1); 404 vi = aj + diag[i] + 1; 405 nz = ai[i+1] - diag[i] - 1; 406 s[0] = xb[i2]; s[1] = xb[i2+1]; 407 for (j=0; j<nz; j++) { 408 idx = 2*vi[j]; 409 it = 4*j; 410 xw[0] = x[idx]; xw[1] = x[1+idx]; 411 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 412 } 413 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 414 x[i2] = xw[0]; x[i2+1] = xw[1]; 415 idiag -= 4; 416 i2 -= 2; 417 } 418 break; 419 case 3: 420 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 421 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 422 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 423 i2 -= 3; 424 idiag -= 9; 425 for (i=m-2; i>=0; i--) { 426 v = aa + 9*(diag[i]+1); 427 vi = aj + diag[i] + 1; 428 nz = ai[i+1] - diag[i] - 1; 429 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 430 while (nz--) { 431 idx = 3*(*vi++); 432 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 433 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 434 v += 9; 435 } 436 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 437 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 438 idiag -= 9; 439 i2 -= 3; 440 } 441 break; 442 case 4: 443 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 444 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 445 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 446 i2 -= 4; 447 idiag -= 16; 448 for (i=m-2; i>=0; i--) { 449 v = aa + 16*(diag[i]+1); 450 vi = aj + diag[i] + 1; 451 nz = ai[i+1] - diag[i] - 1; 452 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 453 while (nz--) { 454 idx = 4*(*vi++); 455 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 456 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 457 v += 16; 458 } 459 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 460 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 461 idiag -= 16; 462 i2 -= 4; 463 } 464 break; 465 case 5: 466 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 467 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 468 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 469 i2 -= 5; 470 idiag -= 25; 471 for (i=m-2; i>=0; i--) { 472 v = aa + 25*(diag[i]+1); 473 vi = aj + diag[i] + 1; 474 nz = ai[i+1] - diag[i] - 1; 475 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 476 while (nz--) { 477 idx = 5*(*vi++); 478 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 479 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 480 v += 25; 481 } 482 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 483 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 484 idiag -= 25; 485 i2 -= 5; 486 } 487 break; 488 case 6: 489 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 490 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 491 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 492 i2 -= 6; 493 idiag -= 36; 494 for (i=m-2; i>=0; i--) { 495 v = aa + 36*(diag[i]+1); 496 vi = aj + diag[i] + 1; 497 nz = ai[i+1] - diag[i] - 1; 498 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 499 while (nz--) { 500 idx = 6*(*vi++); 501 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 502 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 503 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 504 v += 36; 505 } 506 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 507 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 508 idiag -= 36; 509 i2 -= 6; 510 } 511 break; 512 case 7: 513 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 514 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 515 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 516 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 517 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 518 i2 -= 7; 519 idiag -= 49; 520 for (i=m-2; i>=0; i--) { 521 v = aa + 49*(diag[i]+1); 522 vi = aj + diag[i] + 1; 523 nz = ai[i+1] - diag[i] - 1; 524 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 525 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 526 while (nz--) { 527 idx = 7*(*vi++); 528 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 529 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 530 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 531 v += 49; 532 } 533 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 534 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 535 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 536 idiag -= 49; 537 i2 -= 7; 538 } 539 break; 540 default: 541 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 542 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 543 i2 -= bs; 544 idiag -= bs2; 545 for (i=m-2; i>=0; i--) { 546 v = aa + bs2*(diag[i]+1); 547 vi = aj + diag[i] + 1; 548 nz = ai[i+1] - diag[i] - 1; 549 550 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 551 /* copy all rows of x that are needed into contiguous space */ 552 workt = work; 553 for (j=0; j<nz; j++) { 554 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 555 workt += bs; 556 } 557 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 558 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 559 560 idiag -= bs2; 561 i2 -= bs; 562 } 563 break; 564 } 565 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 566 } 567 its--; 568 } 569 while (its--) { 570 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 571 idiag = a->idiag; 572 i2 = 0; 573 switch (bs) { 574 case 1: 575 for (i=0; i<m; i++) { 576 v = aa + ai[i]; 577 vi = aj + ai[i]; 578 nz = ai[i+1] - ai[i]; 579 s[0] = b[i2]; 580 for (j=0; j<nz; j++) { 581 xw[0] = x[vi[j]]; 582 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 583 } 584 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 585 x[i2] += xw[0]; 586 idiag += 1; 587 i2 += 1; 588 } 589 break; 590 case 2: 591 for (i=0; i<m; i++) { 592 v = aa + 4*ai[i]; 593 vi = aj + ai[i]; 594 nz = ai[i+1] - ai[i]; 595 s[0] = b[i2]; s[1] = b[i2+1]; 596 for (j=0; j<nz; j++) { 597 idx = 2*vi[j]; 598 it = 4*j; 599 xw[0] = x[idx]; xw[1] = x[1+idx]; 600 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 601 } 602 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 603 x[i2] += xw[0]; x[i2+1] += xw[1]; 604 idiag += 4; 605 i2 += 2; 606 } 607 break; 608 case 3: 609 for (i=0; i<m; i++) { 610 v = aa + 9*ai[i]; 611 vi = aj + ai[i]; 612 nz = ai[i+1] - ai[i]; 613 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 614 while (nz--) { 615 idx = 3*(*vi++); 616 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 617 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 618 v += 9; 619 } 620 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 621 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 622 idiag += 9; 623 i2 += 3; 624 } 625 break; 626 case 4: 627 for (i=0; i<m; i++) { 628 v = aa + 16*ai[i]; 629 vi = aj + ai[i]; 630 nz = ai[i+1] - ai[i]; 631 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 632 while (nz--) { 633 idx = 4*(*vi++); 634 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 635 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 636 v += 16; 637 } 638 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 639 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 640 idiag += 16; 641 i2 += 4; 642 } 643 break; 644 case 5: 645 for (i=0; i<m; i++) { 646 v = aa + 25*ai[i]; 647 vi = aj + ai[i]; 648 nz = ai[i+1] - ai[i]; 649 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 650 while (nz--) { 651 idx = 5*(*vi++); 652 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 653 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 654 v += 25; 655 } 656 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 657 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 658 idiag += 25; 659 i2 += 5; 660 } 661 break; 662 case 6: 663 for (i=0; i<m; i++) { 664 v = aa + 36*ai[i]; 665 vi = aj + ai[i]; 666 nz = ai[i+1] - ai[i]; 667 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 668 while (nz--) { 669 idx = 6*(*vi++); 670 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 671 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 672 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 673 v += 36; 674 } 675 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 676 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 677 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 678 idiag += 36; 679 i2 += 6; 680 } 681 break; 682 case 7: 683 for (i=0; i<m; i++) { 684 v = aa + 49*ai[i]; 685 vi = aj + ai[i]; 686 nz = ai[i+1] - ai[i]; 687 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 688 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 689 while (nz--) { 690 idx = 7*(*vi++); 691 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 692 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 693 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 694 v += 49; 695 } 696 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 697 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 698 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 699 idiag += 49; 700 i2 += 7; 701 } 702 break; 703 default: 704 for (i=0; i<m; i++) { 705 v = aa + bs2*ai[i]; 706 vi = aj + ai[i]; 707 nz = ai[i+1] - ai[i]; 708 709 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 710 /* copy all rows of x that are needed into contiguous space */ 711 workt = work; 712 for (j=0; j<nz; j++) { 713 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 714 workt += bs; 715 } 716 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 717 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 718 719 idiag += bs2; 720 i2 += bs; 721 } 722 break; 723 } 724 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 725 } 726 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 727 idiag = a->idiag+bs2*(a->mbs-1); 728 i2 = bs * (m-1); 729 switch (bs) { 730 case 1: 731 for (i=m-1; i>=0; i--) { 732 v = aa + ai[i]; 733 vi = aj + ai[i]; 734 nz = ai[i+1] - ai[i]; 735 s[0] = b[i2]; 736 for (j=0; j<nz; j++) { 737 xw[0] = x[vi[j]]; 738 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 739 } 740 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 741 x[i2] += xw[0]; 742 idiag -= 1; 743 i2 -= 1; 744 } 745 break; 746 case 2: 747 for (i=m-1; i>=0; i--) { 748 v = aa + 4*ai[i]; 749 vi = aj + ai[i]; 750 nz = ai[i+1] - ai[i]; 751 s[0] = b[i2]; s[1] = b[i2+1]; 752 for (j=0; j<nz; j++) { 753 idx = 2*vi[j]; 754 it = 4*j; 755 xw[0] = x[idx]; xw[1] = x[1+idx]; 756 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 757 } 758 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 759 x[i2] += xw[0]; x[i2+1] += xw[1]; 760 idiag -= 4; 761 i2 -= 2; 762 } 763 break; 764 case 3: 765 for (i=m-1; i>=0; i--) { 766 v = aa + 9*ai[i]; 767 vi = aj + ai[i]; 768 nz = ai[i+1] - ai[i]; 769 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 770 while (nz--) { 771 idx = 3*(*vi++); 772 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 773 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 774 v += 9; 775 } 776 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 777 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 778 idiag -= 9; 779 i2 -= 3; 780 } 781 break; 782 case 4: 783 for (i=m-1; i>=0; i--) { 784 v = aa + 16*ai[i]; 785 vi = aj + ai[i]; 786 nz = ai[i+1] - ai[i]; 787 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 788 while (nz--) { 789 idx = 4*(*vi++); 790 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 791 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 792 v += 16; 793 } 794 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 795 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 796 idiag -= 16; 797 i2 -= 4; 798 } 799 break; 800 case 5: 801 for (i=m-1; i>=0; i--) { 802 v = aa + 25*ai[i]; 803 vi = aj + ai[i]; 804 nz = ai[i+1] - ai[i]; 805 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 806 while (nz--) { 807 idx = 5*(*vi++); 808 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 809 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 810 v += 25; 811 } 812 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 813 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 814 idiag -= 25; 815 i2 -= 5; 816 } 817 break; 818 case 6: 819 for (i=m-1; i>=0; i--) { 820 v = aa + 36*ai[i]; 821 vi = aj + ai[i]; 822 nz = ai[i+1] - ai[i]; 823 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 824 while (nz--) { 825 idx = 6*(*vi++); 826 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 827 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 828 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 829 v += 36; 830 } 831 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 832 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 833 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 834 idiag -= 36; 835 i2 -= 6; 836 } 837 break; 838 case 7: 839 for (i=m-1; i>=0; i--) { 840 v = aa + 49*ai[i]; 841 vi = aj + ai[i]; 842 nz = ai[i+1] - ai[i]; 843 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 844 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 845 while (nz--) { 846 idx = 7*(*vi++); 847 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 848 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 849 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 850 v += 49; 851 } 852 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 853 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 854 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 855 idiag -= 49; 856 i2 -= 7; 857 } 858 break; 859 default: 860 for (i=m-1; i>=0; i--) { 861 v = aa + bs2*ai[i]; 862 vi = aj + ai[i]; 863 nz = ai[i+1] - ai[i]; 864 865 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 866 /* copy all rows of x that are needed into contiguous space */ 867 workt = work; 868 for (j=0; j<nz; j++) { 869 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 870 workt += bs; 871 } 872 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 873 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 874 875 idiag -= bs2; 876 i2 -= bs; 877 } 878 break; 879 } 880 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 881 } 882 } 883 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 884 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 885 PetscFunctionReturn(0); 886 } 887 888 889 /* 890 Special version for direct calls from Fortran (Used in PETSc-fun3d) 891 */ 892 #if defined(PETSC_HAVE_FORTRAN_CAPS) 893 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 894 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 895 #define matsetvaluesblocked4_ matsetvaluesblocked4 896 #endif 897 898 #undef __FUNCT__ 899 #define __FUNCT__ "matsetvaluesblocked4_" 900 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 901 { 902 Mat A = *AA; 903 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 904 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 905 PetscInt *ai =a->i,*ailen=a->ilen; 906 PetscInt *aj =a->j,stepval,lastcol = -1; 907 const PetscScalar *value = v; 908 MatScalar *ap,*aa = a->a,*bap; 909 910 PetscFunctionBegin; 911 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 912 stepval = (n-1)*4; 913 for (k=0; k<m; k++) { /* loop over added rows */ 914 row = im[k]; 915 rp = aj + ai[row]; 916 ap = aa + 16*ai[row]; 917 nrow = ailen[row]; 918 low = 0; 919 high = nrow; 920 for (l=0; l<n; l++) { /* loop over added columns */ 921 col = in[l]; 922 if (col <= lastcol) low = 0; 923 else high = nrow; 924 lastcol = col; 925 value = v + k*(stepval+4 + l)*4; 926 while (high-low > 7) { 927 t = (low+high)/2; 928 if (rp[t] > col) high = t; 929 else low = t; 930 } 931 for (i=low; i<high; i++) { 932 if (rp[i] > col) break; 933 if (rp[i] == col) { 934 bap = ap + 16*i; 935 for (ii=0; ii<4; ii++,value+=stepval) { 936 for (jj=ii; jj<16; jj+=4) { 937 bap[jj] += *value++; 938 } 939 } 940 goto noinsert2; 941 } 942 } 943 N = nrow++ - 1; 944 high++; /* added new column index thus must search to one higher than before */ 945 /* shift up all the later entries in this row */ 946 for (ii=N; ii>=i; ii--) { 947 rp[ii+1] = rp[ii]; 948 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 949 } 950 if (N >= i) { 951 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 952 } 953 rp[i] = col; 954 bap = ap + 16*i; 955 for (ii=0; ii<4; ii++,value+=stepval) { 956 for (jj=ii; jj<16; jj+=4) { 957 bap[jj] = *value++; 958 } 959 } 960 noinsert2:; 961 low = i; 962 } 963 ailen[row] = nrow; 964 } 965 PetscFunctionReturnVoid(); 966 } 967 968 #if defined(PETSC_HAVE_FORTRAN_CAPS) 969 #define matsetvalues4_ MATSETVALUES4 970 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 971 #define matsetvalues4_ matsetvalues4 972 #endif 973 974 #undef __FUNCT__ 975 #define __FUNCT__ "MatSetValues4_" 976 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 977 { 978 Mat A = *AA; 979 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 980 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 981 PetscInt *ai=a->i,*ailen=a->ilen; 982 PetscInt *aj=a->j,brow,bcol; 983 PetscInt ridx,cidx,lastcol = -1; 984 MatScalar *ap,value,*aa=a->a,*bap; 985 986 PetscFunctionBegin; 987 for (k=0; k<m; k++) { /* loop over added rows */ 988 row = im[k]; brow = row/4; 989 rp = aj + ai[brow]; 990 ap = aa + 16*ai[brow]; 991 nrow = ailen[brow]; 992 low = 0; 993 high = nrow; 994 for (l=0; l<n; l++) { /* loop over added columns */ 995 col = in[l]; bcol = col/4; 996 ridx = row % 4; cidx = col % 4; 997 value = v[l + k*n]; 998 if (col <= lastcol) low = 0; 999 else high = nrow; 1000 lastcol = col; 1001 while (high-low > 7) { 1002 t = (low+high)/2; 1003 if (rp[t] > bcol) high = t; 1004 else low = t; 1005 } 1006 for (i=low; i<high; i++) { 1007 if (rp[i] > bcol) break; 1008 if (rp[i] == bcol) { 1009 bap = ap + 16*i + 4*cidx + ridx; 1010 *bap += value; 1011 goto noinsert1; 1012 } 1013 } 1014 N = nrow++ - 1; 1015 high++; /* added new column thus must search to one higher than before */ 1016 /* shift up all the later entries in this row */ 1017 for (ii=N; ii>=i; ii--) { 1018 rp[ii+1] = rp[ii]; 1019 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1020 } 1021 if (N>=i) { 1022 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1023 } 1024 rp[i] = bcol; 1025 ap[16*i + 4*cidx + ridx] = value; 1026 noinsert1:; 1027 low = i; 1028 } 1029 ailen[brow] = nrow; 1030 } 1031 PetscFunctionReturnVoid(); 1032 } 1033 1034 /* 1035 Checks for missing diagonals 1036 */ 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1039 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1040 { 1041 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1042 PetscErrorCode ierr; 1043 PetscInt *diag,*ii = a->i,i; 1044 1045 PetscFunctionBegin; 1046 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1047 *missing = PETSC_FALSE; 1048 if (A->rmap->n > 0 && !ii) { 1049 *missing = PETSC_TRUE; 1050 if (d) *d = 0; 1051 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1052 } else { 1053 diag = a->diag; 1054 for (i=0; i<a->mbs; i++) { 1055 if (diag[i] >= ii[i+1]) { 1056 *missing = PETSC_TRUE; 1057 if (d) *d = i; 1058 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1059 break; 1060 } 1061 } 1062 } 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1068 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1069 { 1070 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1071 PetscErrorCode ierr; 1072 PetscInt i,j,m = a->mbs; 1073 1074 PetscFunctionBegin; 1075 if (!a->diag) { 1076 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1077 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1078 a->free_diag = PETSC_TRUE; 1079 } 1080 for (i=0; i<m; i++) { 1081 a->diag[i] = a->i[i+1]; 1082 for (j=a->i[i]; j<a->i[i+1]; j++) { 1083 if (a->j[j] == i) { 1084 a->diag[i] = j; 1085 break; 1086 } 1087 } 1088 } 1089 PetscFunctionReturn(0); 1090 } 1091 1092 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1095 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1096 { 1097 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1098 PetscErrorCode ierr; 1099 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1100 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1101 1102 PetscFunctionBegin; 1103 *nn = n; 1104 if (!ia) PetscFunctionReturn(0); 1105 if (symmetric) { 1106 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1107 nz = tia[n]; 1108 } else { 1109 tia = a->i; tja = a->j; 1110 } 1111 1112 if (!blockcompressed && bs > 1) { 1113 (*nn) *= bs; 1114 /* malloc & create the natural set of indices */ 1115 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1116 if (n) { 1117 (*ia)[0] = 0; 1118 for (j=1; j<bs; j++) { 1119 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1120 } 1121 } 1122 1123 for (i=1; i<n; i++) { 1124 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1125 for (j=1; j<bs; j++) { 1126 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1127 } 1128 } 1129 if (n) { 1130 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1131 } 1132 1133 if (inja) { 1134 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1135 cnt = 0; 1136 for (i=0; i<n; i++) { 1137 for (j=0; j<bs; j++) { 1138 for (k=tia[i]; k<tia[i+1]; k++) { 1139 for (l=0; l<bs; l++) { 1140 (*ja)[cnt++] = bs*tja[k] + l; 1141 } 1142 } 1143 } 1144 } 1145 } 1146 1147 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1148 ierr = PetscFree(tia);CHKERRQ(ierr); 1149 ierr = PetscFree(tja);CHKERRQ(ierr); 1150 } 1151 } else if (oshift == 1) { 1152 if (symmetric) { 1153 nz = tia[A->rmap->n/bs]; 1154 /* add 1 to i and j indices */ 1155 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1156 *ia = tia; 1157 if (ja) { 1158 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1159 *ja = tja; 1160 } 1161 } else { 1162 nz = a->i[A->rmap->n/bs]; 1163 /* malloc space and add 1 to i and j indices */ 1164 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1165 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1166 if (ja) { 1167 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1168 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1169 } 1170 } 1171 } else { 1172 *ia = tia; 1173 if (ja) *ja = tja; 1174 } 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1180 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1181 { 1182 PetscErrorCode ierr; 1183 1184 PetscFunctionBegin; 1185 if (!ia) PetscFunctionReturn(0); 1186 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1187 ierr = PetscFree(*ia);CHKERRQ(ierr); 1188 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1189 } 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNCT__ 1194 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1195 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1196 { 1197 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1198 PetscErrorCode ierr; 1199 1200 PetscFunctionBegin; 1201 #if defined(PETSC_USE_LOG) 1202 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1203 #endif 1204 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1205 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1206 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1207 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1208 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1209 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1210 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1211 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1212 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1213 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1214 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1215 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1216 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1217 1218 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1219 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1220 ierr = PetscFree(A->data);CHKERRQ(ierr); 1221 1222 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1226 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1227 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1231 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1232 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1238 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1239 { 1240 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 switch (op) { 1245 case MAT_ROW_ORIENTED: 1246 a->roworiented = flg; 1247 break; 1248 case MAT_KEEP_NONZERO_PATTERN: 1249 a->keepnonzeropattern = flg; 1250 break; 1251 case MAT_NEW_NONZERO_LOCATIONS: 1252 a->nonew = (flg ? 0 : 1); 1253 break; 1254 case MAT_NEW_NONZERO_LOCATION_ERR: 1255 a->nonew = (flg ? -1 : 0); 1256 break; 1257 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1258 a->nonew = (flg ? -2 : 0); 1259 break; 1260 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1261 a->nounused = (flg ? -1 : 0); 1262 break; 1263 case MAT_NEW_DIAGONALS: 1264 case MAT_IGNORE_OFF_PROC_ENTRIES: 1265 case MAT_USE_HASH_TABLE: 1266 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1267 break; 1268 case MAT_SPD: 1269 case MAT_SYMMETRIC: 1270 case MAT_STRUCTURALLY_SYMMETRIC: 1271 case MAT_HERMITIAN: 1272 case MAT_SYMMETRY_ETERNAL: 1273 /* These options are handled directly by MatSetOption() */ 1274 break; 1275 default: 1276 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1277 } 1278 PetscFunctionReturn(0); 1279 } 1280 1281 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1282 #undef __FUNCT__ 1283 #define __FUNCT__ "MatGetRow_SeqBAIJ_private" 1284 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1285 { 1286 PetscErrorCode ierr; 1287 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1288 MatScalar *aa_i; 1289 PetscScalar *v_i; 1290 1291 PetscFunctionBegin; 1292 bs = A->rmap->bs; 1293 bs2 = bs*bs; 1294 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1295 1296 bn = row/bs; /* Block number */ 1297 bp = row % bs; /* Block Position */ 1298 M = ai[bn+1] - ai[bn]; 1299 *nz = bs*M; 1300 1301 if (v) { 1302 *v = 0; 1303 if (*nz) { 1304 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1305 for (i=0; i<M; i++) { /* for each block in the block row */ 1306 v_i = *v + i*bs; 1307 aa_i = aa + bs2*(ai[bn] + i); 1308 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1309 } 1310 } 1311 } 1312 1313 if (idx) { 1314 *idx = 0; 1315 if (*nz) { 1316 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1317 for (i=0; i<M; i++) { /* for each block in the block row */ 1318 idx_i = *idx + i*bs; 1319 itmp = bs*aj[ai[bn] + i]; 1320 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1321 } 1322 } 1323 } 1324 PetscFunctionReturn(0); 1325 } 1326 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1329 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1330 { 1331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1332 PetscErrorCode ierr; 1333 1334 PetscFunctionBegin; 1335 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1341 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1342 { 1343 PetscErrorCode ierr; 1344 1345 PetscFunctionBegin; 1346 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1347 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1348 PetscFunctionReturn(0); 1349 } 1350 1351 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1355 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1356 { 1357 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1358 Mat C; 1359 PetscErrorCode ierr; 1360 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1361 PetscInt *rows,*cols,bs2=a->bs2; 1362 MatScalar *array; 1363 1364 PetscFunctionBegin; 1365 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1366 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1367 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1368 1369 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1370 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1371 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1372 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1373 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1374 ierr = PetscFree(col);CHKERRQ(ierr); 1375 } else { 1376 C = *B; 1377 } 1378 1379 array = a->a; 1380 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1381 for (i=0; i<mbs; i++) { 1382 cols[0] = i*bs; 1383 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1384 len = ai[i+1] - ai[i]; 1385 for (j=0; j<len; j++) { 1386 rows[0] = (*aj++)*bs; 1387 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1388 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1389 array += bs2; 1390 } 1391 } 1392 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1393 1394 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1395 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1396 1397 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1398 *B = C; 1399 } else { 1400 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1401 } 1402 PetscFunctionReturn(0); 1403 } 1404 1405 #undef __FUNCT__ 1406 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1407 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1408 { 1409 PetscErrorCode ierr; 1410 Mat Btrans; 1411 1412 PetscFunctionBegin; 1413 *f = PETSC_FALSE; 1414 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1415 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1416 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1422 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1423 { 1424 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1425 PetscErrorCode ierr; 1426 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1427 int fd; 1428 PetscScalar *aa; 1429 FILE *file; 1430 1431 PetscFunctionBegin; 1432 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1433 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1434 col_lens[0] = MAT_FILE_CLASSID; 1435 1436 col_lens[1] = A->rmap->N; 1437 col_lens[2] = A->cmap->n; 1438 col_lens[3] = a->nz*bs2; 1439 1440 /* store lengths of each row and write (including header) to file */ 1441 count = 0; 1442 for (i=0; i<a->mbs; i++) { 1443 for (j=0; j<bs; j++) { 1444 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1445 } 1446 } 1447 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1448 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1449 1450 /* store column indices (zero start index) */ 1451 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1452 count = 0; 1453 for (i=0; i<a->mbs; i++) { 1454 for (j=0; j<bs; j++) { 1455 for (k=a->i[i]; k<a->i[i+1]; k++) { 1456 for (l=0; l<bs; l++) { 1457 jj[count++] = bs*a->j[k] + l; 1458 } 1459 } 1460 } 1461 } 1462 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1463 ierr = PetscFree(jj);CHKERRQ(ierr); 1464 1465 /* store nonzero values */ 1466 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1467 count = 0; 1468 for (i=0; i<a->mbs; i++) { 1469 for (j=0; j<bs; j++) { 1470 for (k=a->i[i]; k<a->i[i+1]; k++) { 1471 for (l=0; l<bs; l++) { 1472 aa[count++] = a->a[bs2*k + l*bs + j]; 1473 } 1474 } 1475 } 1476 } 1477 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1478 ierr = PetscFree(aa);CHKERRQ(ierr); 1479 1480 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1481 if (file) { 1482 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1483 } 1484 PetscFunctionReturn(0); 1485 } 1486 1487 #undef __FUNCT__ 1488 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1489 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1490 { 1491 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1492 PetscErrorCode ierr; 1493 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1494 PetscViewerFormat format; 1495 1496 PetscFunctionBegin; 1497 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1498 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1499 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1500 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1501 const char *matname; 1502 Mat aij; 1503 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1504 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1505 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1506 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1507 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1508 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1509 PetscFunctionReturn(0); 1510 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1511 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1512 for (i=0; i<a->mbs; i++) { 1513 for (j=0; j<bs; j++) { 1514 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1515 for (k=a->i[i]; k<a->i[i+1]; k++) { 1516 for (l=0; l<bs; l++) { 1517 #if defined(PETSC_USE_COMPLEX) 1518 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1519 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1520 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1521 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1522 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1523 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1524 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1525 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1526 } 1527 #else 1528 if (a->a[bs2*k + l*bs + j] != 0.0) { 1529 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1530 } 1531 #endif 1532 } 1533 } 1534 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1535 } 1536 } 1537 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1538 } else { 1539 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1540 for (i=0; i<a->mbs; i++) { 1541 for (j=0; j<bs; j++) { 1542 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1543 for (k=a->i[i]; k<a->i[i+1]; k++) { 1544 for (l=0; l<bs; l++) { 1545 #if defined(PETSC_USE_COMPLEX) 1546 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1547 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1548 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1549 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1550 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1551 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1552 } else { 1553 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1554 } 1555 #else 1556 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1557 #endif 1558 } 1559 } 1560 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1561 } 1562 } 1563 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1564 } 1565 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1566 PetscFunctionReturn(0); 1567 } 1568 1569 #include <petscdraw.h> 1570 #undef __FUNCT__ 1571 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1572 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1573 { 1574 Mat A = (Mat) Aa; 1575 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1576 PetscErrorCode ierr; 1577 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1578 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1579 MatScalar *aa; 1580 PetscViewer viewer; 1581 PetscViewerFormat format; 1582 1583 PetscFunctionBegin; 1584 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1585 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1586 1587 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1588 1589 /* loop over matrix elements drawing boxes */ 1590 1591 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1592 color = PETSC_DRAW_BLUE; 1593 for (i=0,row=0; i<mbs; i++,row+=bs) { 1594 for (j=a->i[i]; j<a->i[i+1]; j++) { 1595 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1596 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1597 aa = a->a + j*bs2; 1598 for (k=0; k<bs; k++) { 1599 for (l=0; l<bs; l++) { 1600 if (PetscRealPart(*aa++) >= 0.) continue; 1601 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1602 } 1603 } 1604 } 1605 } 1606 color = PETSC_DRAW_CYAN; 1607 for (i=0,row=0; i<mbs; i++,row+=bs) { 1608 for (j=a->i[i]; j<a->i[i+1]; j++) { 1609 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1610 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1611 aa = a->a + j*bs2; 1612 for (k=0; k<bs; k++) { 1613 for (l=0; l<bs; l++) { 1614 if (PetscRealPart(*aa++) != 0.) continue; 1615 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1616 } 1617 } 1618 } 1619 } 1620 color = PETSC_DRAW_RED; 1621 for (i=0,row=0; i<mbs; i++,row+=bs) { 1622 for (j=a->i[i]; j<a->i[i+1]; j++) { 1623 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1624 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1625 aa = a->a + j*bs2; 1626 for (k=0; k<bs; k++) { 1627 for (l=0; l<bs; l++) { 1628 if (PetscRealPart(*aa++) <= 0.) continue; 1629 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1630 } 1631 } 1632 } 1633 } 1634 } else { 1635 /* use contour shading to indicate magnitude of values */ 1636 /* first determine max of all nonzero values */ 1637 PetscDraw popup; 1638 PetscReal scale,maxv = 0.0; 1639 1640 for (i=0; i<a->nz*a->bs2; i++) { 1641 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1642 } 1643 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1644 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1645 if (popup) { 1646 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1647 } 1648 for (i=0,row=0; i<mbs; i++,row+=bs) { 1649 for (j=a->i[i]; j<a->i[i+1]; j++) { 1650 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1651 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1652 aa = a->a + j*bs2; 1653 for (k=0; k<bs; k++) { 1654 for (l=0; l<bs; l++) { 1655 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1656 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1657 } 1658 } 1659 } 1660 } 1661 } 1662 PetscFunctionReturn(0); 1663 } 1664 1665 #undef __FUNCT__ 1666 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1667 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1668 { 1669 PetscErrorCode ierr; 1670 PetscReal xl,yl,xr,yr,w,h; 1671 PetscDraw draw; 1672 PetscBool isnull; 1673 1674 PetscFunctionBegin; 1675 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1676 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1677 1678 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1679 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1680 xr += w; yr += h; xl = -w; yl = -h; 1681 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1682 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1683 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1684 PetscFunctionReturn(0); 1685 } 1686 1687 #undef __FUNCT__ 1688 #define __FUNCT__ "MatView_SeqBAIJ" 1689 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1690 { 1691 PetscErrorCode ierr; 1692 PetscBool iascii,isbinary,isdraw; 1693 1694 PetscFunctionBegin; 1695 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1696 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1697 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1698 if (iascii) { 1699 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1700 } else if (isbinary) { 1701 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1702 } else if (isdraw) { 1703 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1704 } else { 1705 Mat B; 1706 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1707 ierr = MatView(B,viewer);CHKERRQ(ierr); 1708 ierr = MatDestroy(&B);CHKERRQ(ierr); 1709 } 1710 PetscFunctionReturn(0); 1711 } 1712 1713 1714 #undef __FUNCT__ 1715 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1716 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1717 { 1718 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1719 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1720 PetscInt *ai = a->i,*ailen = a->ilen; 1721 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1722 MatScalar *ap,*aa = a->a; 1723 1724 PetscFunctionBegin; 1725 for (k=0; k<m; k++) { /* loop over rows */ 1726 row = im[k]; brow = row/bs; 1727 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1728 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1729 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1730 nrow = ailen[brow]; 1731 for (l=0; l<n; l++) { /* loop over columns */ 1732 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1733 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1734 col = in[l]; 1735 bcol = col/bs; 1736 cidx = col%bs; 1737 ridx = row%bs; 1738 high = nrow; 1739 low = 0; /* assume unsorted */ 1740 while (high-low > 5) { 1741 t = (low+high)/2; 1742 if (rp[t] > bcol) high = t; 1743 else low = t; 1744 } 1745 for (i=low; i<high; i++) { 1746 if (rp[i] > bcol) break; 1747 if (rp[i] == bcol) { 1748 *v++ = ap[bs2*i+bs*cidx+ridx]; 1749 goto finished; 1750 } 1751 } 1752 *v++ = 0.0; 1753 finished:; 1754 } 1755 } 1756 PetscFunctionReturn(0); 1757 } 1758 1759 #undef __FUNCT__ 1760 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1761 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1762 { 1763 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1764 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1765 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1766 PetscErrorCode ierr; 1767 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1768 PetscBool roworiented=a->roworiented; 1769 const PetscScalar *value = v; 1770 MatScalar *ap,*aa = a->a,*bap; 1771 1772 PetscFunctionBegin; 1773 if (roworiented) { 1774 stepval = (n-1)*bs; 1775 } else { 1776 stepval = (m-1)*bs; 1777 } 1778 for (k=0; k<m; k++) { /* loop over added rows */ 1779 row = im[k]; 1780 if (row < 0) continue; 1781 #if defined(PETSC_USE_DEBUG) 1782 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1783 #endif 1784 rp = aj + ai[row]; 1785 ap = aa + bs2*ai[row]; 1786 rmax = imax[row]; 1787 nrow = ailen[row]; 1788 low = 0; 1789 high = nrow; 1790 for (l=0; l<n; l++) { /* loop over added columns */ 1791 if (in[l] < 0) continue; 1792 #if defined(PETSC_USE_DEBUG) 1793 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1794 #endif 1795 col = in[l]; 1796 if (roworiented) { 1797 value = v + (k*(stepval+bs) + l)*bs; 1798 } else { 1799 value = v + (l*(stepval+bs) + k)*bs; 1800 } 1801 if (col <= lastcol) low = 0; 1802 else high = nrow; 1803 lastcol = col; 1804 while (high-low > 7) { 1805 t = (low+high)/2; 1806 if (rp[t] > col) high = t; 1807 else low = t; 1808 } 1809 for (i=low; i<high; i++) { 1810 if (rp[i] > col) break; 1811 if (rp[i] == col) { 1812 bap = ap + bs2*i; 1813 if (roworiented) { 1814 if (is == ADD_VALUES) { 1815 for (ii=0; ii<bs; ii++,value+=stepval) { 1816 for (jj=ii; jj<bs2; jj+=bs) { 1817 bap[jj] += *value++; 1818 } 1819 } 1820 } else { 1821 for (ii=0; ii<bs; ii++,value+=stepval) { 1822 for (jj=ii; jj<bs2; jj+=bs) { 1823 bap[jj] = *value++; 1824 } 1825 } 1826 } 1827 } else { 1828 if (is == ADD_VALUES) { 1829 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1830 for (jj=0; jj<bs; jj++) { 1831 bap[jj] += value[jj]; 1832 } 1833 bap += bs; 1834 } 1835 } else { 1836 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1837 for (jj=0; jj<bs; jj++) { 1838 bap[jj] = value[jj]; 1839 } 1840 bap += bs; 1841 } 1842 } 1843 } 1844 goto noinsert2; 1845 } 1846 } 1847 if (nonew == 1) goto noinsert2; 1848 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1849 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1850 N = nrow++ - 1; high++; 1851 /* shift up all the later entries in this row */ 1852 for (ii=N; ii>=i; ii--) { 1853 rp[ii+1] = rp[ii]; 1854 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1855 } 1856 if (N >= i) { 1857 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1858 } 1859 rp[i] = col; 1860 bap = ap + bs2*i; 1861 if (roworiented) { 1862 for (ii=0; ii<bs; ii++,value+=stepval) { 1863 for (jj=ii; jj<bs2; jj+=bs) { 1864 bap[jj] = *value++; 1865 } 1866 } 1867 } else { 1868 for (ii=0; ii<bs; ii++,value+=stepval) { 1869 for (jj=0; jj<bs; jj++) { 1870 *bap++ = *value++; 1871 } 1872 } 1873 } 1874 noinsert2:; 1875 low = i; 1876 } 1877 ailen[row] = nrow; 1878 } 1879 PetscFunctionReturn(0); 1880 } 1881 1882 #undef __FUNCT__ 1883 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1884 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1885 { 1886 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1887 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1888 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1889 PetscErrorCode ierr; 1890 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1891 MatScalar *aa = a->a,*ap; 1892 PetscReal ratio=0.6; 1893 1894 PetscFunctionBegin; 1895 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1896 1897 if (m) rmax = ailen[0]; 1898 for (i=1; i<mbs; i++) { 1899 /* move each row back by the amount of empty slots (fshift) before it*/ 1900 fshift += imax[i-1] - ailen[i-1]; 1901 rmax = PetscMax(rmax,ailen[i]); 1902 if (fshift) { 1903 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1904 N = ailen[i]; 1905 for (j=0; j<N; j++) { 1906 ip[j-fshift] = ip[j]; 1907 1908 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1909 } 1910 } 1911 ai[i] = ai[i-1] + ailen[i-1]; 1912 } 1913 if (mbs) { 1914 fshift += imax[mbs-1] - ailen[mbs-1]; 1915 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1916 } 1917 1918 /* reset ilen and imax for each row */ 1919 a->nonzerorowcnt = 0; 1920 for (i=0; i<mbs; i++) { 1921 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1922 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1923 } 1924 a->nz = ai[mbs]; 1925 1926 /* diagonals may have moved, so kill the diagonal pointers */ 1927 a->idiagvalid = PETSC_FALSE; 1928 if (fshift && a->diag) { 1929 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1930 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1931 a->diag = 0; 1932 } 1933 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1934 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1935 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1936 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1937 1938 A->info.mallocs += a->reallocs; 1939 a->reallocs = 0; 1940 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1941 a->rmax = rmax; 1942 1943 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1944 PetscFunctionReturn(0); 1945 } 1946 1947 /* 1948 This function returns an array of flags which indicate the locations of contiguous 1949 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1950 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1951 Assume: sizes should be long enough to hold all the values. 1952 */ 1953 #undef __FUNCT__ 1954 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1955 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1956 { 1957 PetscInt i,j,k,row; 1958 PetscBool flg; 1959 1960 PetscFunctionBegin; 1961 for (i=0,j=0; i<n; j++) { 1962 row = idx[i]; 1963 if (row%bs!=0) { /* Not the begining of a block */ 1964 sizes[j] = 1; 1965 i++; 1966 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1967 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1968 i++; 1969 } else { /* Begining of the block, so check if the complete block exists */ 1970 flg = PETSC_TRUE; 1971 for (k=1; k<bs; k++) { 1972 if (row+k != idx[i+k]) { /* break in the block */ 1973 flg = PETSC_FALSE; 1974 break; 1975 } 1976 } 1977 if (flg) { /* No break in the bs */ 1978 sizes[j] = bs; 1979 i += bs; 1980 } else { 1981 sizes[j] = 1; 1982 i++; 1983 } 1984 } 1985 } 1986 *bs_max = j; 1987 PetscFunctionReturn(0); 1988 } 1989 1990 #undef __FUNCT__ 1991 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1992 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 1993 { 1994 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1995 PetscErrorCode ierr; 1996 PetscInt i,j,k,count,*rows; 1997 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 1998 PetscScalar zero = 0.0; 1999 MatScalar *aa; 2000 const PetscScalar *xx; 2001 PetscScalar *bb; 2002 2003 PetscFunctionBegin; 2004 /* fix right hand side if needed */ 2005 if (x && b) { 2006 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2007 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2008 for (i=0; i<is_n; i++) { 2009 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2010 } 2011 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2012 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2013 } 2014 2015 /* Make a copy of the IS and sort it */ 2016 /* allocate memory for rows,sizes */ 2017 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2018 2019 /* copy IS values to rows, and sort them */ 2020 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2021 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2022 2023 if (baij->keepnonzeropattern) { 2024 for (i=0; i<is_n; i++) sizes[i] = 1; 2025 bs_max = is_n; 2026 } else { 2027 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2028 A->nonzerostate++; 2029 } 2030 2031 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2032 row = rows[j]; 2033 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2034 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2035 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2036 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2037 if (diag != (PetscScalar)0.0) { 2038 if (baij->ilen[row/bs] > 0) { 2039 baij->ilen[row/bs] = 1; 2040 baij->j[baij->i[row/bs]] = row/bs; 2041 2042 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2043 } 2044 /* Now insert all the diagonal values for this bs */ 2045 for (k=0; k<bs; k++) { 2046 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2047 } 2048 } else { /* (diag == 0.0) */ 2049 baij->ilen[row/bs] = 0; 2050 } /* end (diag == 0.0) */ 2051 } else { /* (sizes[i] != bs) */ 2052 #if defined(PETSC_USE_DEBUG) 2053 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2054 #endif 2055 for (k=0; k<count; k++) { 2056 aa[0] = zero; 2057 aa += bs; 2058 } 2059 if (diag != (PetscScalar)0.0) { 2060 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2061 } 2062 } 2063 } 2064 2065 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2066 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2067 PetscFunctionReturn(0); 2068 } 2069 2070 #undef __FUNCT__ 2071 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2072 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2073 { 2074 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2075 PetscErrorCode ierr; 2076 PetscInt i,j,k,count; 2077 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2078 PetscScalar zero = 0.0; 2079 MatScalar *aa; 2080 const PetscScalar *xx; 2081 PetscScalar *bb; 2082 PetscBool *zeroed,vecs = PETSC_FALSE; 2083 2084 PetscFunctionBegin; 2085 /* fix right hand side if needed */ 2086 if (x && b) { 2087 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2088 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2089 vecs = PETSC_TRUE; 2090 } 2091 2092 /* zero the columns */ 2093 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2094 for (i=0; i<is_n; i++) { 2095 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2096 zeroed[is_idx[i]] = PETSC_TRUE; 2097 } 2098 for (i=0; i<A->rmap->N; i++) { 2099 if (!zeroed[i]) { 2100 row = i/bs; 2101 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2102 for (k=0; k<bs; k++) { 2103 col = bs*baij->j[j] + k; 2104 if (zeroed[col]) { 2105 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2106 if (vecs) bb[i] -= aa[0]*xx[col]; 2107 aa[0] = 0.0; 2108 } 2109 } 2110 } 2111 } else if (vecs) bb[i] = diag*xx[i]; 2112 } 2113 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2114 if (vecs) { 2115 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2116 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2117 } 2118 2119 /* zero the rows */ 2120 for (i=0; i<is_n; i++) { 2121 row = is_idx[i]; 2122 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2123 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2124 for (k=0; k<count; k++) { 2125 aa[0] = zero; 2126 aa += bs; 2127 } 2128 if (diag != (PetscScalar)0.0) { 2129 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2130 } 2131 } 2132 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2133 PetscFunctionReturn(0); 2134 } 2135 2136 #undef __FUNCT__ 2137 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2138 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2139 { 2140 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2141 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2142 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2143 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2144 PetscErrorCode ierr; 2145 PetscInt ridx,cidx,bs2=a->bs2; 2146 PetscBool roworiented=a->roworiented; 2147 MatScalar *ap,value,*aa=a->a,*bap; 2148 2149 PetscFunctionBegin; 2150 for (k=0; k<m; k++) { /* loop over added rows */ 2151 row = im[k]; 2152 brow = row/bs; 2153 if (row < 0) continue; 2154 #if defined(PETSC_USE_DEBUG) 2155 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2156 #endif 2157 rp = aj + ai[brow]; 2158 ap = aa + bs2*ai[brow]; 2159 rmax = imax[brow]; 2160 nrow = ailen[brow]; 2161 low = 0; 2162 high = nrow; 2163 for (l=0; l<n; l++) { /* loop over added columns */ 2164 if (in[l] < 0) continue; 2165 #if defined(PETSC_USE_DEBUG) 2166 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2167 #endif 2168 col = in[l]; bcol = col/bs; 2169 ridx = row % bs; cidx = col % bs; 2170 if (roworiented) { 2171 value = v[l + k*n]; 2172 } else { 2173 value = v[k + l*m]; 2174 } 2175 if (col <= lastcol) low = 0; else high = nrow; 2176 lastcol = col; 2177 while (high-low > 7) { 2178 t = (low+high)/2; 2179 if (rp[t] > bcol) high = t; 2180 else low = t; 2181 } 2182 for (i=low; i<high; i++) { 2183 if (rp[i] > bcol) break; 2184 if (rp[i] == bcol) { 2185 bap = ap + bs2*i + bs*cidx + ridx; 2186 if (is == ADD_VALUES) *bap += value; 2187 else *bap = value; 2188 goto noinsert1; 2189 } 2190 } 2191 if (nonew == 1) goto noinsert1; 2192 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2193 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2194 N = nrow++ - 1; high++; 2195 /* shift up all the later entries in this row */ 2196 for (ii=N; ii>=i; ii--) { 2197 rp[ii+1] = rp[ii]; 2198 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2199 } 2200 if (N>=i) { 2201 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2202 } 2203 rp[i] = bcol; 2204 ap[bs2*i + bs*cidx + ridx] = value; 2205 a->nz++; 2206 A->nonzerostate++; 2207 noinsert1:; 2208 low = i; 2209 } 2210 ailen[brow] = nrow; 2211 } 2212 PetscFunctionReturn(0); 2213 } 2214 2215 #undef __FUNCT__ 2216 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2217 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2218 { 2219 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2220 Mat outA; 2221 PetscErrorCode ierr; 2222 PetscBool row_identity,col_identity; 2223 2224 PetscFunctionBegin; 2225 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2226 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2227 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2228 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2229 2230 outA = inA; 2231 inA->factortype = MAT_FACTOR_LU; 2232 2233 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2234 2235 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2236 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2237 a->row = row; 2238 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2239 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2240 a->col = col; 2241 2242 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2243 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2244 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2245 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2246 2247 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2248 if (!a->solve_work) { 2249 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2250 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2251 } 2252 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2253 PetscFunctionReturn(0); 2254 } 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2258 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2259 { 2260 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2261 PetscInt i,nz,mbs; 2262 2263 PetscFunctionBegin; 2264 nz = baij->maxnz; 2265 mbs = baij->mbs; 2266 for (i=0; i<nz; i++) { 2267 baij->j[i] = indices[i]; 2268 } 2269 baij->nz = nz; 2270 for (i=0; i<mbs; i++) { 2271 baij->ilen[i] = baij->imax[i]; 2272 } 2273 PetscFunctionReturn(0); 2274 } 2275 2276 #undef __FUNCT__ 2277 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2278 /*@ 2279 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2280 in the matrix. 2281 2282 Input Parameters: 2283 + mat - the SeqBAIJ matrix 2284 - indices - the column indices 2285 2286 Level: advanced 2287 2288 Notes: 2289 This can be called if you have precomputed the nonzero structure of the 2290 matrix and want to provide it to the matrix object to improve the performance 2291 of the MatSetValues() operation. 2292 2293 You MUST have set the correct numbers of nonzeros per row in the call to 2294 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2295 2296 MUST be called before any calls to MatSetValues(); 2297 2298 @*/ 2299 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2300 { 2301 PetscErrorCode ierr; 2302 2303 PetscFunctionBegin; 2304 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2305 PetscValidPointer(indices,2); 2306 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2307 PetscFunctionReturn(0); 2308 } 2309 2310 #undef __FUNCT__ 2311 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2312 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2313 { 2314 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2315 PetscErrorCode ierr; 2316 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2317 PetscReal atmp; 2318 PetscScalar *x,zero = 0.0; 2319 MatScalar *aa; 2320 PetscInt ncols,brow,krow,kcol; 2321 2322 PetscFunctionBegin; 2323 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2324 bs = A->rmap->bs; 2325 aa = a->a; 2326 ai = a->i; 2327 aj = a->j; 2328 mbs = a->mbs; 2329 2330 ierr = VecSet(v,zero);CHKERRQ(ierr); 2331 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2332 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2333 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2334 for (i=0; i<mbs; i++) { 2335 ncols = ai[1] - ai[0]; ai++; 2336 brow = bs*i; 2337 for (j=0; j<ncols; j++) { 2338 for (kcol=0; kcol<bs; kcol++) { 2339 for (krow=0; krow<bs; krow++) { 2340 atmp = PetscAbsScalar(*aa);aa++; 2341 row = brow + krow; /* row index */ 2342 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2343 } 2344 } 2345 aj++; 2346 } 2347 } 2348 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2349 PetscFunctionReturn(0); 2350 } 2351 2352 #undef __FUNCT__ 2353 #define __FUNCT__ "MatCopy_SeqBAIJ" 2354 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2355 { 2356 PetscErrorCode ierr; 2357 2358 PetscFunctionBegin; 2359 /* If the two matrices have the same copy implementation, use fast copy. */ 2360 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2361 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2362 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2363 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2364 2365 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2366 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2367 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2368 } else { 2369 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2370 } 2371 PetscFunctionReturn(0); 2372 } 2373 2374 #undef __FUNCT__ 2375 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2376 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2377 { 2378 PetscErrorCode ierr; 2379 2380 PetscFunctionBegin; 2381 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2382 PetscFunctionReturn(0); 2383 } 2384 2385 #undef __FUNCT__ 2386 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2387 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2388 { 2389 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2390 2391 PetscFunctionBegin; 2392 *array = a->a; 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #undef __FUNCT__ 2397 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2398 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2399 { 2400 PetscFunctionBegin; 2401 PetscFunctionReturn(0); 2402 } 2403 2404 #undef __FUNCT__ 2405 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ" 2406 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2407 { 2408 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2409 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2410 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2411 PetscErrorCode ierr; 2412 2413 PetscFunctionBegin; 2414 /* Set the number of nonzeros in the new matrix */ 2415 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2416 PetscFunctionReturn(0); 2417 } 2418 2419 #undef __FUNCT__ 2420 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2421 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2422 { 2423 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2424 PetscErrorCode ierr; 2425 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2426 PetscBLASInt one=1; 2427 2428 PetscFunctionBegin; 2429 if (str == SAME_NONZERO_PATTERN) { 2430 PetscScalar alpha = a; 2431 PetscBLASInt bnz; 2432 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2433 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2434 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2435 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2436 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2437 } else { 2438 Mat B; 2439 PetscInt *nnz; 2440 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2441 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2442 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2443 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2444 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2445 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2446 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2447 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2448 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2449 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2450 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2451 ierr = PetscFree(nnz);CHKERRQ(ierr); 2452 } 2453 PetscFunctionReturn(0); 2454 } 2455 2456 #undef __FUNCT__ 2457 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2458 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2459 { 2460 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2461 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2462 MatScalar *aa = a->a; 2463 2464 PetscFunctionBegin; 2465 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2466 PetscFunctionReturn(0); 2467 } 2468 2469 #undef __FUNCT__ 2470 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2471 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2472 { 2473 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2474 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2475 MatScalar *aa = a->a; 2476 2477 PetscFunctionBegin; 2478 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2479 PetscFunctionReturn(0); 2480 } 2481 2482 #undef __FUNCT__ 2483 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2484 /* 2485 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2486 */ 2487 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2488 { 2489 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2490 PetscErrorCode ierr; 2491 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2492 PetscInt nz = a->i[m],row,*jj,mr,col; 2493 2494 PetscFunctionBegin; 2495 *nn = n; 2496 if (!ia) PetscFunctionReturn(0); 2497 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2498 else { 2499 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2500 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2501 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2502 jj = a->j; 2503 for (i=0; i<nz; i++) { 2504 collengths[jj[i]]++; 2505 } 2506 cia[0] = oshift; 2507 for (i=0; i<n; i++) { 2508 cia[i+1] = cia[i] + collengths[i]; 2509 } 2510 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2511 jj = a->j; 2512 for (row=0; row<m; row++) { 2513 mr = a->i[row+1] - a->i[row]; 2514 for (i=0; i<mr; i++) { 2515 col = *jj++; 2516 2517 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2518 } 2519 } 2520 ierr = PetscFree(collengths);CHKERRQ(ierr); 2521 *ia = cia; *ja = cja; 2522 } 2523 PetscFunctionReturn(0); 2524 } 2525 2526 #undef __FUNCT__ 2527 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2528 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2529 { 2530 PetscErrorCode ierr; 2531 2532 PetscFunctionBegin; 2533 if (!ia) PetscFunctionReturn(0); 2534 ierr = PetscFree(*ia);CHKERRQ(ierr); 2535 ierr = PetscFree(*ja);CHKERRQ(ierr); 2536 PetscFunctionReturn(0); 2537 } 2538 2539 /* 2540 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2541 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2542 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2543 */ 2544 #undef __FUNCT__ 2545 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color" 2546 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2547 { 2548 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2549 PetscErrorCode ierr; 2550 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2551 PetscInt nz = a->i[m],row,*jj,mr,col; 2552 PetscInt *cspidx; 2553 2554 PetscFunctionBegin; 2555 *nn = n; 2556 if (!ia) PetscFunctionReturn(0); 2557 2558 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2559 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2560 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2561 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2562 jj = a->j; 2563 for (i=0; i<nz; i++) { 2564 collengths[jj[i]]++; 2565 } 2566 cia[0] = oshift; 2567 for (i=0; i<n; i++) { 2568 cia[i+1] = cia[i] + collengths[i]; 2569 } 2570 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2571 jj = a->j; 2572 for (row=0; row<m; row++) { 2573 mr = a->i[row+1] - a->i[row]; 2574 for (i=0; i<mr; i++) { 2575 col = *jj++; 2576 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2577 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2578 } 2579 } 2580 ierr = PetscFree(collengths);CHKERRQ(ierr); 2581 *ia = cia; *ja = cja; 2582 *spidx = cspidx; 2583 PetscFunctionReturn(0); 2584 } 2585 2586 #undef __FUNCT__ 2587 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color" 2588 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2589 { 2590 PetscErrorCode ierr; 2591 2592 PetscFunctionBegin; 2593 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2594 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2595 PetscFunctionReturn(0); 2596 } 2597 2598 #undef __FUNCT__ 2599 #define __FUNCT__ "MatShift_SeqBAIJ" 2600 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2601 { 2602 PetscErrorCode ierr; 2603 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2604 2605 PetscFunctionBegin; 2606 if (!Y->preallocated || !aij->nz) { 2607 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2608 } 2609 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2610 PetscFunctionReturn(0); 2611 } 2612 2613 /* -------------------------------------------------------------------*/ 2614 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2615 MatGetRow_SeqBAIJ, 2616 MatRestoreRow_SeqBAIJ, 2617 MatMult_SeqBAIJ_N, 2618 /* 4*/ MatMultAdd_SeqBAIJ_N, 2619 MatMultTranspose_SeqBAIJ, 2620 MatMultTransposeAdd_SeqBAIJ, 2621 0, 2622 0, 2623 0, 2624 /* 10*/ 0, 2625 MatLUFactor_SeqBAIJ, 2626 0, 2627 0, 2628 MatTranspose_SeqBAIJ, 2629 /* 15*/ MatGetInfo_SeqBAIJ, 2630 MatEqual_SeqBAIJ, 2631 MatGetDiagonal_SeqBAIJ, 2632 MatDiagonalScale_SeqBAIJ, 2633 MatNorm_SeqBAIJ, 2634 /* 20*/ 0, 2635 MatAssemblyEnd_SeqBAIJ, 2636 MatSetOption_SeqBAIJ, 2637 MatZeroEntries_SeqBAIJ, 2638 /* 24*/ MatZeroRows_SeqBAIJ, 2639 0, 2640 0, 2641 0, 2642 0, 2643 /* 29*/ MatSetUp_SeqBAIJ, 2644 0, 2645 0, 2646 0, 2647 0, 2648 /* 34*/ MatDuplicate_SeqBAIJ, 2649 0, 2650 0, 2651 MatILUFactor_SeqBAIJ, 2652 0, 2653 /* 39*/ MatAXPY_SeqBAIJ, 2654 MatGetSubMatrices_SeqBAIJ, 2655 MatIncreaseOverlap_SeqBAIJ, 2656 MatGetValues_SeqBAIJ, 2657 MatCopy_SeqBAIJ, 2658 /* 44*/ 0, 2659 MatScale_SeqBAIJ, 2660 MatShift_SeqBAIJ, 2661 0, 2662 MatZeroRowsColumns_SeqBAIJ, 2663 /* 49*/ 0, 2664 MatGetRowIJ_SeqBAIJ, 2665 MatRestoreRowIJ_SeqBAIJ, 2666 MatGetColumnIJ_SeqBAIJ, 2667 MatRestoreColumnIJ_SeqBAIJ, 2668 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2669 0, 2670 0, 2671 0, 2672 MatSetValuesBlocked_SeqBAIJ, 2673 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2674 MatDestroy_SeqBAIJ, 2675 MatView_SeqBAIJ, 2676 0, 2677 0, 2678 /* 64*/ 0, 2679 0, 2680 0, 2681 0, 2682 0, 2683 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2684 0, 2685 MatConvert_Basic, 2686 0, 2687 0, 2688 /* 74*/ 0, 2689 MatFDColoringApply_BAIJ, 2690 0, 2691 0, 2692 0, 2693 /* 79*/ 0, 2694 0, 2695 0, 2696 0, 2697 MatLoad_SeqBAIJ, 2698 /* 84*/ 0, 2699 0, 2700 0, 2701 0, 2702 0, 2703 /* 89*/ 0, 2704 0, 2705 0, 2706 0, 2707 0, 2708 /* 94*/ 0, 2709 0, 2710 0, 2711 0, 2712 0, 2713 /* 99*/ 0, 2714 0, 2715 0, 2716 0, 2717 0, 2718 /*104*/ 0, 2719 MatRealPart_SeqBAIJ, 2720 MatImaginaryPart_SeqBAIJ, 2721 0, 2722 0, 2723 /*109*/ 0, 2724 0, 2725 0, 2726 0, 2727 MatMissingDiagonal_SeqBAIJ, 2728 /*114*/ 0, 2729 0, 2730 0, 2731 0, 2732 0, 2733 /*119*/ 0, 2734 0, 2735 MatMultHermitianTranspose_SeqBAIJ, 2736 MatMultHermitianTransposeAdd_SeqBAIJ, 2737 0, 2738 /*124*/ 0, 2739 0, 2740 MatInvertBlockDiagonal_SeqBAIJ, 2741 0, 2742 0, 2743 /*129*/ 0, 2744 0, 2745 0, 2746 0, 2747 0, 2748 /*134*/ 0, 2749 0, 2750 0, 2751 0, 2752 0, 2753 /*139*/ 0, 2754 0, 2755 0, 2756 MatFDColoringSetUp_SeqXAIJ, 2757 0, 2758 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ 2759 }; 2760 2761 #undef __FUNCT__ 2762 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2763 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2764 { 2765 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2766 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2767 PetscErrorCode ierr; 2768 2769 PetscFunctionBegin; 2770 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2771 2772 /* allocate space for values if not already there */ 2773 if (!aij->saved_values) { 2774 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2775 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2776 } 2777 2778 /* copy values over */ 2779 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2780 PetscFunctionReturn(0); 2781 } 2782 2783 #undef __FUNCT__ 2784 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2785 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2786 { 2787 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2788 PetscErrorCode ierr; 2789 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2790 2791 PetscFunctionBegin; 2792 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2793 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2794 2795 /* copy values over */ 2796 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2797 PetscFunctionReturn(0); 2798 } 2799 2800 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2801 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2802 2803 #undef __FUNCT__ 2804 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2805 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2806 { 2807 Mat_SeqBAIJ *b; 2808 PetscErrorCode ierr; 2809 PetscInt i,mbs,nbs,bs2; 2810 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2811 2812 PetscFunctionBegin; 2813 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2814 if (nz == MAT_SKIP_ALLOCATION) { 2815 skipallocation = PETSC_TRUE; 2816 nz = 0; 2817 } 2818 2819 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2820 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2821 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2822 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2823 2824 B->preallocated = PETSC_TRUE; 2825 2826 mbs = B->rmap->n/bs; 2827 nbs = B->cmap->n/bs; 2828 bs2 = bs*bs; 2829 2830 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2831 2832 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2833 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2834 if (nnz) { 2835 for (i=0; i<mbs; i++) { 2836 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2837 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2838 } 2839 } 2840 2841 b = (Mat_SeqBAIJ*)B->data; 2842 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2843 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2844 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2845 2846 if (!flg) { 2847 switch (bs) { 2848 case 1: 2849 B->ops->mult = MatMult_SeqBAIJ_1; 2850 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2851 break; 2852 case 2: 2853 B->ops->mult = MatMult_SeqBAIJ_2; 2854 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2855 break; 2856 case 3: 2857 B->ops->mult = MatMult_SeqBAIJ_3; 2858 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2859 break; 2860 case 4: 2861 B->ops->mult = MatMult_SeqBAIJ_4; 2862 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2863 break; 2864 case 5: 2865 B->ops->mult = MatMult_SeqBAIJ_5; 2866 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2867 break; 2868 case 6: 2869 B->ops->mult = MatMult_SeqBAIJ_6; 2870 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2871 break; 2872 case 7: 2873 B->ops->mult = MatMult_SeqBAIJ_7; 2874 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2875 break; 2876 case 15: 2877 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2878 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2879 break; 2880 default: 2881 B->ops->mult = MatMult_SeqBAIJ_N; 2882 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2883 break; 2884 } 2885 } 2886 B->ops->sor = MatSOR_SeqBAIJ; 2887 b->mbs = mbs; 2888 b->nbs = nbs; 2889 if (!skipallocation) { 2890 if (!b->imax) { 2891 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2892 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2893 2894 b->free_imax_ilen = PETSC_TRUE; 2895 } 2896 /* b->ilen will count nonzeros in each block row so far. */ 2897 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2898 if (!nnz) { 2899 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2900 else if (nz < 0) nz = 1; 2901 for (i=0; i<mbs; i++) b->imax[i] = nz; 2902 nz = nz*mbs; 2903 } else { 2904 nz = 0; 2905 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2906 } 2907 2908 /* allocate the matrix space */ 2909 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2910 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2911 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2912 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2913 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2914 2915 b->singlemalloc = PETSC_TRUE; 2916 b->i[0] = 0; 2917 for (i=1; i<mbs+1; i++) { 2918 b->i[i] = b->i[i-1] + b->imax[i-1]; 2919 } 2920 b->free_a = PETSC_TRUE; 2921 b->free_ij = PETSC_TRUE; 2922 } else { 2923 b->free_a = PETSC_FALSE; 2924 b->free_ij = PETSC_FALSE; 2925 } 2926 2927 b->bs2 = bs2; 2928 b->mbs = mbs; 2929 b->nz = 0; 2930 b->maxnz = nz; 2931 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2932 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2933 PetscFunctionReturn(0); 2934 } 2935 2936 #undef __FUNCT__ 2937 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2938 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2939 { 2940 PetscInt i,m,nz,nz_max=0,*nnz; 2941 PetscScalar *values=0; 2942 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2943 PetscErrorCode ierr; 2944 2945 PetscFunctionBegin; 2946 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2947 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2948 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2949 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2950 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2951 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2952 m = B->rmap->n/bs; 2953 2954 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2955 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2956 for (i=0; i<m; i++) { 2957 nz = ii[i+1]- ii[i]; 2958 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2959 nz_max = PetscMax(nz_max, nz); 2960 nnz[i] = nz; 2961 } 2962 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2963 ierr = PetscFree(nnz);CHKERRQ(ierr); 2964 2965 values = (PetscScalar*)V; 2966 if (!values) { 2967 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2968 } 2969 for (i=0; i<m; i++) { 2970 PetscInt ncols = ii[i+1] - ii[i]; 2971 const PetscInt *icols = jj + ii[i]; 2972 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2973 if (!roworiented) { 2974 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2975 } else { 2976 PetscInt j; 2977 for (j=0; j<ncols; j++) { 2978 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2979 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2980 } 2981 } 2982 } 2983 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2984 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2985 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2986 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2987 PetscFunctionReturn(0); 2988 } 2989 2990 /*MC 2991 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2992 block sparse compressed row format. 2993 2994 Options Database Keys: 2995 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2996 2997 Level: beginner 2998 2999 .seealso: MatCreateSeqBAIJ() 3000 M*/ 3001 3002 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3003 3004 #undef __FUNCT__ 3005 #define __FUNCT__ "MatCreate_SeqBAIJ" 3006 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3007 { 3008 PetscErrorCode ierr; 3009 PetscMPIInt size; 3010 Mat_SeqBAIJ *b; 3011 3012 PetscFunctionBegin; 3013 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3014 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3015 3016 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3017 B->data = (void*)b; 3018 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3019 3020 b->row = 0; 3021 b->col = 0; 3022 b->icol = 0; 3023 b->reallocs = 0; 3024 b->saved_values = 0; 3025 3026 b->roworiented = PETSC_TRUE; 3027 b->nonew = 0; 3028 b->diag = 0; 3029 B->spptr = 0; 3030 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3031 b->keepnonzeropattern = PETSC_FALSE; 3032 3033 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3034 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3035 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3036 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3037 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3039 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3043 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3044 PetscFunctionReturn(0); 3045 } 3046 3047 #undef __FUNCT__ 3048 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3049 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3050 { 3051 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3052 PetscErrorCode ierr; 3053 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3054 3055 PetscFunctionBegin; 3056 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3057 3058 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3059 c->imax = a->imax; 3060 c->ilen = a->ilen; 3061 c->free_imax_ilen = PETSC_FALSE; 3062 } else { 3063 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3064 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3065 for (i=0; i<mbs; i++) { 3066 c->imax[i] = a->imax[i]; 3067 c->ilen[i] = a->ilen[i]; 3068 } 3069 c->free_imax_ilen = PETSC_TRUE; 3070 } 3071 3072 /* allocate the matrix space */ 3073 if (mallocmatspace) { 3074 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3075 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3076 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3077 3078 c->i = a->i; 3079 c->j = a->j; 3080 c->singlemalloc = PETSC_FALSE; 3081 c->free_a = PETSC_TRUE; 3082 c->free_ij = PETSC_FALSE; 3083 c->parent = A; 3084 C->preallocated = PETSC_TRUE; 3085 C->assembled = PETSC_TRUE; 3086 3087 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3088 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3089 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3090 } else { 3091 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3092 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3093 3094 c->singlemalloc = PETSC_TRUE; 3095 c->free_a = PETSC_TRUE; 3096 c->free_ij = PETSC_TRUE; 3097 3098 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3099 if (mbs > 0) { 3100 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3101 if (cpvalues == MAT_COPY_VALUES) { 3102 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3103 } else { 3104 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3105 } 3106 } 3107 C->preallocated = PETSC_TRUE; 3108 C->assembled = PETSC_TRUE; 3109 } 3110 } 3111 3112 c->roworiented = a->roworiented; 3113 c->nonew = a->nonew; 3114 3115 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3116 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3117 3118 c->bs2 = a->bs2; 3119 c->mbs = a->mbs; 3120 c->nbs = a->nbs; 3121 3122 if (a->diag) { 3123 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3124 c->diag = a->diag; 3125 c->free_diag = PETSC_FALSE; 3126 } else { 3127 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3128 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3129 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3130 c->free_diag = PETSC_TRUE; 3131 } 3132 } else c->diag = 0; 3133 3134 c->nz = a->nz; 3135 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3136 c->solve_work = NULL; 3137 c->mult_work = NULL; 3138 c->sor_workt = NULL; 3139 c->sor_work = NULL; 3140 3141 c->compressedrow.use = a->compressedrow.use; 3142 c->compressedrow.nrows = a->compressedrow.nrows; 3143 if (a->compressedrow.use) { 3144 i = a->compressedrow.nrows; 3145 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3146 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3147 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3148 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3149 } else { 3150 c->compressedrow.use = PETSC_FALSE; 3151 c->compressedrow.i = NULL; 3152 c->compressedrow.rindex = NULL; 3153 } 3154 C->nonzerostate = A->nonzerostate; 3155 3156 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3157 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3158 PetscFunctionReturn(0); 3159 } 3160 3161 #undef __FUNCT__ 3162 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3163 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3164 { 3165 PetscErrorCode ierr; 3166 3167 PetscFunctionBegin; 3168 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3169 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3170 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3171 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3172 PetscFunctionReturn(0); 3173 } 3174 3175 #undef __FUNCT__ 3176 #define __FUNCT__ "MatLoad_SeqBAIJ" 3177 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3178 { 3179 Mat_SeqBAIJ *a; 3180 PetscErrorCode ierr; 3181 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3182 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3183 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3184 PetscInt *masked,nmask,tmp,bs2,ishift; 3185 PetscMPIInt size; 3186 int fd; 3187 PetscScalar *aa; 3188 MPI_Comm comm; 3189 3190 PetscFunctionBegin; 3191 /* force binary viewer to load .info file if it has not yet done so */ 3192 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3193 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3194 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3195 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3196 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3197 if (bs < 0) bs = 1; 3198 bs2 = bs*bs; 3199 3200 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3201 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3202 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3203 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3204 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3205 M = header[1]; N = header[2]; nz = header[3]; 3206 3207 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3208 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3209 3210 /* 3211 This code adds extra rows to make sure the number of rows is 3212 divisible by the blocksize 3213 */ 3214 mbs = M/bs; 3215 extra_rows = bs - M + bs*(mbs); 3216 if (extra_rows == bs) extra_rows = 0; 3217 else mbs++; 3218 if (extra_rows) { 3219 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3220 } 3221 3222 /* Set global sizes if not already set */ 3223 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3224 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3225 } else { /* Check if the matrix global sizes are correct */ 3226 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3227 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3228 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3229 } 3230 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3231 } 3232 3233 /* read in row lengths */ 3234 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3235 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3236 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3237 3238 /* read in column indices */ 3239 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3240 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3241 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3242 3243 /* loop over row lengths determining block row lengths */ 3244 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3245 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3246 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3247 rowcount = 0; 3248 nzcount = 0; 3249 for (i=0; i<mbs; i++) { 3250 nmask = 0; 3251 for (j=0; j<bs; j++) { 3252 kmax = rowlengths[rowcount]; 3253 for (k=0; k<kmax; k++) { 3254 tmp = jj[nzcount++]/bs; 3255 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3256 } 3257 rowcount++; 3258 } 3259 browlengths[i] += nmask; 3260 /* zero out the mask elements we set */ 3261 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3262 } 3263 3264 /* Do preallocation */ 3265 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3266 a = (Mat_SeqBAIJ*)newmat->data; 3267 3268 /* set matrix "i" values */ 3269 a->i[0] = 0; 3270 for (i=1; i<= mbs; i++) { 3271 a->i[i] = a->i[i-1] + browlengths[i-1]; 3272 a->ilen[i-1] = browlengths[i-1]; 3273 } 3274 a->nz = 0; 3275 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3276 3277 /* read in nonzero values */ 3278 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3279 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3280 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3281 3282 /* set "a" and "j" values into matrix */ 3283 nzcount = 0; jcount = 0; 3284 for (i=0; i<mbs; i++) { 3285 nzcountb = nzcount; 3286 nmask = 0; 3287 for (j=0; j<bs; j++) { 3288 kmax = rowlengths[i*bs+j]; 3289 for (k=0; k<kmax; k++) { 3290 tmp = jj[nzcount++]/bs; 3291 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3292 } 3293 } 3294 /* sort the masked values */ 3295 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3296 3297 /* set "j" values into matrix */ 3298 maskcount = 1; 3299 for (j=0; j<nmask; j++) { 3300 a->j[jcount++] = masked[j]; 3301 mask[masked[j]] = maskcount++; 3302 } 3303 /* set "a" values into matrix */ 3304 ishift = bs2*a->i[i]; 3305 for (j=0; j<bs; j++) { 3306 kmax = rowlengths[i*bs+j]; 3307 for (k=0; k<kmax; k++) { 3308 tmp = jj[nzcountb]/bs; 3309 block = mask[tmp] - 1; 3310 point = jj[nzcountb] - bs*tmp; 3311 idx = ishift + bs2*block + j + bs*point; 3312 a->a[idx] = (MatScalar)aa[nzcountb++]; 3313 } 3314 } 3315 /* zero out the mask elements we set */ 3316 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3317 } 3318 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3319 3320 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3321 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3322 ierr = PetscFree(aa);CHKERRQ(ierr); 3323 ierr = PetscFree(jj);CHKERRQ(ierr); 3324 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3325 3326 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3327 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3328 PetscFunctionReturn(0); 3329 } 3330 3331 #undef __FUNCT__ 3332 #define __FUNCT__ "MatCreateSeqBAIJ" 3333 /*@C 3334 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3335 compressed row) format. For good matrix assembly performance the 3336 user should preallocate the matrix storage by setting the parameter nz 3337 (or the array nnz). By setting these parameters accurately, performance 3338 during matrix assembly can be increased by more than a factor of 50. 3339 3340 Collective on MPI_Comm 3341 3342 Input Parameters: 3343 + comm - MPI communicator, set to PETSC_COMM_SELF 3344 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3345 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3346 . m - number of rows 3347 . n - number of columns 3348 . nz - number of nonzero blocks per block row (same for all rows) 3349 - nnz - array containing the number of nonzero blocks in the various block rows 3350 (possibly different for each block row) or NULL 3351 3352 Output Parameter: 3353 . A - the matrix 3354 3355 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3356 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3357 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3358 3359 Options Database Keys: 3360 . -mat_no_unroll - uses code that does not unroll the loops in the 3361 block calculations (much slower) 3362 . -mat_block_size - size of the blocks to use 3363 3364 Level: intermediate 3365 3366 Notes: 3367 The number of rows and columns must be divisible by blocksize. 3368 3369 If the nnz parameter is given then the nz parameter is ignored 3370 3371 A nonzero block is any block that as 1 or more nonzeros in it 3372 3373 The block AIJ format is fully compatible with standard Fortran 77 3374 storage. That is, the stored row and column indices can begin at 3375 either one (as in Fortran) or zero. See the users' manual for details. 3376 3377 Specify the preallocated storage with either nz or nnz (not both). 3378 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3379 allocation. See Users-Manual: ch_mat for details. 3380 matrices. 3381 3382 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3383 @*/ 3384 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3385 { 3386 PetscErrorCode ierr; 3387 3388 PetscFunctionBegin; 3389 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3390 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3391 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3392 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3393 PetscFunctionReturn(0); 3394 } 3395 3396 #undef __FUNCT__ 3397 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3398 /*@C 3399 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3400 per row in the matrix. For good matrix assembly performance the 3401 user should preallocate the matrix storage by setting the parameter nz 3402 (or the array nnz). By setting these parameters accurately, performance 3403 during matrix assembly can be increased by more than a factor of 50. 3404 3405 Collective on MPI_Comm 3406 3407 Input Parameters: 3408 + B - the matrix 3409 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3410 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3411 . nz - number of block nonzeros per block row (same for all rows) 3412 - nnz - array containing the number of block nonzeros in the various block rows 3413 (possibly different for each block row) or NULL 3414 3415 Options Database Keys: 3416 . -mat_no_unroll - uses code that does not unroll the loops in the 3417 block calculations (much slower) 3418 . -mat_block_size - size of the blocks to use 3419 3420 Level: intermediate 3421 3422 Notes: 3423 If the nnz parameter is given then the nz parameter is ignored 3424 3425 You can call MatGetInfo() to get information on how effective the preallocation was; 3426 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3427 You can also run with the option -info and look for messages with the string 3428 malloc in them to see if additional memory allocation was needed. 3429 3430 The block AIJ format is fully compatible with standard Fortran 77 3431 storage. That is, the stored row and column indices can begin at 3432 either one (as in Fortran) or zero. See the users' manual for details. 3433 3434 Specify the preallocated storage with either nz or nnz (not both). 3435 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3436 allocation. See Users-Manual: ch_mat for details. 3437 3438 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3439 @*/ 3440 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3441 { 3442 PetscErrorCode ierr; 3443 3444 PetscFunctionBegin; 3445 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3446 PetscValidType(B,1); 3447 PetscValidLogicalCollectiveInt(B,bs,2); 3448 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3449 PetscFunctionReturn(0); 3450 } 3451 3452 #undef __FUNCT__ 3453 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3454 /*@C 3455 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3456 (the default sequential PETSc format). 3457 3458 Collective on MPI_Comm 3459 3460 Input Parameters: 3461 + B - the matrix 3462 . i - the indices into j for the start of each local row (starts with zero) 3463 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3464 - v - optional values in the matrix 3465 3466 Level: developer 3467 3468 Notes: 3469 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3470 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3471 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3472 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3473 block column and the second index is over columns within a block. 3474 3475 .keywords: matrix, aij, compressed row, sparse 3476 3477 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3478 @*/ 3479 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3480 { 3481 PetscErrorCode ierr; 3482 3483 PetscFunctionBegin; 3484 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3485 PetscValidType(B,1); 3486 PetscValidLogicalCollectiveInt(B,bs,2); 3487 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3488 PetscFunctionReturn(0); 3489 } 3490 3491 3492 #undef __FUNCT__ 3493 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3494 /*@ 3495 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3496 3497 Collective on MPI_Comm 3498 3499 Input Parameters: 3500 + comm - must be an MPI communicator of size 1 3501 . bs - size of block 3502 . m - number of rows 3503 . n - number of columns 3504 . i - row indices 3505 . j - column indices 3506 - a - matrix values 3507 3508 Output Parameter: 3509 . mat - the matrix 3510 3511 Level: advanced 3512 3513 Notes: 3514 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3515 once the matrix is destroyed 3516 3517 You cannot set new nonzero locations into this matrix, that will generate an error. 3518 3519 The i and j indices are 0 based 3520 3521 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3522 3523 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3524 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3525 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3526 with column-major ordering within blocks. 3527 3528 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3529 3530 @*/ 3531 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3532 { 3533 PetscErrorCode ierr; 3534 PetscInt ii; 3535 Mat_SeqBAIJ *baij; 3536 3537 PetscFunctionBegin; 3538 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3539 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3540 3541 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3542 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3543 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3544 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3545 baij = (Mat_SeqBAIJ*)(*mat)->data; 3546 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3547 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3548 3549 baij->i = i; 3550 baij->j = j; 3551 baij->a = a; 3552 3553 baij->singlemalloc = PETSC_FALSE; 3554 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3555 baij->free_a = PETSC_FALSE; 3556 baij->free_ij = PETSC_FALSE; 3557 3558 for (ii=0; ii<m; ii++) { 3559 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3560 #if defined(PETSC_USE_DEBUG) 3561 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3562 #endif 3563 } 3564 #if defined(PETSC_USE_DEBUG) 3565 for (ii=0; ii<baij->i[m]; ii++) { 3566 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3567 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3568 } 3569 #endif 3570 3571 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3572 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3573 PetscFunctionReturn(0); 3574 } 3575 3576 #undef __FUNCT__ 3577 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ" 3578 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3579 { 3580 PetscErrorCode ierr; 3581 3582 PetscFunctionBegin; 3583 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3584 PetscFunctionReturn(0); 3585 } 3586