1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 20 21 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 22 { 23 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 24 PetscErrorCode ierr; 25 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 26 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 27 PetscReal shift = 0.0; 28 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 29 30 PetscFunctionBegin; 31 allowzeropivot = PetscNot(A->erroriffailure); 32 33 if (a->idiagvalid) { 34 if (values) *values = a->idiag; 35 PetscFunctionReturn(0); 36 } 37 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 38 diag_offset = a->diag; 39 if (!a->idiag) { 40 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 41 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 42 } 43 diag = a->idiag; 44 if (values) *values = a->idiag; 45 /* factor and invert each block */ 46 switch (bs) { 47 case 1: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 1*diag_offset[i]; 50 diag[0] = odiag[0]; 51 52 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 53 if (allowzeropivot) { 54 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 55 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 56 A->factorerror_zeropivot_row = i; 57 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 58 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 59 } 60 61 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 62 diag += 1; 63 } 64 break; 65 case 2: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 4*diag_offset[i]; 68 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 69 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 70 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 71 diag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 81 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 82 diag += 9; 83 } 84 break; 85 case 4: 86 for (i=0; i<mbs; i++) { 87 odiag = v + 16*diag_offset[i]; 88 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 89 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 90 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 91 diag += 16; 92 } 93 break; 94 case 5: 95 for (i=0; i<mbs; i++) { 96 odiag = v + 25*diag_offset[i]; 97 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 98 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 99 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 100 diag += 25; 101 } 102 break; 103 case 6: 104 for (i=0; i<mbs; i++) { 105 odiag = v + 36*diag_offset[i]; 106 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 107 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 108 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 109 diag += 36; 110 } 111 break; 112 case 7: 113 for (i=0; i<mbs; i++) { 114 odiag = v + 49*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 117 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 118 diag += 49; 119 } 120 break; 121 default: 122 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 123 for (i=0; i<mbs; i++) { 124 odiag = v + bs2*diag_offset[i]; 125 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 126 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 127 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 128 diag += bs2; 129 } 130 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 131 } 132 a->idiagvalid = PETSC_TRUE; 133 PetscFunctionReturn(0); 134 } 135 136 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 137 { 138 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 139 PetscScalar *x,*work,*w,*workt,*t; 140 const MatScalar *v,*aa = a->a, *idiag; 141 const PetscScalar *b,*xb; 142 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 143 PetscErrorCode ierr; 144 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 145 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 146 147 PetscFunctionBegin; 148 its = its*lits; 149 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 150 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 151 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 152 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 153 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 154 155 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 156 157 if (!m) PetscFunctionReturn(0); 158 diag = a->diag; 159 idiag = a->idiag; 160 k = PetscMax(A->rmap->n,A->cmap->n); 161 if (!a->mult_work) { 162 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 163 } 164 if (!a->sor_workt) { 165 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 166 } 167 if (!a->sor_work) { 168 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 169 } 170 work = a->mult_work; 171 t = a->sor_workt; 172 w = a->sor_work; 173 174 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 175 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 176 177 if (flag & SOR_ZERO_INITIAL_GUESS) { 178 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 179 switch (bs) { 180 case 1: 181 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 182 t[0] = b[0]; 183 i2 = 1; 184 idiag += 1; 185 for (i=1; i<m; i++) { 186 v = aa + ai[i]; 187 vi = aj + ai[i]; 188 nz = diag[i] - ai[i]; 189 s[0] = b[i2]; 190 for (j=0; j<nz; j++) { 191 xw[0] = x[vi[j]]; 192 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 193 } 194 t[i2] = s[0]; 195 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 196 x[i2] = xw[0]; 197 idiag += 1; 198 i2 += 1; 199 } 200 break; 201 case 2: 202 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 203 t[0] = b[0]; t[1] = b[1]; 204 i2 = 2; 205 idiag += 4; 206 for (i=1; i<m; i++) { 207 v = aa + 4*ai[i]; 208 vi = aj + ai[i]; 209 nz = diag[i] - ai[i]; 210 s[0] = b[i2]; s[1] = b[i2+1]; 211 for (j=0; j<nz; j++) { 212 idx = 2*vi[j]; 213 it = 4*j; 214 xw[0] = x[idx]; xw[1] = x[1+idx]; 215 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 216 } 217 t[i2] = s[0]; t[i2+1] = s[1]; 218 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 219 x[i2] = xw[0]; x[i2+1] = xw[1]; 220 idiag += 4; 221 i2 += 2; 222 } 223 break; 224 case 3: 225 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 226 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 227 i2 = 3; 228 idiag += 9; 229 for (i=1; i<m; i++) { 230 v = aa + 9*ai[i]; 231 vi = aj + ai[i]; 232 nz = diag[i] - ai[i]; 233 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 234 while (nz--) { 235 idx = 3*(*vi++); 236 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 237 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 238 v += 9; 239 } 240 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 241 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 242 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 243 idiag += 9; 244 i2 += 3; 245 } 246 break; 247 case 4: 248 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 249 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 250 i2 = 4; 251 idiag += 16; 252 for (i=1; i<m; i++) { 253 v = aa + 16*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 257 while (nz--) { 258 idx = 4*(*vi++); 259 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 260 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 261 v += 16; 262 } 263 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 264 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 265 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 266 idiag += 16; 267 i2 += 4; 268 } 269 break; 270 case 5: 271 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 272 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 273 i2 = 5; 274 idiag += 25; 275 for (i=1; i<m; i++) { 276 v = aa + 25*ai[i]; 277 vi = aj + ai[i]; 278 nz = diag[i] - ai[i]; 279 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 280 while (nz--) { 281 idx = 5*(*vi++); 282 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 283 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 284 v += 25; 285 } 286 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 287 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 288 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 289 idiag += 25; 290 i2 += 5; 291 } 292 break; 293 case 6: 294 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 295 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 296 i2 = 6; 297 idiag += 36; 298 for (i=1; i<m; i++) { 299 v = aa + 36*ai[i]; 300 vi = aj + ai[i]; 301 nz = diag[i] - ai[i]; 302 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 303 while (nz--) { 304 idx = 6*(*vi++); 305 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 306 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 307 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 308 v += 36; 309 } 310 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 311 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 312 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 313 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 314 idiag += 36; 315 i2 += 6; 316 } 317 break; 318 case 7: 319 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 320 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 321 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 322 i2 = 7; 323 idiag += 49; 324 for (i=1; i<m; i++) { 325 v = aa + 49*ai[i]; 326 vi = aj + ai[i]; 327 nz = diag[i] - ai[i]; 328 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 329 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 330 while (nz--) { 331 idx = 7*(*vi++); 332 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 333 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 334 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 335 v += 49; 336 } 337 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 338 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 339 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 340 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 341 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 342 idiag += 49; 343 i2 += 7; 344 } 345 break; 346 default: 347 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 348 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 349 i2 = bs; 350 idiag += bs2; 351 for (i=1; i<m; i++) { 352 v = aa + bs2*ai[i]; 353 vi = aj + ai[i]; 354 nz = diag[i] - ai[i]; 355 356 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 357 /* copy all rows of x that are needed into contiguous space */ 358 workt = work; 359 for (j=0; j<nz; j++) { 360 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 361 workt += bs; 362 } 363 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 364 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 365 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 366 367 idiag += bs2; 368 i2 += bs; 369 } 370 break; 371 } 372 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 373 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 374 xb = t; 375 } 376 else xb = b; 377 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 378 idiag = a->idiag+bs2*(a->mbs-1); 379 i2 = bs * (m-1); 380 switch (bs) { 381 case 1: 382 s[0] = xb[i2]; 383 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 384 x[i2] = xw[0]; 385 i2 -= 1; 386 for (i=m-2; i>=0; i--) { 387 v = aa + (diag[i]+1); 388 vi = aj + diag[i] + 1; 389 nz = ai[i+1] - diag[i] - 1; 390 s[0] = xb[i2]; 391 for (j=0; j<nz; j++) { 392 xw[0] = x[vi[j]]; 393 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 394 } 395 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 396 x[i2] = xw[0]; 397 idiag -= 1; 398 i2 -= 1; 399 } 400 break; 401 case 2: 402 s[0] = xb[i2]; s[1] = xb[i2+1]; 403 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 404 x[i2] = xw[0]; x[i2+1] = xw[1]; 405 i2 -= 2; 406 idiag -= 4; 407 for (i=m-2; i>=0; i--) { 408 v = aa + 4*(diag[i] + 1); 409 vi = aj + diag[i] + 1; 410 nz = ai[i+1] - diag[i] - 1; 411 s[0] = xb[i2]; s[1] = xb[i2+1]; 412 for (j=0; j<nz; j++) { 413 idx = 2*vi[j]; 414 it = 4*j; 415 xw[0] = x[idx]; xw[1] = x[1+idx]; 416 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 417 } 418 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 419 x[i2] = xw[0]; x[i2+1] = xw[1]; 420 idiag -= 4; 421 i2 -= 2; 422 } 423 break; 424 case 3: 425 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 426 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 427 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 428 i2 -= 3; 429 idiag -= 9; 430 for (i=m-2; i>=0; i--) { 431 v = aa + 9*(diag[i]+1); 432 vi = aj + diag[i] + 1; 433 nz = ai[i+1] - diag[i] - 1; 434 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 435 while (nz--) { 436 idx = 3*(*vi++); 437 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 438 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 439 v += 9; 440 } 441 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 442 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 443 idiag -= 9; 444 i2 -= 3; 445 } 446 break; 447 case 4: 448 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 449 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 450 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 451 i2 -= 4; 452 idiag -= 16; 453 for (i=m-2; i>=0; i--) { 454 v = aa + 16*(diag[i]+1); 455 vi = aj + diag[i] + 1; 456 nz = ai[i+1] - diag[i] - 1; 457 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 458 while (nz--) { 459 idx = 4*(*vi++); 460 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 461 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 462 v += 16; 463 } 464 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 465 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 466 idiag -= 16; 467 i2 -= 4; 468 } 469 break; 470 case 5: 471 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 472 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 473 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 474 i2 -= 5; 475 idiag -= 25; 476 for (i=m-2; i>=0; i--) { 477 v = aa + 25*(diag[i]+1); 478 vi = aj + diag[i] + 1; 479 nz = ai[i+1] - diag[i] - 1; 480 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 481 while (nz--) { 482 idx = 5*(*vi++); 483 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 484 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 485 v += 25; 486 } 487 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 488 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 489 idiag -= 25; 490 i2 -= 5; 491 } 492 break; 493 case 6: 494 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 495 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 496 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 497 i2 -= 6; 498 idiag -= 36; 499 for (i=m-2; i>=0; i--) { 500 v = aa + 36*(diag[i]+1); 501 vi = aj + diag[i] + 1; 502 nz = ai[i+1] - diag[i] - 1; 503 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 504 while (nz--) { 505 idx = 6*(*vi++); 506 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 507 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 508 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 509 v += 36; 510 } 511 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 512 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 513 idiag -= 36; 514 i2 -= 6; 515 } 516 break; 517 case 7: 518 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 519 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 520 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 521 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 522 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 523 i2 -= 7; 524 idiag -= 49; 525 for (i=m-2; i>=0; i--) { 526 v = aa + 49*(diag[i]+1); 527 vi = aj + diag[i] + 1; 528 nz = ai[i+1] - diag[i] - 1; 529 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 530 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 531 while (nz--) { 532 idx = 7*(*vi++); 533 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 534 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 535 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 536 v += 49; 537 } 538 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 539 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 540 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 541 idiag -= 49; 542 i2 -= 7; 543 } 544 break; 545 default: 546 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 547 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 548 i2 -= bs; 549 idiag -= bs2; 550 for (i=m-2; i>=0; i--) { 551 v = aa + bs2*(diag[i]+1); 552 vi = aj + diag[i] + 1; 553 nz = ai[i+1] - diag[i] - 1; 554 555 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 556 /* copy all rows of x that are needed into contiguous space */ 557 workt = work; 558 for (j=0; j<nz; j++) { 559 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 560 workt += bs; 561 } 562 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 563 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 564 565 idiag -= bs2; 566 i2 -= bs; 567 } 568 break; 569 } 570 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 571 } 572 its--; 573 } 574 while (its--) { 575 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 576 idiag = a->idiag; 577 i2 = 0; 578 switch (bs) { 579 case 1: 580 for (i=0; i<m; i++) { 581 v = aa + ai[i]; 582 vi = aj + ai[i]; 583 nz = ai[i+1] - ai[i]; 584 s[0] = b[i2]; 585 for (j=0; j<nz; j++) { 586 xw[0] = x[vi[j]]; 587 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 588 } 589 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 590 x[i2] += xw[0]; 591 idiag += 1; 592 i2 += 1; 593 } 594 break; 595 case 2: 596 for (i=0; i<m; i++) { 597 v = aa + 4*ai[i]; 598 vi = aj + ai[i]; 599 nz = ai[i+1] - ai[i]; 600 s[0] = b[i2]; s[1] = b[i2+1]; 601 for (j=0; j<nz; j++) { 602 idx = 2*vi[j]; 603 it = 4*j; 604 xw[0] = x[idx]; xw[1] = x[1+idx]; 605 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 606 } 607 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 608 x[i2] += xw[0]; x[i2+1] += xw[1]; 609 idiag += 4; 610 i2 += 2; 611 } 612 break; 613 case 3: 614 for (i=0; i<m; i++) { 615 v = aa + 9*ai[i]; 616 vi = aj + ai[i]; 617 nz = ai[i+1] - ai[i]; 618 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 619 while (nz--) { 620 idx = 3*(*vi++); 621 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 622 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 623 v += 9; 624 } 625 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 626 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 627 idiag += 9; 628 i2 += 3; 629 } 630 break; 631 case 4: 632 for (i=0; i<m; i++) { 633 v = aa + 16*ai[i]; 634 vi = aj + ai[i]; 635 nz = ai[i+1] - ai[i]; 636 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 637 while (nz--) { 638 idx = 4*(*vi++); 639 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 640 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 641 v += 16; 642 } 643 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 644 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 645 idiag += 16; 646 i2 += 4; 647 } 648 break; 649 case 5: 650 for (i=0; i<m; i++) { 651 v = aa + 25*ai[i]; 652 vi = aj + ai[i]; 653 nz = ai[i+1] - ai[i]; 654 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 655 while (nz--) { 656 idx = 5*(*vi++); 657 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 658 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 659 v += 25; 660 } 661 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 662 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 663 idiag += 25; 664 i2 += 5; 665 } 666 break; 667 case 6: 668 for (i=0; i<m; i++) { 669 v = aa + 36*ai[i]; 670 vi = aj + ai[i]; 671 nz = ai[i+1] - ai[i]; 672 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 673 while (nz--) { 674 idx = 6*(*vi++); 675 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 676 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 677 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 678 v += 36; 679 } 680 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 681 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 682 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 683 idiag += 36; 684 i2 += 6; 685 } 686 break; 687 case 7: 688 for (i=0; i<m; i++) { 689 v = aa + 49*ai[i]; 690 vi = aj + ai[i]; 691 nz = ai[i+1] - ai[i]; 692 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 693 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 694 while (nz--) { 695 idx = 7*(*vi++); 696 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 697 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 698 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 699 v += 49; 700 } 701 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 702 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 703 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 704 idiag += 49; 705 i2 += 7; 706 } 707 break; 708 default: 709 for (i=0; i<m; i++) { 710 v = aa + bs2*ai[i]; 711 vi = aj + ai[i]; 712 nz = ai[i+1] - ai[i]; 713 714 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 715 /* copy all rows of x that are needed into contiguous space */ 716 workt = work; 717 for (j=0; j<nz; j++) { 718 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 719 workt += bs; 720 } 721 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 722 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 723 724 idiag += bs2; 725 i2 += bs; 726 } 727 break; 728 } 729 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 730 } 731 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 732 idiag = a->idiag+bs2*(a->mbs-1); 733 i2 = bs * (m-1); 734 switch (bs) { 735 case 1: 736 for (i=m-1; i>=0; i--) { 737 v = aa + ai[i]; 738 vi = aj + ai[i]; 739 nz = ai[i+1] - ai[i]; 740 s[0] = b[i2]; 741 for (j=0; j<nz; j++) { 742 xw[0] = x[vi[j]]; 743 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 744 } 745 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 746 x[i2] += xw[0]; 747 idiag -= 1; 748 i2 -= 1; 749 } 750 break; 751 case 2: 752 for (i=m-1; i>=0; i--) { 753 v = aa + 4*ai[i]; 754 vi = aj + ai[i]; 755 nz = ai[i+1] - ai[i]; 756 s[0] = b[i2]; s[1] = b[i2+1]; 757 for (j=0; j<nz; j++) { 758 idx = 2*vi[j]; 759 it = 4*j; 760 xw[0] = x[idx]; xw[1] = x[1+idx]; 761 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 762 } 763 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 764 x[i2] += xw[0]; x[i2+1] += xw[1]; 765 idiag -= 4; 766 i2 -= 2; 767 } 768 break; 769 case 3: 770 for (i=m-1; i>=0; i--) { 771 v = aa + 9*ai[i]; 772 vi = aj + ai[i]; 773 nz = ai[i+1] - ai[i]; 774 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 775 while (nz--) { 776 idx = 3*(*vi++); 777 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 778 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 779 v += 9; 780 } 781 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 782 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 783 idiag -= 9; 784 i2 -= 3; 785 } 786 break; 787 case 4: 788 for (i=m-1; i>=0; i--) { 789 v = aa + 16*ai[i]; 790 vi = aj + ai[i]; 791 nz = ai[i+1] - ai[i]; 792 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 793 while (nz--) { 794 idx = 4*(*vi++); 795 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 796 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 797 v += 16; 798 } 799 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 800 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 801 idiag -= 16; 802 i2 -= 4; 803 } 804 break; 805 case 5: 806 for (i=m-1; i>=0; i--) { 807 v = aa + 25*ai[i]; 808 vi = aj + ai[i]; 809 nz = ai[i+1] - ai[i]; 810 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 811 while (nz--) { 812 idx = 5*(*vi++); 813 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 814 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 815 v += 25; 816 } 817 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 818 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 819 idiag -= 25; 820 i2 -= 5; 821 } 822 break; 823 case 6: 824 for (i=m-1; i>=0; i--) { 825 v = aa + 36*ai[i]; 826 vi = aj + ai[i]; 827 nz = ai[i+1] - ai[i]; 828 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 829 while (nz--) { 830 idx = 6*(*vi++); 831 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 832 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 833 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 834 v += 36; 835 } 836 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 837 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 838 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 839 idiag -= 36; 840 i2 -= 6; 841 } 842 break; 843 case 7: 844 for (i=m-1; i>=0; i--) { 845 v = aa + 49*ai[i]; 846 vi = aj + ai[i]; 847 nz = ai[i+1] - ai[i]; 848 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 849 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 850 while (nz--) { 851 idx = 7*(*vi++); 852 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 853 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 854 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 855 v += 49; 856 } 857 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 858 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 859 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 860 idiag -= 49; 861 i2 -= 7; 862 } 863 break; 864 default: 865 for (i=m-1; i>=0; i--) { 866 v = aa + bs2*ai[i]; 867 vi = aj + ai[i]; 868 nz = ai[i+1] - ai[i]; 869 870 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 871 /* copy all rows of x that are needed into contiguous space */ 872 workt = work; 873 for (j=0; j<nz; j++) { 874 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 875 workt += bs; 876 } 877 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 878 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 879 880 idiag -= bs2; 881 i2 -= bs; 882 } 883 break; 884 } 885 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 886 } 887 } 888 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 889 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } 892 893 894 /* 895 Special version for direct calls from Fortran (Used in PETSc-fun3d) 896 */ 897 #if defined(PETSC_HAVE_FORTRAN_CAPS) 898 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 899 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 900 #define matsetvaluesblocked4_ matsetvaluesblocked4 901 #endif 902 903 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 904 { 905 Mat A = *AA; 906 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 907 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 908 PetscInt *ai =a->i,*ailen=a->ilen; 909 PetscInt *aj =a->j,stepval,lastcol = -1; 910 const PetscScalar *value = v; 911 MatScalar *ap,*aa = a->a,*bap; 912 913 PetscFunctionBegin; 914 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 915 stepval = (n-1)*4; 916 for (k=0; k<m; k++) { /* loop over added rows */ 917 row = im[k]; 918 rp = aj + ai[row]; 919 ap = aa + 16*ai[row]; 920 nrow = ailen[row]; 921 low = 0; 922 high = nrow; 923 for (l=0; l<n; l++) { /* loop over added columns */ 924 col = in[l]; 925 if (col <= lastcol) low = 0; 926 else high = nrow; 927 lastcol = col; 928 value = v + k*(stepval+4 + l)*4; 929 while (high-low > 7) { 930 t = (low+high)/2; 931 if (rp[t] > col) high = t; 932 else low = t; 933 } 934 for (i=low; i<high; i++) { 935 if (rp[i] > col) break; 936 if (rp[i] == col) { 937 bap = ap + 16*i; 938 for (ii=0; ii<4; ii++,value+=stepval) { 939 for (jj=ii; jj<16; jj+=4) { 940 bap[jj] += *value++; 941 } 942 } 943 goto noinsert2; 944 } 945 } 946 N = nrow++ - 1; 947 high++; /* added new column index thus must search to one higher than before */ 948 /* shift up all the later entries in this row */ 949 for (ii=N; ii>=i; ii--) { 950 rp[ii+1] = rp[ii]; 951 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 952 } 953 if (N >= i) { 954 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 955 } 956 rp[i] = col; 957 bap = ap + 16*i; 958 for (ii=0; ii<4; ii++,value+=stepval) { 959 for (jj=ii; jj<16; jj+=4) { 960 bap[jj] = *value++; 961 } 962 } 963 noinsert2:; 964 low = i; 965 } 966 ailen[row] = nrow; 967 } 968 PetscFunctionReturnVoid(); 969 } 970 971 #if defined(PETSC_HAVE_FORTRAN_CAPS) 972 #define matsetvalues4_ MATSETVALUES4 973 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 974 #define matsetvalues4_ matsetvalues4 975 #endif 976 977 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 978 { 979 Mat A = *AA; 980 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 981 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 982 PetscInt *ai=a->i,*ailen=a->ilen; 983 PetscInt *aj=a->j,brow,bcol; 984 PetscInt ridx,cidx,lastcol = -1; 985 MatScalar *ap,value,*aa=a->a,*bap; 986 987 PetscFunctionBegin; 988 for (k=0; k<m; k++) { /* loop over added rows */ 989 row = im[k]; brow = row/4; 990 rp = aj + ai[brow]; 991 ap = aa + 16*ai[brow]; 992 nrow = ailen[brow]; 993 low = 0; 994 high = nrow; 995 for (l=0; l<n; l++) { /* loop over added columns */ 996 col = in[l]; bcol = col/4; 997 ridx = row % 4; cidx = col % 4; 998 value = v[l + k*n]; 999 if (col <= lastcol) low = 0; 1000 else high = nrow; 1001 lastcol = col; 1002 while (high-low > 7) { 1003 t = (low+high)/2; 1004 if (rp[t] > bcol) high = t; 1005 else low = t; 1006 } 1007 for (i=low; i<high; i++) { 1008 if (rp[i] > bcol) break; 1009 if (rp[i] == bcol) { 1010 bap = ap + 16*i + 4*cidx + ridx; 1011 *bap += value; 1012 goto noinsert1; 1013 } 1014 } 1015 N = nrow++ - 1; 1016 high++; /* added new column thus must search to one higher than before */ 1017 /* shift up all the later entries in this row */ 1018 for (ii=N; ii>=i; ii--) { 1019 rp[ii+1] = rp[ii]; 1020 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1021 } 1022 if (N>=i) { 1023 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1024 } 1025 rp[i] = bcol; 1026 ap[16*i + 4*cidx + ridx] = value; 1027 noinsert1:; 1028 low = i; 1029 } 1030 ailen[brow] = nrow; 1031 } 1032 PetscFunctionReturnVoid(); 1033 } 1034 1035 /* 1036 Checks for missing diagonals 1037 */ 1038 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1039 { 1040 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1041 PetscErrorCode ierr; 1042 PetscInt *diag,*ii = a->i,i; 1043 1044 PetscFunctionBegin; 1045 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1046 *missing = PETSC_FALSE; 1047 if (A->rmap->n > 0 && !ii) { 1048 *missing = PETSC_TRUE; 1049 if (d) *d = 0; 1050 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 1051 } else { 1052 diag = a->diag; 1053 for (i=0; i<a->mbs; i++) { 1054 if (diag[i] >= ii[i+1]) { 1055 *missing = PETSC_TRUE; 1056 if (d) *d = i; 1057 ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr); 1058 break; 1059 } 1060 } 1061 } 1062 PetscFunctionReturn(0); 1063 } 1064 1065 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1066 { 1067 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1068 PetscErrorCode ierr; 1069 PetscInt i,j,m = a->mbs; 1070 1071 PetscFunctionBegin; 1072 if (!a->diag) { 1073 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1074 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1075 a->free_diag = PETSC_TRUE; 1076 } 1077 for (i=0; i<m; i++) { 1078 a->diag[i] = a->i[i+1]; 1079 for (j=a->i[i]; j<a->i[i+1]; j++) { 1080 if (a->j[j] == i) { 1081 a->diag[i] = j; 1082 break; 1083 } 1084 } 1085 } 1086 PetscFunctionReturn(0); 1087 } 1088 1089 1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1091 { 1092 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1093 PetscErrorCode ierr; 1094 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1095 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1096 1097 PetscFunctionBegin; 1098 *nn = n; 1099 if (!ia) PetscFunctionReturn(0); 1100 if (symmetric) { 1101 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1102 nz = tia[n]; 1103 } else { 1104 tia = a->i; tja = a->j; 1105 } 1106 1107 if (!blockcompressed && bs > 1) { 1108 (*nn) *= bs; 1109 /* malloc & create the natural set of indices */ 1110 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1111 if (n) { 1112 (*ia)[0] = oshift; 1113 for (j=1; j<bs; j++) { 1114 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1115 } 1116 } 1117 1118 for (i=1; i<n; i++) { 1119 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1120 for (j=1; j<bs; j++) { 1121 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1122 } 1123 } 1124 if (n) { 1125 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1126 } 1127 1128 if (inja) { 1129 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1130 cnt = 0; 1131 for (i=0; i<n; i++) { 1132 for (j=0; j<bs; j++) { 1133 for (k=tia[i]; k<tia[i+1]; k++) { 1134 for (l=0; l<bs; l++) { 1135 (*ja)[cnt++] = bs*tja[k] + l; 1136 } 1137 } 1138 } 1139 } 1140 } 1141 1142 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1143 ierr = PetscFree(tia);CHKERRQ(ierr); 1144 ierr = PetscFree(tja);CHKERRQ(ierr); 1145 } 1146 } else if (oshift == 1) { 1147 if (symmetric) { 1148 nz = tia[A->rmap->n/bs]; 1149 /* add 1 to i and j indices */ 1150 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1151 *ia = tia; 1152 if (ja) { 1153 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1154 *ja = tja; 1155 } 1156 } else { 1157 nz = a->i[A->rmap->n/bs]; 1158 /* malloc space and add 1 to i and j indices */ 1159 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1160 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1161 if (ja) { 1162 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1163 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1164 } 1165 } 1166 } else { 1167 *ia = tia; 1168 if (ja) *ja = tja; 1169 } 1170 PetscFunctionReturn(0); 1171 } 1172 1173 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1174 { 1175 PetscErrorCode ierr; 1176 1177 PetscFunctionBegin; 1178 if (!ia) PetscFunctionReturn(0); 1179 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1180 ierr = PetscFree(*ia);CHKERRQ(ierr); 1181 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1182 } 1183 PetscFunctionReturn(0); 1184 } 1185 1186 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1187 { 1188 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1189 PetscErrorCode ierr; 1190 1191 PetscFunctionBegin; 1192 #if defined(PETSC_USE_LOG) 1193 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1194 #endif 1195 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1196 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1197 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1198 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1199 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1200 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1201 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1202 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1203 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1204 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1205 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1206 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1207 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1208 1209 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1210 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1211 ierr = PetscFree(A->data);CHKERRQ(ierr); 1212 1213 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1214 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1215 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1224 #if defined(PETSC_HAVE_HYPRE) 1225 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr); 1226 #endif 1227 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1233 { 1234 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1235 PetscErrorCode ierr; 1236 1237 PetscFunctionBegin; 1238 switch (op) { 1239 case MAT_ROW_ORIENTED: 1240 a->roworiented = flg; 1241 break; 1242 case MAT_KEEP_NONZERO_PATTERN: 1243 a->keepnonzeropattern = flg; 1244 break; 1245 case MAT_NEW_NONZERO_LOCATIONS: 1246 a->nonew = (flg ? 0 : 1); 1247 break; 1248 case MAT_NEW_NONZERO_LOCATION_ERR: 1249 a->nonew = (flg ? -1 : 0); 1250 break; 1251 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1252 a->nonew = (flg ? -2 : 0); 1253 break; 1254 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1255 a->nounused = (flg ? -1 : 0); 1256 break; 1257 case MAT_NEW_DIAGONALS: 1258 case MAT_IGNORE_OFF_PROC_ENTRIES: 1259 case MAT_USE_HASH_TABLE: 1260 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1261 break; 1262 case MAT_SPD: 1263 case MAT_SYMMETRIC: 1264 case MAT_STRUCTURALLY_SYMMETRIC: 1265 case MAT_HERMITIAN: 1266 case MAT_SYMMETRY_ETERNAL: 1267 case MAT_SUBMAT_SINGLEIS: 1268 case MAT_STRUCTURE_ONLY: 1269 /* These options are handled directly by MatSetOption() */ 1270 break; 1271 default: 1272 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1273 } 1274 PetscFunctionReturn(0); 1275 } 1276 1277 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1278 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1279 { 1280 PetscErrorCode ierr; 1281 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1282 MatScalar *aa_i; 1283 PetscScalar *v_i; 1284 1285 PetscFunctionBegin; 1286 bs = A->rmap->bs; 1287 bs2 = bs*bs; 1288 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1289 1290 bn = row/bs; /* Block number */ 1291 bp = row % bs; /* Block Position */ 1292 M = ai[bn+1] - ai[bn]; 1293 *nz = bs*M; 1294 1295 if (v) { 1296 *v = 0; 1297 if (*nz) { 1298 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1299 for (i=0; i<M; i++) { /* for each block in the block row */ 1300 v_i = *v + i*bs; 1301 aa_i = aa + bs2*(ai[bn] + i); 1302 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1303 } 1304 } 1305 } 1306 1307 if (idx) { 1308 *idx = 0; 1309 if (*nz) { 1310 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1311 for (i=0; i<M; i++) { /* for each block in the block row */ 1312 idx_i = *idx + i*bs; 1313 itmp = bs*aj[ai[bn] + i]; 1314 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1315 } 1316 } 1317 } 1318 PetscFunctionReturn(0); 1319 } 1320 1321 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1322 { 1323 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1324 PetscErrorCode ierr; 1325 1326 PetscFunctionBegin; 1327 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1328 PetscFunctionReturn(0); 1329 } 1330 1331 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1332 { 1333 PetscErrorCode ierr; 1334 1335 PetscFunctionBegin; 1336 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1337 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1338 PetscFunctionReturn(0); 1339 } 1340 1341 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1342 { 1343 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1344 Mat C; 1345 PetscErrorCode ierr; 1346 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1347 PetscInt *rows,*cols,bs2=a->bs2; 1348 MatScalar *array; 1349 1350 PetscFunctionBegin; 1351 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1352 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1353 1354 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1355 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1356 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1357 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1358 ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr); 1359 ierr = PetscFree(col);CHKERRQ(ierr); 1360 } else { 1361 C = *B; 1362 } 1363 1364 array = a->a; 1365 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1366 for (i=0; i<mbs; i++) { 1367 cols[0] = i*bs; 1368 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1369 len = ai[i+1] - ai[i]; 1370 for (j=0; j<len; j++) { 1371 rows[0] = (*aj++)*bs; 1372 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1373 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1374 array += bs2; 1375 } 1376 } 1377 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1378 1379 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1380 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1381 1382 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1383 *B = C; 1384 } else { 1385 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1386 } 1387 PetscFunctionReturn(0); 1388 } 1389 1390 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1391 { 1392 PetscErrorCode ierr; 1393 Mat Btrans; 1394 1395 PetscFunctionBegin; 1396 *f = PETSC_FALSE; 1397 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1398 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1399 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1400 PetscFunctionReturn(0); 1401 } 1402 1403 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1404 { 1405 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1406 PetscErrorCode ierr; 1407 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1408 int fd; 1409 PetscScalar *aa; 1410 FILE *file; 1411 1412 PetscFunctionBegin; 1413 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1414 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1415 col_lens[0] = MAT_FILE_CLASSID; 1416 1417 col_lens[1] = A->rmap->N; 1418 col_lens[2] = A->cmap->n; 1419 col_lens[3] = a->nz*bs2; 1420 1421 /* store lengths of each row and write (including header) to file */ 1422 count = 0; 1423 for (i=0; i<a->mbs; i++) { 1424 for (j=0; j<bs; j++) { 1425 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1426 } 1427 } 1428 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1429 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1430 1431 /* store column indices (zero start index) */ 1432 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1433 count = 0; 1434 for (i=0; i<a->mbs; i++) { 1435 for (j=0; j<bs; j++) { 1436 for (k=a->i[i]; k<a->i[i+1]; k++) { 1437 for (l=0; l<bs; l++) { 1438 jj[count++] = bs*a->j[k] + l; 1439 } 1440 } 1441 } 1442 } 1443 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1444 ierr = PetscFree(jj);CHKERRQ(ierr); 1445 1446 /* store nonzero values */ 1447 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1448 count = 0; 1449 for (i=0; i<a->mbs; i++) { 1450 for (j=0; j<bs; j++) { 1451 for (k=a->i[i]; k<a->i[i+1]; k++) { 1452 for (l=0; l<bs; l++) { 1453 aa[count++] = a->a[bs2*k + l*bs + j]; 1454 } 1455 } 1456 } 1457 } 1458 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1459 ierr = PetscFree(aa);CHKERRQ(ierr); 1460 1461 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1462 if (file) { 1463 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1464 } 1465 PetscFunctionReturn(0); 1466 } 1467 1468 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1469 { 1470 PetscErrorCode ierr; 1471 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1472 PetscInt i,bs = A->rmap->bs,k; 1473 1474 PetscFunctionBegin; 1475 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1476 for (i=0; i<a->mbs; i++) { 1477 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1478 for (k=a->i[i]; k<a->i[i+1]; k++) { 1479 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1480 } 1481 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1482 } 1483 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1484 PetscFunctionReturn(0); 1485 } 1486 1487 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1488 { 1489 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1490 PetscErrorCode ierr; 1491 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1492 PetscViewerFormat format; 1493 1494 PetscFunctionBegin; 1495 if (A->structure_only) { 1496 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } 1499 1500 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1501 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1502 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1503 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1504 const char *matname; 1505 Mat aij; 1506 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1507 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1508 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1509 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1510 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1511 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1512 PetscFunctionReturn(0); 1513 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1514 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1515 for (i=0; i<a->mbs; i++) { 1516 for (j=0; j<bs; j++) { 1517 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1518 for (k=a->i[i]; k<a->i[i+1]; k++) { 1519 for (l=0; l<bs; l++) { 1520 #if defined(PETSC_USE_COMPLEX) 1521 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1522 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1523 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1524 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1525 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1526 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1527 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1528 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1529 } 1530 #else 1531 if (a->a[bs2*k + l*bs + j] != 0.0) { 1532 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1533 } 1534 #endif 1535 } 1536 } 1537 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1538 } 1539 } 1540 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1541 } else { 1542 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1543 for (i=0; i<a->mbs; i++) { 1544 for (j=0; j<bs; j++) { 1545 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1546 for (k=a->i[i]; k<a->i[i+1]; k++) { 1547 for (l=0; l<bs; l++) { 1548 #if defined(PETSC_USE_COMPLEX) 1549 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1550 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1551 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1552 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1553 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1554 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1555 } else { 1556 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1557 } 1558 #else 1559 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1560 #endif 1561 } 1562 } 1563 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1564 } 1565 } 1566 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1567 } 1568 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1569 PetscFunctionReturn(0); 1570 } 1571 1572 #include <petscdraw.h> 1573 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1574 { 1575 Mat A = (Mat) Aa; 1576 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1577 PetscErrorCode ierr; 1578 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1579 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1580 MatScalar *aa; 1581 PetscViewer viewer; 1582 PetscViewerFormat format; 1583 1584 PetscFunctionBegin; 1585 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1586 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1587 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1588 1589 /* loop over matrix elements drawing boxes */ 1590 1591 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1592 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1593 /* Blue for negative, Cyan for zero and Red for positive */ 1594 color = PETSC_DRAW_BLUE; 1595 for (i=0,row=0; i<mbs; i++,row+=bs) { 1596 for (j=a->i[i]; j<a->i[i+1]; j++) { 1597 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1598 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1599 aa = a->a + j*bs2; 1600 for (k=0; k<bs; k++) { 1601 for (l=0; l<bs; l++) { 1602 if (PetscRealPart(*aa++) >= 0.) continue; 1603 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1604 } 1605 } 1606 } 1607 } 1608 color = PETSC_DRAW_CYAN; 1609 for (i=0,row=0; i<mbs; i++,row+=bs) { 1610 for (j=a->i[i]; j<a->i[i+1]; j++) { 1611 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1612 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1613 aa = a->a + j*bs2; 1614 for (k=0; k<bs; k++) { 1615 for (l=0; l<bs; l++) { 1616 if (PetscRealPart(*aa++) != 0.) continue; 1617 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1618 } 1619 } 1620 } 1621 } 1622 color = PETSC_DRAW_RED; 1623 for (i=0,row=0; i<mbs; i++,row+=bs) { 1624 for (j=a->i[i]; j<a->i[i+1]; j++) { 1625 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1626 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1627 aa = a->a + j*bs2; 1628 for (k=0; k<bs; k++) { 1629 for (l=0; l<bs; l++) { 1630 if (PetscRealPart(*aa++) <= 0.) continue; 1631 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1632 } 1633 } 1634 } 1635 } 1636 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1637 } else { 1638 /* use contour shading to indicate magnitude of values */ 1639 /* first determine max of all nonzero values */ 1640 PetscReal minv = 0.0, maxv = 0.0; 1641 PetscDraw popup; 1642 1643 for (i=0; i<a->nz*a->bs2; i++) { 1644 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1645 } 1646 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1647 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1648 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1649 1650 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1651 for (i=0,row=0; i<mbs; i++,row+=bs) { 1652 for (j=a->i[i]; j<a->i[i+1]; j++) { 1653 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1654 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1655 aa = a->a + j*bs2; 1656 for (k=0; k<bs; k++) { 1657 for (l=0; l<bs; l++) { 1658 MatScalar v = *aa++; 1659 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1660 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1661 } 1662 } 1663 } 1664 } 1665 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1666 } 1667 PetscFunctionReturn(0); 1668 } 1669 1670 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1671 { 1672 PetscErrorCode ierr; 1673 PetscReal xl,yl,xr,yr,w,h; 1674 PetscDraw draw; 1675 PetscBool isnull; 1676 1677 PetscFunctionBegin; 1678 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1679 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1680 if (isnull) PetscFunctionReturn(0); 1681 1682 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1683 xr += w; yr += h; xl = -w; yl = -h; 1684 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1685 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1686 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1687 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1688 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1689 PetscFunctionReturn(0); 1690 } 1691 1692 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1693 { 1694 PetscErrorCode ierr; 1695 PetscBool iascii,isbinary,isdraw; 1696 1697 PetscFunctionBegin; 1698 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1699 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1700 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1701 if (iascii) { 1702 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1703 } else if (isbinary) { 1704 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1705 } else if (isdraw) { 1706 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1707 } else { 1708 Mat B; 1709 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1710 ierr = MatView(B,viewer);CHKERRQ(ierr); 1711 ierr = MatDestroy(&B);CHKERRQ(ierr); 1712 } 1713 PetscFunctionReturn(0); 1714 } 1715 1716 1717 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1718 { 1719 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1720 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1721 PetscInt *ai = a->i,*ailen = a->ilen; 1722 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1723 MatScalar *ap,*aa = a->a; 1724 1725 PetscFunctionBegin; 1726 for (k=0; k<m; k++) { /* loop over rows */ 1727 row = im[k]; brow = row/bs; 1728 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1729 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1730 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1731 nrow = ailen[brow]; 1732 for (l=0; l<n; l++) { /* loop over columns */ 1733 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1734 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1735 col = in[l]; 1736 bcol = col/bs; 1737 cidx = col%bs; 1738 ridx = row%bs; 1739 high = nrow; 1740 low = 0; /* assume unsorted */ 1741 while (high-low > 5) { 1742 t = (low+high)/2; 1743 if (rp[t] > bcol) high = t; 1744 else low = t; 1745 } 1746 for (i=low; i<high; i++) { 1747 if (rp[i] > bcol) break; 1748 if (rp[i] == bcol) { 1749 *v++ = ap[bs2*i+bs*cidx+ridx]; 1750 goto finished; 1751 } 1752 } 1753 *v++ = 0.0; 1754 finished:; 1755 } 1756 } 1757 PetscFunctionReturn(0); 1758 } 1759 1760 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1761 { 1762 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1763 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1764 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1765 PetscErrorCode ierr; 1766 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1767 PetscBool roworiented=a->roworiented; 1768 const PetscScalar *value = v; 1769 MatScalar *ap=NULL,*aa = a->a,*bap; 1770 1771 PetscFunctionBegin; 1772 if (roworiented) { 1773 stepval = (n-1)*bs; 1774 } else { 1775 stepval = (m-1)*bs; 1776 } 1777 for (k=0; k<m; k++) { /* loop over added rows */ 1778 row = im[k]; 1779 if (row < 0) continue; 1780 #if defined(PETSC_USE_DEBUG) 1781 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1782 #endif 1783 rp = aj + ai[row]; 1784 if (!A->structure_only) ap = aa + bs2*ai[row]; 1785 rmax = imax[row]; 1786 nrow = ailen[row]; 1787 low = 0; 1788 high = nrow; 1789 for (l=0; l<n; l++) { /* loop over added columns */ 1790 if (in[l] < 0) continue; 1791 #if defined(PETSC_USE_DEBUG) 1792 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1793 #endif 1794 col = in[l]; 1795 if (!A->structure_only) { 1796 if (roworiented) { 1797 value = v + (k*(stepval+bs) + l)*bs; 1798 } else { 1799 value = v + (l*(stepval+bs) + k)*bs; 1800 } 1801 } 1802 if (col <= lastcol) low = 0; 1803 else high = nrow; 1804 lastcol = col; 1805 while (high-low > 7) { 1806 t = (low+high)/2; 1807 if (rp[t] > col) high = t; 1808 else low = t; 1809 } 1810 for (i=low; i<high; i++) { 1811 if (rp[i] > col) break; 1812 if (rp[i] == col) { 1813 if (A->structure_only) goto noinsert2; 1814 bap = ap + bs2*i; 1815 if (roworiented) { 1816 if (is == ADD_VALUES) { 1817 for (ii=0; ii<bs; ii++,value+=stepval) { 1818 for (jj=ii; jj<bs2; jj+=bs) { 1819 bap[jj] += *value++; 1820 } 1821 } 1822 } else { 1823 for (ii=0; ii<bs; ii++,value+=stepval) { 1824 for (jj=ii; jj<bs2; jj+=bs) { 1825 bap[jj] = *value++; 1826 } 1827 } 1828 } 1829 } else { 1830 if (is == ADD_VALUES) { 1831 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1832 for (jj=0; jj<bs; jj++) { 1833 bap[jj] += value[jj]; 1834 } 1835 bap += bs; 1836 } 1837 } else { 1838 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1839 for (jj=0; jj<bs; jj++) { 1840 bap[jj] = value[jj]; 1841 } 1842 bap += bs; 1843 } 1844 } 1845 } 1846 goto noinsert2; 1847 } 1848 } 1849 if (nonew == 1) goto noinsert2; 1850 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1851 if (A->structure_only) { 1852 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1853 } else { 1854 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1855 } 1856 N = nrow++ - 1; high++; 1857 /* shift up all the later entries in this row */ 1858 for (ii=N; ii>=i; ii--) { 1859 rp[ii+1] = rp[ii]; 1860 if (!A->structure_only) { 1861 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1862 } 1863 } 1864 if (N >= i && !A->structure_only) { 1865 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1866 } 1867 1868 rp[i] = col; 1869 if (!A->structure_only) { 1870 bap = ap + bs2*i; 1871 if (roworiented) { 1872 for (ii=0; ii<bs; ii++,value+=stepval) { 1873 for (jj=ii; jj<bs2; jj+=bs) { 1874 bap[jj] = *value++; 1875 } 1876 } 1877 } else { 1878 for (ii=0; ii<bs; ii++,value+=stepval) { 1879 for (jj=0; jj<bs; jj++) { 1880 *bap++ = *value++; 1881 } 1882 } 1883 } 1884 } 1885 noinsert2:; 1886 low = i; 1887 } 1888 ailen[row] = nrow; 1889 } 1890 PetscFunctionReturn(0); 1891 } 1892 1893 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1894 { 1895 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1896 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1897 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1898 PetscErrorCode ierr; 1899 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1900 MatScalar *aa = a->a,*ap; 1901 PetscReal ratio=0.6; 1902 1903 PetscFunctionBegin; 1904 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1905 1906 if (m) rmax = ailen[0]; 1907 for (i=1; i<mbs; i++) { 1908 /* move each row back by the amount of empty slots (fshift) before it*/ 1909 fshift += imax[i-1] - ailen[i-1]; 1910 rmax = PetscMax(rmax,ailen[i]); 1911 if (fshift) { 1912 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1913 N = ailen[i]; 1914 for (j=0; j<N; j++) { 1915 ip[j-fshift] = ip[j]; 1916 if (!A->structure_only) { 1917 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1918 } 1919 } 1920 } 1921 ai[i] = ai[i-1] + ailen[i-1]; 1922 } 1923 if (mbs) { 1924 fshift += imax[mbs-1] - ailen[mbs-1]; 1925 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1926 } 1927 1928 /* reset ilen and imax for each row */ 1929 a->nonzerorowcnt = 0; 1930 if (A->structure_only) { 1931 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1932 } else { /* !A->structure_only */ 1933 for (i=0; i<mbs; i++) { 1934 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1935 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1936 } 1937 } 1938 a->nz = ai[mbs]; 1939 1940 /* diagonals may have moved, so kill the diagonal pointers */ 1941 a->idiagvalid = PETSC_FALSE; 1942 if (fshift && a->diag) { 1943 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1944 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1945 a->diag = 0; 1946 } 1947 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1948 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1949 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1950 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1951 1952 A->info.mallocs += a->reallocs; 1953 a->reallocs = 0; 1954 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1955 a->rmax = rmax; 1956 1957 if (!A->structure_only) { 1958 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1959 } 1960 PetscFunctionReturn(0); 1961 } 1962 1963 /* 1964 This function returns an array of flags which indicate the locations of contiguous 1965 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1966 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1967 Assume: sizes should be long enough to hold all the values. 1968 */ 1969 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1970 { 1971 PetscInt i,j,k,row; 1972 PetscBool flg; 1973 1974 PetscFunctionBegin; 1975 for (i=0,j=0; i<n; j++) { 1976 row = idx[i]; 1977 if (row%bs!=0) { /* Not the begining of a block */ 1978 sizes[j] = 1; 1979 i++; 1980 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1981 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1982 i++; 1983 } else { /* Begining of the block, so check if the complete block exists */ 1984 flg = PETSC_TRUE; 1985 for (k=1; k<bs; k++) { 1986 if (row+k != idx[i+k]) { /* break in the block */ 1987 flg = PETSC_FALSE; 1988 break; 1989 } 1990 } 1991 if (flg) { /* No break in the bs */ 1992 sizes[j] = bs; 1993 i += bs; 1994 } else { 1995 sizes[j] = 1; 1996 i++; 1997 } 1998 } 1999 } 2000 *bs_max = j; 2001 PetscFunctionReturn(0); 2002 } 2003 2004 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2005 { 2006 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2007 PetscErrorCode ierr; 2008 PetscInt i,j,k,count,*rows; 2009 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2010 PetscScalar zero = 0.0; 2011 MatScalar *aa; 2012 const PetscScalar *xx; 2013 PetscScalar *bb; 2014 2015 PetscFunctionBegin; 2016 /* fix right hand side if needed */ 2017 if (x && b) { 2018 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2019 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2020 for (i=0; i<is_n; i++) { 2021 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2022 } 2023 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2024 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2025 } 2026 2027 /* Make a copy of the IS and sort it */ 2028 /* allocate memory for rows,sizes */ 2029 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2030 2031 /* copy IS values to rows, and sort them */ 2032 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2033 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2034 2035 if (baij->keepnonzeropattern) { 2036 for (i=0; i<is_n; i++) sizes[i] = 1; 2037 bs_max = is_n; 2038 } else { 2039 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2040 A->nonzerostate++; 2041 } 2042 2043 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2044 row = rows[j]; 2045 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2046 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2047 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2048 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2049 if (diag != (PetscScalar)0.0) { 2050 if (baij->ilen[row/bs] > 0) { 2051 baij->ilen[row/bs] = 1; 2052 baij->j[baij->i[row/bs]] = row/bs; 2053 2054 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2055 } 2056 /* Now insert all the diagonal values for this bs */ 2057 for (k=0; k<bs; k++) { 2058 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2059 } 2060 } else { /* (diag == 0.0) */ 2061 baij->ilen[row/bs] = 0; 2062 } /* end (diag == 0.0) */ 2063 } else { /* (sizes[i] != bs) */ 2064 #if defined(PETSC_USE_DEBUG) 2065 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2066 #endif 2067 for (k=0; k<count; k++) { 2068 aa[0] = zero; 2069 aa += bs; 2070 } 2071 if (diag != (PetscScalar)0.0) { 2072 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2073 } 2074 } 2075 } 2076 2077 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2078 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 PetscFunctionReturn(0); 2080 } 2081 2082 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2083 { 2084 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2085 PetscErrorCode ierr; 2086 PetscInt i,j,k,count; 2087 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2088 PetscScalar zero = 0.0; 2089 MatScalar *aa; 2090 const PetscScalar *xx; 2091 PetscScalar *bb; 2092 PetscBool *zeroed,vecs = PETSC_FALSE; 2093 2094 PetscFunctionBegin; 2095 /* fix right hand side if needed */ 2096 if (x && b) { 2097 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2098 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2099 vecs = PETSC_TRUE; 2100 } 2101 2102 /* zero the columns */ 2103 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2104 for (i=0; i<is_n; i++) { 2105 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2106 zeroed[is_idx[i]] = PETSC_TRUE; 2107 } 2108 for (i=0; i<A->rmap->N; i++) { 2109 if (!zeroed[i]) { 2110 row = i/bs; 2111 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2112 for (k=0; k<bs; k++) { 2113 col = bs*baij->j[j] + k; 2114 if (zeroed[col]) { 2115 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2116 if (vecs) bb[i] -= aa[0]*xx[col]; 2117 aa[0] = 0.0; 2118 } 2119 } 2120 } 2121 } else if (vecs) bb[i] = diag*xx[i]; 2122 } 2123 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2124 if (vecs) { 2125 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2126 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2127 } 2128 2129 /* zero the rows */ 2130 for (i=0; i<is_n; i++) { 2131 row = is_idx[i]; 2132 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2133 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2134 for (k=0; k<count; k++) { 2135 aa[0] = zero; 2136 aa += bs; 2137 } 2138 if (diag != (PetscScalar)0.0) { 2139 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2140 } 2141 } 2142 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2143 PetscFunctionReturn(0); 2144 } 2145 2146 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2147 { 2148 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2149 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2150 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2151 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2152 PetscErrorCode ierr; 2153 PetscInt ridx,cidx,bs2=a->bs2; 2154 PetscBool roworiented=a->roworiented; 2155 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2156 2157 PetscFunctionBegin; 2158 for (k=0; k<m; k++) { /* loop over added rows */ 2159 row = im[k]; 2160 brow = row/bs; 2161 if (row < 0) continue; 2162 #if defined(PETSC_USE_DEBUG) 2163 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2164 #endif 2165 rp = aj + ai[brow]; 2166 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2167 rmax = imax[brow]; 2168 nrow = ailen[brow]; 2169 low = 0; 2170 high = nrow; 2171 for (l=0; l<n; l++) { /* loop over added columns */ 2172 if (in[l] < 0) continue; 2173 #if defined(PETSC_USE_DEBUG) 2174 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2175 #endif 2176 col = in[l]; bcol = col/bs; 2177 ridx = row % bs; cidx = col % bs; 2178 if (!A->structure_only) { 2179 if (roworiented) { 2180 value = v[l + k*n]; 2181 } else { 2182 value = v[k + l*m]; 2183 } 2184 } 2185 if (col <= lastcol) low = 0; else high = nrow; 2186 lastcol = col; 2187 while (high-low > 7) { 2188 t = (low+high)/2; 2189 if (rp[t] > bcol) high = t; 2190 else low = t; 2191 } 2192 for (i=low; i<high; i++) { 2193 if (rp[i] > bcol) break; 2194 if (rp[i] == bcol) { 2195 bap = ap + bs2*i + bs*cidx + ridx; 2196 if (!A->structure_only) { 2197 if (is == ADD_VALUES) *bap += value; 2198 else *bap = value; 2199 } 2200 goto noinsert1; 2201 } 2202 } 2203 if (nonew == 1) goto noinsert1; 2204 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2205 if (A->structure_only) { 2206 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2207 } else { 2208 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2209 } 2210 N = nrow++ - 1; high++; 2211 /* shift up all the later entries in this row */ 2212 for (ii=N; ii>=i; ii--) { 2213 rp[ii+1] = rp[ii]; 2214 if (!A->structure_only) { 2215 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2216 } 2217 } 2218 if (N>=i && !A->structure_only) { 2219 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2220 } 2221 rp[i] = bcol; 2222 if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value; 2223 a->nz++; 2224 A->nonzerostate++; 2225 noinsert1:; 2226 low = i; 2227 } 2228 ailen[brow] = nrow; 2229 } 2230 PetscFunctionReturn(0); 2231 } 2232 2233 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2234 { 2235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2236 Mat outA; 2237 PetscErrorCode ierr; 2238 PetscBool row_identity,col_identity; 2239 2240 PetscFunctionBegin; 2241 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2242 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2243 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2244 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2245 2246 outA = inA; 2247 inA->factortype = MAT_FACTOR_LU; 2248 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2249 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2250 2251 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2252 2253 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2254 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2255 a->row = row; 2256 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2257 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2258 a->col = col; 2259 2260 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2261 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2262 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2263 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2264 2265 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2266 if (!a->solve_work) { 2267 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2268 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2269 } 2270 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2271 PetscFunctionReturn(0); 2272 } 2273 2274 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2275 { 2276 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2277 PetscInt i,nz,mbs; 2278 2279 PetscFunctionBegin; 2280 nz = baij->maxnz; 2281 mbs = baij->mbs; 2282 for (i=0; i<nz; i++) { 2283 baij->j[i] = indices[i]; 2284 } 2285 baij->nz = nz; 2286 for (i=0; i<mbs; i++) { 2287 baij->ilen[i] = baij->imax[i]; 2288 } 2289 PetscFunctionReturn(0); 2290 } 2291 2292 /*@ 2293 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2294 in the matrix. 2295 2296 Input Parameters: 2297 + mat - the SeqBAIJ matrix 2298 - indices - the column indices 2299 2300 Level: advanced 2301 2302 Notes: 2303 This can be called if you have precomputed the nonzero structure of the 2304 matrix and want to provide it to the matrix object to improve the performance 2305 of the MatSetValues() operation. 2306 2307 You MUST have set the correct numbers of nonzeros per row in the call to 2308 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2309 2310 MUST be called before any calls to MatSetValues(); 2311 2312 @*/ 2313 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2314 { 2315 PetscErrorCode ierr; 2316 2317 PetscFunctionBegin; 2318 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2319 PetscValidPointer(indices,2); 2320 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2321 PetscFunctionReturn(0); 2322 } 2323 2324 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2325 { 2326 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2327 PetscErrorCode ierr; 2328 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2329 PetscReal atmp; 2330 PetscScalar *x,zero = 0.0; 2331 MatScalar *aa; 2332 PetscInt ncols,brow,krow,kcol; 2333 2334 PetscFunctionBegin; 2335 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2336 bs = A->rmap->bs; 2337 aa = a->a; 2338 ai = a->i; 2339 aj = a->j; 2340 mbs = a->mbs; 2341 2342 ierr = VecSet(v,zero);CHKERRQ(ierr); 2343 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2344 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2345 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2346 for (i=0; i<mbs; i++) { 2347 ncols = ai[1] - ai[0]; ai++; 2348 brow = bs*i; 2349 for (j=0; j<ncols; j++) { 2350 for (kcol=0; kcol<bs; kcol++) { 2351 for (krow=0; krow<bs; krow++) { 2352 atmp = PetscAbsScalar(*aa);aa++; 2353 row = brow + krow; /* row index */ 2354 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2355 } 2356 } 2357 aj++; 2358 } 2359 } 2360 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2361 PetscFunctionReturn(0); 2362 } 2363 2364 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2365 { 2366 PetscErrorCode ierr; 2367 2368 PetscFunctionBegin; 2369 /* If the two matrices have the same copy implementation, use fast copy. */ 2370 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2371 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2372 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2373 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2374 2375 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2376 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2377 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2378 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2379 } else { 2380 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2381 } 2382 PetscFunctionReturn(0); 2383 } 2384 2385 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2386 { 2387 PetscErrorCode ierr; 2388 2389 PetscFunctionBegin; 2390 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2391 PetscFunctionReturn(0); 2392 } 2393 2394 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2395 { 2396 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2397 2398 PetscFunctionBegin; 2399 *array = a->a; 2400 PetscFunctionReturn(0); 2401 } 2402 2403 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2404 { 2405 PetscFunctionBegin; 2406 PetscFunctionReturn(0); 2407 } 2408 2409 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2410 { 2411 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2412 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2413 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2414 PetscErrorCode ierr; 2415 2416 PetscFunctionBegin; 2417 /* Set the number of nonzeros in the new matrix */ 2418 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2419 PetscFunctionReturn(0); 2420 } 2421 2422 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2423 { 2424 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2425 PetscErrorCode ierr; 2426 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2427 PetscBLASInt one=1; 2428 2429 PetscFunctionBegin; 2430 if (str == SAME_NONZERO_PATTERN) { 2431 PetscScalar alpha = a; 2432 PetscBLASInt bnz; 2433 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2434 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2435 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2436 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2437 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2438 } else { 2439 Mat B; 2440 PetscInt *nnz; 2441 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2442 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2443 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2444 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2445 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2446 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2447 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2448 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2449 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2450 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2451 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2452 ierr = PetscFree(nnz);CHKERRQ(ierr); 2453 } 2454 PetscFunctionReturn(0); 2455 } 2456 2457 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2458 { 2459 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2460 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2461 MatScalar *aa = a->a; 2462 2463 PetscFunctionBegin; 2464 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2465 PetscFunctionReturn(0); 2466 } 2467 2468 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2469 { 2470 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2471 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2472 MatScalar *aa = a->a; 2473 2474 PetscFunctionBegin; 2475 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2476 PetscFunctionReturn(0); 2477 } 2478 2479 /* 2480 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2481 */ 2482 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2483 { 2484 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2485 PetscErrorCode ierr; 2486 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2487 PetscInt nz = a->i[m],row,*jj,mr,col; 2488 2489 PetscFunctionBegin; 2490 *nn = n; 2491 if (!ia) PetscFunctionReturn(0); 2492 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2493 else { 2494 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2495 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2496 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2497 jj = a->j; 2498 for (i=0; i<nz; i++) { 2499 collengths[jj[i]]++; 2500 } 2501 cia[0] = oshift; 2502 for (i=0; i<n; i++) { 2503 cia[i+1] = cia[i] + collengths[i]; 2504 } 2505 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2506 jj = a->j; 2507 for (row=0; row<m; row++) { 2508 mr = a->i[row+1] - a->i[row]; 2509 for (i=0; i<mr; i++) { 2510 col = *jj++; 2511 2512 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2513 } 2514 } 2515 ierr = PetscFree(collengths);CHKERRQ(ierr); 2516 *ia = cia; *ja = cja; 2517 } 2518 PetscFunctionReturn(0); 2519 } 2520 2521 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2522 { 2523 PetscErrorCode ierr; 2524 2525 PetscFunctionBegin; 2526 if (!ia) PetscFunctionReturn(0); 2527 ierr = PetscFree(*ia);CHKERRQ(ierr); 2528 ierr = PetscFree(*ja);CHKERRQ(ierr); 2529 PetscFunctionReturn(0); 2530 } 2531 2532 /* 2533 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2534 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2535 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2536 */ 2537 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2538 { 2539 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2540 PetscErrorCode ierr; 2541 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2542 PetscInt nz = a->i[m],row,*jj,mr,col; 2543 PetscInt *cspidx; 2544 2545 PetscFunctionBegin; 2546 *nn = n; 2547 if (!ia) PetscFunctionReturn(0); 2548 2549 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2550 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2551 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2552 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2553 jj = a->j; 2554 for (i=0; i<nz; i++) { 2555 collengths[jj[i]]++; 2556 } 2557 cia[0] = oshift; 2558 for (i=0; i<n; i++) { 2559 cia[i+1] = cia[i] + collengths[i]; 2560 } 2561 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2562 jj = a->j; 2563 for (row=0; row<m; row++) { 2564 mr = a->i[row+1] - a->i[row]; 2565 for (i=0; i<mr; i++) { 2566 col = *jj++; 2567 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2568 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2569 } 2570 } 2571 ierr = PetscFree(collengths);CHKERRQ(ierr); 2572 *ia = cia; *ja = cja; 2573 *spidx = cspidx; 2574 PetscFunctionReturn(0); 2575 } 2576 2577 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2578 { 2579 PetscErrorCode ierr; 2580 2581 PetscFunctionBegin; 2582 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2583 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2584 PetscFunctionReturn(0); 2585 } 2586 2587 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2588 { 2589 PetscErrorCode ierr; 2590 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2591 2592 PetscFunctionBegin; 2593 if (!Y->preallocated || !aij->nz) { 2594 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2595 } 2596 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2597 PetscFunctionReturn(0); 2598 } 2599 2600 /* -------------------------------------------------------------------*/ 2601 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2602 MatGetRow_SeqBAIJ, 2603 MatRestoreRow_SeqBAIJ, 2604 MatMult_SeqBAIJ_N, 2605 /* 4*/ MatMultAdd_SeqBAIJ_N, 2606 MatMultTranspose_SeqBAIJ, 2607 MatMultTransposeAdd_SeqBAIJ, 2608 0, 2609 0, 2610 0, 2611 /* 10*/ 0, 2612 MatLUFactor_SeqBAIJ, 2613 0, 2614 0, 2615 MatTranspose_SeqBAIJ, 2616 /* 15*/ MatGetInfo_SeqBAIJ, 2617 MatEqual_SeqBAIJ, 2618 MatGetDiagonal_SeqBAIJ, 2619 MatDiagonalScale_SeqBAIJ, 2620 MatNorm_SeqBAIJ, 2621 /* 20*/ 0, 2622 MatAssemblyEnd_SeqBAIJ, 2623 MatSetOption_SeqBAIJ, 2624 MatZeroEntries_SeqBAIJ, 2625 /* 24*/ MatZeroRows_SeqBAIJ, 2626 0, 2627 0, 2628 0, 2629 0, 2630 /* 29*/ MatSetUp_SeqBAIJ, 2631 0, 2632 0, 2633 0, 2634 0, 2635 /* 34*/ MatDuplicate_SeqBAIJ, 2636 0, 2637 0, 2638 MatILUFactor_SeqBAIJ, 2639 0, 2640 /* 39*/ MatAXPY_SeqBAIJ, 2641 MatCreateSubMatrices_SeqBAIJ, 2642 MatIncreaseOverlap_SeqBAIJ, 2643 MatGetValues_SeqBAIJ, 2644 MatCopy_SeqBAIJ, 2645 /* 44*/ 0, 2646 MatScale_SeqBAIJ, 2647 MatShift_SeqBAIJ, 2648 0, 2649 MatZeroRowsColumns_SeqBAIJ, 2650 /* 49*/ 0, 2651 MatGetRowIJ_SeqBAIJ, 2652 MatRestoreRowIJ_SeqBAIJ, 2653 MatGetColumnIJ_SeqBAIJ, 2654 MatRestoreColumnIJ_SeqBAIJ, 2655 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2656 0, 2657 0, 2658 0, 2659 MatSetValuesBlocked_SeqBAIJ, 2660 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2661 MatDestroy_SeqBAIJ, 2662 MatView_SeqBAIJ, 2663 0, 2664 0, 2665 /* 64*/ 0, 2666 0, 2667 0, 2668 0, 2669 0, 2670 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2671 0, 2672 MatConvert_Basic, 2673 0, 2674 0, 2675 /* 74*/ 0, 2676 MatFDColoringApply_BAIJ, 2677 0, 2678 0, 2679 0, 2680 /* 79*/ 0, 2681 0, 2682 0, 2683 0, 2684 MatLoad_SeqBAIJ, 2685 /* 84*/ 0, 2686 0, 2687 0, 2688 0, 2689 0, 2690 /* 89*/ 0, 2691 0, 2692 0, 2693 0, 2694 0, 2695 /* 94*/ 0, 2696 0, 2697 0, 2698 0, 2699 0, 2700 /* 99*/ 0, 2701 0, 2702 0, 2703 0, 2704 0, 2705 /*104*/ 0, 2706 MatRealPart_SeqBAIJ, 2707 MatImaginaryPart_SeqBAIJ, 2708 0, 2709 0, 2710 /*109*/ 0, 2711 0, 2712 0, 2713 0, 2714 MatMissingDiagonal_SeqBAIJ, 2715 /*114*/ 0, 2716 0, 2717 0, 2718 0, 2719 0, 2720 /*119*/ 0, 2721 0, 2722 MatMultHermitianTranspose_SeqBAIJ, 2723 MatMultHermitianTransposeAdd_SeqBAIJ, 2724 0, 2725 /*124*/ 0, 2726 0, 2727 MatInvertBlockDiagonal_SeqBAIJ, 2728 0, 2729 0, 2730 /*129*/ 0, 2731 0, 2732 0, 2733 0, 2734 0, 2735 /*134*/ 0, 2736 0, 2737 0, 2738 0, 2739 0, 2740 /*139*/ MatSetBlockSizes_Default, 2741 0, 2742 0, 2743 MatFDColoringSetUp_SeqXAIJ, 2744 0, 2745 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2746 MatDestroySubMatrices_SeqBAIJ 2747 }; 2748 2749 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2750 { 2751 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2752 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2753 PetscErrorCode ierr; 2754 2755 PetscFunctionBegin; 2756 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2757 2758 /* allocate space for values if not already there */ 2759 if (!aij->saved_values) { 2760 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2761 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2762 } 2763 2764 /* copy values over */ 2765 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2766 PetscFunctionReturn(0); 2767 } 2768 2769 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2770 { 2771 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2772 PetscErrorCode ierr; 2773 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2774 2775 PetscFunctionBegin; 2776 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2777 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2778 2779 /* copy values over */ 2780 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2781 PetscFunctionReturn(0); 2782 } 2783 2784 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2785 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2786 2787 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2788 { 2789 Mat_SeqBAIJ *b; 2790 PetscErrorCode ierr; 2791 PetscInt i,mbs,nbs,bs2; 2792 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2793 2794 PetscFunctionBegin; 2795 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2796 if (nz == MAT_SKIP_ALLOCATION) { 2797 skipallocation = PETSC_TRUE; 2798 nz = 0; 2799 } 2800 2801 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2802 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2803 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2804 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2805 2806 B->preallocated = PETSC_TRUE; 2807 2808 mbs = B->rmap->n/bs; 2809 nbs = B->cmap->n/bs; 2810 bs2 = bs*bs; 2811 2812 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2813 2814 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2815 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2816 if (nnz) { 2817 for (i=0; i<mbs; i++) { 2818 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2819 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2820 } 2821 } 2822 2823 b = (Mat_SeqBAIJ*)B->data; 2824 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2825 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2826 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2827 2828 if (!flg) { 2829 switch (bs) { 2830 case 1: 2831 B->ops->mult = MatMult_SeqBAIJ_1; 2832 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2833 break; 2834 case 2: 2835 B->ops->mult = MatMult_SeqBAIJ_2; 2836 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2837 break; 2838 case 3: 2839 B->ops->mult = MatMult_SeqBAIJ_3; 2840 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2841 break; 2842 case 4: 2843 B->ops->mult = MatMult_SeqBAIJ_4; 2844 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2845 break; 2846 case 5: 2847 B->ops->mult = MatMult_SeqBAIJ_5; 2848 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2849 break; 2850 case 6: 2851 B->ops->mult = MatMult_SeqBAIJ_6; 2852 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2853 break; 2854 case 7: 2855 B->ops->mult = MatMult_SeqBAIJ_7; 2856 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2857 break; 2858 case 9: 2859 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2860 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2861 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2862 #else 2863 B->ops->mult = MatMult_SeqBAIJ_N; 2864 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2865 #endif 2866 break; 2867 case 11: 2868 B->ops->mult = MatMult_SeqBAIJ_11; 2869 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2870 break; 2871 case 15: 2872 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2873 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2874 break; 2875 default: 2876 B->ops->mult = MatMult_SeqBAIJ_N; 2877 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2878 break; 2879 } 2880 } 2881 B->ops->sor = MatSOR_SeqBAIJ; 2882 b->mbs = mbs; 2883 b->nbs = nbs; 2884 if (!skipallocation) { 2885 if (!b->imax) { 2886 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2887 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2888 2889 b->free_imax_ilen = PETSC_TRUE; 2890 } 2891 /* b->ilen will count nonzeros in each block row so far. */ 2892 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2893 if (!nnz) { 2894 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2895 else if (nz < 0) nz = 1; 2896 for (i=0; i<mbs; i++) b->imax[i] = nz; 2897 nz = nz*mbs; 2898 } else { 2899 nz = 0; 2900 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2901 } 2902 2903 /* allocate the matrix space */ 2904 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2905 if (B->structure_only) { 2906 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 2907 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 2908 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 2909 } else { 2910 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2911 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2912 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2913 } 2914 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2915 2916 if (B->structure_only) { 2917 b->singlemalloc = PETSC_FALSE; 2918 b->free_a = PETSC_FALSE; 2919 } else { 2920 b->singlemalloc = PETSC_TRUE; 2921 b->free_a = PETSC_TRUE; 2922 } 2923 b->free_ij = PETSC_TRUE; 2924 2925 b->i[0] = 0; 2926 for (i=1; i<mbs+1; i++) { 2927 b->i[i] = b->i[i-1] + b->imax[i-1]; 2928 } 2929 2930 } else { 2931 b->free_a = PETSC_FALSE; 2932 b->free_ij = PETSC_FALSE; 2933 } 2934 2935 b->bs2 = bs2; 2936 b->mbs = mbs; 2937 b->nz = 0; 2938 b->maxnz = nz; 2939 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2940 B->was_assembled = PETSC_FALSE; 2941 B->assembled = PETSC_FALSE; 2942 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2943 PetscFunctionReturn(0); 2944 } 2945 2946 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2947 { 2948 PetscInt i,m,nz,nz_max=0,*nnz; 2949 PetscScalar *values=0; 2950 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2951 PetscErrorCode ierr; 2952 2953 PetscFunctionBegin; 2954 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2955 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2956 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2957 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2958 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2959 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2960 m = B->rmap->n/bs; 2961 2962 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2963 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2964 for (i=0; i<m; i++) { 2965 nz = ii[i+1]- ii[i]; 2966 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2967 nz_max = PetscMax(nz_max, nz); 2968 nnz[i] = nz; 2969 } 2970 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2971 ierr = PetscFree(nnz);CHKERRQ(ierr); 2972 2973 values = (PetscScalar*)V; 2974 if (!values) { 2975 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2976 } 2977 for (i=0; i<m; i++) { 2978 PetscInt ncols = ii[i+1] - ii[i]; 2979 const PetscInt *icols = jj + ii[i]; 2980 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2981 if (!roworiented) { 2982 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2983 } else { 2984 PetscInt j; 2985 for (j=0; j<ncols; j++) { 2986 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2987 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2988 } 2989 } 2990 } 2991 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2992 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2993 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2994 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2995 PetscFunctionReturn(0); 2996 } 2997 2998 /*MC 2999 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3000 block sparse compressed row format. 3001 3002 Options Database Keys: 3003 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3004 3005 Level: beginner 3006 3007 .seealso: MatCreateSeqBAIJ() 3008 M*/ 3009 3010 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3011 3012 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3013 { 3014 PetscErrorCode ierr; 3015 PetscMPIInt size; 3016 Mat_SeqBAIJ *b; 3017 3018 PetscFunctionBegin; 3019 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3020 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3021 3022 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3023 B->data = (void*)b; 3024 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3025 3026 b->row = 0; 3027 b->col = 0; 3028 b->icol = 0; 3029 b->reallocs = 0; 3030 b->saved_values = 0; 3031 3032 b->roworiented = PETSC_TRUE; 3033 b->nonew = 0; 3034 b->diag = 0; 3035 B->spptr = 0; 3036 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3037 b->keepnonzeropattern = PETSC_FALSE; 3038 3039 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3048 #if defined(PETSC_HAVE_HYPRE) 3049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3050 #endif 3051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 3052 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 3053 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3054 PetscFunctionReturn(0); 3055 } 3056 3057 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3058 { 3059 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3060 PetscErrorCode ierr; 3061 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3062 3063 PetscFunctionBegin; 3064 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3065 3066 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3067 c->imax = a->imax; 3068 c->ilen = a->ilen; 3069 c->free_imax_ilen = PETSC_FALSE; 3070 } else { 3071 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3072 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3073 for (i=0; i<mbs; i++) { 3074 c->imax[i] = a->imax[i]; 3075 c->ilen[i] = a->ilen[i]; 3076 } 3077 c->free_imax_ilen = PETSC_TRUE; 3078 } 3079 3080 /* allocate the matrix space */ 3081 if (mallocmatspace) { 3082 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3083 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3084 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3085 3086 c->i = a->i; 3087 c->j = a->j; 3088 c->singlemalloc = PETSC_FALSE; 3089 c->free_a = PETSC_TRUE; 3090 c->free_ij = PETSC_FALSE; 3091 c->parent = A; 3092 C->preallocated = PETSC_TRUE; 3093 C->assembled = PETSC_TRUE; 3094 3095 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3096 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3097 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3098 } else { 3099 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3100 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3101 3102 c->singlemalloc = PETSC_TRUE; 3103 c->free_a = PETSC_TRUE; 3104 c->free_ij = PETSC_TRUE; 3105 3106 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3107 if (mbs > 0) { 3108 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3109 if (cpvalues == MAT_COPY_VALUES) { 3110 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3111 } else { 3112 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3113 } 3114 } 3115 C->preallocated = PETSC_TRUE; 3116 C->assembled = PETSC_TRUE; 3117 } 3118 } 3119 3120 c->roworiented = a->roworiented; 3121 c->nonew = a->nonew; 3122 3123 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3124 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3125 3126 c->bs2 = a->bs2; 3127 c->mbs = a->mbs; 3128 c->nbs = a->nbs; 3129 3130 if (a->diag) { 3131 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3132 c->diag = a->diag; 3133 c->free_diag = PETSC_FALSE; 3134 } else { 3135 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3136 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3137 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3138 c->free_diag = PETSC_TRUE; 3139 } 3140 } else c->diag = 0; 3141 3142 c->nz = a->nz; 3143 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3144 c->solve_work = NULL; 3145 c->mult_work = NULL; 3146 c->sor_workt = NULL; 3147 c->sor_work = NULL; 3148 3149 c->compressedrow.use = a->compressedrow.use; 3150 c->compressedrow.nrows = a->compressedrow.nrows; 3151 if (a->compressedrow.use) { 3152 i = a->compressedrow.nrows; 3153 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3154 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3155 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3156 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3157 } else { 3158 c->compressedrow.use = PETSC_FALSE; 3159 c->compressedrow.i = NULL; 3160 c->compressedrow.rindex = NULL; 3161 } 3162 C->nonzerostate = A->nonzerostate; 3163 3164 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3165 PetscFunctionReturn(0); 3166 } 3167 3168 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3169 { 3170 PetscErrorCode ierr; 3171 3172 PetscFunctionBegin; 3173 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3174 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3175 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3176 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3177 PetscFunctionReturn(0); 3178 } 3179 3180 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3181 { 3182 Mat_SeqBAIJ *a; 3183 PetscErrorCode ierr; 3184 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3185 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3186 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3187 PetscInt *masked,nmask,tmp,bs2,ishift; 3188 PetscMPIInt size; 3189 int fd; 3190 PetscScalar *aa; 3191 MPI_Comm comm; 3192 3193 PetscFunctionBegin; 3194 /* force binary viewer to load .info file if it has not yet done so */ 3195 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3196 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3197 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3198 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3199 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3200 if (bs < 0) bs = 1; 3201 bs2 = bs*bs; 3202 3203 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3204 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3205 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3206 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3207 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3208 M = header[1]; N = header[2]; nz = header[3]; 3209 3210 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3211 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3212 3213 /* 3214 This code adds extra rows to make sure the number of rows is 3215 divisible by the blocksize 3216 */ 3217 mbs = M/bs; 3218 extra_rows = bs - M + bs*(mbs); 3219 if (extra_rows == bs) extra_rows = 0; 3220 else mbs++; 3221 if (extra_rows) { 3222 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3223 } 3224 3225 /* Set global sizes if not already set */ 3226 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3227 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3228 } else { /* Check if the matrix global sizes are correct */ 3229 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3230 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3231 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3232 } 3233 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3234 } 3235 3236 /* read in row lengths */ 3237 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3238 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3239 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3240 3241 /* read in column indices */ 3242 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3243 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3244 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3245 3246 /* loop over row lengths determining block row lengths */ 3247 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3248 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3249 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3250 rowcount = 0; 3251 nzcount = 0; 3252 for (i=0; i<mbs; i++) { 3253 nmask = 0; 3254 for (j=0; j<bs; j++) { 3255 kmax = rowlengths[rowcount]; 3256 for (k=0; k<kmax; k++) { 3257 tmp = jj[nzcount++]/bs; 3258 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3259 } 3260 rowcount++; 3261 } 3262 browlengths[i] += nmask; 3263 /* zero out the mask elements we set */ 3264 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3265 } 3266 3267 /* Do preallocation */ 3268 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3269 a = (Mat_SeqBAIJ*)newmat->data; 3270 3271 /* set matrix "i" values */ 3272 a->i[0] = 0; 3273 for (i=1; i<= mbs; i++) { 3274 a->i[i] = a->i[i-1] + browlengths[i-1]; 3275 a->ilen[i-1] = browlengths[i-1]; 3276 } 3277 a->nz = 0; 3278 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3279 3280 /* read in nonzero values */ 3281 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3282 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3283 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3284 3285 /* set "a" and "j" values into matrix */ 3286 nzcount = 0; jcount = 0; 3287 for (i=0; i<mbs; i++) { 3288 nzcountb = nzcount; 3289 nmask = 0; 3290 for (j=0; j<bs; j++) { 3291 kmax = rowlengths[i*bs+j]; 3292 for (k=0; k<kmax; k++) { 3293 tmp = jj[nzcount++]/bs; 3294 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3295 } 3296 } 3297 /* sort the masked values */ 3298 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3299 3300 /* set "j" values into matrix */ 3301 maskcount = 1; 3302 for (j=0; j<nmask; j++) { 3303 a->j[jcount++] = masked[j]; 3304 mask[masked[j]] = maskcount++; 3305 } 3306 /* set "a" values into matrix */ 3307 ishift = bs2*a->i[i]; 3308 for (j=0; j<bs; j++) { 3309 kmax = rowlengths[i*bs+j]; 3310 for (k=0; k<kmax; k++) { 3311 tmp = jj[nzcountb]/bs; 3312 block = mask[tmp] - 1; 3313 point = jj[nzcountb] - bs*tmp; 3314 idx = ishift + bs2*block + j + bs*point; 3315 a->a[idx] = (MatScalar)aa[nzcountb++]; 3316 } 3317 } 3318 /* zero out the mask elements we set */ 3319 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3320 } 3321 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3322 3323 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3324 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3325 ierr = PetscFree(aa);CHKERRQ(ierr); 3326 ierr = PetscFree(jj);CHKERRQ(ierr); 3327 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3328 3329 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3330 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3331 PetscFunctionReturn(0); 3332 } 3333 3334 /*@C 3335 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3336 compressed row) format. For good matrix assembly performance the 3337 user should preallocate the matrix storage by setting the parameter nz 3338 (or the array nnz). By setting these parameters accurately, performance 3339 during matrix assembly can be increased by more than a factor of 50. 3340 3341 Collective on MPI_Comm 3342 3343 Input Parameters: 3344 + comm - MPI communicator, set to PETSC_COMM_SELF 3345 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3346 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3347 . m - number of rows 3348 . n - number of columns 3349 . nz - number of nonzero blocks per block row (same for all rows) 3350 - nnz - array containing the number of nonzero blocks in the various block rows 3351 (possibly different for each block row) or NULL 3352 3353 Output Parameter: 3354 . A - the matrix 3355 3356 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3357 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3358 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3359 3360 Options Database Keys: 3361 . -mat_no_unroll - uses code that does not unroll the loops in the 3362 block calculations (much slower) 3363 . -mat_block_size - size of the blocks to use 3364 3365 Level: intermediate 3366 3367 Notes: 3368 The number of rows and columns must be divisible by blocksize. 3369 3370 If the nnz parameter is given then the nz parameter is ignored 3371 3372 A nonzero block is any block that as 1 or more nonzeros in it 3373 3374 The block AIJ format is fully compatible with standard Fortran 77 3375 storage. That is, the stored row and column indices can begin at 3376 either one (as in Fortran) or zero. See the users' manual for details. 3377 3378 Specify the preallocated storage with either nz or nnz (not both). 3379 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3380 allocation. See Users-Manual: ch_mat for details. 3381 matrices. 3382 3383 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3384 @*/ 3385 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3386 { 3387 PetscErrorCode ierr; 3388 3389 PetscFunctionBegin; 3390 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3391 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3392 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3393 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3394 PetscFunctionReturn(0); 3395 } 3396 3397 /*@C 3398 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3399 per row in the matrix. For good matrix assembly performance the 3400 user should preallocate the matrix storage by setting the parameter nz 3401 (or the array nnz). By setting these parameters accurately, performance 3402 during matrix assembly can be increased by more than a factor of 50. 3403 3404 Collective on MPI_Comm 3405 3406 Input Parameters: 3407 + B - the matrix 3408 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3409 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3410 . nz - number of block nonzeros per block row (same for all rows) 3411 - nnz - array containing the number of block nonzeros in the various block rows 3412 (possibly different for each block row) or NULL 3413 3414 Options Database Keys: 3415 . -mat_no_unroll - uses code that does not unroll the loops in the 3416 block calculations (much slower) 3417 . -mat_block_size - size of the blocks to use 3418 3419 Level: intermediate 3420 3421 Notes: 3422 If the nnz parameter is given then the nz parameter is ignored 3423 3424 You can call MatGetInfo() to get information on how effective the preallocation was; 3425 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3426 You can also run with the option -info and look for messages with the string 3427 malloc in them to see if additional memory allocation was needed. 3428 3429 The block AIJ format is fully compatible with standard Fortran 77 3430 storage. That is, the stored row and column indices can begin at 3431 either one (as in Fortran) or zero. See the users' manual for details. 3432 3433 Specify the preallocated storage with either nz or nnz (not both). 3434 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3435 allocation. See Users-Manual: ch_mat for details. 3436 3437 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3438 @*/ 3439 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3440 { 3441 PetscErrorCode ierr; 3442 3443 PetscFunctionBegin; 3444 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3445 PetscValidType(B,1); 3446 PetscValidLogicalCollectiveInt(B,bs,2); 3447 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3448 PetscFunctionReturn(0); 3449 } 3450 3451 /*@C 3452 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3453 (the default sequential PETSc format). 3454 3455 Collective on MPI_Comm 3456 3457 Input Parameters: 3458 + B - the matrix 3459 . i - the indices into j for the start of each local row (starts with zero) 3460 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3461 - v - optional values in the matrix 3462 3463 Level: developer 3464 3465 Notes: 3466 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3467 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3468 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3469 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3470 block column and the second index is over columns within a block. 3471 3472 .keywords: matrix, aij, compressed row, sparse 3473 3474 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3475 @*/ 3476 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3477 { 3478 PetscErrorCode ierr; 3479 3480 PetscFunctionBegin; 3481 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3482 PetscValidType(B,1); 3483 PetscValidLogicalCollectiveInt(B,bs,2); 3484 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3485 PetscFunctionReturn(0); 3486 } 3487 3488 3489 /*@ 3490 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3491 3492 Collective on MPI_Comm 3493 3494 Input Parameters: 3495 + comm - must be an MPI communicator of size 1 3496 . bs - size of block 3497 . m - number of rows 3498 . n - number of columns 3499 . i - row indices 3500 . j - column indices 3501 - a - matrix values 3502 3503 Output Parameter: 3504 . mat - the matrix 3505 3506 Level: advanced 3507 3508 Notes: 3509 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3510 once the matrix is destroyed 3511 3512 You cannot set new nonzero locations into this matrix, that will generate an error. 3513 3514 The i and j indices are 0 based 3515 3516 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3517 3518 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3519 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3520 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3521 with column-major ordering within blocks. 3522 3523 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3524 3525 @*/ 3526 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3527 { 3528 PetscErrorCode ierr; 3529 PetscInt ii; 3530 Mat_SeqBAIJ *baij; 3531 3532 PetscFunctionBegin; 3533 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3534 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3535 3536 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3537 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3538 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3539 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3540 baij = (Mat_SeqBAIJ*)(*mat)->data; 3541 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3542 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3543 3544 baij->i = i; 3545 baij->j = j; 3546 baij->a = a; 3547 3548 baij->singlemalloc = PETSC_FALSE; 3549 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3550 baij->free_a = PETSC_FALSE; 3551 baij->free_ij = PETSC_FALSE; 3552 3553 for (ii=0; ii<m; ii++) { 3554 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3555 #if defined(PETSC_USE_DEBUG) 3556 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3557 #endif 3558 } 3559 #if defined(PETSC_USE_DEBUG) 3560 for (ii=0; ii<baij->i[m]; ii++) { 3561 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3562 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3563 } 3564 #endif 3565 3566 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3567 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3568 PetscFunctionReturn(0); 3569 } 3570 3571 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3572 { 3573 PetscErrorCode ierr; 3574 PetscMPIInt size; 3575 3576 PetscFunctionBegin; 3577 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3578 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3579 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3580 } else { 3581 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3582 } 3583 PetscFunctionReturn(0); 3584 } 3585