1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 20 21 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 22 { 23 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 24 PetscErrorCode ierr; 25 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 26 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 27 PetscReal shift = 0.0; 28 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 29 30 PetscFunctionBegin; 31 allowzeropivot = PetscNot(A->erroriffailure); 32 33 if (a->idiagvalid) { 34 if (values) *values = a->idiag; 35 PetscFunctionReturn(0); 36 } 37 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 38 diag_offset = a->diag; 39 if (!a->idiag) { 40 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 41 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 42 } 43 diag = a->idiag; 44 if (values) *values = a->idiag; 45 /* factor and invert each block */ 46 switch (bs) { 47 case 1: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 1*diag_offset[i]; 50 diag[0] = odiag[0]; 51 52 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 53 if (allowzeropivot) { 54 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 55 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 56 A->factorerror_zeropivot_row = i; 57 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 58 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 59 } 60 61 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 62 diag += 1; 63 } 64 break; 65 case 2: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 4*diag_offset[i]; 68 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 69 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 70 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 71 diag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 81 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 82 diag += 9; 83 } 84 break; 85 case 4: 86 for (i=0; i<mbs; i++) { 87 odiag = v + 16*diag_offset[i]; 88 ierr = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr); 89 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 90 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 91 diag += 16; 92 } 93 break; 94 case 5: 95 for (i=0; i<mbs; i++) { 96 odiag = v + 25*diag_offset[i]; 97 ierr = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr); 98 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 99 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 100 diag += 25; 101 } 102 break; 103 case 6: 104 for (i=0; i<mbs; i++) { 105 odiag = v + 36*diag_offset[i]; 106 ierr = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr); 107 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 108 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 109 diag += 36; 110 } 111 break; 112 case 7: 113 for (i=0; i<mbs; i++) { 114 odiag = v + 49*diag_offset[i]; 115 ierr = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr); 116 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 117 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 118 diag += 49; 119 } 120 break; 121 default: 122 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 123 for (i=0; i<mbs; i++) { 124 odiag = v + bs2*diag_offset[i]; 125 ierr = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr); 126 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 127 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 128 diag += bs2; 129 } 130 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 131 } 132 a->idiagvalid = PETSC_TRUE; 133 PetscFunctionReturn(0); 134 } 135 136 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 137 { 138 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 139 PetscScalar *x,*work,*w,*workt,*t; 140 const MatScalar *v,*aa = a->a, *idiag; 141 const PetscScalar *b,*xb; 142 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 143 PetscErrorCode ierr; 144 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 145 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 146 147 PetscFunctionBegin; 148 its = its*lits; 149 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 150 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 151 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 152 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 153 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 154 155 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 156 157 if (!m) PetscFunctionReturn(0); 158 diag = a->diag; 159 idiag = a->idiag; 160 k = PetscMax(A->rmap->n,A->cmap->n); 161 if (!a->mult_work) { 162 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 163 } 164 if (!a->sor_workt) { 165 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 166 } 167 if (!a->sor_work) { 168 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 169 } 170 work = a->mult_work; 171 t = a->sor_workt; 172 w = a->sor_work; 173 174 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 175 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 176 177 if (flag & SOR_ZERO_INITIAL_GUESS) { 178 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 179 switch (bs) { 180 case 1: 181 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 182 t[0] = b[0]; 183 i2 = 1; 184 idiag += 1; 185 for (i=1; i<m; i++) { 186 v = aa + ai[i]; 187 vi = aj + ai[i]; 188 nz = diag[i] - ai[i]; 189 s[0] = b[i2]; 190 for (j=0; j<nz; j++) { 191 xw[0] = x[vi[j]]; 192 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 193 } 194 t[i2] = s[0]; 195 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 196 x[i2] = xw[0]; 197 idiag += 1; 198 i2 += 1; 199 } 200 break; 201 case 2: 202 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 203 t[0] = b[0]; t[1] = b[1]; 204 i2 = 2; 205 idiag += 4; 206 for (i=1; i<m; i++) { 207 v = aa + 4*ai[i]; 208 vi = aj + ai[i]; 209 nz = diag[i] - ai[i]; 210 s[0] = b[i2]; s[1] = b[i2+1]; 211 for (j=0; j<nz; j++) { 212 idx = 2*vi[j]; 213 it = 4*j; 214 xw[0] = x[idx]; xw[1] = x[1+idx]; 215 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 216 } 217 t[i2] = s[0]; t[i2+1] = s[1]; 218 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 219 x[i2] = xw[0]; x[i2+1] = xw[1]; 220 idiag += 4; 221 i2 += 2; 222 } 223 break; 224 case 3: 225 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 226 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 227 i2 = 3; 228 idiag += 9; 229 for (i=1; i<m; i++) { 230 v = aa + 9*ai[i]; 231 vi = aj + ai[i]; 232 nz = diag[i] - ai[i]; 233 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 234 while (nz--) { 235 idx = 3*(*vi++); 236 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 237 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 238 v += 9; 239 } 240 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 241 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 242 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 243 idiag += 9; 244 i2 += 3; 245 } 246 break; 247 case 4: 248 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 249 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 250 i2 = 4; 251 idiag += 16; 252 for (i=1; i<m; i++) { 253 v = aa + 16*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 257 while (nz--) { 258 idx = 4*(*vi++); 259 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 260 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 261 v += 16; 262 } 263 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 264 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 265 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 266 idiag += 16; 267 i2 += 4; 268 } 269 break; 270 case 5: 271 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 272 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 273 i2 = 5; 274 idiag += 25; 275 for (i=1; i<m; i++) { 276 v = aa + 25*ai[i]; 277 vi = aj + ai[i]; 278 nz = diag[i] - ai[i]; 279 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 280 while (nz--) { 281 idx = 5*(*vi++); 282 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 283 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 284 v += 25; 285 } 286 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 287 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 288 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 289 idiag += 25; 290 i2 += 5; 291 } 292 break; 293 case 6: 294 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 295 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 296 i2 = 6; 297 idiag += 36; 298 for (i=1; i<m; i++) { 299 v = aa + 36*ai[i]; 300 vi = aj + ai[i]; 301 nz = diag[i] - ai[i]; 302 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 303 while (nz--) { 304 idx = 6*(*vi++); 305 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 306 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 307 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 308 v += 36; 309 } 310 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 311 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 312 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 313 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 314 idiag += 36; 315 i2 += 6; 316 } 317 break; 318 case 7: 319 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 320 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 321 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 322 i2 = 7; 323 idiag += 49; 324 for (i=1; i<m; i++) { 325 v = aa + 49*ai[i]; 326 vi = aj + ai[i]; 327 nz = diag[i] - ai[i]; 328 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 329 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 330 while (nz--) { 331 idx = 7*(*vi++); 332 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 333 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 334 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 335 v += 49; 336 } 337 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 338 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 339 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 340 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 341 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 342 idiag += 49; 343 i2 += 7; 344 } 345 break; 346 default: 347 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 348 ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr); 349 i2 = bs; 350 idiag += bs2; 351 for (i=1; i<m; i++) { 352 v = aa + bs2*ai[i]; 353 vi = aj + ai[i]; 354 nz = diag[i] - ai[i]; 355 356 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 357 /* copy all rows of x that are needed into contiguous space */ 358 workt = work; 359 for (j=0; j<nz; j++) { 360 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 361 workt += bs; 362 } 363 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 364 ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr); 365 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 366 367 idiag += bs2; 368 i2 += bs; 369 } 370 break; 371 } 372 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 373 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 374 xb = t; 375 } 376 else xb = b; 377 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 378 idiag = a->idiag+bs2*(a->mbs-1); 379 i2 = bs * (m-1); 380 switch (bs) { 381 case 1: 382 s[0] = xb[i2]; 383 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 384 x[i2] = xw[0]; 385 i2 -= 1; 386 for (i=m-2; i>=0; i--) { 387 v = aa + (diag[i]+1); 388 vi = aj + diag[i] + 1; 389 nz = ai[i+1] - diag[i] - 1; 390 s[0] = xb[i2]; 391 for (j=0; j<nz; j++) { 392 xw[0] = x[vi[j]]; 393 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 394 } 395 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 396 x[i2] = xw[0]; 397 idiag -= 1; 398 i2 -= 1; 399 } 400 break; 401 case 2: 402 s[0] = xb[i2]; s[1] = xb[i2+1]; 403 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 404 x[i2] = xw[0]; x[i2+1] = xw[1]; 405 i2 -= 2; 406 idiag -= 4; 407 for (i=m-2; i>=0; i--) { 408 v = aa + 4*(diag[i] + 1); 409 vi = aj + diag[i] + 1; 410 nz = ai[i+1] - diag[i] - 1; 411 s[0] = xb[i2]; s[1] = xb[i2+1]; 412 for (j=0; j<nz; j++) { 413 idx = 2*vi[j]; 414 it = 4*j; 415 xw[0] = x[idx]; xw[1] = x[1+idx]; 416 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 417 } 418 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 419 x[i2] = xw[0]; x[i2+1] = xw[1]; 420 idiag -= 4; 421 i2 -= 2; 422 } 423 break; 424 case 3: 425 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 426 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 427 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 428 i2 -= 3; 429 idiag -= 9; 430 for (i=m-2; i>=0; i--) { 431 v = aa + 9*(diag[i]+1); 432 vi = aj + diag[i] + 1; 433 nz = ai[i+1] - diag[i] - 1; 434 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 435 while (nz--) { 436 idx = 3*(*vi++); 437 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 438 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 439 v += 9; 440 } 441 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 442 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 443 idiag -= 9; 444 i2 -= 3; 445 } 446 break; 447 case 4: 448 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 449 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 450 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 451 i2 -= 4; 452 idiag -= 16; 453 for (i=m-2; i>=0; i--) { 454 v = aa + 16*(diag[i]+1); 455 vi = aj + diag[i] + 1; 456 nz = ai[i+1] - diag[i] - 1; 457 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 458 while (nz--) { 459 idx = 4*(*vi++); 460 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 461 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 462 v += 16; 463 } 464 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 465 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 466 idiag -= 16; 467 i2 -= 4; 468 } 469 break; 470 case 5: 471 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 472 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 473 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 474 i2 -= 5; 475 idiag -= 25; 476 for (i=m-2; i>=0; i--) { 477 v = aa + 25*(diag[i]+1); 478 vi = aj + diag[i] + 1; 479 nz = ai[i+1] - diag[i] - 1; 480 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 481 while (nz--) { 482 idx = 5*(*vi++); 483 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 484 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 485 v += 25; 486 } 487 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 488 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 489 idiag -= 25; 490 i2 -= 5; 491 } 492 break; 493 case 6: 494 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 495 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 496 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 497 i2 -= 6; 498 idiag -= 36; 499 for (i=m-2; i>=0; i--) { 500 v = aa + 36*(diag[i]+1); 501 vi = aj + diag[i] + 1; 502 nz = ai[i+1] - diag[i] - 1; 503 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 504 while (nz--) { 505 idx = 6*(*vi++); 506 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 507 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 508 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 509 v += 36; 510 } 511 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 512 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 513 idiag -= 36; 514 i2 -= 6; 515 } 516 break; 517 case 7: 518 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 519 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 520 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 521 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 522 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 523 i2 -= 7; 524 idiag -= 49; 525 for (i=m-2; i>=0; i--) { 526 v = aa + 49*(diag[i]+1); 527 vi = aj + diag[i] + 1; 528 nz = ai[i+1] - diag[i] - 1; 529 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 530 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 531 while (nz--) { 532 idx = 7*(*vi++); 533 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 534 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 535 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 536 v += 49; 537 } 538 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 539 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 540 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 541 idiag -= 49; 542 i2 -= 7; 543 } 544 break; 545 default: 546 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 547 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 548 i2 -= bs; 549 idiag -= bs2; 550 for (i=m-2; i>=0; i--) { 551 v = aa + bs2*(diag[i]+1); 552 vi = aj + diag[i] + 1; 553 nz = ai[i+1] - diag[i] - 1; 554 555 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 556 /* copy all rows of x that are needed into contiguous space */ 557 workt = work; 558 for (j=0; j<nz; j++) { 559 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 560 workt += bs; 561 } 562 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 563 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 564 565 idiag -= bs2; 566 i2 -= bs; 567 } 568 break; 569 } 570 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 571 } 572 its--; 573 } 574 while (its--) { 575 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 576 idiag = a->idiag; 577 i2 = 0; 578 switch (bs) { 579 case 1: 580 for (i=0; i<m; i++) { 581 v = aa + ai[i]; 582 vi = aj + ai[i]; 583 nz = ai[i+1] - ai[i]; 584 s[0] = b[i2]; 585 for (j=0; j<nz; j++) { 586 xw[0] = x[vi[j]]; 587 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 588 } 589 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 590 x[i2] += xw[0]; 591 idiag += 1; 592 i2 += 1; 593 } 594 break; 595 case 2: 596 for (i=0; i<m; i++) { 597 v = aa + 4*ai[i]; 598 vi = aj + ai[i]; 599 nz = ai[i+1] - ai[i]; 600 s[0] = b[i2]; s[1] = b[i2+1]; 601 for (j=0; j<nz; j++) { 602 idx = 2*vi[j]; 603 it = 4*j; 604 xw[0] = x[idx]; xw[1] = x[1+idx]; 605 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 606 } 607 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 608 x[i2] += xw[0]; x[i2+1] += xw[1]; 609 idiag += 4; 610 i2 += 2; 611 } 612 break; 613 case 3: 614 for (i=0; i<m; i++) { 615 v = aa + 9*ai[i]; 616 vi = aj + ai[i]; 617 nz = ai[i+1] - ai[i]; 618 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 619 while (nz--) { 620 idx = 3*(*vi++); 621 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 622 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 623 v += 9; 624 } 625 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 626 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 627 idiag += 9; 628 i2 += 3; 629 } 630 break; 631 case 4: 632 for (i=0; i<m; i++) { 633 v = aa + 16*ai[i]; 634 vi = aj + ai[i]; 635 nz = ai[i+1] - ai[i]; 636 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 637 while (nz--) { 638 idx = 4*(*vi++); 639 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 640 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 641 v += 16; 642 } 643 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 644 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 645 idiag += 16; 646 i2 += 4; 647 } 648 break; 649 case 5: 650 for (i=0; i<m; i++) { 651 v = aa + 25*ai[i]; 652 vi = aj + ai[i]; 653 nz = ai[i+1] - ai[i]; 654 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 655 while (nz--) { 656 idx = 5*(*vi++); 657 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 658 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 659 v += 25; 660 } 661 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 662 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 663 idiag += 25; 664 i2 += 5; 665 } 666 break; 667 case 6: 668 for (i=0; i<m; i++) { 669 v = aa + 36*ai[i]; 670 vi = aj + ai[i]; 671 nz = ai[i+1] - ai[i]; 672 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 673 while (nz--) { 674 idx = 6*(*vi++); 675 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 676 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 677 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 678 v += 36; 679 } 680 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 681 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 682 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 683 idiag += 36; 684 i2 += 6; 685 } 686 break; 687 case 7: 688 for (i=0; i<m; i++) { 689 v = aa + 49*ai[i]; 690 vi = aj + ai[i]; 691 nz = ai[i+1] - ai[i]; 692 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 693 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 694 while (nz--) { 695 idx = 7*(*vi++); 696 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 697 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 698 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 699 v += 49; 700 } 701 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 702 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 703 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 704 idiag += 49; 705 i2 += 7; 706 } 707 break; 708 default: 709 for (i=0; i<m; i++) { 710 v = aa + bs2*ai[i]; 711 vi = aj + ai[i]; 712 nz = ai[i+1] - ai[i]; 713 714 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 715 /* copy all rows of x that are needed into contiguous space */ 716 workt = work; 717 for (j=0; j<nz; j++) { 718 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 719 workt += bs; 720 } 721 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 722 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 723 724 idiag += bs2; 725 i2 += bs; 726 } 727 break; 728 } 729 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 730 } 731 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 732 idiag = a->idiag+bs2*(a->mbs-1); 733 i2 = bs * (m-1); 734 switch (bs) { 735 case 1: 736 for (i=m-1; i>=0; i--) { 737 v = aa + ai[i]; 738 vi = aj + ai[i]; 739 nz = ai[i+1] - ai[i]; 740 s[0] = b[i2]; 741 for (j=0; j<nz; j++) { 742 xw[0] = x[vi[j]]; 743 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 744 } 745 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 746 x[i2] += xw[0]; 747 idiag -= 1; 748 i2 -= 1; 749 } 750 break; 751 case 2: 752 for (i=m-1; i>=0; i--) { 753 v = aa + 4*ai[i]; 754 vi = aj + ai[i]; 755 nz = ai[i+1] - ai[i]; 756 s[0] = b[i2]; s[1] = b[i2+1]; 757 for (j=0; j<nz; j++) { 758 idx = 2*vi[j]; 759 it = 4*j; 760 xw[0] = x[idx]; xw[1] = x[1+idx]; 761 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 762 } 763 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 764 x[i2] += xw[0]; x[i2+1] += xw[1]; 765 idiag -= 4; 766 i2 -= 2; 767 } 768 break; 769 case 3: 770 for (i=m-1; i>=0; i--) { 771 v = aa + 9*ai[i]; 772 vi = aj + ai[i]; 773 nz = ai[i+1] - ai[i]; 774 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 775 while (nz--) { 776 idx = 3*(*vi++); 777 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 778 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 779 v += 9; 780 } 781 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 782 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 783 idiag -= 9; 784 i2 -= 3; 785 } 786 break; 787 case 4: 788 for (i=m-1; i>=0; i--) { 789 v = aa + 16*ai[i]; 790 vi = aj + ai[i]; 791 nz = ai[i+1] - ai[i]; 792 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 793 while (nz--) { 794 idx = 4*(*vi++); 795 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 796 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 797 v += 16; 798 } 799 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 800 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 801 idiag -= 16; 802 i2 -= 4; 803 } 804 break; 805 case 5: 806 for (i=m-1; i>=0; i--) { 807 v = aa + 25*ai[i]; 808 vi = aj + ai[i]; 809 nz = ai[i+1] - ai[i]; 810 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 811 while (nz--) { 812 idx = 5*(*vi++); 813 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 814 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 815 v += 25; 816 } 817 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 818 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 819 idiag -= 25; 820 i2 -= 5; 821 } 822 break; 823 case 6: 824 for (i=m-1; i>=0; i--) { 825 v = aa + 36*ai[i]; 826 vi = aj + ai[i]; 827 nz = ai[i+1] - ai[i]; 828 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 829 while (nz--) { 830 idx = 6*(*vi++); 831 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 832 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 833 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 834 v += 36; 835 } 836 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 837 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 838 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 839 idiag -= 36; 840 i2 -= 6; 841 } 842 break; 843 case 7: 844 for (i=m-1; i>=0; i--) { 845 v = aa + 49*ai[i]; 846 vi = aj + ai[i]; 847 nz = ai[i+1] - ai[i]; 848 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 849 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 850 while (nz--) { 851 idx = 7*(*vi++); 852 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 853 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 854 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 855 v += 49; 856 } 857 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 858 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 859 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 860 idiag -= 49; 861 i2 -= 7; 862 } 863 break; 864 default: 865 for (i=m-1; i>=0; i--) { 866 v = aa + bs2*ai[i]; 867 vi = aj + ai[i]; 868 nz = ai[i+1] - ai[i]; 869 870 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 871 /* copy all rows of x that are needed into contiguous space */ 872 workt = work; 873 for (j=0; j<nz; j++) { 874 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 875 workt += bs; 876 } 877 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 878 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 879 880 idiag -= bs2; 881 i2 -= bs; 882 } 883 break; 884 } 885 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 886 } 887 } 888 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 889 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } 892 893 894 /* 895 Special version for direct calls from Fortran (Used in PETSc-fun3d) 896 */ 897 #if defined(PETSC_HAVE_FORTRAN_CAPS) 898 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 899 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 900 #define matsetvaluesblocked4_ matsetvaluesblocked4 901 #endif 902 903 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 904 { 905 Mat A = *AA; 906 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 907 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 908 PetscInt *ai =a->i,*ailen=a->ilen; 909 PetscInt *aj =a->j,stepval,lastcol = -1; 910 const PetscScalar *value = v; 911 MatScalar *ap,*aa = a->a,*bap; 912 PetscErrorCode ierr; 913 914 PetscFunctionBegin; 915 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 916 stepval = (n-1)*4; 917 for (k=0; k<m; k++) { /* loop over added rows */ 918 row = im[k]; 919 rp = aj + ai[row]; 920 ap = aa + 16*ai[row]; 921 nrow = ailen[row]; 922 low = 0; 923 high = nrow; 924 for (l=0; l<n; l++) { /* loop over added columns */ 925 col = in[l]; 926 if (col <= lastcol) low = 0; 927 else high = nrow; 928 lastcol = col; 929 value = v + k*(stepval+4 + l)*4; 930 while (high-low > 7) { 931 t = (low+high)/2; 932 if (rp[t] > col) high = t; 933 else low = t; 934 } 935 for (i=low; i<high; i++) { 936 if (rp[i] > col) break; 937 if (rp[i] == col) { 938 bap = ap + 16*i; 939 for (ii=0; ii<4; ii++,value+=stepval) { 940 for (jj=ii; jj<16; jj+=4) { 941 bap[jj] += *value++; 942 } 943 } 944 goto noinsert2; 945 } 946 } 947 N = nrow++ - 1; 948 high++; /* added new column index thus must search to one higher than before */ 949 /* shift up all the later entries in this row */ 950 for (ii=N; ii>=i; ii--) { 951 rp[ii+1] = rp[ii]; 952 ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr); 953 } 954 if (N >= i) { 955 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 956 } 957 rp[i] = col; 958 bap = ap + 16*i; 959 for (ii=0; ii<4; ii++,value+=stepval) { 960 for (jj=ii; jj<16; jj+=4) { 961 bap[jj] = *value++; 962 } 963 } 964 noinsert2:; 965 low = i; 966 } 967 ailen[row] = nrow; 968 } 969 PetscFunctionReturnVoid(); 970 } 971 972 #if defined(PETSC_HAVE_FORTRAN_CAPS) 973 #define matsetvalues4_ MATSETVALUES4 974 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 975 #define matsetvalues4_ matsetvalues4 976 #endif 977 978 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 979 { 980 Mat A = *AA; 981 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 982 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm; 983 PetscInt *ai=a->i,*ailen=a->ilen; 984 PetscInt *aj=a->j,brow,bcol; 985 PetscInt ridx,cidx,lastcol = -1; 986 MatScalar *ap,value,*aa=a->a,*bap; 987 PetscErrorCode ierr; 988 989 PetscFunctionBegin; 990 for (k=0; k<m; k++) { /* loop over added rows */ 991 row = im[k]; brow = row/4; 992 rp = aj + ai[brow]; 993 ap = aa + 16*ai[brow]; 994 nrow = ailen[brow]; 995 low = 0; 996 high = nrow; 997 for (l=0; l<n; l++) { /* loop over added columns */ 998 col = in[l]; bcol = col/4; 999 ridx = row % 4; cidx = col % 4; 1000 value = v[l + k*n]; 1001 if (col <= lastcol) low = 0; 1002 else high = nrow; 1003 lastcol = col; 1004 while (high-low > 7) { 1005 t = (low+high)/2; 1006 if (rp[t] > bcol) high = t; 1007 else low = t; 1008 } 1009 for (i=low; i<high; i++) { 1010 if (rp[i] > bcol) break; 1011 if (rp[i] == bcol) { 1012 bap = ap + 16*i + 4*cidx + ridx; 1013 *bap += value; 1014 goto noinsert1; 1015 } 1016 } 1017 N = nrow++ - 1; 1018 high++; /* added new column thus must search to one higher than before */ 1019 /* shift up all the later entries in this row */ 1020 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr); 1021 ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr); 1022 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 1023 rp[i] = bcol; 1024 ap[16*i + 4*cidx + ridx] = value; 1025 noinsert1:; 1026 low = i; 1027 } 1028 ailen[brow] = nrow; 1029 } 1030 PetscFunctionReturnVoid(); 1031 } 1032 1033 /* 1034 Checks for missing diagonals 1035 */ 1036 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1037 { 1038 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1039 PetscErrorCode ierr; 1040 PetscInt *diag,*ii = a->i,i; 1041 1042 PetscFunctionBegin; 1043 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1044 *missing = PETSC_FALSE; 1045 if (A->rmap->n > 0 && !ii) { 1046 *missing = PETSC_TRUE; 1047 if (d) *d = 0; 1048 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 1049 } else { 1050 diag = a->diag; 1051 for (i=0; i<a->mbs; i++) { 1052 if (diag[i] >= ii[i+1]) { 1053 *missing = PETSC_TRUE; 1054 if (d) *d = i; 1055 ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr); 1056 break; 1057 } 1058 } 1059 } 1060 PetscFunctionReturn(0); 1061 } 1062 1063 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1064 { 1065 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1066 PetscErrorCode ierr; 1067 PetscInt i,j,m = a->mbs; 1068 1069 PetscFunctionBegin; 1070 if (!a->diag) { 1071 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1072 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1073 a->free_diag = PETSC_TRUE; 1074 } 1075 for (i=0; i<m; i++) { 1076 a->diag[i] = a->i[i+1]; 1077 for (j=a->i[i]; j<a->i[i+1]; j++) { 1078 if (a->j[j] == i) { 1079 a->diag[i] = j; 1080 break; 1081 } 1082 } 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 1088 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1089 { 1090 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1091 PetscErrorCode ierr; 1092 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1093 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1094 1095 PetscFunctionBegin; 1096 *nn = n; 1097 if (!ia) PetscFunctionReturn(0); 1098 if (symmetric) { 1099 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1100 nz = tia[n]; 1101 } else { 1102 tia = a->i; tja = a->j; 1103 } 1104 1105 if (!blockcompressed && bs > 1) { 1106 (*nn) *= bs; 1107 /* malloc & create the natural set of indices */ 1108 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1109 if (n) { 1110 (*ia)[0] = oshift; 1111 for (j=1; j<bs; j++) { 1112 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1113 } 1114 } 1115 1116 for (i=1; i<n; i++) { 1117 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1118 for (j=1; j<bs; j++) { 1119 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1120 } 1121 } 1122 if (n) { 1123 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1124 } 1125 1126 if (inja) { 1127 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1128 cnt = 0; 1129 for (i=0; i<n; i++) { 1130 for (j=0; j<bs; j++) { 1131 for (k=tia[i]; k<tia[i+1]; k++) { 1132 for (l=0; l<bs; l++) { 1133 (*ja)[cnt++] = bs*tja[k] + l; 1134 } 1135 } 1136 } 1137 } 1138 } 1139 1140 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1141 ierr = PetscFree(tia);CHKERRQ(ierr); 1142 ierr = PetscFree(tja);CHKERRQ(ierr); 1143 } 1144 } else if (oshift == 1) { 1145 if (symmetric) { 1146 nz = tia[A->rmap->n/bs]; 1147 /* add 1 to i and j indices */ 1148 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1149 *ia = tia; 1150 if (ja) { 1151 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1152 *ja = tja; 1153 } 1154 } else { 1155 nz = a->i[A->rmap->n/bs]; 1156 /* malloc space and add 1 to i and j indices */ 1157 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1158 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1159 if (ja) { 1160 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1161 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1162 } 1163 } 1164 } else { 1165 *ia = tia; 1166 if (ja) *ja = tja; 1167 } 1168 PetscFunctionReturn(0); 1169 } 1170 1171 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1172 { 1173 PetscErrorCode ierr; 1174 1175 PetscFunctionBegin; 1176 if (!ia) PetscFunctionReturn(0); 1177 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1178 ierr = PetscFree(*ia);CHKERRQ(ierr); 1179 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1180 } 1181 PetscFunctionReturn(0); 1182 } 1183 1184 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1185 { 1186 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1187 PetscErrorCode ierr; 1188 1189 PetscFunctionBegin; 1190 #if defined(PETSC_USE_LOG) 1191 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1192 #endif 1193 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1194 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1195 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1196 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1197 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1198 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1199 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1200 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1201 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1202 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1203 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1204 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1205 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1206 1207 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1208 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1209 ierr = PetscFree(A->data);CHKERRQ(ierr); 1210 1211 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1212 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1213 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1214 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1215 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1222 #if defined(PETSC_HAVE_HYPRE) 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr); 1224 #endif 1225 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr); 1226 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1231 { 1232 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1233 PetscErrorCode ierr; 1234 1235 PetscFunctionBegin; 1236 switch (op) { 1237 case MAT_ROW_ORIENTED: 1238 a->roworiented = flg; 1239 break; 1240 case MAT_KEEP_NONZERO_PATTERN: 1241 a->keepnonzeropattern = flg; 1242 break; 1243 case MAT_NEW_NONZERO_LOCATIONS: 1244 a->nonew = (flg ? 0 : 1); 1245 break; 1246 case MAT_NEW_NONZERO_LOCATION_ERR: 1247 a->nonew = (flg ? -1 : 0); 1248 break; 1249 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1250 a->nonew = (flg ? -2 : 0); 1251 break; 1252 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1253 a->nounused = (flg ? -1 : 0); 1254 break; 1255 case MAT_NEW_DIAGONALS: 1256 case MAT_IGNORE_OFF_PROC_ENTRIES: 1257 case MAT_USE_HASH_TABLE: 1258 case MAT_SORTED_FULL: 1259 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1260 break; 1261 case MAT_SPD: 1262 case MAT_SYMMETRIC: 1263 case MAT_STRUCTURALLY_SYMMETRIC: 1264 case MAT_HERMITIAN: 1265 case MAT_SYMMETRY_ETERNAL: 1266 case MAT_SUBMAT_SINGLEIS: 1267 case MAT_STRUCTURE_ONLY: 1268 /* These options are handled directly by MatSetOption() */ 1269 break; 1270 default: 1271 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1272 } 1273 PetscFunctionReturn(0); 1274 } 1275 1276 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1277 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1278 { 1279 PetscErrorCode ierr; 1280 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1281 MatScalar *aa_i; 1282 PetscScalar *v_i; 1283 1284 PetscFunctionBegin; 1285 bs = A->rmap->bs; 1286 bs2 = bs*bs; 1287 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1288 1289 bn = row/bs; /* Block number */ 1290 bp = row % bs; /* Block Position */ 1291 M = ai[bn+1] - ai[bn]; 1292 *nz = bs*M; 1293 1294 if (v) { 1295 *v = 0; 1296 if (*nz) { 1297 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1298 for (i=0; i<M; i++) { /* for each block in the block row */ 1299 v_i = *v + i*bs; 1300 aa_i = aa + bs2*(ai[bn] + i); 1301 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1302 } 1303 } 1304 } 1305 1306 if (idx) { 1307 *idx = 0; 1308 if (*nz) { 1309 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1310 for (i=0; i<M; i++) { /* for each block in the block row */ 1311 idx_i = *idx + i*bs; 1312 itmp = bs*aj[ai[bn] + i]; 1313 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1314 } 1315 } 1316 } 1317 PetscFunctionReturn(0); 1318 } 1319 1320 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1321 { 1322 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1323 PetscErrorCode ierr; 1324 1325 PetscFunctionBegin; 1326 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1327 PetscFunctionReturn(0); 1328 } 1329 1330 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1331 { 1332 PetscErrorCode ierr; 1333 1334 PetscFunctionBegin; 1335 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1336 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1337 PetscFunctionReturn(0); 1338 } 1339 1340 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1341 { 1342 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at; 1343 Mat C; 1344 PetscErrorCode ierr; 1345 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill; 1346 PetscInt bs2=a->bs2,*ati,*atj,anzj,kr; 1347 MatScalar *ata,*aa=a->a; 1348 1349 PetscFunctionBegin; 1350 ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr); 1351 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1352 for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1353 1354 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1355 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1356 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1357 ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr); 1358 1359 at = (Mat_SeqBAIJ*)C->data; 1360 ati = at->i; 1361 for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i]; 1362 } else { 1363 C = *B; 1364 at = (Mat_SeqBAIJ*)C->data; 1365 ati = at->i; 1366 } 1367 1368 atj = at->j; 1369 ata = at->a; 1370 1371 /* Copy ati into atfill so we have locations of the next free space in atj */ 1372 ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr); 1373 1374 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1375 for (i=0; i<mbs; i++) { 1376 anzj = ai[i+1] - ai[i]; 1377 for (j=0; j<anzj; j++) { 1378 atj[atfill[*aj]] = i; 1379 for (kr=0; kr<bs; kr++) { 1380 for (k=0; k<bs; k++) { 1381 ata[bs2*atfill[*aj]+k*bs+kr] = *aa++; 1382 } 1383 } 1384 atfill[*aj++] += 1; 1385 } 1386 } 1387 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1388 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1389 1390 /* Clean up temporary space and complete requests. */ 1391 ierr = PetscFree(atfill);CHKERRQ(ierr); 1392 1393 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1394 ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1395 *B = C; 1396 } else { 1397 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1398 } 1399 PetscFunctionReturn(0); 1400 } 1401 1402 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1403 { 1404 PetscErrorCode ierr; 1405 Mat Btrans; 1406 1407 PetscFunctionBegin; 1408 *f = PETSC_FALSE; 1409 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1410 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1411 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 1415 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1416 { 1417 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1418 PetscErrorCode ierr; 1419 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1420 int fd; 1421 PetscScalar *aa; 1422 FILE *file; 1423 1424 PetscFunctionBegin; 1425 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1426 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1427 col_lens[0] = MAT_FILE_CLASSID; 1428 1429 col_lens[1] = A->rmap->N; 1430 col_lens[2] = A->cmap->n; 1431 col_lens[3] = a->nz*bs2; 1432 1433 /* store lengths of each row and write (including header) to file */ 1434 count = 0; 1435 for (i=0; i<a->mbs; i++) { 1436 for (j=0; j<bs; j++) { 1437 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1438 } 1439 } 1440 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1441 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1442 1443 /* store column indices (zero start index) */ 1444 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1445 count = 0; 1446 for (i=0; i<a->mbs; i++) { 1447 for (j=0; j<bs; j++) { 1448 for (k=a->i[i]; k<a->i[i+1]; k++) { 1449 for (l=0; l<bs; l++) { 1450 jj[count++] = bs*a->j[k] + l; 1451 } 1452 } 1453 } 1454 } 1455 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1456 ierr = PetscFree(jj);CHKERRQ(ierr); 1457 1458 /* store nonzero values */ 1459 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1460 count = 0; 1461 for (i=0; i<a->mbs; i++) { 1462 for (j=0; j<bs; j++) { 1463 for (k=a->i[i]; k<a->i[i+1]; k++) { 1464 for (l=0; l<bs; l++) { 1465 aa[count++] = a->a[bs2*k + l*bs + j]; 1466 } 1467 } 1468 } 1469 } 1470 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1471 ierr = PetscFree(aa);CHKERRQ(ierr); 1472 1473 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1474 if (file) { 1475 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1476 } 1477 PetscFunctionReturn(0); 1478 } 1479 1480 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1481 { 1482 PetscErrorCode ierr; 1483 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1484 PetscInt i,bs = A->rmap->bs,k; 1485 1486 PetscFunctionBegin; 1487 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1488 for (i=0; i<a->mbs; i++) { 1489 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1490 for (k=a->i[i]; k<a->i[i+1]; k++) { 1491 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1492 } 1493 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1494 } 1495 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1496 PetscFunctionReturn(0); 1497 } 1498 1499 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1500 { 1501 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1502 PetscErrorCode ierr; 1503 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1504 PetscViewerFormat format; 1505 1506 PetscFunctionBegin; 1507 if (A->structure_only) { 1508 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1509 PetscFunctionReturn(0); 1510 } 1511 1512 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1513 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1514 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1515 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1516 const char *matname; 1517 Mat aij; 1518 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1519 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1520 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1521 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1522 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1523 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1524 PetscFunctionReturn(0); 1525 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1526 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1527 for (i=0; i<a->mbs; i++) { 1528 for (j=0; j<bs; j++) { 1529 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1530 for (k=a->i[i]; k<a->i[i+1]; k++) { 1531 for (l=0; l<bs; l++) { 1532 #if defined(PETSC_USE_COMPLEX) 1533 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1534 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1535 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1536 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1537 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1538 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1539 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1540 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1541 } 1542 #else 1543 if (a->a[bs2*k + l*bs + j] != 0.0) { 1544 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1545 } 1546 #endif 1547 } 1548 } 1549 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1550 } 1551 } 1552 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1553 } else { 1554 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1555 for (i=0; i<a->mbs; i++) { 1556 for (j=0; j<bs; j++) { 1557 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1558 for (k=a->i[i]; k<a->i[i+1]; k++) { 1559 for (l=0; l<bs; l++) { 1560 #if defined(PETSC_USE_COMPLEX) 1561 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1562 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1563 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1564 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1565 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1566 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1567 } else { 1568 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1569 } 1570 #else 1571 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1572 #endif 1573 } 1574 } 1575 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1576 } 1577 } 1578 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1579 } 1580 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #include <petscdraw.h> 1585 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1586 { 1587 Mat A = (Mat) Aa; 1588 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1589 PetscErrorCode ierr; 1590 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1591 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1592 MatScalar *aa; 1593 PetscViewer viewer; 1594 PetscViewerFormat format; 1595 1596 PetscFunctionBegin; 1597 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1598 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1599 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1600 1601 /* loop over matrix elements drawing boxes */ 1602 1603 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1604 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1605 /* Blue for negative, Cyan for zero and Red for positive */ 1606 color = PETSC_DRAW_BLUE; 1607 for (i=0,row=0; i<mbs; i++,row+=bs) { 1608 for (j=a->i[i]; j<a->i[i+1]; j++) { 1609 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1610 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1611 aa = a->a + j*bs2; 1612 for (k=0; k<bs; k++) { 1613 for (l=0; l<bs; l++) { 1614 if (PetscRealPart(*aa++) >= 0.) continue; 1615 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1616 } 1617 } 1618 } 1619 } 1620 color = PETSC_DRAW_CYAN; 1621 for (i=0,row=0; i<mbs; i++,row+=bs) { 1622 for (j=a->i[i]; j<a->i[i+1]; j++) { 1623 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1624 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1625 aa = a->a + j*bs2; 1626 for (k=0; k<bs; k++) { 1627 for (l=0; l<bs; l++) { 1628 if (PetscRealPart(*aa++) != 0.) continue; 1629 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1630 } 1631 } 1632 } 1633 } 1634 color = PETSC_DRAW_RED; 1635 for (i=0,row=0; i<mbs; i++,row+=bs) { 1636 for (j=a->i[i]; j<a->i[i+1]; j++) { 1637 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1638 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1639 aa = a->a + j*bs2; 1640 for (k=0; k<bs; k++) { 1641 for (l=0; l<bs; l++) { 1642 if (PetscRealPart(*aa++) <= 0.) continue; 1643 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1644 } 1645 } 1646 } 1647 } 1648 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1649 } else { 1650 /* use contour shading to indicate magnitude of values */ 1651 /* first determine max of all nonzero values */ 1652 PetscReal minv = 0.0, maxv = 0.0; 1653 PetscDraw popup; 1654 1655 for (i=0; i<a->nz*a->bs2; i++) { 1656 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1657 } 1658 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1659 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1660 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1661 1662 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1663 for (i=0,row=0; i<mbs; i++,row+=bs) { 1664 for (j=a->i[i]; j<a->i[i+1]; j++) { 1665 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1666 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1667 aa = a->a + j*bs2; 1668 for (k=0; k<bs; k++) { 1669 for (l=0; l<bs; l++) { 1670 MatScalar v = *aa++; 1671 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1672 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1673 } 1674 } 1675 } 1676 } 1677 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1678 } 1679 PetscFunctionReturn(0); 1680 } 1681 1682 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1683 { 1684 PetscErrorCode ierr; 1685 PetscReal xl,yl,xr,yr,w,h; 1686 PetscDraw draw; 1687 PetscBool isnull; 1688 1689 PetscFunctionBegin; 1690 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1691 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1692 if (isnull) PetscFunctionReturn(0); 1693 1694 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1695 xr += w; yr += h; xl = -w; yl = -h; 1696 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1697 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1698 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1699 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1700 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1701 PetscFunctionReturn(0); 1702 } 1703 1704 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1705 { 1706 PetscErrorCode ierr; 1707 PetscBool iascii,isbinary,isdraw; 1708 1709 PetscFunctionBegin; 1710 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1711 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1712 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1713 if (iascii) { 1714 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1715 } else if (isbinary) { 1716 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1717 } else if (isdraw) { 1718 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1719 } else { 1720 Mat B; 1721 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1722 ierr = MatView(B,viewer);CHKERRQ(ierr); 1723 ierr = MatDestroy(&B);CHKERRQ(ierr); 1724 } 1725 PetscFunctionReturn(0); 1726 } 1727 1728 1729 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1730 { 1731 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1732 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1733 PetscInt *ai = a->i,*ailen = a->ilen; 1734 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1735 MatScalar *ap,*aa = a->a; 1736 1737 PetscFunctionBegin; 1738 for (k=0; k<m; k++) { /* loop over rows */ 1739 row = im[k]; brow = row/bs; 1740 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1741 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1742 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1743 nrow = ailen[brow]; 1744 for (l=0; l<n; l++) { /* loop over columns */ 1745 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1746 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1747 col = in[l]; 1748 bcol = col/bs; 1749 cidx = col%bs; 1750 ridx = row%bs; 1751 high = nrow; 1752 low = 0; /* assume unsorted */ 1753 while (high-low > 5) { 1754 t = (low+high)/2; 1755 if (rp[t] > bcol) high = t; 1756 else low = t; 1757 } 1758 for (i=low; i<high; i++) { 1759 if (rp[i] > bcol) break; 1760 if (rp[i] == bcol) { 1761 *v++ = ap[bs2*i+bs*cidx+ridx]; 1762 goto finished; 1763 } 1764 } 1765 *v++ = 0.0; 1766 finished:; 1767 } 1768 } 1769 PetscFunctionReturn(0); 1770 } 1771 1772 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1773 { 1774 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1775 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1776 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1777 PetscErrorCode ierr; 1778 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1779 PetscBool roworiented=a->roworiented; 1780 const PetscScalar *value = v; 1781 MatScalar *ap=NULL,*aa = a->a,*bap; 1782 1783 PetscFunctionBegin; 1784 if (roworiented) { 1785 stepval = (n-1)*bs; 1786 } else { 1787 stepval = (m-1)*bs; 1788 } 1789 for (k=0; k<m; k++) { /* loop over added rows */ 1790 row = im[k]; 1791 if (row < 0) continue; 1792 #if defined(PETSC_USE_DEBUG) 1793 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1794 #endif 1795 rp = aj + ai[row]; 1796 if (!A->structure_only) ap = aa + bs2*ai[row]; 1797 rmax = imax[row]; 1798 nrow = ailen[row]; 1799 low = 0; 1800 high = nrow; 1801 for (l=0; l<n; l++) { /* loop over added columns */ 1802 if (in[l] < 0) continue; 1803 #if defined(PETSC_USE_DEBUG) 1804 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1805 #endif 1806 col = in[l]; 1807 if (!A->structure_only) { 1808 if (roworiented) { 1809 value = v + (k*(stepval+bs) + l)*bs; 1810 } else { 1811 value = v + (l*(stepval+bs) + k)*bs; 1812 } 1813 } 1814 if (col <= lastcol) low = 0; 1815 else high = nrow; 1816 lastcol = col; 1817 while (high-low > 7) { 1818 t = (low+high)/2; 1819 if (rp[t] > col) high = t; 1820 else low = t; 1821 } 1822 for (i=low; i<high; i++) { 1823 if (rp[i] > col) break; 1824 if (rp[i] == col) { 1825 if (A->structure_only) goto noinsert2; 1826 bap = ap + bs2*i; 1827 if (roworiented) { 1828 if (is == ADD_VALUES) { 1829 for (ii=0; ii<bs; ii++,value+=stepval) { 1830 for (jj=ii; jj<bs2; jj+=bs) { 1831 bap[jj] += *value++; 1832 } 1833 } 1834 } else { 1835 for (ii=0; ii<bs; ii++,value+=stepval) { 1836 for (jj=ii; jj<bs2; jj+=bs) { 1837 bap[jj] = *value++; 1838 } 1839 } 1840 } 1841 } else { 1842 if (is == ADD_VALUES) { 1843 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1844 for (jj=0; jj<bs; jj++) { 1845 bap[jj] += value[jj]; 1846 } 1847 bap += bs; 1848 } 1849 } else { 1850 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1851 for (jj=0; jj<bs; jj++) { 1852 bap[jj] = value[jj]; 1853 } 1854 bap += bs; 1855 } 1856 } 1857 } 1858 goto noinsert2; 1859 } 1860 } 1861 if (nonew == 1) goto noinsert2; 1862 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1863 if (A->structure_only) { 1864 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1865 } else { 1866 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1867 } 1868 N = nrow++ - 1; high++; 1869 /* shift up all the later entries in this row */ 1870 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 1871 rp[i] = col; 1872 if (!A->structure_only) { 1873 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 1874 bap = ap + bs2*i; 1875 if (roworiented) { 1876 for (ii=0; ii<bs; ii++,value+=stepval) { 1877 for (jj=ii; jj<bs2; jj+=bs) { 1878 bap[jj] = *value++; 1879 } 1880 } 1881 } else { 1882 for (ii=0; ii<bs; ii++,value+=stepval) { 1883 for (jj=0; jj<bs; jj++) { 1884 *bap++ = *value++; 1885 } 1886 } 1887 } 1888 } 1889 noinsert2:; 1890 low = i; 1891 } 1892 ailen[row] = nrow; 1893 } 1894 PetscFunctionReturn(0); 1895 } 1896 1897 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1898 { 1899 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1900 PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax; 1901 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1902 PetscErrorCode ierr; 1903 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1904 MatScalar *aa = a->a,*ap; 1905 PetscReal ratio=0.6; 1906 1907 PetscFunctionBegin; 1908 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1909 1910 if (m) rmax = ailen[0]; 1911 for (i=1; i<mbs; i++) { 1912 /* move each row back by the amount of empty slots (fshift) before it*/ 1913 fshift += imax[i-1] - ailen[i-1]; 1914 rmax = PetscMax(rmax,ailen[i]); 1915 if (fshift) { 1916 ip = aj + ai[i]; 1917 ap = aa + bs2*ai[i]; 1918 N = ailen[i]; 1919 ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr); 1920 if (!A->structure_only) { 1921 ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr); 1922 } 1923 } 1924 ai[i] = ai[i-1] + ailen[i-1]; 1925 } 1926 if (mbs) { 1927 fshift += imax[mbs-1] - ailen[mbs-1]; 1928 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1929 } 1930 1931 /* reset ilen and imax for each row */ 1932 a->nonzerorowcnt = 0; 1933 if (A->structure_only) { 1934 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1935 } else { /* !A->structure_only */ 1936 for (i=0; i<mbs; i++) { 1937 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1938 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1939 } 1940 } 1941 a->nz = ai[mbs]; 1942 1943 /* diagonals may have moved, so kill the diagonal pointers */ 1944 a->idiagvalid = PETSC_FALSE; 1945 if (fshift && a->diag) { 1946 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1947 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1948 a->diag = 0; 1949 } 1950 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1951 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1952 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1953 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1954 1955 A->info.mallocs += a->reallocs; 1956 a->reallocs = 0; 1957 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1958 a->rmax = rmax; 1959 1960 if (!A->structure_only) { 1961 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1962 } 1963 PetscFunctionReturn(0); 1964 } 1965 1966 /* 1967 This function returns an array of flags which indicate the locations of contiguous 1968 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1969 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1970 Assume: sizes should be long enough to hold all the values. 1971 */ 1972 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1973 { 1974 PetscInt i,j,k,row; 1975 PetscBool flg; 1976 1977 PetscFunctionBegin; 1978 for (i=0,j=0; i<n; j++) { 1979 row = idx[i]; 1980 if (row%bs!=0) { /* Not the begining of a block */ 1981 sizes[j] = 1; 1982 i++; 1983 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1984 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1985 i++; 1986 } else { /* Begining of the block, so check if the complete block exists */ 1987 flg = PETSC_TRUE; 1988 for (k=1; k<bs; k++) { 1989 if (row+k != idx[i+k]) { /* break in the block */ 1990 flg = PETSC_FALSE; 1991 break; 1992 } 1993 } 1994 if (flg) { /* No break in the bs */ 1995 sizes[j] = bs; 1996 i += bs; 1997 } else { 1998 sizes[j] = 1; 1999 i++; 2000 } 2001 } 2002 } 2003 *bs_max = j; 2004 PetscFunctionReturn(0); 2005 } 2006 2007 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2008 { 2009 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2010 PetscErrorCode ierr; 2011 PetscInt i,j,k,count,*rows; 2012 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2013 PetscScalar zero = 0.0; 2014 MatScalar *aa; 2015 const PetscScalar *xx; 2016 PetscScalar *bb; 2017 2018 PetscFunctionBegin; 2019 /* fix right hand side if needed */ 2020 if (x && b) { 2021 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2022 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2023 for (i=0; i<is_n; i++) { 2024 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2025 } 2026 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2027 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2028 } 2029 2030 /* Make a copy of the IS and sort it */ 2031 /* allocate memory for rows,sizes */ 2032 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2033 2034 /* copy IS values to rows, and sort them */ 2035 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2036 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2037 2038 if (baij->keepnonzeropattern) { 2039 for (i=0; i<is_n; i++) sizes[i] = 1; 2040 bs_max = is_n; 2041 } else { 2042 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2043 A->nonzerostate++; 2044 } 2045 2046 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2047 row = rows[j]; 2048 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2049 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2050 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2051 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2052 if (diag != (PetscScalar)0.0) { 2053 if (baij->ilen[row/bs] > 0) { 2054 baij->ilen[row/bs] = 1; 2055 baij->j[baij->i[row/bs]] = row/bs; 2056 2057 ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr); 2058 } 2059 /* Now insert all the diagonal values for this bs */ 2060 for (k=0; k<bs; k++) { 2061 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2062 } 2063 } else { /* (diag == 0.0) */ 2064 baij->ilen[row/bs] = 0; 2065 } /* end (diag == 0.0) */ 2066 } else { /* (sizes[i] != bs) */ 2067 #if defined(PETSC_USE_DEBUG) 2068 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2069 #endif 2070 for (k=0; k<count; k++) { 2071 aa[0] = zero; 2072 aa += bs; 2073 } 2074 if (diag != (PetscScalar)0.0) { 2075 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2076 } 2077 } 2078 } 2079 2080 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2081 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2082 PetscFunctionReturn(0); 2083 } 2084 2085 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2086 { 2087 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2088 PetscErrorCode ierr; 2089 PetscInt i,j,k,count; 2090 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2091 PetscScalar zero = 0.0; 2092 MatScalar *aa; 2093 const PetscScalar *xx; 2094 PetscScalar *bb; 2095 PetscBool *zeroed,vecs = PETSC_FALSE; 2096 2097 PetscFunctionBegin; 2098 /* fix right hand side if needed */ 2099 if (x && b) { 2100 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2101 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2102 vecs = PETSC_TRUE; 2103 } 2104 2105 /* zero the columns */ 2106 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2107 for (i=0; i<is_n; i++) { 2108 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2109 zeroed[is_idx[i]] = PETSC_TRUE; 2110 } 2111 for (i=0; i<A->rmap->N; i++) { 2112 if (!zeroed[i]) { 2113 row = i/bs; 2114 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2115 for (k=0; k<bs; k++) { 2116 col = bs*baij->j[j] + k; 2117 if (zeroed[col]) { 2118 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2119 if (vecs) bb[i] -= aa[0]*xx[col]; 2120 aa[0] = 0.0; 2121 } 2122 } 2123 } 2124 } else if (vecs) bb[i] = diag*xx[i]; 2125 } 2126 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2127 if (vecs) { 2128 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2129 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2130 } 2131 2132 /* zero the rows */ 2133 for (i=0; i<is_n; i++) { 2134 row = is_idx[i]; 2135 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2136 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2137 for (k=0; k<count; k++) { 2138 aa[0] = zero; 2139 aa += bs; 2140 } 2141 if (diag != (PetscScalar)0.0) { 2142 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2143 } 2144 } 2145 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2146 PetscFunctionReturn(0); 2147 } 2148 2149 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2150 { 2151 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2152 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2153 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2154 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2155 PetscErrorCode ierr; 2156 PetscInt ridx,cidx,bs2=a->bs2; 2157 PetscBool roworiented=a->roworiented; 2158 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2159 2160 PetscFunctionBegin; 2161 for (k=0; k<m; k++) { /* loop over added rows */ 2162 row = im[k]; 2163 brow = row/bs; 2164 if (row < 0) continue; 2165 #if defined(PETSC_USE_DEBUG) 2166 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2167 #endif 2168 rp = aj + ai[brow]; 2169 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2170 rmax = imax[brow]; 2171 nrow = ailen[brow]; 2172 low = 0; 2173 high = nrow; 2174 for (l=0; l<n; l++) { /* loop over added columns */ 2175 if (in[l] < 0) continue; 2176 #if defined(PETSC_USE_DEBUG) 2177 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2178 #endif 2179 col = in[l]; bcol = col/bs; 2180 ridx = row % bs; cidx = col % bs; 2181 if (!A->structure_only) { 2182 if (roworiented) { 2183 value = v[l + k*n]; 2184 } else { 2185 value = v[k + l*m]; 2186 } 2187 } 2188 if (col <= lastcol) low = 0; else high = nrow; 2189 lastcol = col; 2190 while (high-low > 7) { 2191 t = (low+high)/2; 2192 if (rp[t] > bcol) high = t; 2193 else low = t; 2194 } 2195 for (i=low; i<high; i++) { 2196 if (rp[i] > bcol) break; 2197 if (rp[i] == bcol) { 2198 bap = ap + bs2*i + bs*cidx + ridx; 2199 if (!A->structure_only) { 2200 if (is == ADD_VALUES) *bap += value; 2201 else *bap = value; 2202 } 2203 goto noinsert1; 2204 } 2205 } 2206 if (nonew == 1) goto noinsert1; 2207 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2208 if (A->structure_only) { 2209 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2210 } else { 2211 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2212 } 2213 N = nrow++ - 1; high++; 2214 /* shift up all the later entries in this row */ 2215 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 2216 rp[i] = bcol; 2217 if (!A->structure_only) { 2218 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 2219 ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr); 2220 ap[bs2*i + bs*cidx + ridx] = value; 2221 } 2222 a->nz++; 2223 A->nonzerostate++; 2224 noinsert1:; 2225 low = i; 2226 } 2227 ailen[brow] = nrow; 2228 } 2229 PetscFunctionReturn(0); 2230 } 2231 2232 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2233 { 2234 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2235 Mat outA; 2236 PetscErrorCode ierr; 2237 PetscBool row_identity,col_identity; 2238 2239 PetscFunctionBegin; 2240 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2241 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2242 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2243 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2244 2245 outA = inA; 2246 inA->factortype = MAT_FACTOR_LU; 2247 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2248 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2249 2250 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2251 2252 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2253 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2254 a->row = row; 2255 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2256 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2257 a->col = col; 2258 2259 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2260 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2261 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2262 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2263 2264 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2265 if (!a->solve_work) { 2266 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2267 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2268 } 2269 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2270 PetscFunctionReturn(0); 2271 } 2272 2273 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2274 { 2275 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2276 PetscInt i,nz,mbs; 2277 2278 PetscFunctionBegin; 2279 nz = baij->maxnz; 2280 mbs = baij->mbs; 2281 for (i=0; i<nz; i++) { 2282 baij->j[i] = indices[i]; 2283 } 2284 baij->nz = nz; 2285 for (i=0; i<mbs; i++) { 2286 baij->ilen[i] = baij->imax[i]; 2287 } 2288 PetscFunctionReturn(0); 2289 } 2290 2291 /*@ 2292 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2293 in the matrix. 2294 2295 Input Parameters: 2296 + mat - the SeqBAIJ matrix 2297 - indices - the column indices 2298 2299 Level: advanced 2300 2301 Notes: 2302 This can be called if you have precomputed the nonzero structure of the 2303 matrix and want to provide it to the matrix object to improve the performance 2304 of the MatSetValues() operation. 2305 2306 You MUST have set the correct numbers of nonzeros per row in the call to 2307 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2308 2309 MUST be called before any calls to MatSetValues(); 2310 2311 @*/ 2312 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2313 { 2314 PetscErrorCode ierr; 2315 2316 PetscFunctionBegin; 2317 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2318 PetscValidPointer(indices,2); 2319 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2320 PetscFunctionReturn(0); 2321 } 2322 2323 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2324 { 2325 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2326 PetscErrorCode ierr; 2327 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2328 PetscReal atmp; 2329 PetscScalar *x,zero = 0.0; 2330 MatScalar *aa; 2331 PetscInt ncols,brow,krow,kcol; 2332 2333 PetscFunctionBegin; 2334 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2335 bs = A->rmap->bs; 2336 aa = a->a; 2337 ai = a->i; 2338 aj = a->j; 2339 mbs = a->mbs; 2340 2341 ierr = VecSet(v,zero);CHKERRQ(ierr); 2342 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2343 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2344 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2345 for (i=0; i<mbs; i++) { 2346 ncols = ai[1] - ai[0]; ai++; 2347 brow = bs*i; 2348 for (j=0; j<ncols; j++) { 2349 for (kcol=0; kcol<bs; kcol++) { 2350 for (krow=0; krow<bs; krow++) { 2351 atmp = PetscAbsScalar(*aa);aa++; 2352 row = brow + krow; /* row index */ 2353 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2354 } 2355 } 2356 aj++; 2357 } 2358 } 2359 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2360 PetscFunctionReturn(0); 2361 } 2362 2363 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2364 { 2365 PetscErrorCode ierr; 2366 2367 PetscFunctionBegin; 2368 /* If the two matrices have the same copy implementation, use fast copy. */ 2369 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2370 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2371 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2372 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2373 2374 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2375 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2376 ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr); 2377 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2378 } else { 2379 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2380 } 2381 PetscFunctionReturn(0); 2382 } 2383 2384 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2385 { 2386 PetscErrorCode ierr; 2387 2388 PetscFunctionBegin; 2389 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2390 PetscFunctionReturn(0); 2391 } 2392 2393 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2394 { 2395 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2396 2397 PetscFunctionBegin; 2398 *array = a->a; 2399 PetscFunctionReturn(0); 2400 } 2401 2402 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2403 { 2404 PetscFunctionBegin; 2405 PetscFunctionReturn(0); 2406 } 2407 2408 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2409 { 2410 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2411 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2412 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2413 PetscErrorCode ierr; 2414 2415 PetscFunctionBegin; 2416 /* Set the number of nonzeros in the new matrix */ 2417 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2422 { 2423 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2424 PetscErrorCode ierr; 2425 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2426 PetscBLASInt one=1; 2427 2428 PetscFunctionBegin; 2429 if (str == SAME_NONZERO_PATTERN) { 2430 PetscScalar alpha = a; 2431 PetscBLASInt bnz; 2432 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2433 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2434 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2435 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2436 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2437 } else { 2438 Mat B; 2439 PetscInt *nnz; 2440 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2441 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2442 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2443 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2444 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2445 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2446 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2447 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2448 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2449 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2450 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2451 ierr = PetscFree(nnz);CHKERRQ(ierr); 2452 } 2453 PetscFunctionReturn(0); 2454 } 2455 2456 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2457 { 2458 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2459 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2460 MatScalar *aa = a->a; 2461 2462 PetscFunctionBegin; 2463 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2464 PetscFunctionReturn(0); 2465 } 2466 2467 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2468 { 2469 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2470 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2471 MatScalar *aa = a->a; 2472 2473 PetscFunctionBegin; 2474 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2475 PetscFunctionReturn(0); 2476 } 2477 2478 /* 2479 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2480 */ 2481 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2482 { 2483 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2484 PetscErrorCode ierr; 2485 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2486 PetscInt nz = a->i[m],row,*jj,mr,col; 2487 2488 PetscFunctionBegin; 2489 *nn = n; 2490 if (!ia) PetscFunctionReturn(0); 2491 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2492 else { 2493 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2494 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2495 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2496 jj = a->j; 2497 for (i=0; i<nz; i++) { 2498 collengths[jj[i]]++; 2499 } 2500 cia[0] = oshift; 2501 for (i=0; i<n; i++) { 2502 cia[i+1] = cia[i] + collengths[i]; 2503 } 2504 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2505 jj = a->j; 2506 for (row=0; row<m; row++) { 2507 mr = a->i[row+1] - a->i[row]; 2508 for (i=0; i<mr; i++) { 2509 col = *jj++; 2510 2511 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2512 } 2513 } 2514 ierr = PetscFree(collengths);CHKERRQ(ierr); 2515 *ia = cia; *ja = cja; 2516 } 2517 PetscFunctionReturn(0); 2518 } 2519 2520 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2521 { 2522 PetscErrorCode ierr; 2523 2524 PetscFunctionBegin; 2525 if (!ia) PetscFunctionReturn(0); 2526 ierr = PetscFree(*ia);CHKERRQ(ierr); 2527 ierr = PetscFree(*ja);CHKERRQ(ierr); 2528 PetscFunctionReturn(0); 2529 } 2530 2531 /* 2532 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2533 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2534 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2535 */ 2536 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2537 { 2538 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2539 PetscErrorCode ierr; 2540 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2541 PetscInt nz = a->i[m],row,*jj,mr,col; 2542 PetscInt *cspidx; 2543 2544 PetscFunctionBegin; 2545 *nn = n; 2546 if (!ia) PetscFunctionReturn(0); 2547 2548 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2549 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2550 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2551 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2552 jj = a->j; 2553 for (i=0; i<nz; i++) { 2554 collengths[jj[i]]++; 2555 } 2556 cia[0] = oshift; 2557 for (i=0; i<n; i++) { 2558 cia[i+1] = cia[i] + collengths[i]; 2559 } 2560 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2561 jj = a->j; 2562 for (row=0; row<m; row++) { 2563 mr = a->i[row+1] - a->i[row]; 2564 for (i=0; i<mr; i++) { 2565 col = *jj++; 2566 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2567 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2568 } 2569 } 2570 ierr = PetscFree(collengths);CHKERRQ(ierr); 2571 *ia = cia; 2572 *ja = cja; 2573 *spidx = cspidx; 2574 PetscFunctionReturn(0); 2575 } 2576 2577 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2578 { 2579 PetscErrorCode ierr; 2580 2581 PetscFunctionBegin; 2582 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2583 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2584 PetscFunctionReturn(0); 2585 } 2586 2587 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2588 { 2589 PetscErrorCode ierr; 2590 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2591 2592 PetscFunctionBegin; 2593 if (!Y->preallocated || !aij->nz) { 2594 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2595 } 2596 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2597 PetscFunctionReturn(0); 2598 } 2599 2600 /* -------------------------------------------------------------------*/ 2601 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2602 MatGetRow_SeqBAIJ, 2603 MatRestoreRow_SeqBAIJ, 2604 MatMult_SeqBAIJ_N, 2605 /* 4*/ MatMultAdd_SeqBAIJ_N, 2606 MatMultTranspose_SeqBAIJ, 2607 MatMultTransposeAdd_SeqBAIJ, 2608 0, 2609 0, 2610 0, 2611 /* 10*/ 0, 2612 MatLUFactor_SeqBAIJ, 2613 0, 2614 0, 2615 MatTranspose_SeqBAIJ, 2616 /* 15*/ MatGetInfo_SeqBAIJ, 2617 MatEqual_SeqBAIJ, 2618 MatGetDiagonal_SeqBAIJ, 2619 MatDiagonalScale_SeqBAIJ, 2620 MatNorm_SeqBAIJ, 2621 /* 20*/ 0, 2622 MatAssemblyEnd_SeqBAIJ, 2623 MatSetOption_SeqBAIJ, 2624 MatZeroEntries_SeqBAIJ, 2625 /* 24*/ MatZeroRows_SeqBAIJ, 2626 0, 2627 0, 2628 0, 2629 0, 2630 /* 29*/ MatSetUp_SeqBAIJ, 2631 0, 2632 0, 2633 0, 2634 0, 2635 /* 34*/ MatDuplicate_SeqBAIJ, 2636 0, 2637 0, 2638 MatILUFactor_SeqBAIJ, 2639 0, 2640 /* 39*/ MatAXPY_SeqBAIJ, 2641 MatCreateSubMatrices_SeqBAIJ, 2642 MatIncreaseOverlap_SeqBAIJ, 2643 MatGetValues_SeqBAIJ, 2644 MatCopy_SeqBAIJ, 2645 /* 44*/ 0, 2646 MatScale_SeqBAIJ, 2647 MatShift_SeqBAIJ, 2648 0, 2649 MatZeroRowsColumns_SeqBAIJ, 2650 /* 49*/ 0, 2651 MatGetRowIJ_SeqBAIJ, 2652 MatRestoreRowIJ_SeqBAIJ, 2653 MatGetColumnIJ_SeqBAIJ, 2654 MatRestoreColumnIJ_SeqBAIJ, 2655 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2656 0, 2657 0, 2658 0, 2659 MatSetValuesBlocked_SeqBAIJ, 2660 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2661 MatDestroy_SeqBAIJ, 2662 MatView_SeqBAIJ, 2663 0, 2664 0, 2665 /* 64*/ 0, 2666 0, 2667 0, 2668 0, 2669 0, 2670 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2671 0, 2672 MatConvert_Basic, 2673 0, 2674 0, 2675 /* 74*/ 0, 2676 MatFDColoringApply_BAIJ, 2677 0, 2678 0, 2679 0, 2680 /* 79*/ 0, 2681 0, 2682 0, 2683 0, 2684 MatLoad_SeqBAIJ, 2685 /* 84*/ 0, 2686 0, 2687 0, 2688 0, 2689 0, 2690 /* 89*/ 0, 2691 0, 2692 0, 2693 0, 2694 0, 2695 /* 94*/ 0, 2696 0, 2697 0, 2698 0, 2699 0, 2700 /* 99*/ 0, 2701 0, 2702 0, 2703 0, 2704 0, 2705 /*104*/ 0, 2706 MatRealPart_SeqBAIJ, 2707 MatImaginaryPart_SeqBAIJ, 2708 0, 2709 0, 2710 /*109*/ 0, 2711 0, 2712 0, 2713 0, 2714 MatMissingDiagonal_SeqBAIJ, 2715 /*114*/ 0, 2716 0, 2717 0, 2718 0, 2719 0, 2720 /*119*/ 0, 2721 0, 2722 MatMultHermitianTranspose_SeqBAIJ, 2723 MatMultHermitianTransposeAdd_SeqBAIJ, 2724 0, 2725 /*124*/ 0, 2726 0, 2727 MatInvertBlockDiagonal_SeqBAIJ, 2728 0, 2729 0, 2730 /*129*/ 0, 2731 0, 2732 0, 2733 0, 2734 0, 2735 /*134*/ 0, 2736 0, 2737 0, 2738 0, 2739 0, 2740 /*139*/ MatSetBlockSizes_Default, 2741 0, 2742 0, 2743 MatFDColoringSetUp_SeqXAIJ, 2744 0, 2745 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2746 MatDestroySubMatrices_SeqBAIJ 2747 }; 2748 2749 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2750 { 2751 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2752 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2753 PetscErrorCode ierr; 2754 2755 PetscFunctionBegin; 2756 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2757 2758 /* allocate space for values if not already there */ 2759 if (!aij->saved_values) { 2760 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2761 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2762 } 2763 2764 /* copy values over */ 2765 ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr); 2766 PetscFunctionReturn(0); 2767 } 2768 2769 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2770 { 2771 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2772 PetscErrorCode ierr; 2773 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2774 2775 PetscFunctionBegin; 2776 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2777 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2778 2779 /* copy values over */ 2780 ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr); 2781 PetscFunctionReturn(0); 2782 } 2783 2784 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2785 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2786 2787 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2788 { 2789 Mat_SeqBAIJ *b; 2790 PetscErrorCode ierr; 2791 PetscInt i,mbs,nbs,bs2; 2792 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2793 2794 PetscFunctionBegin; 2795 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2796 if (nz == MAT_SKIP_ALLOCATION) { 2797 skipallocation = PETSC_TRUE; 2798 nz = 0; 2799 } 2800 2801 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2802 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2803 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2804 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2805 2806 B->preallocated = PETSC_TRUE; 2807 2808 mbs = B->rmap->n/bs; 2809 nbs = B->cmap->n/bs; 2810 bs2 = bs*bs; 2811 2812 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2813 2814 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2815 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2816 if (nnz) { 2817 for (i=0; i<mbs; i++) { 2818 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2819 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2820 } 2821 } 2822 2823 b = (Mat_SeqBAIJ*)B->data; 2824 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2825 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2826 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2827 2828 if (!flg) { 2829 switch (bs) { 2830 case 1: 2831 B->ops->mult = MatMult_SeqBAIJ_1; 2832 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2833 break; 2834 case 2: 2835 B->ops->mult = MatMult_SeqBAIJ_2; 2836 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2837 break; 2838 case 3: 2839 B->ops->mult = MatMult_SeqBAIJ_3; 2840 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2841 break; 2842 case 4: 2843 B->ops->mult = MatMult_SeqBAIJ_4; 2844 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2845 break; 2846 case 5: 2847 B->ops->mult = MatMult_SeqBAIJ_5; 2848 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2849 break; 2850 case 6: 2851 B->ops->mult = MatMult_SeqBAIJ_6; 2852 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2853 break; 2854 case 7: 2855 B->ops->mult = MatMult_SeqBAIJ_7; 2856 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2857 break; 2858 case 9: 2859 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2860 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2861 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2862 #else 2863 B->ops->mult = MatMult_SeqBAIJ_N; 2864 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2865 #endif 2866 break; 2867 case 11: 2868 B->ops->mult = MatMult_SeqBAIJ_11; 2869 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2870 break; 2871 case 15: 2872 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2873 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2874 break; 2875 default: 2876 B->ops->mult = MatMult_SeqBAIJ_N; 2877 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2878 break; 2879 } 2880 } 2881 B->ops->sor = MatSOR_SeqBAIJ; 2882 b->mbs = mbs; 2883 b->nbs = nbs; 2884 if (!skipallocation) { 2885 if (!b->imax) { 2886 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2887 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2888 2889 b->free_imax_ilen = PETSC_TRUE; 2890 } 2891 /* b->ilen will count nonzeros in each block row so far. */ 2892 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2893 if (!nnz) { 2894 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2895 else if (nz < 0) nz = 1; 2896 nz = PetscMin(nz,nbs); 2897 for (i=0; i<mbs; i++) b->imax[i] = nz; 2898 nz = nz*mbs; 2899 } else { 2900 nz = 0; 2901 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2902 } 2903 2904 /* allocate the matrix space */ 2905 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2906 if (B->structure_only) { 2907 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 2908 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 2909 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 2910 } else { 2911 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2912 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2913 ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr); 2914 } 2915 ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr); 2916 2917 if (B->structure_only) { 2918 b->singlemalloc = PETSC_FALSE; 2919 b->free_a = PETSC_FALSE; 2920 } else { 2921 b->singlemalloc = PETSC_TRUE; 2922 b->free_a = PETSC_TRUE; 2923 } 2924 b->free_ij = PETSC_TRUE; 2925 2926 b->i[0] = 0; 2927 for (i=1; i<mbs+1; i++) { 2928 b->i[i] = b->i[i-1] + b->imax[i-1]; 2929 } 2930 2931 } else { 2932 b->free_a = PETSC_FALSE; 2933 b->free_ij = PETSC_FALSE; 2934 } 2935 2936 b->bs2 = bs2; 2937 b->mbs = mbs; 2938 b->nz = 0; 2939 b->maxnz = nz; 2940 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2941 B->was_assembled = PETSC_FALSE; 2942 B->assembled = PETSC_FALSE; 2943 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2944 PetscFunctionReturn(0); 2945 } 2946 2947 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2948 { 2949 PetscInt i,m,nz,nz_max=0,*nnz; 2950 PetscScalar *values=0; 2951 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2952 PetscErrorCode ierr; 2953 2954 PetscFunctionBegin; 2955 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2956 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2957 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2958 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2959 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2960 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2961 m = B->rmap->n/bs; 2962 2963 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2964 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2965 for (i=0; i<m; i++) { 2966 nz = ii[i+1]- ii[i]; 2967 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2968 nz_max = PetscMax(nz_max, nz); 2969 nnz[i] = nz; 2970 } 2971 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2972 ierr = PetscFree(nnz);CHKERRQ(ierr); 2973 2974 values = (PetscScalar*)V; 2975 if (!values) { 2976 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2977 } 2978 for (i=0; i<m; i++) { 2979 PetscInt ncols = ii[i+1] - ii[i]; 2980 const PetscInt *icols = jj + ii[i]; 2981 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2982 if (!roworiented) { 2983 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2984 } else { 2985 PetscInt j; 2986 for (j=0; j<ncols; j++) { 2987 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2988 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2989 } 2990 } 2991 } 2992 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2993 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2994 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2995 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2996 PetscFunctionReturn(0); 2997 } 2998 2999 /*MC 3000 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3001 block sparse compressed row format. 3002 3003 Options Database Keys: 3004 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3005 3006 Level: beginner 3007 3008 .seealso: MatCreateSeqBAIJ() 3009 M*/ 3010 3011 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3012 3013 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3014 { 3015 PetscErrorCode ierr; 3016 PetscMPIInt size; 3017 Mat_SeqBAIJ *b; 3018 3019 PetscFunctionBegin; 3020 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3021 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3022 3023 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3024 B->data = (void*)b; 3025 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3026 3027 b->row = 0; 3028 b->col = 0; 3029 b->icol = 0; 3030 b->reallocs = 0; 3031 b->saved_values = 0; 3032 3033 b->roworiented = PETSC_TRUE; 3034 b->nonew = 0; 3035 b->diag = 0; 3036 B->spptr = 0; 3037 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3038 b->keepnonzeropattern = PETSC_FALSE; 3039 3040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3049 #if defined(PETSC_HAVE_HYPRE) 3050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3051 #endif 3052 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 3053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 3054 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3055 PetscFunctionReturn(0); 3056 } 3057 3058 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3059 { 3060 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3061 PetscErrorCode ierr; 3062 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3063 3064 PetscFunctionBegin; 3065 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3066 3067 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3068 c->imax = a->imax; 3069 c->ilen = a->ilen; 3070 c->free_imax_ilen = PETSC_FALSE; 3071 } else { 3072 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3073 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3074 for (i=0; i<mbs; i++) { 3075 c->imax[i] = a->imax[i]; 3076 c->ilen[i] = a->ilen[i]; 3077 } 3078 c->free_imax_ilen = PETSC_TRUE; 3079 } 3080 3081 /* allocate the matrix space */ 3082 if (mallocmatspace) { 3083 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3084 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3085 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3086 3087 c->i = a->i; 3088 c->j = a->j; 3089 c->singlemalloc = PETSC_FALSE; 3090 c->free_a = PETSC_TRUE; 3091 c->free_ij = PETSC_FALSE; 3092 c->parent = A; 3093 C->preallocated = PETSC_TRUE; 3094 C->assembled = PETSC_TRUE; 3095 3096 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3097 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3098 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3099 } else { 3100 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3101 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3102 3103 c->singlemalloc = PETSC_TRUE; 3104 c->free_a = PETSC_TRUE; 3105 c->free_ij = PETSC_TRUE; 3106 3107 ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr); 3108 if (mbs > 0) { 3109 ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr); 3110 if (cpvalues == MAT_COPY_VALUES) { 3111 ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr); 3112 } else { 3113 ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr); 3114 } 3115 } 3116 C->preallocated = PETSC_TRUE; 3117 C->assembled = PETSC_TRUE; 3118 } 3119 } 3120 3121 c->roworiented = a->roworiented; 3122 c->nonew = a->nonew; 3123 3124 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3125 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3126 3127 c->bs2 = a->bs2; 3128 c->mbs = a->mbs; 3129 c->nbs = a->nbs; 3130 3131 if (a->diag) { 3132 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3133 c->diag = a->diag; 3134 c->free_diag = PETSC_FALSE; 3135 } else { 3136 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3137 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3138 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3139 c->free_diag = PETSC_TRUE; 3140 } 3141 } else c->diag = 0; 3142 3143 c->nz = a->nz; 3144 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3145 c->solve_work = NULL; 3146 c->mult_work = NULL; 3147 c->sor_workt = NULL; 3148 c->sor_work = NULL; 3149 3150 c->compressedrow.use = a->compressedrow.use; 3151 c->compressedrow.nrows = a->compressedrow.nrows; 3152 if (a->compressedrow.use) { 3153 i = a->compressedrow.nrows; 3154 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3155 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3156 ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr); 3157 ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr); 3158 } else { 3159 c->compressedrow.use = PETSC_FALSE; 3160 c->compressedrow.i = NULL; 3161 c->compressedrow.rindex = NULL; 3162 } 3163 C->nonzerostate = A->nonzerostate; 3164 3165 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3166 PetscFunctionReturn(0); 3167 } 3168 3169 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3170 { 3171 PetscErrorCode ierr; 3172 3173 PetscFunctionBegin; 3174 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3175 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3176 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3177 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3178 PetscFunctionReturn(0); 3179 } 3180 3181 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3182 { 3183 Mat_SeqBAIJ *a; 3184 PetscErrorCode ierr; 3185 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3186 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3187 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3188 PetscInt *masked,nmask,tmp,bs2,ishift; 3189 PetscMPIInt size; 3190 int fd; 3191 PetscScalar *aa; 3192 MPI_Comm comm; 3193 PetscBool isbinary; 3194 3195 PetscFunctionBegin; 3196 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3197 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name); 3198 3199 /* force binary viewer to load .info file if it has not yet done so */ 3200 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3201 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3202 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3203 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3204 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3205 if (bs < 0) bs = 1; 3206 bs2 = bs*bs; 3207 3208 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3209 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3210 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3211 ierr = PetscBinaryRead(fd,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3212 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3213 M = header[1]; N = header[2]; nz = header[3]; 3214 3215 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3216 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3217 3218 /* 3219 This code adds extra rows to make sure the number of rows is 3220 divisible by the blocksize 3221 */ 3222 mbs = M/bs; 3223 extra_rows = bs - M + bs*(mbs); 3224 if (extra_rows == bs) extra_rows = 0; 3225 else mbs++; 3226 if (extra_rows) { 3227 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3228 } 3229 3230 /* Set global sizes if not already set */ 3231 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3232 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3233 } else { /* Check if the matrix global sizes are correct */ 3234 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3235 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3236 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3237 } 3238 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3239 } 3240 3241 /* read in row lengths */ 3242 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3243 ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr); 3244 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3245 3246 /* read in column indices */ 3247 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3248 ierr = PetscBinaryRead(fd,jj,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3249 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3250 3251 /* loop over row lengths determining block row lengths */ 3252 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3253 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3254 ierr = PetscArrayzero(mask,mbs);CHKERRQ(ierr); 3255 rowcount = 0; 3256 nzcount = 0; 3257 for (i=0; i<mbs; i++) { 3258 nmask = 0; 3259 for (j=0; j<bs; j++) { 3260 kmax = rowlengths[rowcount]; 3261 for (k=0; k<kmax; k++) { 3262 tmp = jj[nzcount++]/bs; 3263 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3264 } 3265 rowcount++; 3266 } 3267 browlengths[i] += nmask; 3268 /* zero out the mask elements we set */ 3269 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3270 } 3271 3272 /* Do preallocation */ 3273 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3274 a = (Mat_SeqBAIJ*)newmat->data; 3275 3276 /* set matrix "i" values */ 3277 a->i[0] = 0; 3278 for (i=1; i<= mbs; i++) { 3279 a->i[i] = a->i[i-1] + browlengths[i-1]; 3280 a->ilen[i-1] = browlengths[i-1]; 3281 } 3282 a->nz = 0; 3283 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3284 3285 /* read in nonzero values */ 3286 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3287 ierr = PetscBinaryRead(fd,aa,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3288 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3289 3290 /* set "a" and "j" values into matrix */ 3291 nzcount = 0; jcount = 0; 3292 for (i=0; i<mbs; i++) { 3293 nzcountb = nzcount; 3294 nmask = 0; 3295 for (j=0; j<bs; j++) { 3296 kmax = rowlengths[i*bs+j]; 3297 for (k=0; k<kmax; k++) { 3298 tmp = jj[nzcount++]/bs; 3299 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3300 } 3301 } 3302 /* sort the masked values */ 3303 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3304 3305 /* set "j" values into matrix */ 3306 maskcount = 1; 3307 for (j=0; j<nmask; j++) { 3308 a->j[jcount++] = masked[j]; 3309 mask[masked[j]] = maskcount++; 3310 } 3311 /* set "a" values into matrix */ 3312 ishift = bs2*a->i[i]; 3313 for (j=0; j<bs; j++) { 3314 kmax = rowlengths[i*bs+j]; 3315 for (k=0; k<kmax; k++) { 3316 tmp = jj[nzcountb]/bs; 3317 block = mask[tmp] - 1; 3318 point = jj[nzcountb] - bs*tmp; 3319 idx = ishift + bs2*block + j + bs*point; 3320 a->a[idx] = (MatScalar)aa[nzcountb++]; 3321 } 3322 } 3323 /* zero out the mask elements we set */ 3324 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3325 } 3326 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3327 3328 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3329 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3330 ierr = PetscFree(aa);CHKERRQ(ierr); 3331 ierr = PetscFree(jj);CHKERRQ(ierr); 3332 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3333 3334 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3335 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3336 PetscFunctionReturn(0); 3337 } 3338 3339 /*@C 3340 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3341 compressed row) format. For good matrix assembly performance the 3342 user should preallocate the matrix storage by setting the parameter nz 3343 (or the array nnz). By setting these parameters accurately, performance 3344 during matrix assembly can be increased by more than a factor of 50. 3345 3346 Collective 3347 3348 Input Parameters: 3349 + comm - MPI communicator, set to PETSC_COMM_SELF 3350 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3351 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3352 . m - number of rows 3353 . n - number of columns 3354 . nz - number of nonzero blocks per block row (same for all rows) 3355 - nnz - array containing the number of nonzero blocks in the various block rows 3356 (possibly different for each block row) or NULL 3357 3358 Output Parameter: 3359 . A - the matrix 3360 3361 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3362 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3363 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3364 3365 Options Database Keys: 3366 . -mat_no_unroll - uses code that does not unroll the loops in the 3367 block calculations (much slower) 3368 . -mat_block_size - size of the blocks to use 3369 3370 Level: intermediate 3371 3372 Notes: 3373 The number of rows and columns must be divisible by blocksize. 3374 3375 If the nnz parameter is given then the nz parameter is ignored 3376 3377 A nonzero block is any block that as 1 or more nonzeros in it 3378 3379 The block AIJ format is fully compatible with standard Fortran 77 3380 storage. That is, the stored row and column indices can begin at 3381 either one (as in Fortran) or zero. See the users' manual for details. 3382 3383 Specify the preallocated storage with either nz or nnz (not both). 3384 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3385 allocation. See Users-Manual: ch_mat for details. 3386 matrices. 3387 3388 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3389 @*/ 3390 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3391 { 3392 PetscErrorCode ierr; 3393 3394 PetscFunctionBegin; 3395 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3396 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3397 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3398 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3399 PetscFunctionReturn(0); 3400 } 3401 3402 /*@C 3403 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3404 per row in the matrix. For good matrix assembly performance the 3405 user should preallocate the matrix storage by setting the parameter nz 3406 (or the array nnz). By setting these parameters accurately, performance 3407 during matrix assembly can be increased by more than a factor of 50. 3408 3409 Collective 3410 3411 Input Parameters: 3412 + B - the matrix 3413 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3414 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3415 . nz - number of block nonzeros per block row (same for all rows) 3416 - nnz - array containing the number of block nonzeros in the various block rows 3417 (possibly different for each block row) or NULL 3418 3419 Options Database Keys: 3420 . -mat_no_unroll - uses code that does not unroll the loops in the 3421 block calculations (much slower) 3422 . -mat_block_size - size of the blocks to use 3423 3424 Level: intermediate 3425 3426 Notes: 3427 If the nnz parameter is given then the nz parameter is ignored 3428 3429 You can call MatGetInfo() to get information on how effective the preallocation was; 3430 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3431 You can also run with the option -info and look for messages with the string 3432 malloc in them to see if additional memory allocation was needed. 3433 3434 The block AIJ format is fully compatible with standard Fortran 77 3435 storage. That is, the stored row and column indices can begin at 3436 either one (as in Fortran) or zero. See the users' manual for details. 3437 3438 Specify the preallocated storage with either nz or nnz (not both). 3439 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3440 allocation. See Users-Manual: ch_mat for details. 3441 3442 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3443 @*/ 3444 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3445 { 3446 PetscErrorCode ierr; 3447 3448 PetscFunctionBegin; 3449 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3450 PetscValidType(B,1); 3451 PetscValidLogicalCollectiveInt(B,bs,2); 3452 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3453 PetscFunctionReturn(0); 3454 } 3455 3456 /*@C 3457 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3458 (the default sequential PETSc format). 3459 3460 Collective 3461 3462 Input Parameters: 3463 + B - the matrix 3464 . i - the indices into j for the start of each local row (starts with zero) 3465 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3466 - v - optional values in the matrix 3467 3468 Level: developer 3469 3470 Notes: 3471 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3472 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3473 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3474 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3475 block column and the second index is over columns within a block. 3476 3477 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3478 @*/ 3479 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3480 { 3481 PetscErrorCode ierr; 3482 3483 PetscFunctionBegin; 3484 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3485 PetscValidType(B,1); 3486 PetscValidLogicalCollectiveInt(B,bs,2); 3487 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3488 PetscFunctionReturn(0); 3489 } 3490 3491 3492 /*@ 3493 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3494 3495 Collective 3496 3497 Input Parameters: 3498 + comm - must be an MPI communicator of size 1 3499 . bs - size of block 3500 . m - number of rows 3501 . n - number of columns 3502 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3503 . j - column indices 3504 - a - matrix values 3505 3506 Output Parameter: 3507 . mat - the matrix 3508 3509 Level: advanced 3510 3511 Notes: 3512 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3513 once the matrix is destroyed 3514 3515 You cannot set new nonzero locations into this matrix, that will generate an error. 3516 3517 The i and j indices are 0 based 3518 3519 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3520 3521 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3522 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3523 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3524 with column-major ordering within blocks. 3525 3526 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3527 3528 @*/ 3529 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3530 { 3531 PetscErrorCode ierr; 3532 PetscInt ii; 3533 Mat_SeqBAIJ *baij; 3534 3535 PetscFunctionBegin; 3536 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3537 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3538 3539 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3540 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3541 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3542 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3543 baij = (Mat_SeqBAIJ*)(*mat)->data; 3544 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3545 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3546 3547 baij->i = i; 3548 baij->j = j; 3549 baij->a = a; 3550 3551 baij->singlemalloc = PETSC_FALSE; 3552 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3553 baij->free_a = PETSC_FALSE; 3554 baij->free_ij = PETSC_FALSE; 3555 3556 for (ii=0; ii<m; ii++) { 3557 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3558 #if defined(PETSC_USE_DEBUG) 3559 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3560 #endif 3561 } 3562 #if defined(PETSC_USE_DEBUG) 3563 for (ii=0; ii<baij->i[m]; ii++) { 3564 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3565 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3566 } 3567 #endif 3568 3569 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3570 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3571 PetscFunctionReturn(0); 3572 } 3573 3574 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3575 { 3576 PetscErrorCode ierr; 3577 PetscMPIInt size; 3578 3579 PetscFunctionBegin; 3580 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3581 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3582 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3583 } else { 3584 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3585 } 3586 PetscFunctionReturn(0); 3587 } 3588