1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 20 21 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 22 { 23 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 24 PetscErrorCode ierr; 25 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 26 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 27 PetscReal shift = 0.0; 28 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 29 30 PetscFunctionBegin; 31 allowzeropivot = PetscNot(A->erroriffailure); 32 33 if (a->idiagvalid) { 34 if (values) *values = a->idiag; 35 PetscFunctionReturn(0); 36 } 37 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 38 diag_offset = a->diag; 39 if (!a->idiag) { 40 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 41 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 42 } 43 diag = a->idiag; 44 if (values) *values = a->idiag; 45 /* factor and invert each block */ 46 switch (bs) { 47 case 1: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 1*diag_offset[i]; 50 diag[0] = odiag[0]; 51 52 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 53 if (allowzeropivot) { 54 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 55 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 56 A->factorerror_zeropivot_row = i; 57 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 58 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 59 } 60 61 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 62 diag += 1; 63 } 64 break; 65 case 2: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 4*diag_offset[i]; 68 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 69 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 70 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 71 diag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 81 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 82 diag += 9; 83 } 84 break; 85 case 4: 86 for (i=0; i<mbs; i++) { 87 odiag = v + 16*diag_offset[i]; 88 ierr = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr); 89 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 90 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 91 diag += 16; 92 } 93 break; 94 case 5: 95 for (i=0; i<mbs; i++) { 96 odiag = v + 25*diag_offset[i]; 97 ierr = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr); 98 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 99 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 100 diag += 25; 101 } 102 break; 103 case 6: 104 for (i=0; i<mbs; i++) { 105 odiag = v + 36*diag_offset[i]; 106 ierr = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr); 107 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 108 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 109 diag += 36; 110 } 111 break; 112 case 7: 113 for (i=0; i<mbs; i++) { 114 odiag = v + 49*diag_offset[i]; 115 ierr = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr); 116 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 117 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 118 diag += 49; 119 } 120 break; 121 default: 122 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 123 for (i=0; i<mbs; i++) { 124 odiag = v + bs2*diag_offset[i]; 125 ierr = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr); 126 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 127 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 128 diag += bs2; 129 } 130 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 131 } 132 a->idiagvalid = PETSC_TRUE; 133 PetscFunctionReturn(0); 134 } 135 136 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 137 { 138 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 139 PetscScalar *x,*work,*w,*workt,*t; 140 const MatScalar *v,*aa = a->a, *idiag; 141 const PetscScalar *b,*xb; 142 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 143 PetscErrorCode ierr; 144 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 145 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 146 147 PetscFunctionBegin; 148 its = its*lits; 149 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 150 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 151 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 152 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 153 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 154 155 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 156 157 if (!m) PetscFunctionReturn(0); 158 diag = a->diag; 159 idiag = a->idiag; 160 k = PetscMax(A->rmap->n,A->cmap->n); 161 if (!a->mult_work) { 162 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 163 } 164 if (!a->sor_workt) { 165 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 166 } 167 if (!a->sor_work) { 168 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 169 } 170 work = a->mult_work; 171 t = a->sor_workt; 172 w = a->sor_work; 173 174 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 175 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 176 177 if (flag & SOR_ZERO_INITIAL_GUESS) { 178 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 179 switch (bs) { 180 case 1: 181 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 182 t[0] = b[0]; 183 i2 = 1; 184 idiag += 1; 185 for (i=1; i<m; i++) { 186 v = aa + ai[i]; 187 vi = aj + ai[i]; 188 nz = diag[i] - ai[i]; 189 s[0] = b[i2]; 190 for (j=0; j<nz; j++) { 191 xw[0] = x[vi[j]]; 192 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 193 } 194 t[i2] = s[0]; 195 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 196 x[i2] = xw[0]; 197 idiag += 1; 198 i2 += 1; 199 } 200 break; 201 case 2: 202 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 203 t[0] = b[0]; t[1] = b[1]; 204 i2 = 2; 205 idiag += 4; 206 for (i=1; i<m; i++) { 207 v = aa + 4*ai[i]; 208 vi = aj + ai[i]; 209 nz = diag[i] - ai[i]; 210 s[0] = b[i2]; s[1] = b[i2+1]; 211 for (j=0; j<nz; j++) { 212 idx = 2*vi[j]; 213 it = 4*j; 214 xw[0] = x[idx]; xw[1] = x[1+idx]; 215 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 216 } 217 t[i2] = s[0]; t[i2+1] = s[1]; 218 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 219 x[i2] = xw[0]; x[i2+1] = xw[1]; 220 idiag += 4; 221 i2 += 2; 222 } 223 break; 224 case 3: 225 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 226 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 227 i2 = 3; 228 idiag += 9; 229 for (i=1; i<m; i++) { 230 v = aa + 9*ai[i]; 231 vi = aj + ai[i]; 232 nz = diag[i] - ai[i]; 233 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 234 while (nz--) { 235 idx = 3*(*vi++); 236 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 237 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 238 v += 9; 239 } 240 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 241 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 242 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 243 idiag += 9; 244 i2 += 3; 245 } 246 break; 247 case 4: 248 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 249 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 250 i2 = 4; 251 idiag += 16; 252 for (i=1; i<m; i++) { 253 v = aa + 16*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 257 while (nz--) { 258 idx = 4*(*vi++); 259 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 260 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 261 v += 16; 262 } 263 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 264 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 265 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 266 idiag += 16; 267 i2 += 4; 268 } 269 break; 270 case 5: 271 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 272 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 273 i2 = 5; 274 idiag += 25; 275 for (i=1; i<m; i++) { 276 v = aa + 25*ai[i]; 277 vi = aj + ai[i]; 278 nz = diag[i] - ai[i]; 279 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 280 while (nz--) { 281 idx = 5*(*vi++); 282 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 283 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 284 v += 25; 285 } 286 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 287 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 288 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 289 idiag += 25; 290 i2 += 5; 291 } 292 break; 293 case 6: 294 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 295 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 296 i2 = 6; 297 idiag += 36; 298 for (i=1; i<m; i++) { 299 v = aa + 36*ai[i]; 300 vi = aj + ai[i]; 301 nz = diag[i] - ai[i]; 302 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 303 while (nz--) { 304 idx = 6*(*vi++); 305 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 306 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 307 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 308 v += 36; 309 } 310 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 311 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 312 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 313 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 314 idiag += 36; 315 i2 += 6; 316 } 317 break; 318 case 7: 319 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 320 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 321 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 322 i2 = 7; 323 idiag += 49; 324 for (i=1; i<m; i++) { 325 v = aa + 49*ai[i]; 326 vi = aj + ai[i]; 327 nz = diag[i] - ai[i]; 328 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 329 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 330 while (nz--) { 331 idx = 7*(*vi++); 332 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 333 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 334 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 335 v += 49; 336 } 337 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 338 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 339 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 340 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 341 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 342 idiag += 49; 343 i2 += 7; 344 } 345 break; 346 default: 347 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 348 ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr); 349 i2 = bs; 350 idiag += bs2; 351 for (i=1; i<m; i++) { 352 v = aa + bs2*ai[i]; 353 vi = aj + ai[i]; 354 nz = diag[i] - ai[i]; 355 356 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 357 /* copy all rows of x that are needed into contiguous space */ 358 workt = work; 359 for (j=0; j<nz; j++) { 360 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 361 workt += bs; 362 } 363 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 364 ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr); 365 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 366 367 idiag += bs2; 368 i2 += bs; 369 } 370 break; 371 } 372 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 373 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 374 xb = t; 375 } 376 else xb = b; 377 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 378 idiag = a->idiag+bs2*(a->mbs-1); 379 i2 = bs * (m-1); 380 switch (bs) { 381 case 1: 382 s[0] = xb[i2]; 383 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 384 x[i2] = xw[0]; 385 i2 -= 1; 386 for (i=m-2; i>=0; i--) { 387 v = aa + (diag[i]+1); 388 vi = aj + diag[i] + 1; 389 nz = ai[i+1] - diag[i] - 1; 390 s[0] = xb[i2]; 391 for (j=0; j<nz; j++) { 392 xw[0] = x[vi[j]]; 393 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 394 } 395 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 396 x[i2] = xw[0]; 397 idiag -= 1; 398 i2 -= 1; 399 } 400 break; 401 case 2: 402 s[0] = xb[i2]; s[1] = xb[i2+1]; 403 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 404 x[i2] = xw[0]; x[i2+1] = xw[1]; 405 i2 -= 2; 406 idiag -= 4; 407 for (i=m-2; i>=0; i--) { 408 v = aa + 4*(diag[i] + 1); 409 vi = aj + diag[i] + 1; 410 nz = ai[i+1] - diag[i] - 1; 411 s[0] = xb[i2]; s[1] = xb[i2+1]; 412 for (j=0; j<nz; j++) { 413 idx = 2*vi[j]; 414 it = 4*j; 415 xw[0] = x[idx]; xw[1] = x[1+idx]; 416 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 417 } 418 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 419 x[i2] = xw[0]; x[i2+1] = xw[1]; 420 idiag -= 4; 421 i2 -= 2; 422 } 423 break; 424 case 3: 425 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 426 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 427 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 428 i2 -= 3; 429 idiag -= 9; 430 for (i=m-2; i>=0; i--) { 431 v = aa + 9*(diag[i]+1); 432 vi = aj + diag[i] + 1; 433 nz = ai[i+1] - diag[i] - 1; 434 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 435 while (nz--) { 436 idx = 3*(*vi++); 437 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 438 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 439 v += 9; 440 } 441 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 442 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 443 idiag -= 9; 444 i2 -= 3; 445 } 446 break; 447 case 4: 448 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 449 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 450 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 451 i2 -= 4; 452 idiag -= 16; 453 for (i=m-2; i>=0; i--) { 454 v = aa + 16*(diag[i]+1); 455 vi = aj + diag[i] + 1; 456 nz = ai[i+1] - diag[i] - 1; 457 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 458 while (nz--) { 459 idx = 4*(*vi++); 460 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 461 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 462 v += 16; 463 } 464 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 465 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 466 idiag -= 16; 467 i2 -= 4; 468 } 469 break; 470 case 5: 471 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 472 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 473 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 474 i2 -= 5; 475 idiag -= 25; 476 for (i=m-2; i>=0; i--) { 477 v = aa + 25*(diag[i]+1); 478 vi = aj + diag[i] + 1; 479 nz = ai[i+1] - diag[i] - 1; 480 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 481 while (nz--) { 482 idx = 5*(*vi++); 483 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 484 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 485 v += 25; 486 } 487 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 488 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 489 idiag -= 25; 490 i2 -= 5; 491 } 492 break; 493 case 6: 494 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 495 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 496 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 497 i2 -= 6; 498 idiag -= 36; 499 for (i=m-2; i>=0; i--) { 500 v = aa + 36*(diag[i]+1); 501 vi = aj + diag[i] + 1; 502 nz = ai[i+1] - diag[i] - 1; 503 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 504 while (nz--) { 505 idx = 6*(*vi++); 506 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 507 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 508 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 509 v += 36; 510 } 511 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 512 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 513 idiag -= 36; 514 i2 -= 6; 515 } 516 break; 517 case 7: 518 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 519 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 520 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 521 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 522 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 523 i2 -= 7; 524 idiag -= 49; 525 for (i=m-2; i>=0; i--) { 526 v = aa + 49*(diag[i]+1); 527 vi = aj + diag[i] + 1; 528 nz = ai[i+1] - diag[i] - 1; 529 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 530 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 531 while (nz--) { 532 idx = 7*(*vi++); 533 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 534 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 535 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 536 v += 49; 537 } 538 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 539 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 540 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 541 idiag -= 49; 542 i2 -= 7; 543 } 544 break; 545 default: 546 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 547 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 548 i2 -= bs; 549 idiag -= bs2; 550 for (i=m-2; i>=0; i--) { 551 v = aa + bs2*(diag[i]+1); 552 vi = aj + diag[i] + 1; 553 nz = ai[i+1] - diag[i] - 1; 554 555 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 556 /* copy all rows of x that are needed into contiguous space */ 557 workt = work; 558 for (j=0; j<nz; j++) { 559 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 560 workt += bs; 561 } 562 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 563 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 564 565 idiag -= bs2; 566 i2 -= bs; 567 } 568 break; 569 } 570 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 571 } 572 its--; 573 } 574 while (its--) { 575 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 576 idiag = a->idiag; 577 i2 = 0; 578 switch (bs) { 579 case 1: 580 for (i=0; i<m; i++) { 581 v = aa + ai[i]; 582 vi = aj + ai[i]; 583 nz = ai[i+1] - ai[i]; 584 s[0] = b[i2]; 585 for (j=0; j<nz; j++) { 586 xw[0] = x[vi[j]]; 587 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 588 } 589 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 590 x[i2] += xw[0]; 591 idiag += 1; 592 i2 += 1; 593 } 594 break; 595 case 2: 596 for (i=0; i<m; i++) { 597 v = aa + 4*ai[i]; 598 vi = aj + ai[i]; 599 nz = ai[i+1] - ai[i]; 600 s[0] = b[i2]; s[1] = b[i2+1]; 601 for (j=0; j<nz; j++) { 602 idx = 2*vi[j]; 603 it = 4*j; 604 xw[0] = x[idx]; xw[1] = x[1+idx]; 605 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 606 } 607 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 608 x[i2] += xw[0]; x[i2+1] += xw[1]; 609 idiag += 4; 610 i2 += 2; 611 } 612 break; 613 case 3: 614 for (i=0; i<m; i++) { 615 v = aa + 9*ai[i]; 616 vi = aj + ai[i]; 617 nz = ai[i+1] - ai[i]; 618 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 619 while (nz--) { 620 idx = 3*(*vi++); 621 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 622 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 623 v += 9; 624 } 625 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 626 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 627 idiag += 9; 628 i2 += 3; 629 } 630 break; 631 case 4: 632 for (i=0; i<m; i++) { 633 v = aa + 16*ai[i]; 634 vi = aj + ai[i]; 635 nz = ai[i+1] - ai[i]; 636 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 637 while (nz--) { 638 idx = 4*(*vi++); 639 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 640 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 641 v += 16; 642 } 643 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 644 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 645 idiag += 16; 646 i2 += 4; 647 } 648 break; 649 case 5: 650 for (i=0; i<m; i++) { 651 v = aa + 25*ai[i]; 652 vi = aj + ai[i]; 653 nz = ai[i+1] - ai[i]; 654 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 655 while (nz--) { 656 idx = 5*(*vi++); 657 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 658 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 659 v += 25; 660 } 661 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 662 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 663 idiag += 25; 664 i2 += 5; 665 } 666 break; 667 case 6: 668 for (i=0; i<m; i++) { 669 v = aa + 36*ai[i]; 670 vi = aj + ai[i]; 671 nz = ai[i+1] - ai[i]; 672 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 673 while (nz--) { 674 idx = 6*(*vi++); 675 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 676 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 677 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 678 v += 36; 679 } 680 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 681 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 682 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 683 idiag += 36; 684 i2 += 6; 685 } 686 break; 687 case 7: 688 for (i=0; i<m; i++) { 689 v = aa + 49*ai[i]; 690 vi = aj + ai[i]; 691 nz = ai[i+1] - ai[i]; 692 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 693 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 694 while (nz--) { 695 idx = 7*(*vi++); 696 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 697 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 698 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 699 v += 49; 700 } 701 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 702 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 703 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 704 idiag += 49; 705 i2 += 7; 706 } 707 break; 708 default: 709 for (i=0; i<m; i++) { 710 v = aa + bs2*ai[i]; 711 vi = aj + ai[i]; 712 nz = ai[i+1] - ai[i]; 713 714 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 715 /* copy all rows of x that are needed into contiguous space */ 716 workt = work; 717 for (j=0; j<nz; j++) { 718 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 719 workt += bs; 720 } 721 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 722 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 723 724 idiag += bs2; 725 i2 += bs; 726 } 727 break; 728 } 729 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 730 } 731 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 732 idiag = a->idiag+bs2*(a->mbs-1); 733 i2 = bs * (m-1); 734 switch (bs) { 735 case 1: 736 for (i=m-1; i>=0; i--) { 737 v = aa + ai[i]; 738 vi = aj + ai[i]; 739 nz = ai[i+1] - ai[i]; 740 s[0] = b[i2]; 741 for (j=0; j<nz; j++) { 742 xw[0] = x[vi[j]]; 743 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 744 } 745 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 746 x[i2] += xw[0]; 747 idiag -= 1; 748 i2 -= 1; 749 } 750 break; 751 case 2: 752 for (i=m-1; i>=0; i--) { 753 v = aa + 4*ai[i]; 754 vi = aj + ai[i]; 755 nz = ai[i+1] - ai[i]; 756 s[0] = b[i2]; s[1] = b[i2+1]; 757 for (j=0; j<nz; j++) { 758 idx = 2*vi[j]; 759 it = 4*j; 760 xw[0] = x[idx]; xw[1] = x[1+idx]; 761 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 762 } 763 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 764 x[i2] += xw[0]; x[i2+1] += xw[1]; 765 idiag -= 4; 766 i2 -= 2; 767 } 768 break; 769 case 3: 770 for (i=m-1; i>=0; i--) { 771 v = aa + 9*ai[i]; 772 vi = aj + ai[i]; 773 nz = ai[i+1] - ai[i]; 774 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 775 while (nz--) { 776 idx = 3*(*vi++); 777 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 778 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 779 v += 9; 780 } 781 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 782 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 783 idiag -= 9; 784 i2 -= 3; 785 } 786 break; 787 case 4: 788 for (i=m-1; i>=0; i--) { 789 v = aa + 16*ai[i]; 790 vi = aj + ai[i]; 791 nz = ai[i+1] - ai[i]; 792 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 793 while (nz--) { 794 idx = 4*(*vi++); 795 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 796 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 797 v += 16; 798 } 799 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 800 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 801 idiag -= 16; 802 i2 -= 4; 803 } 804 break; 805 case 5: 806 for (i=m-1; i>=0; i--) { 807 v = aa + 25*ai[i]; 808 vi = aj + ai[i]; 809 nz = ai[i+1] - ai[i]; 810 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 811 while (nz--) { 812 idx = 5*(*vi++); 813 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 814 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 815 v += 25; 816 } 817 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 818 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 819 idiag -= 25; 820 i2 -= 5; 821 } 822 break; 823 case 6: 824 for (i=m-1; i>=0; i--) { 825 v = aa + 36*ai[i]; 826 vi = aj + ai[i]; 827 nz = ai[i+1] - ai[i]; 828 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 829 while (nz--) { 830 idx = 6*(*vi++); 831 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 832 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 833 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 834 v += 36; 835 } 836 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 837 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 838 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 839 idiag -= 36; 840 i2 -= 6; 841 } 842 break; 843 case 7: 844 for (i=m-1; i>=0; i--) { 845 v = aa + 49*ai[i]; 846 vi = aj + ai[i]; 847 nz = ai[i+1] - ai[i]; 848 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 849 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 850 while (nz--) { 851 idx = 7*(*vi++); 852 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 853 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 854 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 855 v += 49; 856 } 857 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 858 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 859 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 860 idiag -= 49; 861 i2 -= 7; 862 } 863 break; 864 default: 865 for (i=m-1; i>=0; i--) { 866 v = aa + bs2*ai[i]; 867 vi = aj + ai[i]; 868 nz = ai[i+1] - ai[i]; 869 870 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 871 /* copy all rows of x that are needed into contiguous space */ 872 workt = work; 873 for (j=0; j<nz; j++) { 874 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 875 workt += bs; 876 } 877 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 878 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 879 880 idiag -= bs2; 881 i2 -= bs; 882 } 883 break; 884 } 885 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 886 } 887 } 888 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 889 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } 892 893 894 /* 895 Special version for direct calls from Fortran (Used in PETSc-fun3d) 896 */ 897 #if defined(PETSC_HAVE_FORTRAN_CAPS) 898 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 899 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 900 #define matsetvaluesblocked4_ matsetvaluesblocked4 901 #endif 902 903 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 904 { 905 Mat A = *AA; 906 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 907 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 908 PetscInt *ai =a->i,*ailen=a->ilen; 909 PetscInt *aj =a->j,stepval,lastcol = -1; 910 const PetscScalar *value = v; 911 MatScalar *ap,*aa = a->a,*bap; 912 PetscErrorCode ierr; 913 914 PetscFunctionBegin; 915 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 916 stepval = (n-1)*4; 917 for (k=0; k<m; k++) { /* loop over added rows */ 918 row = im[k]; 919 rp = aj + ai[row]; 920 ap = aa + 16*ai[row]; 921 nrow = ailen[row]; 922 low = 0; 923 high = nrow; 924 for (l=0; l<n; l++) { /* loop over added columns */ 925 col = in[l]; 926 if (col <= lastcol) low = 0; 927 else high = nrow; 928 lastcol = col; 929 value = v + k*(stepval+4 + l)*4; 930 while (high-low > 7) { 931 t = (low+high)/2; 932 if (rp[t] > col) high = t; 933 else low = t; 934 } 935 for (i=low; i<high; i++) { 936 if (rp[i] > col) break; 937 if (rp[i] == col) { 938 bap = ap + 16*i; 939 for (ii=0; ii<4; ii++,value+=stepval) { 940 for (jj=ii; jj<16; jj+=4) { 941 bap[jj] += *value++; 942 } 943 } 944 goto noinsert2; 945 } 946 } 947 N = nrow++ - 1; 948 high++; /* added new column index thus must search to one higher than before */ 949 /* shift up all the later entries in this row */ 950 for (ii=N; ii>=i; ii--) { 951 rp[ii+1] = rp[ii]; 952 ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr); 953 } 954 if (N >= i) { 955 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 956 } 957 rp[i] = col; 958 bap = ap + 16*i; 959 for (ii=0; ii<4; ii++,value+=stepval) { 960 for (jj=ii; jj<16; jj+=4) { 961 bap[jj] = *value++; 962 } 963 } 964 noinsert2:; 965 low = i; 966 } 967 ailen[row] = nrow; 968 } 969 PetscFunctionReturnVoid(); 970 } 971 972 #if defined(PETSC_HAVE_FORTRAN_CAPS) 973 #define matsetvalues4_ MATSETVALUES4 974 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 975 #define matsetvalues4_ matsetvalues4 976 #endif 977 978 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 979 { 980 Mat A = *AA; 981 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 982 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm; 983 PetscInt *ai=a->i,*ailen=a->ilen; 984 PetscInt *aj=a->j,brow,bcol; 985 PetscInt ridx,cidx,lastcol = -1; 986 MatScalar *ap,value,*aa=a->a,*bap; 987 PetscErrorCode ierr; 988 989 PetscFunctionBegin; 990 for (k=0; k<m; k++) { /* loop over added rows */ 991 row = im[k]; brow = row/4; 992 rp = aj + ai[brow]; 993 ap = aa + 16*ai[brow]; 994 nrow = ailen[brow]; 995 low = 0; 996 high = nrow; 997 for (l=0; l<n; l++) { /* loop over added columns */ 998 col = in[l]; bcol = col/4; 999 ridx = row % 4; cidx = col % 4; 1000 value = v[l + k*n]; 1001 if (col <= lastcol) low = 0; 1002 else high = nrow; 1003 lastcol = col; 1004 while (high-low > 7) { 1005 t = (low+high)/2; 1006 if (rp[t] > bcol) high = t; 1007 else low = t; 1008 } 1009 for (i=low; i<high; i++) { 1010 if (rp[i] > bcol) break; 1011 if (rp[i] == bcol) { 1012 bap = ap + 16*i + 4*cidx + ridx; 1013 *bap += value; 1014 goto noinsert1; 1015 } 1016 } 1017 N = nrow++ - 1; 1018 high++; /* added new column thus must search to one higher than before */ 1019 /* shift up all the later entries in this row */ 1020 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr); 1021 ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr); 1022 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 1023 rp[i] = bcol; 1024 ap[16*i + 4*cidx + ridx] = value; 1025 noinsert1:; 1026 low = i; 1027 } 1028 ailen[brow] = nrow; 1029 } 1030 PetscFunctionReturnVoid(); 1031 } 1032 1033 /* 1034 Checks for missing diagonals 1035 */ 1036 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1037 { 1038 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1039 PetscErrorCode ierr; 1040 PetscInt *diag,*ii = a->i,i; 1041 1042 PetscFunctionBegin; 1043 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1044 *missing = PETSC_FALSE; 1045 if (A->rmap->n > 0 && !ii) { 1046 *missing = PETSC_TRUE; 1047 if (d) *d = 0; 1048 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 1049 } else { 1050 PetscInt n; 1051 n = PetscMin(a->mbs, a->nbs); 1052 diag = a->diag; 1053 for (i=0; i<n; i++) { 1054 if (diag[i] >= ii[i+1]) { 1055 *missing = PETSC_TRUE; 1056 if (d) *d = i; 1057 ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr); 1058 break; 1059 } 1060 } 1061 } 1062 PetscFunctionReturn(0); 1063 } 1064 1065 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1066 { 1067 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1068 PetscErrorCode ierr; 1069 PetscInt i,j,m = a->mbs; 1070 1071 PetscFunctionBegin; 1072 if (!a->diag) { 1073 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1074 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1075 a->free_diag = PETSC_TRUE; 1076 } 1077 for (i=0; i<m; i++) { 1078 a->diag[i] = a->i[i+1]; 1079 for (j=a->i[i]; j<a->i[i+1]; j++) { 1080 if (a->j[j] == i) { 1081 a->diag[i] = j; 1082 break; 1083 } 1084 } 1085 } 1086 PetscFunctionReturn(0); 1087 } 1088 1089 1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1091 { 1092 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1093 PetscErrorCode ierr; 1094 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1095 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1096 1097 PetscFunctionBegin; 1098 *nn = n; 1099 if (!ia) PetscFunctionReturn(0); 1100 if (symmetric) { 1101 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1102 nz = tia[n]; 1103 } else { 1104 tia = a->i; tja = a->j; 1105 } 1106 1107 if (!blockcompressed && bs > 1) { 1108 (*nn) *= bs; 1109 /* malloc & create the natural set of indices */ 1110 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1111 if (n) { 1112 (*ia)[0] = oshift; 1113 for (j=1; j<bs; j++) { 1114 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1115 } 1116 } 1117 1118 for (i=1; i<n; i++) { 1119 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1120 for (j=1; j<bs; j++) { 1121 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1122 } 1123 } 1124 if (n) { 1125 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1126 } 1127 1128 if (inja) { 1129 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1130 cnt = 0; 1131 for (i=0; i<n; i++) { 1132 for (j=0; j<bs; j++) { 1133 for (k=tia[i]; k<tia[i+1]; k++) { 1134 for (l=0; l<bs; l++) { 1135 (*ja)[cnt++] = bs*tja[k] + l; 1136 } 1137 } 1138 } 1139 } 1140 } 1141 1142 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1143 ierr = PetscFree(tia);CHKERRQ(ierr); 1144 ierr = PetscFree(tja);CHKERRQ(ierr); 1145 } 1146 } else if (oshift == 1) { 1147 if (symmetric) { 1148 nz = tia[A->rmap->n/bs]; 1149 /* add 1 to i and j indices */ 1150 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1151 *ia = tia; 1152 if (ja) { 1153 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1154 *ja = tja; 1155 } 1156 } else { 1157 nz = a->i[A->rmap->n/bs]; 1158 /* malloc space and add 1 to i and j indices */ 1159 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1160 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1161 if (ja) { 1162 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1163 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1164 } 1165 } 1166 } else { 1167 *ia = tia; 1168 if (ja) *ja = tja; 1169 } 1170 PetscFunctionReturn(0); 1171 } 1172 1173 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1174 { 1175 PetscErrorCode ierr; 1176 1177 PetscFunctionBegin; 1178 if (!ia) PetscFunctionReturn(0); 1179 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1180 ierr = PetscFree(*ia);CHKERRQ(ierr); 1181 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1182 } 1183 PetscFunctionReturn(0); 1184 } 1185 1186 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1187 { 1188 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1189 PetscErrorCode ierr; 1190 1191 PetscFunctionBegin; 1192 #if defined(PETSC_USE_LOG) 1193 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1194 #endif 1195 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1196 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1197 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1198 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1199 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1200 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1201 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1202 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1203 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1204 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1205 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1206 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1207 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1208 1209 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1210 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1211 ierr = PetscFree(A->data);CHKERRQ(ierr); 1212 1213 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1214 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr); 1215 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1226 #if defined(PETSC_HAVE_HYPRE) 1227 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr); 1228 #endif 1229 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1235 { 1236 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1237 PetscErrorCode ierr; 1238 1239 PetscFunctionBegin; 1240 switch (op) { 1241 case MAT_ROW_ORIENTED: 1242 a->roworiented = flg; 1243 break; 1244 case MAT_KEEP_NONZERO_PATTERN: 1245 a->keepnonzeropattern = flg; 1246 break; 1247 case MAT_NEW_NONZERO_LOCATIONS: 1248 a->nonew = (flg ? 0 : 1); 1249 break; 1250 case MAT_NEW_NONZERO_LOCATION_ERR: 1251 a->nonew = (flg ? -1 : 0); 1252 break; 1253 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1254 a->nonew = (flg ? -2 : 0); 1255 break; 1256 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1257 a->nounused = (flg ? -1 : 0); 1258 break; 1259 case MAT_NEW_DIAGONALS: 1260 case MAT_IGNORE_OFF_PROC_ENTRIES: 1261 case MAT_USE_HASH_TABLE: 1262 case MAT_SORTED_FULL: 1263 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1264 break; 1265 case MAT_SPD: 1266 case MAT_SYMMETRIC: 1267 case MAT_STRUCTURALLY_SYMMETRIC: 1268 case MAT_HERMITIAN: 1269 case MAT_SYMMETRY_ETERNAL: 1270 case MAT_SUBMAT_SINGLEIS: 1271 case MAT_STRUCTURE_ONLY: 1272 /* These options are handled directly by MatSetOption() */ 1273 break; 1274 default: 1275 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1276 } 1277 PetscFunctionReturn(0); 1278 } 1279 1280 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1281 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1282 { 1283 PetscErrorCode ierr; 1284 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1285 MatScalar *aa_i; 1286 PetscScalar *v_i; 1287 1288 PetscFunctionBegin; 1289 bs = A->rmap->bs; 1290 bs2 = bs*bs; 1291 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1292 1293 bn = row/bs; /* Block number */ 1294 bp = row % bs; /* Block Position */ 1295 M = ai[bn+1] - ai[bn]; 1296 *nz = bs*M; 1297 1298 if (v) { 1299 *v = 0; 1300 if (*nz) { 1301 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1302 for (i=0; i<M; i++) { /* for each block in the block row */ 1303 v_i = *v + i*bs; 1304 aa_i = aa + bs2*(ai[bn] + i); 1305 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1306 } 1307 } 1308 } 1309 1310 if (idx) { 1311 *idx = 0; 1312 if (*nz) { 1313 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1314 for (i=0; i<M; i++) { /* for each block in the block row */ 1315 idx_i = *idx + i*bs; 1316 itmp = bs*aj[ai[bn] + i]; 1317 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1318 } 1319 } 1320 } 1321 PetscFunctionReturn(0); 1322 } 1323 1324 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1325 { 1326 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1327 PetscErrorCode ierr; 1328 1329 PetscFunctionBegin; 1330 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1331 PetscFunctionReturn(0); 1332 } 1333 1334 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1335 { 1336 PetscErrorCode ierr; 1337 1338 PetscFunctionBegin; 1339 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1340 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1341 PetscFunctionReturn(0); 1342 } 1343 1344 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1345 { 1346 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at; 1347 Mat C; 1348 PetscErrorCode ierr; 1349 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill; 1350 PetscInt bs2=a->bs2,*ati,*atj,anzj,kr; 1351 MatScalar *ata,*aa=a->a; 1352 1353 PetscFunctionBegin; 1354 ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr); 1355 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1356 for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1357 1358 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1359 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1360 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1361 ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr); 1362 1363 at = (Mat_SeqBAIJ*)C->data; 1364 ati = at->i; 1365 for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i]; 1366 } else { 1367 C = *B; 1368 at = (Mat_SeqBAIJ*)C->data; 1369 ati = at->i; 1370 } 1371 1372 atj = at->j; 1373 ata = at->a; 1374 1375 /* Copy ati into atfill so we have locations of the next free space in atj */ 1376 ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr); 1377 1378 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1379 for (i=0; i<mbs; i++) { 1380 anzj = ai[i+1] - ai[i]; 1381 for (j=0; j<anzj; j++) { 1382 atj[atfill[*aj]] = i; 1383 for (kr=0; kr<bs; kr++) { 1384 for (k=0; k<bs; k++) { 1385 ata[bs2*atfill[*aj]+k*bs+kr] = *aa++; 1386 } 1387 } 1388 atfill[*aj++] += 1; 1389 } 1390 } 1391 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1392 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1393 1394 /* Clean up temporary space and complete requests. */ 1395 ierr = PetscFree(atfill);CHKERRQ(ierr); 1396 1397 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1398 ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1399 *B = C; 1400 } else { 1401 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1402 } 1403 PetscFunctionReturn(0); 1404 } 1405 1406 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1407 { 1408 PetscErrorCode ierr; 1409 Mat Btrans; 1410 1411 PetscFunctionBegin; 1412 *f = PETSC_FALSE; 1413 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1414 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1415 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1416 PetscFunctionReturn(0); 1417 } 1418 1419 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1420 { 1421 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1422 PetscErrorCode ierr; 1423 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1424 int fd; 1425 PetscScalar *aa; 1426 FILE *file; 1427 1428 PetscFunctionBegin; 1429 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1430 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1431 col_lens[0] = MAT_FILE_CLASSID; 1432 1433 col_lens[1] = A->rmap->N; 1434 col_lens[2] = A->cmap->n; 1435 col_lens[3] = a->nz*bs2; 1436 1437 /* store lengths of each row and write (including header) to file */ 1438 count = 0; 1439 for (i=0; i<a->mbs; i++) { 1440 for (j=0; j<bs; j++) { 1441 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1442 } 1443 } 1444 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1445 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1446 1447 /* store column indices (zero start index) */ 1448 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1449 count = 0; 1450 for (i=0; i<a->mbs; i++) { 1451 for (j=0; j<bs; j++) { 1452 for (k=a->i[i]; k<a->i[i+1]; k++) { 1453 for (l=0; l<bs; l++) { 1454 jj[count++] = bs*a->j[k] + l; 1455 } 1456 } 1457 } 1458 } 1459 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1460 ierr = PetscFree(jj);CHKERRQ(ierr); 1461 1462 /* store nonzero values */ 1463 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1464 count = 0; 1465 for (i=0; i<a->mbs; i++) { 1466 for (j=0; j<bs; j++) { 1467 for (k=a->i[i]; k<a->i[i+1]; k++) { 1468 for (l=0; l<bs; l++) { 1469 aa[count++] = a->a[bs2*k + l*bs + j]; 1470 } 1471 } 1472 } 1473 } 1474 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1475 ierr = PetscFree(aa);CHKERRQ(ierr); 1476 1477 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1478 if (file) { 1479 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1480 } 1481 PetscFunctionReturn(0); 1482 } 1483 1484 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1485 { 1486 PetscErrorCode ierr; 1487 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1488 PetscInt i,bs = A->rmap->bs,k; 1489 1490 PetscFunctionBegin; 1491 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1492 for (i=0; i<a->mbs; i++) { 1493 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1494 for (k=a->i[i]; k<a->i[i+1]; k++) { 1495 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1496 } 1497 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1498 } 1499 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1500 PetscFunctionReturn(0); 1501 } 1502 1503 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1504 { 1505 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1506 PetscErrorCode ierr; 1507 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1508 PetscViewerFormat format; 1509 1510 PetscFunctionBegin; 1511 if (A->structure_only) { 1512 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1513 PetscFunctionReturn(0); 1514 } 1515 1516 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1517 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1518 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1519 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1520 const char *matname; 1521 Mat aij; 1522 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1523 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1524 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1525 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1526 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1527 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1528 PetscFunctionReturn(0); 1529 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1530 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1531 for (i=0; i<a->mbs; i++) { 1532 for (j=0; j<bs; j++) { 1533 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1534 for (k=a->i[i]; k<a->i[i+1]; k++) { 1535 for (l=0; l<bs; l++) { 1536 #if defined(PETSC_USE_COMPLEX) 1537 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1538 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1539 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1540 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1541 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1542 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1543 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1544 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1545 } 1546 #else 1547 if (a->a[bs2*k + l*bs + j] != 0.0) { 1548 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1549 } 1550 #endif 1551 } 1552 } 1553 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1554 } 1555 } 1556 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1557 } else { 1558 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1559 for (i=0; i<a->mbs; i++) { 1560 for (j=0; j<bs; j++) { 1561 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1562 for (k=a->i[i]; k<a->i[i+1]; k++) { 1563 for (l=0; l<bs; l++) { 1564 #if defined(PETSC_USE_COMPLEX) 1565 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1566 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1567 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1568 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1569 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1570 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1571 } else { 1572 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1573 } 1574 #else 1575 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1576 #endif 1577 } 1578 } 1579 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1580 } 1581 } 1582 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1583 } 1584 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #include <petscdraw.h> 1589 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1590 { 1591 Mat A = (Mat) Aa; 1592 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1593 PetscErrorCode ierr; 1594 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1595 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1596 MatScalar *aa; 1597 PetscViewer viewer; 1598 PetscViewerFormat format; 1599 1600 PetscFunctionBegin; 1601 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1602 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1603 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1604 1605 /* loop over matrix elements drawing boxes */ 1606 1607 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1608 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1609 /* Blue for negative, Cyan for zero and Red for positive */ 1610 color = PETSC_DRAW_BLUE; 1611 for (i=0,row=0; i<mbs; i++,row+=bs) { 1612 for (j=a->i[i]; j<a->i[i+1]; j++) { 1613 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1614 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1615 aa = a->a + j*bs2; 1616 for (k=0; k<bs; k++) { 1617 for (l=0; l<bs; l++) { 1618 if (PetscRealPart(*aa++) >= 0.) continue; 1619 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1620 } 1621 } 1622 } 1623 } 1624 color = PETSC_DRAW_CYAN; 1625 for (i=0,row=0; i<mbs; i++,row+=bs) { 1626 for (j=a->i[i]; j<a->i[i+1]; j++) { 1627 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1628 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1629 aa = a->a + j*bs2; 1630 for (k=0; k<bs; k++) { 1631 for (l=0; l<bs; l++) { 1632 if (PetscRealPart(*aa++) != 0.) continue; 1633 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1634 } 1635 } 1636 } 1637 } 1638 color = PETSC_DRAW_RED; 1639 for (i=0,row=0; i<mbs; i++,row+=bs) { 1640 for (j=a->i[i]; j<a->i[i+1]; j++) { 1641 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1642 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1643 aa = a->a + j*bs2; 1644 for (k=0; k<bs; k++) { 1645 for (l=0; l<bs; l++) { 1646 if (PetscRealPart(*aa++) <= 0.) continue; 1647 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1648 } 1649 } 1650 } 1651 } 1652 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1653 } else { 1654 /* use contour shading to indicate magnitude of values */ 1655 /* first determine max of all nonzero values */ 1656 PetscReal minv = 0.0, maxv = 0.0; 1657 PetscDraw popup; 1658 1659 for (i=0; i<a->nz*a->bs2; i++) { 1660 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1661 } 1662 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1663 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1664 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1665 1666 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1667 for (i=0,row=0; i<mbs; i++,row+=bs) { 1668 for (j=a->i[i]; j<a->i[i+1]; j++) { 1669 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1670 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1671 aa = a->a + j*bs2; 1672 for (k=0; k<bs; k++) { 1673 for (l=0; l<bs; l++) { 1674 MatScalar v = *aa++; 1675 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1676 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1677 } 1678 } 1679 } 1680 } 1681 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1682 } 1683 PetscFunctionReturn(0); 1684 } 1685 1686 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1687 { 1688 PetscErrorCode ierr; 1689 PetscReal xl,yl,xr,yr,w,h; 1690 PetscDraw draw; 1691 PetscBool isnull; 1692 1693 PetscFunctionBegin; 1694 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1695 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1696 if (isnull) PetscFunctionReturn(0); 1697 1698 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1699 xr += w; yr += h; xl = -w; yl = -h; 1700 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1701 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1702 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1703 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1704 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1705 PetscFunctionReturn(0); 1706 } 1707 1708 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1709 { 1710 PetscErrorCode ierr; 1711 PetscBool iascii,isbinary,isdraw; 1712 1713 PetscFunctionBegin; 1714 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1715 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1716 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1717 if (iascii) { 1718 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1719 } else if (isbinary) { 1720 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1721 } else if (isdraw) { 1722 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1723 } else { 1724 Mat B; 1725 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1726 ierr = MatView(B,viewer);CHKERRQ(ierr); 1727 ierr = MatDestroy(&B);CHKERRQ(ierr); 1728 } 1729 PetscFunctionReturn(0); 1730 } 1731 1732 1733 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1734 { 1735 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1736 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1737 PetscInt *ai = a->i,*ailen = a->ilen; 1738 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1739 MatScalar *ap,*aa = a->a; 1740 1741 PetscFunctionBegin; 1742 for (k=0; k<m; k++) { /* loop over rows */ 1743 row = im[k]; brow = row/bs; 1744 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1745 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1746 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1747 nrow = ailen[brow]; 1748 for (l=0; l<n; l++) { /* loop over columns */ 1749 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1750 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1751 col = in[l]; 1752 bcol = col/bs; 1753 cidx = col%bs; 1754 ridx = row%bs; 1755 high = nrow; 1756 low = 0; /* assume unsorted */ 1757 while (high-low > 5) { 1758 t = (low+high)/2; 1759 if (rp[t] > bcol) high = t; 1760 else low = t; 1761 } 1762 for (i=low; i<high; i++) { 1763 if (rp[i] > bcol) break; 1764 if (rp[i] == bcol) { 1765 *v++ = ap[bs2*i+bs*cidx+ridx]; 1766 goto finished; 1767 } 1768 } 1769 *v++ = 0.0; 1770 finished:; 1771 } 1772 } 1773 PetscFunctionReturn(0); 1774 } 1775 1776 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1777 { 1778 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1779 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1780 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1781 PetscErrorCode ierr; 1782 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1783 PetscBool roworiented=a->roworiented; 1784 const PetscScalar *value = v; 1785 MatScalar *ap=NULL,*aa = a->a,*bap; 1786 1787 PetscFunctionBegin; 1788 if (roworiented) { 1789 stepval = (n-1)*bs; 1790 } else { 1791 stepval = (m-1)*bs; 1792 } 1793 for (k=0; k<m; k++) { /* loop over added rows */ 1794 row = im[k]; 1795 if (row < 0) continue; 1796 #if defined(PETSC_USE_DEBUG) 1797 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1798 #endif 1799 rp = aj + ai[row]; 1800 if (!A->structure_only) ap = aa + bs2*ai[row]; 1801 rmax = imax[row]; 1802 nrow = ailen[row]; 1803 low = 0; 1804 high = nrow; 1805 for (l=0; l<n; l++) { /* loop over added columns */ 1806 if (in[l] < 0) continue; 1807 #if defined(PETSC_USE_DEBUG) 1808 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1809 #endif 1810 col = in[l]; 1811 if (!A->structure_only) { 1812 if (roworiented) { 1813 value = v + (k*(stepval+bs) + l)*bs; 1814 } else { 1815 value = v + (l*(stepval+bs) + k)*bs; 1816 } 1817 } 1818 if (col <= lastcol) low = 0; 1819 else high = nrow; 1820 lastcol = col; 1821 while (high-low > 7) { 1822 t = (low+high)/2; 1823 if (rp[t] > col) high = t; 1824 else low = t; 1825 } 1826 for (i=low; i<high; i++) { 1827 if (rp[i] > col) break; 1828 if (rp[i] == col) { 1829 if (A->structure_only) goto noinsert2; 1830 bap = ap + bs2*i; 1831 if (roworiented) { 1832 if (is == ADD_VALUES) { 1833 for (ii=0; ii<bs; ii++,value+=stepval) { 1834 for (jj=ii; jj<bs2; jj+=bs) { 1835 bap[jj] += *value++; 1836 } 1837 } 1838 } else { 1839 for (ii=0; ii<bs; ii++,value+=stepval) { 1840 for (jj=ii; jj<bs2; jj+=bs) { 1841 bap[jj] = *value++; 1842 } 1843 } 1844 } 1845 } else { 1846 if (is == ADD_VALUES) { 1847 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1848 for (jj=0; jj<bs; jj++) { 1849 bap[jj] += value[jj]; 1850 } 1851 bap += bs; 1852 } 1853 } else { 1854 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1855 for (jj=0; jj<bs; jj++) { 1856 bap[jj] = value[jj]; 1857 } 1858 bap += bs; 1859 } 1860 } 1861 } 1862 goto noinsert2; 1863 } 1864 } 1865 if (nonew == 1) goto noinsert2; 1866 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1867 if (A->structure_only) { 1868 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1869 } else { 1870 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1871 } 1872 N = nrow++ - 1; high++; 1873 /* shift up all the later entries in this row */ 1874 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 1875 rp[i] = col; 1876 if (!A->structure_only) { 1877 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 1878 bap = ap + bs2*i; 1879 if (roworiented) { 1880 for (ii=0; ii<bs; ii++,value+=stepval) { 1881 for (jj=ii; jj<bs2; jj+=bs) { 1882 bap[jj] = *value++; 1883 } 1884 } 1885 } else { 1886 for (ii=0; ii<bs; ii++,value+=stepval) { 1887 for (jj=0; jj<bs; jj++) { 1888 *bap++ = *value++; 1889 } 1890 } 1891 } 1892 } 1893 noinsert2:; 1894 low = i; 1895 } 1896 ailen[row] = nrow; 1897 } 1898 PetscFunctionReturn(0); 1899 } 1900 1901 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1902 { 1903 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1904 PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax; 1905 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1906 PetscErrorCode ierr; 1907 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1908 MatScalar *aa = a->a,*ap; 1909 PetscReal ratio=0.6; 1910 1911 PetscFunctionBegin; 1912 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1913 1914 if (m) rmax = ailen[0]; 1915 for (i=1; i<mbs; i++) { 1916 /* move each row back by the amount of empty slots (fshift) before it*/ 1917 fshift += imax[i-1] - ailen[i-1]; 1918 rmax = PetscMax(rmax,ailen[i]); 1919 if (fshift) { 1920 ip = aj + ai[i]; 1921 ap = aa + bs2*ai[i]; 1922 N = ailen[i]; 1923 ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr); 1924 if (!A->structure_only) { 1925 ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr); 1926 } 1927 } 1928 ai[i] = ai[i-1] + ailen[i-1]; 1929 } 1930 if (mbs) { 1931 fshift += imax[mbs-1] - ailen[mbs-1]; 1932 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1933 } 1934 1935 /* reset ilen and imax for each row */ 1936 a->nonzerorowcnt = 0; 1937 if (A->structure_only) { 1938 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1939 } else { /* !A->structure_only */ 1940 for (i=0; i<mbs; i++) { 1941 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1942 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1943 } 1944 } 1945 a->nz = ai[mbs]; 1946 1947 /* diagonals may have moved, so kill the diagonal pointers */ 1948 a->idiagvalid = PETSC_FALSE; 1949 if (fshift && a->diag) { 1950 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1951 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1952 a->diag = 0; 1953 } 1954 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1955 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1956 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1957 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1958 1959 A->info.mallocs += a->reallocs; 1960 a->reallocs = 0; 1961 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1962 a->rmax = rmax; 1963 1964 if (!A->structure_only) { 1965 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1966 } 1967 PetscFunctionReturn(0); 1968 } 1969 1970 /* 1971 This function returns an array of flags which indicate the locations of contiguous 1972 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1973 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1974 Assume: sizes should be long enough to hold all the values. 1975 */ 1976 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1977 { 1978 PetscInt i,j,k,row; 1979 PetscBool flg; 1980 1981 PetscFunctionBegin; 1982 for (i=0,j=0; i<n; j++) { 1983 row = idx[i]; 1984 if (row%bs!=0) { /* Not the begining of a block */ 1985 sizes[j] = 1; 1986 i++; 1987 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1988 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1989 i++; 1990 } else { /* Begining of the block, so check if the complete block exists */ 1991 flg = PETSC_TRUE; 1992 for (k=1; k<bs; k++) { 1993 if (row+k != idx[i+k]) { /* break in the block */ 1994 flg = PETSC_FALSE; 1995 break; 1996 } 1997 } 1998 if (flg) { /* No break in the bs */ 1999 sizes[j] = bs; 2000 i += bs; 2001 } else { 2002 sizes[j] = 1; 2003 i++; 2004 } 2005 } 2006 } 2007 *bs_max = j; 2008 PetscFunctionReturn(0); 2009 } 2010 2011 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2012 { 2013 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2014 PetscErrorCode ierr; 2015 PetscInt i,j,k,count,*rows; 2016 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2017 PetscScalar zero = 0.0; 2018 MatScalar *aa; 2019 const PetscScalar *xx; 2020 PetscScalar *bb; 2021 2022 PetscFunctionBegin; 2023 /* fix right hand side if needed */ 2024 if (x && b) { 2025 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2026 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2027 for (i=0; i<is_n; i++) { 2028 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2029 } 2030 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2031 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2032 } 2033 2034 /* Make a copy of the IS and sort it */ 2035 /* allocate memory for rows,sizes */ 2036 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2037 2038 /* copy IS values to rows, and sort them */ 2039 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2040 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2041 2042 if (baij->keepnonzeropattern) { 2043 for (i=0; i<is_n; i++) sizes[i] = 1; 2044 bs_max = is_n; 2045 } else { 2046 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2047 A->nonzerostate++; 2048 } 2049 2050 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2051 row = rows[j]; 2052 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2053 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2054 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2055 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2056 if (diag != (PetscScalar)0.0) { 2057 if (baij->ilen[row/bs] > 0) { 2058 baij->ilen[row/bs] = 1; 2059 baij->j[baij->i[row/bs]] = row/bs; 2060 2061 ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr); 2062 } 2063 /* Now insert all the diagonal values for this bs */ 2064 for (k=0; k<bs; k++) { 2065 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2066 } 2067 } else { /* (diag == 0.0) */ 2068 baij->ilen[row/bs] = 0; 2069 } /* end (diag == 0.0) */ 2070 } else { /* (sizes[i] != bs) */ 2071 #if defined(PETSC_USE_DEBUG) 2072 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2073 #endif 2074 for (k=0; k<count; k++) { 2075 aa[0] = zero; 2076 aa += bs; 2077 } 2078 if (diag != (PetscScalar)0.0) { 2079 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2080 } 2081 } 2082 } 2083 2084 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2085 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2086 PetscFunctionReturn(0); 2087 } 2088 2089 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2090 { 2091 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2092 PetscErrorCode ierr; 2093 PetscInt i,j,k,count; 2094 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2095 PetscScalar zero = 0.0; 2096 MatScalar *aa; 2097 const PetscScalar *xx; 2098 PetscScalar *bb; 2099 PetscBool *zeroed,vecs = PETSC_FALSE; 2100 2101 PetscFunctionBegin; 2102 /* fix right hand side if needed */ 2103 if (x && b) { 2104 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2105 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2106 vecs = PETSC_TRUE; 2107 } 2108 2109 /* zero the columns */ 2110 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2111 for (i=0; i<is_n; i++) { 2112 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2113 zeroed[is_idx[i]] = PETSC_TRUE; 2114 } 2115 for (i=0; i<A->rmap->N; i++) { 2116 if (!zeroed[i]) { 2117 row = i/bs; 2118 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2119 for (k=0; k<bs; k++) { 2120 col = bs*baij->j[j] + k; 2121 if (zeroed[col]) { 2122 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2123 if (vecs) bb[i] -= aa[0]*xx[col]; 2124 aa[0] = 0.0; 2125 } 2126 } 2127 } 2128 } else if (vecs) bb[i] = diag*xx[i]; 2129 } 2130 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2131 if (vecs) { 2132 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2133 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2134 } 2135 2136 /* zero the rows */ 2137 for (i=0; i<is_n; i++) { 2138 row = is_idx[i]; 2139 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2140 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2141 for (k=0; k<count; k++) { 2142 aa[0] = zero; 2143 aa += bs; 2144 } 2145 if (diag != (PetscScalar)0.0) { 2146 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2147 } 2148 } 2149 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2150 PetscFunctionReturn(0); 2151 } 2152 2153 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2154 { 2155 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2156 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2157 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2158 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2159 PetscErrorCode ierr; 2160 PetscInt ridx,cidx,bs2=a->bs2; 2161 PetscBool roworiented=a->roworiented; 2162 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2163 2164 PetscFunctionBegin; 2165 for (k=0; k<m; k++) { /* loop over added rows */ 2166 row = im[k]; 2167 brow = row/bs; 2168 if (row < 0) continue; 2169 #if defined(PETSC_USE_DEBUG) 2170 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2171 #endif 2172 rp = aj + ai[brow]; 2173 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2174 rmax = imax[brow]; 2175 nrow = ailen[brow]; 2176 low = 0; 2177 high = nrow; 2178 for (l=0; l<n; l++) { /* loop over added columns */ 2179 if (in[l] < 0) continue; 2180 #if defined(PETSC_USE_DEBUG) 2181 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2182 #endif 2183 col = in[l]; bcol = col/bs; 2184 ridx = row % bs; cidx = col % bs; 2185 if (!A->structure_only) { 2186 if (roworiented) { 2187 value = v[l + k*n]; 2188 } else { 2189 value = v[k + l*m]; 2190 } 2191 } 2192 if (col <= lastcol) low = 0; else high = nrow; 2193 lastcol = col; 2194 while (high-low > 7) { 2195 t = (low+high)/2; 2196 if (rp[t] > bcol) high = t; 2197 else low = t; 2198 } 2199 for (i=low; i<high; i++) { 2200 if (rp[i] > bcol) break; 2201 if (rp[i] == bcol) { 2202 bap = ap + bs2*i + bs*cidx + ridx; 2203 if (!A->structure_only) { 2204 if (is == ADD_VALUES) *bap += value; 2205 else *bap = value; 2206 } 2207 goto noinsert1; 2208 } 2209 } 2210 if (nonew == 1) goto noinsert1; 2211 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2212 if (A->structure_only) { 2213 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2214 } else { 2215 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2216 } 2217 N = nrow++ - 1; high++; 2218 /* shift up all the later entries in this row */ 2219 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 2220 rp[i] = bcol; 2221 if (!A->structure_only) { 2222 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 2223 ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr); 2224 ap[bs2*i + bs*cidx + ridx] = value; 2225 } 2226 a->nz++; 2227 A->nonzerostate++; 2228 noinsert1:; 2229 low = i; 2230 } 2231 ailen[brow] = nrow; 2232 } 2233 PetscFunctionReturn(0); 2234 } 2235 2236 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2237 { 2238 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2239 Mat outA; 2240 PetscErrorCode ierr; 2241 PetscBool row_identity,col_identity; 2242 2243 PetscFunctionBegin; 2244 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2245 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2246 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2247 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2248 2249 outA = inA; 2250 inA->factortype = MAT_FACTOR_LU; 2251 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2252 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2253 2254 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2255 2256 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2257 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2258 a->row = row; 2259 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2260 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2261 a->col = col; 2262 2263 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2264 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2265 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2266 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2267 2268 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2269 if (!a->solve_work) { 2270 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2271 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2272 } 2273 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2274 PetscFunctionReturn(0); 2275 } 2276 2277 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2278 { 2279 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2280 PetscInt i,nz,mbs; 2281 2282 PetscFunctionBegin; 2283 nz = baij->maxnz; 2284 mbs = baij->mbs; 2285 for (i=0; i<nz; i++) { 2286 baij->j[i] = indices[i]; 2287 } 2288 baij->nz = nz; 2289 for (i=0; i<mbs; i++) { 2290 baij->ilen[i] = baij->imax[i]; 2291 } 2292 PetscFunctionReturn(0); 2293 } 2294 2295 /*@ 2296 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2297 in the matrix. 2298 2299 Input Parameters: 2300 + mat - the SeqBAIJ matrix 2301 - indices - the column indices 2302 2303 Level: advanced 2304 2305 Notes: 2306 This can be called if you have precomputed the nonzero structure of the 2307 matrix and want to provide it to the matrix object to improve the performance 2308 of the MatSetValues() operation. 2309 2310 You MUST have set the correct numbers of nonzeros per row in the call to 2311 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2312 2313 MUST be called before any calls to MatSetValues(); 2314 2315 @*/ 2316 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2317 { 2318 PetscErrorCode ierr; 2319 2320 PetscFunctionBegin; 2321 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2322 PetscValidPointer(indices,2); 2323 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2324 PetscFunctionReturn(0); 2325 } 2326 2327 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2328 { 2329 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2330 PetscErrorCode ierr; 2331 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2332 PetscReal atmp; 2333 PetscScalar *x,zero = 0.0; 2334 MatScalar *aa; 2335 PetscInt ncols,brow,krow,kcol; 2336 2337 PetscFunctionBegin; 2338 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2339 bs = A->rmap->bs; 2340 aa = a->a; 2341 ai = a->i; 2342 aj = a->j; 2343 mbs = a->mbs; 2344 2345 ierr = VecSet(v,zero);CHKERRQ(ierr); 2346 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2347 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2348 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2349 for (i=0; i<mbs; i++) { 2350 ncols = ai[1] - ai[0]; ai++; 2351 brow = bs*i; 2352 for (j=0; j<ncols; j++) { 2353 for (kcol=0; kcol<bs; kcol++) { 2354 for (krow=0; krow<bs; krow++) { 2355 atmp = PetscAbsScalar(*aa);aa++; 2356 row = brow + krow; /* row index */ 2357 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2358 } 2359 } 2360 aj++; 2361 } 2362 } 2363 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2364 PetscFunctionReturn(0); 2365 } 2366 2367 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2368 { 2369 PetscErrorCode ierr; 2370 2371 PetscFunctionBegin; 2372 /* If the two matrices have the same copy implementation, use fast copy. */ 2373 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2374 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2375 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2376 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2377 2378 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2379 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2380 ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr); 2381 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2382 } else { 2383 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2384 } 2385 PetscFunctionReturn(0); 2386 } 2387 2388 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2389 { 2390 PetscErrorCode ierr; 2391 2392 PetscFunctionBegin; 2393 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2394 PetscFunctionReturn(0); 2395 } 2396 2397 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2398 { 2399 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2400 2401 PetscFunctionBegin; 2402 *array = a->a; 2403 PetscFunctionReturn(0); 2404 } 2405 2406 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2407 { 2408 PetscFunctionBegin; 2409 *array = NULL; 2410 PetscFunctionReturn(0); 2411 } 2412 2413 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2414 { 2415 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2416 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2417 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2418 PetscErrorCode ierr; 2419 2420 PetscFunctionBegin; 2421 /* Set the number of nonzeros in the new matrix */ 2422 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2423 PetscFunctionReturn(0); 2424 } 2425 2426 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2427 { 2428 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2429 PetscErrorCode ierr; 2430 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2431 PetscBLASInt one=1; 2432 2433 PetscFunctionBegin; 2434 if (str == SAME_NONZERO_PATTERN) { 2435 PetscScalar alpha = a; 2436 PetscBLASInt bnz; 2437 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2438 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2439 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2440 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2441 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2442 } else { 2443 Mat B; 2444 PetscInt *nnz; 2445 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2446 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2447 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2448 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2449 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2450 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2451 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2452 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2453 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2454 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2455 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2456 ierr = PetscFree(nnz);CHKERRQ(ierr); 2457 } 2458 PetscFunctionReturn(0); 2459 } 2460 2461 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2462 { 2463 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2464 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2465 MatScalar *aa = a->a; 2466 2467 PetscFunctionBegin; 2468 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2469 PetscFunctionReturn(0); 2470 } 2471 2472 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2473 { 2474 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2475 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2476 MatScalar *aa = a->a; 2477 2478 PetscFunctionBegin; 2479 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2480 PetscFunctionReturn(0); 2481 } 2482 2483 /* 2484 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2485 */ 2486 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2487 { 2488 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2489 PetscErrorCode ierr; 2490 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2491 PetscInt nz = a->i[m],row,*jj,mr,col; 2492 2493 PetscFunctionBegin; 2494 *nn = n; 2495 if (!ia) PetscFunctionReturn(0); 2496 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2497 else { 2498 ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2499 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2500 ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2501 jj = a->j; 2502 for (i=0; i<nz; i++) { 2503 collengths[jj[i]]++; 2504 } 2505 cia[0] = oshift; 2506 for (i=0; i<n; i++) { 2507 cia[i+1] = cia[i] + collengths[i]; 2508 } 2509 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2510 jj = a->j; 2511 for (row=0; row<m; row++) { 2512 mr = a->i[row+1] - a->i[row]; 2513 for (i=0; i<mr; i++) { 2514 col = *jj++; 2515 2516 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2517 } 2518 } 2519 ierr = PetscFree(collengths);CHKERRQ(ierr); 2520 *ia = cia; *ja = cja; 2521 } 2522 PetscFunctionReturn(0); 2523 } 2524 2525 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2526 { 2527 PetscErrorCode ierr; 2528 2529 PetscFunctionBegin; 2530 if (!ia) PetscFunctionReturn(0); 2531 ierr = PetscFree(*ia);CHKERRQ(ierr); 2532 ierr = PetscFree(*ja);CHKERRQ(ierr); 2533 PetscFunctionReturn(0); 2534 } 2535 2536 /* 2537 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2538 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2539 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2540 */ 2541 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2542 { 2543 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2544 PetscErrorCode ierr; 2545 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2546 PetscInt nz = a->i[m],row,*jj,mr,col; 2547 PetscInt *cspidx; 2548 2549 PetscFunctionBegin; 2550 *nn = n; 2551 if (!ia) PetscFunctionReturn(0); 2552 2553 ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2554 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2555 ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2556 ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr); 2557 jj = a->j; 2558 for (i=0; i<nz; i++) { 2559 collengths[jj[i]]++; 2560 } 2561 cia[0] = oshift; 2562 for (i=0; i<n; i++) { 2563 cia[i+1] = cia[i] + collengths[i]; 2564 } 2565 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2566 jj = a->j; 2567 for (row=0; row<m; row++) { 2568 mr = a->i[row+1] - a->i[row]; 2569 for (i=0; i<mr; i++) { 2570 col = *jj++; 2571 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2572 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2573 } 2574 } 2575 ierr = PetscFree(collengths);CHKERRQ(ierr); 2576 *ia = cia; 2577 *ja = cja; 2578 *spidx = cspidx; 2579 PetscFunctionReturn(0); 2580 } 2581 2582 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2583 { 2584 PetscErrorCode ierr; 2585 2586 PetscFunctionBegin; 2587 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2588 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2589 PetscFunctionReturn(0); 2590 } 2591 2592 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2593 { 2594 PetscErrorCode ierr; 2595 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2596 2597 PetscFunctionBegin; 2598 if (!Y->preallocated || !aij->nz) { 2599 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2600 } 2601 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2602 PetscFunctionReturn(0); 2603 } 2604 2605 /* -------------------------------------------------------------------*/ 2606 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2607 MatGetRow_SeqBAIJ, 2608 MatRestoreRow_SeqBAIJ, 2609 MatMult_SeqBAIJ_N, 2610 /* 4*/ MatMultAdd_SeqBAIJ_N, 2611 MatMultTranspose_SeqBAIJ, 2612 MatMultTransposeAdd_SeqBAIJ, 2613 0, 2614 0, 2615 0, 2616 /* 10*/ 0, 2617 MatLUFactor_SeqBAIJ, 2618 0, 2619 0, 2620 MatTranspose_SeqBAIJ, 2621 /* 15*/ MatGetInfo_SeqBAIJ, 2622 MatEqual_SeqBAIJ, 2623 MatGetDiagonal_SeqBAIJ, 2624 MatDiagonalScale_SeqBAIJ, 2625 MatNorm_SeqBAIJ, 2626 /* 20*/ 0, 2627 MatAssemblyEnd_SeqBAIJ, 2628 MatSetOption_SeqBAIJ, 2629 MatZeroEntries_SeqBAIJ, 2630 /* 24*/ MatZeroRows_SeqBAIJ, 2631 0, 2632 0, 2633 0, 2634 0, 2635 /* 29*/ MatSetUp_SeqBAIJ, 2636 0, 2637 0, 2638 0, 2639 0, 2640 /* 34*/ MatDuplicate_SeqBAIJ, 2641 0, 2642 0, 2643 MatILUFactor_SeqBAIJ, 2644 0, 2645 /* 39*/ MatAXPY_SeqBAIJ, 2646 MatCreateSubMatrices_SeqBAIJ, 2647 MatIncreaseOverlap_SeqBAIJ, 2648 MatGetValues_SeqBAIJ, 2649 MatCopy_SeqBAIJ, 2650 /* 44*/ 0, 2651 MatScale_SeqBAIJ, 2652 MatShift_SeqBAIJ, 2653 0, 2654 MatZeroRowsColumns_SeqBAIJ, 2655 /* 49*/ 0, 2656 MatGetRowIJ_SeqBAIJ, 2657 MatRestoreRowIJ_SeqBAIJ, 2658 MatGetColumnIJ_SeqBAIJ, 2659 MatRestoreColumnIJ_SeqBAIJ, 2660 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2661 0, 2662 0, 2663 0, 2664 MatSetValuesBlocked_SeqBAIJ, 2665 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2666 MatDestroy_SeqBAIJ, 2667 MatView_SeqBAIJ, 2668 0, 2669 0, 2670 /* 64*/ 0, 2671 0, 2672 0, 2673 0, 2674 0, 2675 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2676 0, 2677 MatConvert_Basic, 2678 0, 2679 0, 2680 /* 74*/ 0, 2681 MatFDColoringApply_BAIJ, 2682 0, 2683 0, 2684 0, 2685 /* 79*/ 0, 2686 0, 2687 0, 2688 0, 2689 MatLoad_SeqBAIJ, 2690 /* 84*/ 0, 2691 0, 2692 0, 2693 0, 2694 0, 2695 /* 89*/ 0, 2696 0, 2697 0, 2698 0, 2699 0, 2700 /* 94*/ 0, 2701 0, 2702 0, 2703 0, 2704 0, 2705 /* 99*/ 0, 2706 0, 2707 0, 2708 0, 2709 0, 2710 /*104*/ 0, 2711 MatRealPart_SeqBAIJ, 2712 MatImaginaryPart_SeqBAIJ, 2713 0, 2714 0, 2715 /*109*/ 0, 2716 0, 2717 0, 2718 0, 2719 MatMissingDiagonal_SeqBAIJ, 2720 /*114*/ 0, 2721 0, 2722 0, 2723 0, 2724 0, 2725 /*119*/ 0, 2726 0, 2727 MatMultHermitianTranspose_SeqBAIJ, 2728 MatMultHermitianTransposeAdd_SeqBAIJ, 2729 0, 2730 /*124*/ 0, 2731 0, 2732 MatInvertBlockDiagonal_SeqBAIJ, 2733 0, 2734 0, 2735 /*129*/ 0, 2736 0, 2737 0, 2738 0, 2739 0, 2740 /*134*/ 0, 2741 0, 2742 0, 2743 0, 2744 0, 2745 /*139*/ MatSetBlockSizes_Default, 2746 0, 2747 0, 2748 MatFDColoringSetUp_SeqXAIJ, 2749 0, 2750 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2751 MatDestroySubMatrices_SeqBAIJ 2752 }; 2753 2754 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2755 { 2756 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2757 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2758 PetscErrorCode ierr; 2759 2760 PetscFunctionBegin; 2761 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2762 2763 /* allocate space for values if not already there */ 2764 if (!aij->saved_values) { 2765 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2766 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2767 } 2768 2769 /* copy values over */ 2770 ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr); 2771 PetscFunctionReturn(0); 2772 } 2773 2774 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2775 { 2776 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2777 PetscErrorCode ierr; 2778 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2779 2780 PetscFunctionBegin; 2781 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2782 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2783 2784 /* copy values over */ 2785 ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr); 2786 PetscFunctionReturn(0); 2787 } 2788 2789 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2790 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2791 2792 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2793 { 2794 Mat_SeqBAIJ *b; 2795 PetscErrorCode ierr; 2796 PetscInt i,mbs,nbs,bs2; 2797 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2798 2799 PetscFunctionBegin; 2800 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2801 if (nz == MAT_SKIP_ALLOCATION) { 2802 skipallocation = PETSC_TRUE; 2803 nz = 0; 2804 } 2805 2806 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2807 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2808 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2809 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2810 2811 B->preallocated = PETSC_TRUE; 2812 2813 mbs = B->rmap->n/bs; 2814 nbs = B->cmap->n/bs; 2815 bs2 = bs*bs; 2816 2817 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2818 2819 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2820 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2821 if (nnz) { 2822 for (i=0; i<mbs; i++) { 2823 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2824 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2825 } 2826 } 2827 2828 b = (Mat_SeqBAIJ*)B->data; 2829 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2830 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2831 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2832 2833 if (!flg) { 2834 switch (bs) { 2835 case 1: 2836 B->ops->mult = MatMult_SeqBAIJ_1; 2837 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2838 break; 2839 case 2: 2840 B->ops->mult = MatMult_SeqBAIJ_2; 2841 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2842 break; 2843 case 3: 2844 B->ops->mult = MatMult_SeqBAIJ_3; 2845 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2846 break; 2847 case 4: 2848 B->ops->mult = MatMult_SeqBAIJ_4; 2849 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2850 break; 2851 case 5: 2852 B->ops->mult = MatMult_SeqBAIJ_5; 2853 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2854 break; 2855 case 6: 2856 B->ops->mult = MatMult_SeqBAIJ_6; 2857 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2858 break; 2859 case 7: 2860 B->ops->mult = MatMult_SeqBAIJ_7; 2861 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2862 break; 2863 case 9: 2864 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2865 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2866 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2867 #else 2868 B->ops->mult = MatMult_SeqBAIJ_N; 2869 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2870 #endif 2871 break; 2872 case 11: 2873 B->ops->mult = MatMult_SeqBAIJ_11; 2874 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2875 break; 2876 case 15: 2877 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2878 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2879 break; 2880 default: 2881 B->ops->mult = MatMult_SeqBAIJ_N; 2882 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2883 break; 2884 } 2885 } 2886 B->ops->sor = MatSOR_SeqBAIJ; 2887 b->mbs = mbs; 2888 b->nbs = nbs; 2889 if (!skipallocation) { 2890 if (!b->imax) { 2891 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2892 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2893 2894 b->free_imax_ilen = PETSC_TRUE; 2895 } 2896 /* b->ilen will count nonzeros in each block row so far. */ 2897 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2898 if (!nnz) { 2899 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2900 else if (nz < 0) nz = 1; 2901 nz = PetscMin(nz,nbs); 2902 for (i=0; i<mbs; i++) b->imax[i] = nz; 2903 nz = nz*mbs; 2904 } else { 2905 PetscInt64 nz64 = 0; 2906 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];} 2907 ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr); 2908 } 2909 2910 /* allocate the matrix space */ 2911 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2912 if (B->structure_only) { 2913 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 2914 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 2915 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 2916 } else { 2917 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2918 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2919 ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr); 2920 } 2921 ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr); 2922 2923 if (B->structure_only) { 2924 b->singlemalloc = PETSC_FALSE; 2925 b->free_a = PETSC_FALSE; 2926 } else { 2927 b->singlemalloc = PETSC_TRUE; 2928 b->free_a = PETSC_TRUE; 2929 } 2930 b->free_ij = PETSC_TRUE; 2931 2932 b->i[0] = 0; 2933 for (i=1; i<mbs+1; i++) { 2934 b->i[i] = b->i[i-1] + b->imax[i-1]; 2935 } 2936 2937 } else { 2938 b->free_a = PETSC_FALSE; 2939 b->free_ij = PETSC_FALSE; 2940 } 2941 2942 b->bs2 = bs2; 2943 b->mbs = mbs; 2944 b->nz = 0; 2945 b->maxnz = nz; 2946 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2947 B->was_assembled = PETSC_FALSE; 2948 B->assembled = PETSC_FALSE; 2949 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2950 PetscFunctionReturn(0); 2951 } 2952 2953 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2954 { 2955 PetscInt i,m,nz,nz_max=0,*nnz; 2956 PetscScalar *values=0; 2957 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2958 PetscErrorCode ierr; 2959 2960 PetscFunctionBegin; 2961 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2962 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2963 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2964 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2965 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2966 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2967 m = B->rmap->n/bs; 2968 2969 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2970 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2971 for (i=0; i<m; i++) { 2972 nz = ii[i+1]- ii[i]; 2973 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2974 nz_max = PetscMax(nz_max, nz); 2975 nnz[i] = nz; 2976 } 2977 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2978 ierr = PetscFree(nnz);CHKERRQ(ierr); 2979 2980 values = (PetscScalar*)V; 2981 if (!values) { 2982 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2983 } 2984 for (i=0; i<m; i++) { 2985 PetscInt ncols = ii[i+1] - ii[i]; 2986 const PetscInt *icols = jj + ii[i]; 2987 if (bs == 1 || !roworiented) { 2988 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2989 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2990 } else { 2991 PetscInt j; 2992 for (j=0; j<ncols; j++) { 2993 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2994 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2995 } 2996 } 2997 } 2998 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2999 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3000 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3001 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3002 PetscFunctionReturn(0); 3003 } 3004 3005 /*@C 3006 MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored 3007 3008 Not Collective 3009 3010 Input Parameter: 3011 . mat - a MATSEQBAIJ matrix 3012 3013 Output Parameter: 3014 . array - pointer to the data 3015 3016 Level: intermediate 3017 3018 .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3019 @*/ 3020 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array) 3021 { 3022 PetscErrorCode ierr; 3023 3024 PetscFunctionBegin; 3025 ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3026 PetscFunctionReturn(0); 3027 } 3028 3029 /*@C 3030 MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray() 3031 3032 Not Collective 3033 3034 Input Parameters: 3035 + mat - a MATSEQBAIJ matrix 3036 - array - pointer to the data 3037 3038 Level: intermediate 3039 3040 .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3041 @*/ 3042 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array) 3043 { 3044 PetscErrorCode ierr; 3045 3046 PetscFunctionBegin; 3047 ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3048 PetscFunctionReturn(0); 3049 } 3050 3051 /*MC 3052 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3053 block sparse compressed row format. 3054 3055 Options Database Keys: 3056 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3057 3058 Level: beginner 3059 3060 Notes: 3061 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 3062 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 3063 3064 .seealso: MatCreateSeqBAIJ() 3065 M*/ 3066 3067 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3068 3069 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3070 { 3071 PetscErrorCode ierr; 3072 PetscMPIInt size; 3073 Mat_SeqBAIJ *b; 3074 3075 PetscFunctionBegin; 3076 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3077 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3078 3079 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3080 B->data = (void*)b; 3081 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3082 3083 b->row = 0; 3084 b->col = 0; 3085 b->icol = 0; 3086 b->reallocs = 0; 3087 b->saved_values = 0; 3088 3089 b->roworiented = PETSC_TRUE; 3090 b->nonew = 0; 3091 b->diag = 0; 3092 B->spptr = 0; 3093 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3094 b->keepnonzeropattern = PETSC_FALSE; 3095 3096 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr); 3097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr); 3098 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3099 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3101 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3103 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3104 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3105 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3106 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3107 #if defined(PETSC_HAVE_HYPRE) 3108 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3109 #endif 3110 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 3111 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 3112 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3113 PetscFunctionReturn(0); 3114 } 3115 3116 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3117 { 3118 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3119 PetscErrorCode ierr; 3120 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3121 3122 PetscFunctionBegin; 3123 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3124 3125 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3126 c->imax = a->imax; 3127 c->ilen = a->ilen; 3128 c->free_imax_ilen = PETSC_FALSE; 3129 } else { 3130 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3131 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3132 for (i=0; i<mbs; i++) { 3133 c->imax[i] = a->imax[i]; 3134 c->ilen[i] = a->ilen[i]; 3135 } 3136 c->free_imax_ilen = PETSC_TRUE; 3137 } 3138 3139 /* allocate the matrix space */ 3140 if (mallocmatspace) { 3141 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3142 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3143 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3144 3145 c->i = a->i; 3146 c->j = a->j; 3147 c->singlemalloc = PETSC_FALSE; 3148 c->free_a = PETSC_TRUE; 3149 c->free_ij = PETSC_FALSE; 3150 c->parent = A; 3151 C->preallocated = PETSC_TRUE; 3152 C->assembled = PETSC_TRUE; 3153 3154 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3155 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3156 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3157 } else { 3158 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3159 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3160 3161 c->singlemalloc = PETSC_TRUE; 3162 c->free_a = PETSC_TRUE; 3163 c->free_ij = PETSC_TRUE; 3164 3165 ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr); 3166 if (mbs > 0) { 3167 ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr); 3168 if (cpvalues == MAT_COPY_VALUES) { 3169 ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr); 3170 } else { 3171 ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr); 3172 } 3173 } 3174 C->preallocated = PETSC_TRUE; 3175 C->assembled = PETSC_TRUE; 3176 } 3177 } 3178 3179 c->roworiented = a->roworiented; 3180 c->nonew = a->nonew; 3181 3182 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3183 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3184 3185 c->bs2 = a->bs2; 3186 c->mbs = a->mbs; 3187 c->nbs = a->nbs; 3188 3189 if (a->diag) { 3190 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3191 c->diag = a->diag; 3192 c->free_diag = PETSC_FALSE; 3193 } else { 3194 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3195 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3196 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3197 c->free_diag = PETSC_TRUE; 3198 } 3199 } else c->diag = 0; 3200 3201 c->nz = a->nz; 3202 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3203 c->solve_work = NULL; 3204 c->mult_work = NULL; 3205 c->sor_workt = NULL; 3206 c->sor_work = NULL; 3207 3208 c->compressedrow.use = a->compressedrow.use; 3209 c->compressedrow.nrows = a->compressedrow.nrows; 3210 if (a->compressedrow.use) { 3211 i = a->compressedrow.nrows; 3212 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3213 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3214 ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr); 3215 ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr); 3216 } else { 3217 c->compressedrow.use = PETSC_FALSE; 3218 c->compressedrow.i = NULL; 3219 c->compressedrow.rindex = NULL; 3220 } 3221 C->nonzerostate = A->nonzerostate; 3222 3223 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3224 PetscFunctionReturn(0); 3225 } 3226 3227 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3228 { 3229 PetscErrorCode ierr; 3230 3231 PetscFunctionBegin; 3232 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3233 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3234 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3235 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3236 PetscFunctionReturn(0); 3237 } 3238 3239 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3240 { 3241 Mat_SeqBAIJ *a; 3242 PetscErrorCode ierr; 3243 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3244 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3245 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3246 PetscInt *masked,nmask,tmp,bs2,ishift; 3247 PetscMPIInt size; 3248 int fd; 3249 PetscScalar *aa; 3250 MPI_Comm comm; 3251 PetscBool isbinary; 3252 3253 PetscFunctionBegin; 3254 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3255 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name); 3256 3257 /* force binary viewer to load .info file if it has not yet done so */ 3258 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3259 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3260 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3261 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3262 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3263 if (bs < 0) bs = 1; 3264 bs2 = bs*bs; 3265 3266 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3267 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3268 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3269 ierr = PetscBinaryRead(fd,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3270 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3271 M = header[1]; N = header[2]; nz = header[3]; 3272 3273 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3274 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3275 3276 /* 3277 This code adds extra rows to make sure the number of rows is 3278 divisible by the blocksize 3279 */ 3280 mbs = M/bs; 3281 extra_rows = bs - M + bs*(mbs); 3282 if (extra_rows == bs) extra_rows = 0; 3283 else mbs++; 3284 if (extra_rows) { 3285 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3286 } 3287 3288 /* Set global sizes if not already set */ 3289 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3290 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3291 } else { /* Check if the matrix global sizes are correct */ 3292 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3293 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3294 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3295 } 3296 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3297 } 3298 3299 /* read in row lengths */ 3300 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3301 ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr); 3302 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3303 3304 /* read in column indices */ 3305 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3306 ierr = PetscBinaryRead(fd,jj,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3307 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3308 3309 /* loop over row lengths determining block row lengths */ 3310 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3311 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3312 ierr = PetscArrayzero(mask,mbs);CHKERRQ(ierr); 3313 rowcount = 0; 3314 nzcount = 0; 3315 for (i=0; i<mbs; i++) { 3316 nmask = 0; 3317 for (j=0; j<bs; j++) { 3318 kmax = rowlengths[rowcount]; 3319 for (k=0; k<kmax; k++) { 3320 tmp = jj[nzcount++]/bs; 3321 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3322 } 3323 rowcount++; 3324 } 3325 browlengths[i] += nmask; 3326 /* zero out the mask elements we set */ 3327 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3328 } 3329 3330 /* Do preallocation */ 3331 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3332 a = (Mat_SeqBAIJ*)newmat->data; 3333 3334 /* set matrix "i" values */ 3335 a->i[0] = 0; 3336 for (i=1; i<= mbs; i++) { 3337 a->i[i] = a->i[i-1] + browlengths[i-1]; 3338 a->ilen[i-1] = browlengths[i-1]; 3339 } 3340 a->nz = 0; 3341 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3342 3343 /* read in nonzero values */ 3344 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3345 ierr = PetscBinaryRead(fd,aa,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3346 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3347 3348 /* set "a" and "j" values into matrix */ 3349 nzcount = 0; jcount = 0; 3350 for (i=0; i<mbs; i++) { 3351 nzcountb = nzcount; 3352 nmask = 0; 3353 for (j=0; j<bs; j++) { 3354 kmax = rowlengths[i*bs+j]; 3355 for (k=0; k<kmax; k++) { 3356 tmp = jj[nzcount++]/bs; 3357 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3358 } 3359 } 3360 /* sort the masked values */ 3361 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3362 3363 /* set "j" values into matrix */ 3364 maskcount = 1; 3365 for (j=0; j<nmask; j++) { 3366 a->j[jcount++] = masked[j]; 3367 mask[masked[j]] = maskcount++; 3368 } 3369 /* set "a" values into matrix */ 3370 ishift = bs2*a->i[i]; 3371 for (j=0; j<bs; j++) { 3372 kmax = rowlengths[i*bs+j]; 3373 for (k=0; k<kmax; k++) { 3374 tmp = jj[nzcountb]/bs; 3375 block = mask[tmp] - 1; 3376 point = jj[nzcountb] - bs*tmp; 3377 idx = ishift + bs2*block + j + bs*point; 3378 a->a[idx] = (MatScalar)aa[nzcountb++]; 3379 } 3380 } 3381 /* zero out the mask elements we set */ 3382 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3383 } 3384 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3385 3386 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3387 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3388 ierr = PetscFree(aa);CHKERRQ(ierr); 3389 ierr = PetscFree(jj);CHKERRQ(ierr); 3390 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3391 3392 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3393 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3394 PetscFunctionReturn(0); 3395 } 3396 3397 /*@C 3398 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3399 compressed row) format. For good matrix assembly performance the 3400 user should preallocate the matrix storage by setting the parameter nz 3401 (or the array nnz). By setting these parameters accurately, performance 3402 during matrix assembly can be increased by more than a factor of 50. 3403 3404 Collective 3405 3406 Input Parameters: 3407 + comm - MPI communicator, set to PETSC_COMM_SELF 3408 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3409 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3410 . m - number of rows 3411 . n - number of columns 3412 . nz - number of nonzero blocks per block row (same for all rows) 3413 - nnz - array containing the number of nonzero blocks in the various block rows 3414 (possibly different for each block row) or NULL 3415 3416 Output Parameter: 3417 . A - the matrix 3418 3419 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3420 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3421 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3422 3423 Options Database Keys: 3424 + -mat_no_unroll - uses code that does not unroll the loops in the 3425 block calculations (much slower) 3426 - -mat_block_size - size of the blocks to use 3427 3428 Level: intermediate 3429 3430 Notes: 3431 The number of rows and columns must be divisible by blocksize. 3432 3433 If the nnz parameter is given then the nz parameter is ignored 3434 3435 A nonzero block is any block that as 1 or more nonzeros in it 3436 3437 The block AIJ format is fully compatible with standard Fortran 77 3438 storage. That is, the stored row and column indices can begin at 3439 either one (as in Fortran) or zero. See the users' manual for details. 3440 3441 Specify the preallocated storage with either nz or nnz (not both). 3442 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3443 allocation. See Users-Manual: ch_mat for details. 3444 matrices. 3445 3446 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3447 @*/ 3448 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3449 { 3450 PetscErrorCode ierr; 3451 3452 PetscFunctionBegin; 3453 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3454 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3455 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3456 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3457 PetscFunctionReturn(0); 3458 } 3459 3460 /*@C 3461 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3462 per row in the matrix. For good matrix assembly performance the 3463 user should preallocate the matrix storage by setting the parameter nz 3464 (or the array nnz). By setting these parameters accurately, performance 3465 during matrix assembly can be increased by more than a factor of 50. 3466 3467 Collective 3468 3469 Input Parameters: 3470 + B - the matrix 3471 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3472 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3473 . nz - number of block nonzeros per block row (same for all rows) 3474 - nnz - array containing the number of block nonzeros in the various block rows 3475 (possibly different for each block row) or NULL 3476 3477 Options Database Keys: 3478 + -mat_no_unroll - uses code that does not unroll the loops in the 3479 block calculations (much slower) 3480 - -mat_block_size - size of the blocks to use 3481 3482 Level: intermediate 3483 3484 Notes: 3485 If the nnz parameter is given then the nz parameter is ignored 3486 3487 You can call MatGetInfo() to get information on how effective the preallocation was; 3488 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3489 You can also run with the option -info and look for messages with the string 3490 malloc in them to see if additional memory allocation was needed. 3491 3492 The block AIJ format is fully compatible with standard Fortran 77 3493 storage. That is, the stored row and column indices can begin at 3494 either one (as in Fortran) or zero. See the users' manual for details. 3495 3496 Specify the preallocated storage with either nz or nnz (not both). 3497 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3498 allocation. See Users-Manual: ch_mat for details. 3499 3500 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3501 @*/ 3502 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3503 { 3504 PetscErrorCode ierr; 3505 3506 PetscFunctionBegin; 3507 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3508 PetscValidType(B,1); 3509 PetscValidLogicalCollectiveInt(B,bs,2); 3510 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3511 PetscFunctionReturn(0); 3512 } 3513 3514 /*@C 3515 MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 3516 3517 Collective 3518 3519 Input Parameters: 3520 + B - the matrix 3521 . i - the indices into j for the start of each local row (starts with zero) 3522 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3523 - v - optional values in the matrix 3524 3525 Level: advanced 3526 3527 Notes: 3528 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3529 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3530 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3531 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3532 block column and the second index is over columns within a block. 3533 3534 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3535 3536 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3537 @*/ 3538 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3539 { 3540 PetscErrorCode ierr; 3541 3542 PetscFunctionBegin; 3543 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3544 PetscValidType(B,1); 3545 PetscValidLogicalCollectiveInt(B,bs,2); 3546 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3547 PetscFunctionReturn(0); 3548 } 3549 3550 3551 /*@ 3552 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3553 3554 Collective 3555 3556 Input Parameters: 3557 + comm - must be an MPI communicator of size 1 3558 . bs - size of block 3559 . m - number of rows 3560 . n - number of columns 3561 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3562 . j - column indices 3563 - a - matrix values 3564 3565 Output Parameter: 3566 . mat - the matrix 3567 3568 Level: advanced 3569 3570 Notes: 3571 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3572 once the matrix is destroyed 3573 3574 You cannot set new nonzero locations into this matrix, that will generate an error. 3575 3576 The i and j indices are 0 based 3577 3578 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3579 3580 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3581 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3582 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3583 with column-major ordering within blocks. 3584 3585 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3586 3587 @*/ 3588 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3589 { 3590 PetscErrorCode ierr; 3591 PetscInt ii; 3592 Mat_SeqBAIJ *baij; 3593 3594 PetscFunctionBegin; 3595 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3596 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3597 3598 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3599 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3600 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3601 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3602 baij = (Mat_SeqBAIJ*)(*mat)->data; 3603 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3604 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3605 3606 baij->i = i; 3607 baij->j = j; 3608 baij->a = a; 3609 3610 baij->singlemalloc = PETSC_FALSE; 3611 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3612 baij->free_a = PETSC_FALSE; 3613 baij->free_ij = PETSC_FALSE; 3614 3615 for (ii=0; ii<m; ii++) { 3616 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3617 #if defined(PETSC_USE_DEBUG) 3618 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3619 #endif 3620 } 3621 #if defined(PETSC_USE_DEBUG) 3622 for (ii=0; ii<baij->i[m]; ii++) { 3623 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3624 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3625 } 3626 #endif 3627 3628 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3629 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3630 PetscFunctionReturn(0); 3631 } 3632 3633 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3634 { 3635 PetscErrorCode ierr; 3636 PetscMPIInt size; 3637 3638 PetscFunctionBegin; 3639 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3640 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3641 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3642 } else { 3643 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3644 } 3645 PetscFunctionReturn(0); 3646 } 3647