1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19 20 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 21 { 22 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 23 PetscErrorCode ierr; 24 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 25 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 26 PetscReal shift = 0.0; 27 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 28 29 PetscFunctionBegin; 30 allowzeropivot = PetscNot(A->erroriffailure); 31 32 if (a->idiagvalid) { 33 if (values) *values = a->idiag; 34 PetscFunctionReturn(0); 35 } 36 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 37 diag_offset = a->diag; 38 if (!a->idiag) { 39 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 40 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 41 } 42 diag = a->idiag; 43 if (values) *values = a->idiag; 44 /* factor and invert each block */ 45 switch (bs) { 46 case 1: 47 for (i=0; i<mbs; i++) { 48 odiag = v + 1*diag_offset[i]; 49 diag[0] = odiag[0]; 50 51 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 52 if (allowzeropivot) { 53 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 54 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 55 A->factorerror_zeropivot_row = i; 56 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 57 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 58 } 59 60 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 61 diag += 1; 62 } 63 break; 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 69 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 70 diag += 4; 71 } 72 break; 73 case 3: 74 for (i=0; i<mbs; i++) { 75 odiag = v + 9*diag_offset[i]; 76 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 77 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 78 diag[8] = odiag[8]; 79 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 80 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 81 diag += 9; 82 } 83 break; 84 case 4: 85 for (i=0; i<mbs; i++) { 86 odiag = v + 16*diag_offset[i]; 87 ierr = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr); 88 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 89 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 90 diag += 16; 91 } 92 break; 93 case 5: 94 for (i=0; i<mbs; i++) { 95 odiag = v + 25*diag_offset[i]; 96 ierr = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr); 97 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 98 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 99 diag += 25; 100 } 101 break; 102 case 6: 103 for (i=0; i<mbs; i++) { 104 odiag = v + 36*diag_offset[i]; 105 ierr = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 107 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 108 diag += 36; 109 } 110 break; 111 case 7: 112 for (i=0; i<mbs; i++) { 113 odiag = v + 49*diag_offset[i]; 114 ierr = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr); 115 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 116 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 117 diag += 49; 118 } 119 break; 120 default: 121 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 122 for (i=0; i<mbs; i++) { 123 odiag = v + bs2*diag_offset[i]; 124 ierr = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr); 125 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 126 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 127 diag += bs2; 128 } 129 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 130 } 131 a->idiagvalid = PETSC_TRUE; 132 PetscFunctionReturn(0); 133 } 134 135 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 136 { 137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 138 PetscScalar *x,*work,*w,*workt,*t; 139 const MatScalar *v,*aa = a->a, *idiag; 140 const PetscScalar *b,*xb; 141 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 142 PetscErrorCode ierr; 143 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 144 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 145 146 PetscFunctionBegin; 147 its = its*lits; 148 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 149 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 150 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 151 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor"); 152 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 153 154 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 155 156 if (!m) PetscFunctionReturn(0); 157 diag = a->diag; 158 idiag = a->idiag; 159 k = PetscMax(A->rmap->n,A->cmap->n); 160 if (!a->mult_work) { 161 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 162 } 163 if (!a->sor_workt) { 164 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 165 } 166 if (!a->sor_work) { 167 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 168 } 169 work = a->mult_work; 170 t = a->sor_workt; 171 w = a->sor_work; 172 173 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 174 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 175 176 if (flag & SOR_ZERO_INITIAL_GUESS) { 177 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 178 switch (bs) { 179 case 1: 180 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 181 t[0] = b[0]; 182 i2 = 1; 183 idiag += 1; 184 for (i=1; i<m; i++) { 185 v = aa + ai[i]; 186 vi = aj + ai[i]; 187 nz = diag[i] - ai[i]; 188 s[0] = b[i2]; 189 for (j=0; j<nz; j++) { 190 xw[0] = x[vi[j]]; 191 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 192 } 193 t[i2] = s[0]; 194 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 195 x[i2] = xw[0]; 196 idiag += 1; 197 i2 += 1; 198 } 199 break; 200 case 2: 201 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 202 t[0] = b[0]; t[1] = b[1]; 203 i2 = 2; 204 idiag += 4; 205 for (i=1; i<m; i++) { 206 v = aa + 4*ai[i]; 207 vi = aj + ai[i]; 208 nz = diag[i] - ai[i]; 209 s[0] = b[i2]; s[1] = b[i2+1]; 210 for (j=0; j<nz; j++) { 211 idx = 2*vi[j]; 212 it = 4*j; 213 xw[0] = x[idx]; xw[1] = x[1+idx]; 214 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 215 } 216 t[i2] = s[0]; t[i2+1] = s[1]; 217 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 218 x[i2] = xw[0]; x[i2+1] = xw[1]; 219 idiag += 4; 220 i2 += 2; 221 } 222 break; 223 case 3: 224 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 225 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 226 i2 = 3; 227 idiag += 9; 228 for (i=1; i<m; i++) { 229 v = aa + 9*ai[i]; 230 vi = aj + ai[i]; 231 nz = diag[i] - ai[i]; 232 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 233 while (nz--) { 234 idx = 3*(*vi++); 235 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 236 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 237 v += 9; 238 } 239 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 240 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 241 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 242 idiag += 9; 243 i2 += 3; 244 } 245 break; 246 case 4: 247 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 248 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 249 i2 = 4; 250 idiag += 16; 251 for (i=1; i<m; i++) { 252 v = aa + 16*ai[i]; 253 vi = aj + ai[i]; 254 nz = diag[i] - ai[i]; 255 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 256 while (nz--) { 257 idx = 4*(*vi++); 258 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 259 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 260 v += 16; 261 } 262 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 263 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 264 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 265 idiag += 16; 266 i2 += 4; 267 } 268 break; 269 case 5: 270 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 271 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 272 i2 = 5; 273 idiag += 25; 274 for (i=1; i<m; i++) { 275 v = aa + 25*ai[i]; 276 vi = aj + ai[i]; 277 nz = diag[i] - ai[i]; 278 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 279 while (nz--) { 280 idx = 5*(*vi++); 281 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 282 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 283 v += 25; 284 } 285 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 286 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 287 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 288 idiag += 25; 289 i2 += 5; 290 } 291 break; 292 case 6: 293 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 294 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 295 i2 = 6; 296 idiag += 36; 297 for (i=1; i<m; i++) { 298 v = aa + 36*ai[i]; 299 vi = aj + ai[i]; 300 nz = diag[i] - ai[i]; 301 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 302 while (nz--) { 303 idx = 6*(*vi++); 304 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 305 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 306 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 307 v += 36; 308 } 309 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 310 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 311 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 312 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 313 idiag += 36; 314 i2 += 6; 315 } 316 break; 317 case 7: 318 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 319 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 320 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 321 i2 = 7; 322 idiag += 49; 323 for (i=1; i<m; i++) { 324 v = aa + 49*ai[i]; 325 vi = aj + ai[i]; 326 nz = diag[i] - ai[i]; 327 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 328 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 329 while (nz--) { 330 idx = 7*(*vi++); 331 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 332 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 333 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 334 v += 49; 335 } 336 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 337 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 338 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 339 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 340 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 341 idiag += 49; 342 i2 += 7; 343 } 344 break; 345 default: 346 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 347 ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr); 348 i2 = bs; 349 idiag += bs2; 350 for (i=1; i<m; i++) { 351 v = aa + bs2*ai[i]; 352 vi = aj + ai[i]; 353 nz = diag[i] - ai[i]; 354 355 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 356 /* copy all rows of x that are needed into contiguous space */ 357 workt = work; 358 for (j=0; j<nz; j++) { 359 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 360 workt += bs; 361 } 362 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 363 ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr); 364 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 365 366 idiag += bs2; 367 i2 += bs; 368 } 369 break; 370 } 371 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 372 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 373 xb = t; 374 } 375 else xb = b; 376 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 377 idiag = a->idiag+bs2*(a->mbs-1); 378 i2 = bs * (m-1); 379 switch (bs) { 380 case 1: 381 s[0] = xb[i2]; 382 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 383 x[i2] = xw[0]; 384 i2 -= 1; 385 for (i=m-2; i>=0; i--) { 386 v = aa + (diag[i]+1); 387 vi = aj + diag[i] + 1; 388 nz = ai[i+1] - diag[i] - 1; 389 s[0] = xb[i2]; 390 for (j=0; j<nz; j++) { 391 xw[0] = x[vi[j]]; 392 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 393 } 394 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 395 x[i2] = xw[0]; 396 idiag -= 1; 397 i2 -= 1; 398 } 399 break; 400 case 2: 401 s[0] = xb[i2]; s[1] = xb[i2+1]; 402 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 403 x[i2] = xw[0]; x[i2+1] = xw[1]; 404 i2 -= 2; 405 idiag -= 4; 406 for (i=m-2; i>=0; i--) { 407 v = aa + 4*(diag[i] + 1); 408 vi = aj + diag[i] + 1; 409 nz = ai[i+1] - diag[i] - 1; 410 s[0] = xb[i2]; s[1] = xb[i2+1]; 411 for (j=0; j<nz; j++) { 412 idx = 2*vi[j]; 413 it = 4*j; 414 xw[0] = x[idx]; xw[1] = x[1+idx]; 415 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 416 } 417 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 418 x[i2] = xw[0]; x[i2+1] = xw[1]; 419 idiag -= 4; 420 i2 -= 2; 421 } 422 break; 423 case 3: 424 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 425 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 426 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 427 i2 -= 3; 428 idiag -= 9; 429 for (i=m-2; i>=0; i--) { 430 v = aa + 9*(diag[i]+1); 431 vi = aj + diag[i] + 1; 432 nz = ai[i+1] - diag[i] - 1; 433 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 434 while (nz--) { 435 idx = 3*(*vi++); 436 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 437 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 438 v += 9; 439 } 440 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 441 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 442 idiag -= 9; 443 i2 -= 3; 444 } 445 break; 446 case 4: 447 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 448 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 449 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 450 i2 -= 4; 451 idiag -= 16; 452 for (i=m-2; i>=0; i--) { 453 v = aa + 16*(diag[i]+1); 454 vi = aj + diag[i] + 1; 455 nz = ai[i+1] - diag[i] - 1; 456 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 457 while (nz--) { 458 idx = 4*(*vi++); 459 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 460 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 461 v += 16; 462 } 463 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 464 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 465 idiag -= 16; 466 i2 -= 4; 467 } 468 break; 469 case 5: 470 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 471 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 472 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 473 i2 -= 5; 474 idiag -= 25; 475 for (i=m-2; i>=0; i--) { 476 v = aa + 25*(diag[i]+1); 477 vi = aj + diag[i] + 1; 478 nz = ai[i+1] - diag[i] - 1; 479 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 480 while (nz--) { 481 idx = 5*(*vi++); 482 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 483 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 484 v += 25; 485 } 486 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 487 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 488 idiag -= 25; 489 i2 -= 5; 490 } 491 break; 492 case 6: 493 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 494 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 495 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 496 i2 -= 6; 497 idiag -= 36; 498 for (i=m-2; i>=0; i--) { 499 v = aa + 36*(diag[i]+1); 500 vi = aj + diag[i] + 1; 501 nz = ai[i+1] - diag[i] - 1; 502 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 503 while (nz--) { 504 idx = 6*(*vi++); 505 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 506 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 507 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 508 v += 36; 509 } 510 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 511 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 512 idiag -= 36; 513 i2 -= 6; 514 } 515 break; 516 case 7: 517 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 518 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 519 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 520 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 521 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 522 i2 -= 7; 523 idiag -= 49; 524 for (i=m-2; i>=0; i--) { 525 v = aa + 49*(diag[i]+1); 526 vi = aj + diag[i] + 1; 527 nz = ai[i+1] - diag[i] - 1; 528 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 529 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 530 while (nz--) { 531 idx = 7*(*vi++); 532 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 533 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 534 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 535 v += 49; 536 } 537 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 538 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 539 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 540 idiag -= 49; 541 i2 -= 7; 542 } 543 break; 544 default: 545 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 546 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 547 i2 -= bs; 548 idiag -= bs2; 549 for (i=m-2; i>=0; i--) { 550 v = aa + bs2*(diag[i]+1); 551 vi = aj + diag[i] + 1; 552 nz = ai[i+1] - diag[i] - 1; 553 554 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 555 /* copy all rows of x that are needed into contiguous space */ 556 workt = work; 557 for (j=0; j<nz; j++) { 558 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 559 workt += bs; 560 } 561 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 562 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 563 564 idiag -= bs2; 565 i2 -= bs; 566 } 567 break; 568 } 569 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 570 } 571 its--; 572 } 573 while (its--) { 574 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 575 idiag = a->idiag; 576 i2 = 0; 577 switch (bs) { 578 case 1: 579 for (i=0; i<m; i++) { 580 v = aa + ai[i]; 581 vi = aj + ai[i]; 582 nz = ai[i+1] - ai[i]; 583 s[0] = b[i2]; 584 for (j=0; j<nz; j++) { 585 xw[0] = x[vi[j]]; 586 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 587 } 588 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 589 x[i2] += xw[0]; 590 idiag += 1; 591 i2 += 1; 592 } 593 break; 594 case 2: 595 for (i=0; i<m; i++) { 596 v = aa + 4*ai[i]; 597 vi = aj + ai[i]; 598 nz = ai[i+1] - ai[i]; 599 s[0] = b[i2]; s[1] = b[i2+1]; 600 for (j=0; j<nz; j++) { 601 idx = 2*vi[j]; 602 it = 4*j; 603 xw[0] = x[idx]; xw[1] = x[1+idx]; 604 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 605 } 606 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 607 x[i2] += xw[0]; x[i2+1] += xw[1]; 608 idiag += 4; 609 i2 += 2; 610 } 611 break; 612 case 3: 613 for (i=0; i<m; i++) { 614 v = aa + 9*ai[i]; 615 vi = aj + ai[i]; 616 nz = ai[i+1] - ai[i]; 617 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 618 while (nz--) { 619 idx = 3*(*vi++); 620 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 621 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 622 v += 9; 623 } 624 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 625 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 626 idiag += 9; 627 i2 += 3; 628 } 629 break; 630 case 4: 631 for (i=0; i<m; i++) { 632 v = aa + 16*ai[i]; 633 vi = aj + ai[i]; 634 nz = ai[i+1] - ai[i]; 635 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 636 while (nz--) { 637 idx = 4*(*vi++); 638 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 639 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 640 v += 16; 641 } 642 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 643 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 644 idiag += 16; 645 i2 += 4; 646 } 647 break; 648 case 5: 649 for (i=0; i<m; i++) { 650 v = aa + 25*ai[i]; 651 vi = aj + ai[i]; 652 nz = ai[i+1] - ai[i]; 653 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 654 while (nz--) { 655 idx = 5*(*vi++); 656 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 657 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 658 v += 25; 659 } 660 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 661 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 662 idiag += 25; 663 i2 += 5; 664 } 665 break; 666 case 6: 667 for (i=0; i<m; i++) { 668 v = aa + 36*ai[i]; 669 vi = aj + ai[i]; 670 nz = ai[i+1] - ai[i]; 671 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 672 while (nz--) { 673 idx = 6*(*vi++); 674 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 675 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 676 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 677 v += 36; 678 } 679 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 680 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 681 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 682 idiag += 36; 683 i2 += 6; 684 } 685 break; 686 case 7: 687 for (i=0; i<m; i++) { 688 v = aa + 49*ai[i]; 689 vi = aj + ai[i]; 690 nz = ai[i+1] - ai[i]; 691 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 692 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 693 while (nz--) { 694 idx = 7*(*vi++); 695 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 696 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 697 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 698 v += 49; 699 } 700 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 701 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 702 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 703 idiag += 49; 704 i2 += 7; 705 } 706 break; 707 default: 708 for (i=0; i<m; i++) { 709 v = aa + bs2*ai[i]; 710 vi = aj + ai[i]; 711 nz = ai[i+1] - ai[i]; 712 713 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 714 /* copy all rows of x that are needed into contiguous space */ 715 workt = work; 716 for (j=0; j<nz; j++) { 717 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 718 workt += bs; 719 } 720 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 721 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 722 723 idiag += bs2; 724 i2 += bs; 725 } 726 break; 727 } 728 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 729 } 730 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 731 idiag = a->idiag+bs2*(a->mbs-1); 732 i2 = bs * (m-1); 733 switch (bs) { 734 case 1: 735 for (i=m-1; i>=0; i--) { 736 v = aa + ai[i]; 737 vi = aj + ai[i]; 738 nz = ai[i+1] - ai[i]; 739 s[0] = b[i2]; 740 for (j=0; j<nz; j++) { 741 xw[0] = x[vi[j]]; 742 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 743 } 744 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 745 x[i2] += xw[0]; 746 idiag -= 1; 747 i2 -= 1; 748 } 749 break; 750 case 2: 751 for (i=m-1; i>=0; i--) { 752 v = aa + 4*ai[i]; 753 vi = aj + ai[i]; 754 nz = ai[i+1] - ai[i]; 755 s[0] = b[i2]; s[1] = b[i2+1]; 756 for (j=0; j<nz; j++) { 757 idx = 2*vi[j]; 758 it = 4*j; 759 xw[0] = x[idx]; xw[1] = x[1+idx]; 760 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 761 } 762 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 763 x[i2] += xw[0]; x[i2+1] += xw[1]; 764 idiag -= 4; 765 i2 -= 2; 766 } 767 break; 768 case 3: 769 for (i=m-1; i>=0; i--) { 770 v = aa + 9*ai[i]; 771 vi = aj + ai[i]; 772 nz = ai[i+1] - ai[i]; 773 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 774 while (nz--) { 775 idx = 3*(*vi++); 776 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 777 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 778 v += 9; 779 } 780 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 781 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 782 idiag -= 9; 783 i2 -= 3; 784 } 785 break; 786 case 4: 787 for (i=m-1; i>=0; i--) { 788 v = aa + 16*ai[i]; 789 vi = aj + ai[i]; 790 nz = ai[i+1] - ai[i]; 791 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 792 while (nz--) { 793 idx = 4*(*vi++); 794 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 795 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 796 v += 16; 797 } 798 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 799 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 800 idiag -= 16; 801 i2 -= 4; 802 } 803 break; 804 case 5: 805 for (i=m-1; i>=0; i--) { 806 v = aa + 25*ai[i]; 807 vi = aj + ai[i]; 808 nz = ai[i+1] - ai[i]; 809 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 810 while (nz--) { 811 idx = 5*(*vi++); 812 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 813 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 814 v += 25; 815 } 816 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 817 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 818 idiag -= 25; 819 i2 -= 5; 820 } 821 break; 822 case 6: 823 for (i=m-1; i>=0; i--) { 824 v = aa + 36*ai[i]; 825 vi = aj + ai[i]; 826 nz = ai[i+1] - ai[i]; 827 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 828 while (nz--) { 829 idx = 6*(*vi++); 830 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 831 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 832 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 833 v += 36; 834 } 835 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 836 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 837 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 838 idiag -= 36; 839 i2 -= 6; 840 } 841 break; 842 case 7: 843 for (i=m-1; i>=0; i--) { 844 v = aa + 49*ai[i]; 845 vi = aj + ai[i]; 846 nz = ai[i+1] - ai[i]; 847 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 848 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 849 while (nz--) { 850 idx = 7*(*vi++); 851 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 852 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 853 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 854 v += 49; 855 } 856 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 857 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 858 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 859 idiag -= 49; 860 i2 -= 7; 861 } 862 break; 863 default: 864 for (i=m-1; i>=0; i--) { 865 v = aa + bs2*ai[i]; 866 vi = aj + ai[i]; 867 nz = ai[i+1] - ai[i]; 868 869 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 870 /* copy all rows of x that are needed into contiguous space */ 871 workt = work; 872 for (j=0; j<nz; j++) { 873 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 874 workt += bs; 875 } 876 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 877 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 878 879 idiag -= bs2; 880 i2 -= bs; 881 } 882 break; 883 } 884 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 885 } 886 } 887 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 888 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 889 PetscFunctionReturn(0); 890 } 891 892 /* 893 Special version for direct calls from Fortran (Used in PETSc-fun3d) 894 */ 895 #if defined(PETSC_HAVE_FORTRAN_CAPS) 896 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 897 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 898 #define matsetvaluesblocked4_ matsetvaluesblocked4 899 #endif 900 901 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 902 { 903 Mat A = *AA; 904 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 905 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 906 PetscInt *ai =a->i,*ailen=a->ilen; 907 PetscInt *aj =a->j,stepval,lastcol = -1; 908 const PetscScalar *value = v; 909 MatScalar *ap,*aa = a->a,*bap; 910 PetscErrorCode ierr; 911 912 PetscFunctionBegin; 913 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 914 stepval = (n-1)*4; 915 for (k=0; k<m; k++) { /* loop over added rows */ 916 row = im[k]; 917 rp = aj + ai[row]; 918 ap = aa + 16*ai[row]; 919 nrow = ailen[row]; 920 low = 0; 921 high = nrow; 922 for (l=0; l<n; l++) { /* loop over added columns */ 923 col = in[l]; 924 if (col <= lastcol) low = 0; 925 else high = nrow; 926 lastcol = col; 927 value = v + k*(stepval+4 + l)*4; 928 while (high-low > 7) { 929 t = (low+high)/2; 930 if (rp[t] > col) high = t; 931 else low = t; 932 } 933 for (i=low; i<high; i++) { 934 if (rp[i] > col) break; 935 if (rp[i] == col) { 936 bap = ap + 16*i; 937 for (ii=0; ii<4; ii++,value+=stepval) { 938 for (jj=ii; jj<16; jj+=4) { 939 bap[jj] += *value++; 940 } 941 } 942 goto noinsert2; 943 } 944 } 945 N = nrow++ - 1; 946 high++; /* added new column index thus must search to one higher than before */ 947 /* shift up all the later entries in this row */ 948 for (ii=N; ii>=i; ii--) { 949 rp[ii+1] = rp[ii]; 950 ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr); 951 } 952 if (N >= i) { 953 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 954 } 955 rp[i] = col; 956 bap = ap + 16*i; 957 for (ii=0; ii<4; ii++,value+=stepval) { 958 for (jj=ii; jj<16; jj+=4) { 959 bap[jj] = *value++; 960 } 961 } 962 noinsert2:; 963 low = i; 964 } 965 ailen[row] = nrow; 966 } 967 PetscFunctionReturnVoid(); 968 } 969 970 #if defined(PETSC_HAVE_FORTRAN_CAPS) 971 #define matsetvalues4_ MATSETVALUES4 972 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 973 #define matsetvalues4_ matsetvalues4 974 #endif 975 976 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 977 { 978 Mat A = *AA; 979 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 980 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm; 981 PetscInt *ai=a->i,*ailen=a->ilen; 982 PetscInt *aj=a->j,brow,bcol; 983 PetscInt ridx,cidx,lastcol = -1; 984 MatScalar *ap,value,*aa=a->a,*bap; 985 PetscErrorCode ierr; 986 987 PetscFunctionBegin; 988 for (k=0; k<m; k++) { /* loop over added rows */ 989 row = im[k]; brow = row/4; 990 rp = aj + ai[brow]; 991 ap = aa + 16*ai[brow]; 992 nrow = ailen[brow]; 993 low = 0; 994 high = nrow; 995 for (l=0; l<n; l++) { /* loop over added columns */ 996 col = in[l]; bcol = col/4; 997 ridx = row % 4; cidx = col % 4; 998 value = v[l + k*n]; 999 if (col <= lastcol) low = 0; 1000 else high = nrow; 1001 lastcol = col; 1002 while (high-low > 7) { 1003 t = (low+high)/2; 1004 if (rp[t] > bcol) high = t; 1005 else low = t; 1006 } 1007 for (i=low; i<high; i++) { 1008 if (rp[i] > bcol) break; 1009 if (rp[i] == bcol) { 1010 bap = ap + 16*i + 4*cidx + ridx; 1011 *bap += value; 1012 goto noinsert1; 1013 } 1014 } 1015 N = nrow++ - 1; 1016 high++; /* added new column thus must search to one higher than before */ 1017 /* shift up all the later entries in this row */ 1018 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr); 1019 ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr); 1020 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 1021 rp[i] = bcol; 1022 ap[16*i + 4*cidx + ridx] = value; 1023 noinsert1:; 1024 low = i; 1025 } 1026 ailen[brow] = nrow; 1027 } 1028 PetscFunctionReturnVoid(); 1029 } 1030 1031 /* 1032 Checks for missing diagonals 1033 */ 1034 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1035 { 1036 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1037 PetscErrorCode ierr; 1038 PetscInt *diag,*ii = a->i,i; 1039 1040 PetscFunctionBegin; 1041 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1042 *missing = PETSC_FALSE; 1043 if (A->rmap->n > 0 && !ii) { 1044 *missing = PETSC_TRUE; 1045 if (d) *d = 0; 1046 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 1047 } else { 1048 PetscInt n; 1049 n = PetscMin(a->mbs, a->nbs); 1050 diag = a->diag; 1051 for (i=0; i<n; i++) { 1052 if (diag[i] >= ii[i+1]) { 1053 *missing = PETSC_TRUE; 1054 if (d) *d = i; 1055 ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr); 1056 break; 1057 } 1058 } 1059 } 1060 PetscFunctionReturn(0); 1061 } 1062 1063 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1064 { 1065 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1066 PetscErrorCode ierr; 1067 PetscInt i,j,m = a->mbs; 1068 1069 PetscFunctionBegin; 1070 if (!a->diag) { 1071 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1072 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1073 a->free_diag = PETSC_TRUE; 1074 } 1075 for (i=0; i<m; i++) { 1076 a->diag[i] = a->i[i+1]; 1077 for (j=a->i[i]; j<a->i[i+1]; j++) { 1078 if (a->j[j] == i) { 1079 a->diag[i] = j; 1080 break; 1081 } 1082 } 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1088 { 1089 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1090 PetscErrorCode ierr; 1091 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1092 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1093 1094 PetscFunctionBegin; 1095 *nn = n; 1096 if (!ia) PetscFunctionReturn(0); 1097 if (symmetric) { 1098 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1099 nz = tia[n]; 1100 } else { 1101 tia = a->i; tja = a->j; 1102 } 1103 1104 if (!blockcompressed && bs > 1) { 1105 (*nn) *= bs; 1106 /* malloc & create the natural set of indices */ 1107 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1108 if (n) { 1109 (*ia)[0] = oshift; 1110 for (j=1; j<bs; j++) { 1111 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1112 } 1113 } 1114 1115 for (i=1; i<n; i++) { 1116 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1117 for (j=1; j<bs; j++) { 1118 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1119 } 1120 } 1121 if (n) { 1122 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1123 } 1124 1125 if (inja) { 1126 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1127 cnt = 0; 1128 for (i=0; i<n; i++) { 1129 for (j=0; j<bs; j++) { 1130 for (k=tia[i]; k<tia[i+1]; k++) { 1131 for (l=0; l<bs; l++) { 1132 (*ja)[cnt++] = bs*tja[k] + l; 1133 } 1134 } 1135 } 1136 } 1137 } 1138 1139 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1140 ierr = PetscFree(tia);CHKERRQ(ierr); 1141 ierr = PetscFree(tja);CHKERRQ(ierr); 1142 } 1143 } else if (oshift == 1) { 1144 if (symmetric) { 1145 nz = tia[A->rmap->n/bs]; 1146 /* add 1 to i and j indices */ 1147 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1148 *ia = tia; 1149 if (ja) { 1150 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1151 *ja = tja; 1152 } 1153 } else { 1154 nz = a->i[A->rmap->n/bs]; 1155 /* malloc space and add 1 to i and j indices */ 1156 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1157 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1158 if (ja) { 1159 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1160 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1161 } 1162 } 1163 } else { 1164 *ia = tia; 1165 if (ja) *ja = tja; 1166 } 1167 PetscFunctionReturn(0); 1168 } 1169 1170 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1171 { 1172 PetscErrorCode ierr; 1173 1174 PetscFunctionBegin; 1175 if (!ia) PetscFunctionReturn(0); 1176 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1177 ierr = PetscFree(*ia);CHKERRQ(ierr); 1178 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1179 } 1180 PetscFunctionReturn(0); 1181 } 1182 1183 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1184 { 1185 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1186 PetscErrorCode ierr; 1187 1188 PetscFunctionBegin; 1189 #if defined(PETSC_USE_LOG) 1190 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1191 #endif 1192 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1193 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1194 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1195 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1196 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1197 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1198 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1199 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1200 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1201 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1202 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1203 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1204 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1205 1206 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1207 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1208 ierr = PetscFree(A->data);CHKERRQ(ierr); 1209 1210 ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr); 1211 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr); 1212 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr); 1213 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1214 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1215 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1223 #if defined(PETSC_HAVE_HYPRE) 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr); 1225 #endif 1226 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1231 { 1232 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1233 PetscErrorCode ierr; 1234 1235 PetscFunctionBegin; 1236 switch (op) { 1237 case MAT_ROW_ORIENTED: 1238 a->roworiented = flg; 1239 break; 1240 case MAT_KEEP_NONZERO_PATTERN: 1241 a->keepnonzeropattern = flg; 1242 break; 1243 case MAT_NEW_NONZERO_LOCATIONS: 1244 a->nonew = (flg ? 0 : 1); 1245 break; 1246 case MAT_NEW_NONZERO_LOCATION_ERR: 1247 a->nonew = (flg ? -1 : 0); 1248 break; 1249 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1250 a->nonew = (flg ? -2 : 0); 1251 break; 1252 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1253 a->nounused = (flg ? -1 : 0); 1254 break; 1255 case MAT_FORCE_DIAGONAL_ENTRIES: 1256 case MAT_IGNORE_OFF_PROC_ENTRIES: 1257 case MAT_USE_HASH_TABLE: 1258 case MAT_SORTED_FULL: 1259 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1260 break; 1261 case MAT_SPD: 1262 case MAT_SYMMETRIC: 1263 case MAT_STRUCTURALLY_SYMMETRIC: 1264 case MAT_HERMITIAN: 1265 case MAT_SYMMETRY_ETERNAL: 1266 case MAT_SUBMAT_SINGLEIS: 1267 case MAT_STRUCTURE_ONLY: 1268 /* These options are handled directly by MatSetOption() */ 1269 break; 1270 default: 1271 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1272 } 1273 PetscFunctionReturn(0); 1274 } 1275 1276 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1277 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1278 { 1279 PetscErrorCode ierr; 1280 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1281 MatScalar *aa_i; 1282 PetscScalar *v_i; 1283 1284 PetscFunctionBegin; 1285 bs = A->rmap->bs; 1286 bs2 = bs*bs; 1287 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1288 1289 bn = row/bs; /* Block number */ 1290 bp = row % bs; /* Block Position */ 1291 M = ai[bn+1] - ai[bn]; 1292 *nz = bs*M; 1293 1294 if (v) { 1295 *v = NULL; 1296 if (*nz) { 1297 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1298 for (i=0; i<M; i++) { /* for each block in the block row */ 1299 v_i = *v + i*bs; 1300 aa_i = aa + bs2*(ai[bn] + i); 1301 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1302 } 1303 } 1304 } 1305 1306 if (idx) { 1307 *idx = NULL; 1308 if (*nz) { 1309 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1310 for (i=0; i<M; i++) { /* for each block in the block row */ 1311 idx_i = *idx + i*bs; 1312 itmp = bs*aj[ai[bn] + i]; 1313 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1314 } 1315 } 1316 } 1317 PetscFunctionReturn(0); 1318 } 1319 1320 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1321 { 1322 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1323 PetscErrorCode ierr; 1324 1325 PetscFunctionBegin; 1326 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1327 PetscFunctionReturn(0); 1328 } 1329 1330 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1331 { 1332 PetscErrorCode ierr; 1333 1334 PetscFunctionBegin; 1335 if (nz) *nz = 0; 1336 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1337 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1338 PetscFunctionReturn(0); 1339 } 1340 1341 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1342 { 1343 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at; 1344 Mat C; 1345 PetscErrorCode ierr; 1346 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill; 1347 PetscInt bs2=a->bs2,*ati,*atj,anzj,kr; 1348 MatScalar *ata,*aa=a->a; 1349 1350 PetscFunctionBegin; 1351 ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr); 1352 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1353 for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1354 1355 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1356 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1357 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1358 ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr); 1359 1360 at = (Mat_SeqBAIJ*)C->data; 1361 ati = at->i; 1362 for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i]; 1363 } else { 1364 C = *B; 1365 at = (Mat_SeqBAIJ*)C->data; 1366 ati = at->i; 1367 } 1368 1369 atj = at->j; 1370 ata = at->a; 1371 1372 /* Copy ati into atfill so we have locations of the next free space in atj */ 1373 ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr); 1374 1375 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1376 for (i=0; i<mbs; i++) { 1377 anzj = ai[i+1] - ai[i]; 1378 for (j=0; j<anzj; j++) { 1379 atj[atfill[*aj]] = i; 1380 for (kr=0; kr<bs; kr++) { 1381 for (k=0; k<bs; k++) { 1382 ata[bs2*atfill[*aj]+k*bs+kr] = *aa++; 1383 } 1384 } 1385 atfill[*aj++] += 1; 1386 } 1387 } 1388 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1389 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1390 1391 /* Clean up temporary space and complete requests. */ 1392 ierr = PetscFree(atfill);CHKERRQ(ierr); 1393 1394 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1395 ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1396 *B = C; 1397 } else { 1398 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1399 } 1400 PetscFunctionReturn(0); 1401 } 1402 1403 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1404 { 1405 PetscErrorCode ierr; 1406 Mat Btrans; 1407 1408 PetscFunctionBegin; 1409 *f = PETSC_FALSE; 1410 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1411 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1412 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1417 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 1418 { 1419 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)mat->data; 1420 PetscInt header[4],M,N,m,bs,nz,cnt,i,j,k,l; 1421 PetscInt *rowlens,*colidxs; 1422 PetscScalar *matvals; 1423 PetscErrorCode ierr; 1424 1425 PetscFunctionBegin; 1426 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1427 1428 M = mat->rmap->N; 1429 N = mat->cmap->N; 1430 m = mat->rmap->n; 1431 bs = mat->rmap->bs; 1432 nz = bs*bs*A->nz; 1433 1434 /* write matrix header */ 1435 header[0] = MAT_FILE_CLASSID; 1436 header[1] = M; header[2] = N; header[3] = nz; 1437 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1438 1439 /* store row lengths */ 1440 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1441 for (cnt=0, i=0; i<A->mbs; i++) 1442 for (j=0; j<bs; j++) 1443 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]); 1444 ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr); 1445 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1446 1447 /* store column indices */ 1448 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1449 for (cnt=0, i=0; i<A->mbs; i++) 1450 for (k=0; k<bs; k++) 1451 for (j=A->i[i]; j<A->i[i+1]; j++) 1452 for (l=0; l<bs; l++) 1453 colidxs[cnt++] = bs*A->j[j] + l; 1454 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1455 ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr); 1456 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1457 1458 /* store nonzero values */ 1459 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1460 for (cnt=0, i=0; i<A->mbs; i++) 1461 for (k=0; k<bs; k++) 1462 for (j=A->i[i]; j<A->i[i+1]; j++) 1463 for (l=0; l<bs; l++) 1464 matvals[cnt++] = A->a[bs*(bs*j + l) + k]; 1465 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1466 ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1467 ierr = PetscFree(matvals);CHKERRQ(ierr); 1468 1469 /* write block size option to the viewer's .info file */ 1470 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1471 PetscFunctionReturn(0); 1472 } 1473 1474 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1475 { 1476 PetscErrorCode ierr; 1477 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1478 PetscInt i,bs = A->rmap->bs,k; 1479 1480 PetscFunctionBegin; 1481 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1482 for (i=0; i<a->mbs; i++) { 1483 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1484 for (k=a->i[i]; k<a->i[i+1]; k++) { 1485 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1486 } 1487 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1488 } 1489 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1490 PetscFunctionReturn(0); 1491 } 1492 1493 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1494 { 1495 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1496 PetscErrorCode ierr; 1497 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1498 PetscViewerFormat format; 1499 1500 PetscFunctionBegin; 1501 if (A->structure_only) { 1502 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1503 PetscFunctionReturn(0); 1504 } 1505 1506 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1507 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1508 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1509 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1510 const char *matname; 1511 Mat aij; 1512 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1513 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1514 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1515 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1516 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1517 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1518 PetscFunctionReturn(0); 1519 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1520 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1521 for (i=0; i<a->mbs; i++) { 1522 for (j=0; j<bs; j++) { 1523 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1524 for (k=a->i[i]; k<a->i[i+1]; k++) { 1525 for (l=0; l<bs; l++) { 1526 #if defined(PETSC_USE_COMPLEX) 1527 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1528 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1529 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1530 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1531 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1532 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1533 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1534 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1535 } 1536 #else 1537 if (a->a[bs2*k + l*bs + j] != 0.0) { 1538 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1539 } 1540 #endif 1541 } 1542 } 1543 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1544 } 1545 } 1546 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1547 } else { 1548 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1549 for (i=0; i<a->mbs; i++) { 1550 for (j=0; j<bs; j++) { 1551 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1552 for (k=a->i[i]; k<a->i[i+1]; k++) { 1553 for (l=0; l<bs; l++) { 1554 #if defined(PETSC_USE_COMPLEX) 1555 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1556 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1557 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1558 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1559 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1560 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1561 } else { 1562 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1563 } 1564 #else 1565 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1566 #endif 1567 } 1568 } 1569 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1570 } 1571 } 1572 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1573 } 1574 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #include <petscdraw.h> 1579 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1580 { 1581 Mat A = (Mat) Aa; 1582 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1583 PetscErrorCode ierr; 1584 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1585 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1586 MatScalar *aa; 1587 PetscViewer viewer; 1588 PetscViewerFormat format; 1589 1590 PetscFunctionBegin; 1591 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1592 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1593 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1594 1595 /* loop over matrix elements drawing boxes */ 1596 1597 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1598 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1599 /* Blue for negative, Cyan for zero and Red for positive */ 1600 color = PETSC_DRAW_BLUE; 1601 for (i=0,row=0; i<mbs; i++,row+=bs) { 1602 for (j=a->i[i]; j<a->i[i+1]; j++) { 1603 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1604 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1605 aa = a->a + j*bs2; 1606 for (k=0; k<bs; k++) { 1607 for (l=0; l<bs; l++) { 1608 if (PetscRealPart(*aa++) >= 0.) continue; 1609 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1610 } 1611 } 1612 } 1613 } 1614 color = PETSC_DRAW_CYAN; 1615 for (i=0,row=0; i<mbs; i++,row+=bs) { 1616 for (j=a->i[i]; j<a->i[i+1]; j++) { 1617 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1618 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1619 aa = a->a + j*bs2; 1620 for (k=0; k<bs; k++) { 1621 for (l=0; l<bs; l++) { 1622 if (PetscRealPart(*aa++) != 0.) continue; 1623 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1624 } 1625 } 1626 } 1627 } 1628 color = PETSC_DRAW_RED; 1629 for (i=0,row=0; i<mbs; i++,row+=bs) { 1630 for (j=a->i[i]; j<a->i[i+1]; j++) { 1631 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1632 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1633 aa = a->a + j*bs2; 1634 for (k=0; k<bs; k++) { 1635 for (l=0; l<bs; l++) { 1636 if (PetscRealPart(*aa++) <= 0.) continue; 1637 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1638 } 1639 } 1640 } 1641 } 1642 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1643 } else { 1644 /* use contour shading to indicate magnitude of values */ 1645 /* first determine max of all nonzero values */ 1646 PetscReal minv = 0.0, maxv = 0.0; 1647 PetscDraw popup; 1648 1649 for (i=0; i<a->nz*a->bs2; i++) { 1650 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1651 } 1652 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1653 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1654 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1655 1656 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1657 for (i=0,row=0; i<mbs; i++,row+=bs) { 1658 for (j=a->i[i]; j<a->i[i+1]; j++) { 1659 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1660 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1661 aa = a->a + j*bs2; 1662 for (k=0; k<bs; k++) { 1663 for (l=0; l<bs; l++) { 1664 MatScalar v = *aa++; 1665 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1666 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1667 } 1668 } 1669 } 1670 } 1671 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1672 } 1673 PetscFunctionReturn(0); 1674 } 1675 1676 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1677 { 1678 PetscErrorCode ierr; 1679 PetscReal xl,yl,xr,yr,w,h; 1680 PetscDraw draw; 1681 PetscBool isnull; 1682 1683 PetscFunctionBegin; 1684 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1685 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1686 if (isnull) PetscFunctionReturn(0); 1687 1688 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1689 xr += w; yr += h; xl = -w; yl = -h; 1690 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1691 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1692 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1693 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1694 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 1698 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1699 { 1700 PetscErrorCode ierr; 1701 PetscBool iascii,isbinary,isdraw; 1702 1703 PetscFunctionBegin; 1704 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1705 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1706 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1707 if (iascii) { 1708 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1709 } else if (isbinary) { 1710 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1711 } else if (isdraw) { 1712 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1713 } else { 1714 Mat B; 1715 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1716 ierr = MatView(B,viewer);CHKERRQ(ierr); 1717 ierr = MatDestroy(&B);CHKERRQ(ierr); 1718 } 1719 PetscFunctionReturn(0); 1720 } 1721 1722 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1723 { 1724 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1725 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1726 PetscInt *ai = a->i,*ailen = a->ilen; 1727 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1728 MatScalar *ap,*aa = a->a; 1729 1730 PetscFunctionBegin; 1731 for (k=0; k<m; k++) { /* loop over rows */ 1732 row = im[k]; brow = row/bs; 1733 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1734 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1735 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1736 nrow = ailen[brow]; 1737 for (l=0; l<n; l++) { /* loop over columns */ 1738 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1739 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1740 col = in[l]; 1741 bcol = col/bs; 1742 cidx = col%bs; 1743 ridx = row%bs; 1744 high = nrow; 1745 low = 0; /* assume unsorted */ 1746 while (high-low > 5) { 1747 t = (low+high)/2; 1748 if (rp[t] > bcol) high = t; 1749 else low = t; 1750 } 1751 for (i=low; i<high; i++) { 1752 if (rp[i] > bcol) break; 1753 if (rp[i] == bcol) { 1754 *v++ = ap[bs2*i+bs*cidx+ridx]; 1755 goto finished; 1756 } 1757 } 1758 *v++ = 0.0; 1759 finished:; 1760 } 1761 } 1762 PetscFunctionReturn(0); 1763 } 1764 1765 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1766 { 1767 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1768 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1769 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1770 PetscErrorCode ierr; 1771 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1772 PetscBool roworiented=a->roworiented; 1773 const PetscScalar *value = v; 1774 MatScalar *ap=NULL,*aa = a->a,*bap; 1775 1776 PetscFunctionBegin; 1777 if (roworiented) { 1778 stepval = (n-1)*bs; 1779 } else { 1780 stepval = (m-1)*bs; 1781 } 1782 for (k=0; k<m; k++) { /* loop over added rows */ 1783 row = im[k]; 1784 if (row < 0) continue; 1785 if (PetscUnlikelyDebug(row >= a->mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1786 rp = aj + ai[row]; 1787 if (!A->structure_only) ap = aa + bs2*ai[row]; 1788 rmax = imax[row]; 1789 nrow = ailen[row]; 1790 low = 0; 1791 high = nrow; 1792 for (l=0; l<n; l++) { /* loop over added columns */ 1793 if (in[l] < 0) continue; 1794 if (PetscUnlikelyDebug(in[l] >= a->nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1795 col = in[l]; 1796 if (!A->structure_only) { 1797 if (roworiented) { 1798 value = v + (k*(stepval+bs) + l)*bs; 1799 } else { 1800 value = v + (l*(stepval+bs) + k)*bs; 1801 } 1802 } 1803 if (col <= lastcol) low = 0; 1804 else high = nrow; 1805 lastcol = col; 1806 while (high-low > 7) { 1807 t = (low+high)/2; 1808 if (rp[t] > col) high = t; 1809 else low = t; 1810 } 1811 for (i=low; i<high; i++) { 1812 if (rp[i] > col) break; 1813 if (rp[i] == col) { 1814 if (A->structure_only) goto noinsert2; 1815 bap = ap + bs2*i; 1816 if (roworiented) { 1817 if (is == ADD_VALUES) { 1818 for (ii=0; ii<bs; ii++,value+=stepval) { 1819 for (jj=ii; jj<bs2; jj+=bs) { 1820 bap[jj] += *value++; 1821 } 1822 } 1823 } else { 1824 for (ii=0; ii<bs; ii++,value+=stepval) { 1825 for (jj=ii; jj<bs2; jj+=bs) { 1826 bap[jj] = *value++; 1827 } 1828 } 1829 } 1830 } else { 1831 if (is == ADD_VALUES) { 1832 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1833 for (jj=0; jj<bs; jj++) { 1834 bap[jj] += value[jj]; 1835 } 1836 bap += bs; 1837 } 1838 } else { 1839 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1840 for (jj=0; jj<bs; jj++) { 1841 bap[jj] = value[jj]; 1842 } 1843 bap += bs; 1844 } 1845 } 1846 } 1847 goto noinsert2; 1848 } 1849 } 1850 if (nonew == 1) goto noinsert2; 1851 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1852 if (A->structure_only) { 1853 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1854 } else { 1855 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1856 } 1857 N = nrow++ - 1; high++; 1858 /* shift up all the later entries in this row */ 1859 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 1860 rp[i] = col; 1861 if (!A->structure_only) { 1862 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 1863 bap = ap + bs2*i; 1864 if (roworiented) { 1865 for (ii=0; ii<bs; ii++,value+=stepval) { 1866 for (jj=ii; jj<bs2; jj+=bs) { 1867 bap[jj] = *value++; 1868 } 1869 } 1870 } else { 1871 for (ii=0; ii<bs; ii++,value+=stepval) { 1872 for (jj=0; jj<bs; jj++) { 1873 *bap++ = *value++; 1874 } 1875 } 1876 } 1877 } 1878 noinsert2:; 1879 low = i; 1880 } 1881 ailen[row] = nrow; 1882 } 1883 PetscFunctionReturn(0); 1884 } 1885 1886 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1887 { 1888 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1889 PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax; 1890 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1891 PetscErrorCode ierr; 1892 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1893 MatScalar *aa = a->a,*ap; 1894 PetscReal ratio=0.6; 1895 1896 PetscFunctionBegin; 1897 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1898 1899 if (m) rmax = ailen[0]; 1900 for (i=1; i<mbs; i++) { 1901 /* move each row back by the amount of empty slots (fshift) before it*/ 1902 fshift += imax[i-1] - ailen[i-1]; 1903 rmax = PetscMax(rmax,ailen[i]); 1904 if (fshift) { 1905 ip = aj + ai[i]; 1906 ap = aa + bs2*ai[i]; 1907 N = ailen[i]; 1908 ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr); 1909 if (!A->structure_only) { 1910 ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr); 1911 } 1912 } 1913 ai[i] = ai[i-1] + ailen[i-1]; 1914 } 1915 if (mbs) { 1916 fshift += imax[mbs-1] - ailen[mbs-1]; 1917 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1918 } 1919 1920 /* reset ilen and imax for each row */ 1921 a->nonzerorowcnt = 0; 1922 if (A->structure_only) { 1923 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1924 } else { /* !A->structure_only */ 1925 for (i=0; i<mbs; i++) { 1926 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1927 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1928 } 1929 } 1930 a->nz = ai[mbs]; 1931 1932 /* diagonals may have moved, so kill the diagonal pointers */ 1933 a->idiagvalid = PETSC_FALSE; 1934 if (fshift && a->diag) { 1935 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1936 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1937 a->diag = NULL; 1938 } 1939 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1940 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1941 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1942 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1943 1944 A->info.mallocs += a->reallocs; 1945 a->reallocs = 0; 1946 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1947 a->rmax = rmax; 1948 1949 if (!A->structure_only) { 1950 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1951 } 1952 PetscFunctionReturn(0); 1953 } 1954 1955 /* 1956 This function returns an array of flags which indicate the locations of contiguous 1957 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1958 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1959 Assume: sizes should be long enough to hold all the values. 1960 */ 1961 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1962 { 1963 PetscInt i,j,k,row; 1964 PetscBool flg; 1965 1966 PetscFunctionBegin; 1967 for (i=0,j=0; i<n; j++) { 1968 row = idx[i]; 1969 if (row%bs!=0) { /* Not the begining of a block */ 1970 sizes[j] = 1; 1971 i++; 1972 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1973 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1974 i++; 1975 } else { /* Begining of the block, so check if the complete block exists */ 1976 flg = PETSC_TRUE; 1977 for (k=1; k<bs; k++) { 1978 if (row+k != idx[i+k]) { /* break in the block */ 1979 flg = PETSC_FALSE; 1980 break; 1981 } 1982 } 1983 if (flg) { /* No break in the bs */ 1984 sizes[j] = bs; 1985 i += bs; 1986 } else { 1987 sizes[j] = 1; 1988 i++; 1989 } 1990 } 1991 } 1992 *bs_max = j; 1993 PetscFunctionReturn(0); 1994 } 1995 1996 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 1997 { 1998 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1999 PetscErrorCode ierr; 2000 PetscInt i,j,k,count,*rows; 2001 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2002 PetscScalar zero = 0.0; 2003 MatScalar *aa; 2004 const PetscScalar *xx; 2005 PetscScalar *bb; 2006 2007 PetscFunctionBegin; 2008 /* fix right hand side if needed */ 2009 if (x && b) { 2010 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2011 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2012 for (i=0; i<is_n; i++) { 2013 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2014 } 2015 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2016 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2017 } 2018 2019 /* Make a copy of the IS and sort it */ 2020 /* allocate memory for rows,sizes */ 2021 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2022 2023 /* copy IS values to rows, and sort them */ 2024 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2025 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2026 2027 if (baij->keepnonzeropattern) { 2028 for (i=0; i<is_n; i++) sizes[i] = 1; 2029 bs_max = is_n; 2030 } else { 2031 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2032 A->nonzerostate++; 2033 } 2034 2035 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2036 row = rows[j]; 2037 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2038 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2039 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2040 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2041 if (diag != (PetscScalar)0.0) { 2042 if (baij->ilen[row/bs] > 0) { 2043 baij->ilen[row/bs] = 1; 2044 baij->j[baij->i[row/bs]] = row/bs; 2045 2046 ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr); 2047 } 2048 /* Now insert all the diagonal values for this bs */ 2049 for (k=0; k<bs; k++) { 2050 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2051 } 2052 } else { /* (diag == 0.0) */ 2053 baij->ilen[row/bs] = 0; 2054 } /* end (diag == 0.0) */ 2055 } else { /* (sizes[i] != bs) */ 2056 if (PetscUnlikelyDebug(sizes[i] != 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2057 for (k=0; k<count; k++) { 2058 aa[0] = zero; 2059 aa += bs; 2060 } 2061 if (diag != (PetscScalar)0.0) { 2062 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2063 } 2064 } 2065 } 2066 2067 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2068 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2069 PetscFunctionReturn(0); 2070 } 2071 2072 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2073 { 2074 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2075 PetscErrorCode ierr; 2076 PetscInt i,j,k,count; 2077 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2078 PetscScalar zero = 0.0; 2079 MatScalar *aa; 2080 const PetscScalar *xx; 2081 PetscScalar *bb; 2082 PetscBool *zeroed,vecs = PETSC_FALSE; 2083 2084 PetscFunctionBegin; 2085 /* fix right hand side if needed */ 2086 if (x && b) { 2087 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2088 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2089 vecs = PETSC_TRUE; 2090 } 2091 2092 /* zero the columns */ 2093 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2094 for (i=0; i<is_n; i++) { 2095 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2096 zeroed[is_idx[i]] = PETSC_TRUE; 2097 } 2098 for (i=0; i<A->rmap->N; i++) { 2099 if (!zeroed[i]) { 2100 row = i/bs; 2101 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2102 for (k=0; k<bs; k++) { 2103 col = bs*baij->j[j] + k; 2104 if (zeroed[col]) { 2105 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2106 if (vecs) bb[i] -= aa[0]*xx[col]; 2107 aa[0] = 0.0; 2108 } 2109 } 2110 } 2111 } else if (vecs) bb[i] = diag*xx[i]; 2112 } 2113 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2114 if (vecs) { 2115 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2116 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2117 } 2118 2119 /* zero the rows */ 2120 for (i=0; i<is_n; i++) { 2121 row = is_idx[i]; 2122 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2123 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2124 for (k=0; k<count; k++) { 2125 aa[0] = zero; 2126 aa += bs; 2127 } 2128 if (diag != (PetscScalar)0.0) { 2129 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2130 } 2131 } 2132 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2133 PetscFunctionReturn(0); 2134 } 2135 2136 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2137 { 2138 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2139 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2140 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2141 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2142 PetscErrorCode ierr; 2143 PetscInt ridx,cidx,bs2=a->bs2; 2144 PetscBool roworiented=a->roworiented; 2145 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2146 2147 PetscFunctionBegin; 2148 for (k=0; k<m; k++) { /* loop over added rows */ 2149 row = im[k]; 2150 brow = row/bs; 2151 if (row < 0) continue; 2152 if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2153 rp = aj + ai[brow]; 2154 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2155 rmax = imax[brow]; 2156 nrow = ailen[brow]; 2157 low = 0; 2158 high = nrow; 2159 for (l=0; l<n; l++) { /* loop over added columns */ 2160 if (in[l] < 0) continue; 2161 if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2162 col = in[l]; bcol = col/bs; 2163 ridx = row % bs; cidx = col % bs; 2164 if (!A->structure_only) { 2165 if (roworiented) { 2166 value = v[l + k*n]; 2167 } else { 2168 value = v[k + l*m]; 2169 } 2170 } 2171 if (col <= lastcol) low = 0; else high = nrow; 2172 lastcol = col; 2173 while (high-low > 7) { 2174 t = (low+high)/2; 2175 if (rp[t] > bcol) high = t; 2176 else low = t; 2177 } 2178 for (i=low; i<high; i++) { 2179 if (rp[i] > bcol) break; 2180 if (rp[i] == bcol) { 2181 bap = ap + bs2*i + bs*cidx + ridx; 2182 if (!A->structure_only) { 2183 if (is == ADD_VALUES) *bap += value; 2184 else *bap = value; 2185 } 2186 goto noinsert1; 2187 } 2188 } 2189 if (nonew == 1) goto noinsert1; 2190 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2191 if (A->structure_only) { 2192 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2193 } else { 2194 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2195 } 2196 N = nrow++ - 1; high++; 2197 /* shift up all the later entries in this row */ 2198 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 2199 rp[i] = bcol; 2200 if (!A->structure_only) { 2201 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 2202 ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr); 2203 ap[bs2*i + bs*cidx + ridx] = value; 2204 } 2205 a->nz++; 2206 A->nonzerostate++; 2207 noinsert1:; 2208 low = i; 2209 } 2210 ailen[brow] = nrow; 2211 } 2212 PetscFunctionReturn(0); 2213 } 2214 2215 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2216 { 2217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2218 Mat outA; 2219 PetscErrorCode ierr; 2220 PetscBool row_identity,col_identity; 2221 2222 PetscFunctionBegin; 2223 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2224 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2225 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2226 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2227 2228 outA = inA; 2229 inA->factortype = MAT_FACTOR_LU; 2230 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2231 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2232 2233 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2234 2235 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2236 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2237 a->row = row; 2238 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2239 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2240 a->col = col; 2241 2242 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2243 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2244 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2245 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2246 2247 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2248 if (!a->solve_work) { 2249 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2250 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2251 } 2252 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2253 PetscFunctionReturn(0); 2254 } 2255 2256 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2257 { 2258 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2259 PetscInt i,nz,mbs; 2260 2261 PetscFunctionBegin; 2262 nz = baij->maxnz; 2263 mbs = baij->mbs; 2264 for (i=0; i<nz; i++) { 2265 baij->j[i] = indices[i]; 2266 } 2267 baij->nz = nz; 2268 for (i=0; i<mbs; i++) { 2269 baij->ilen[i] = baij->imax[i]; 2270 } 2271 PetscFunctionReturn(0); 2272 } 2273 2274 /*@ 2275 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2276 in the matrix. 2277 2278 Input Parameters: 2279 + mat - the SeqBAIJ matrix 2280 - indices - the column indices 2281 2282 Level: advanced 2283 2284 Notes: 2285 This can be called if you have precomputed the nonzero structure of the 2286 matrix and want to provide it to the matrix object to improve the performance 2287 of the MatSetValues() operation. 2288 2289 You MUST have set the correct numbers of nonzeros per row in the call to 2290 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2291 2292 MUST be called before any calls to MatSetValues(); 2293 2294 @*/ 2295 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2296 { 2297 PetscErrorCode ierr; 2298 2299 PetscFunctionBegin; 2300 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2301 PetscValidPointer(indices,2); 2302 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2303 PetscFunctionReturn(0); 2304 } 2305 2306 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2307 { 2308 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2309 PetscErrorCode ierr; 2310 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2311 PetscReal atmp; 2312 PetscScalar *x,zero = 0.0; 2313 MatScalar *aa; 2314 PetscInt ncols,brow,krow,kcol; 2315 2316 PetscFunctionBegin; 2317 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2318 bs = A->rmap->bs; 2319 aa = a->a; 2320 ai = a->i; 2321 aj = a->j; 2322 mbs = a->mbs; 2323 2324 ierr = VecSet(v,zero);CHKERRQ(ierr); 2325 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2326 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2327 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2328 for (i=0; i<mbs; i++) { 2329 ncols = ai[1] - ai[0]; ai++; 2330 brow = bs*i; 2331 for (j=0; j<ncols; j++) { 2332 for (kcol=0; kcol<bs; kcol++) { 2333 for (krow=0; krow<bs; krow++) { 2334 atmp = PetscAbsScalar(*aa);aa++; 2335 row = brow + krow; /* row index */ 2336 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2337 } 2338 } 2339 aj++; 2340 } 2341 } 2342 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2343 PetscFunctionReturn(0); 2344 } 2345 2346 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2347 { 2348 PetscErrorCode ierr; 2349 2350 PetscFunctionBegin; 2351 /* If the two matrices have the same copy implementation, use fast copy. */ 2352 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2353 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2354 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2355 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2356 2357 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2358 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2359 ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr); 2360 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2361 } else { 2362 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2363 } 2364 PetscFunctionReturn(0); 2365 } 2366 2367 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2368 { 2369 PetscErrorCode ierr; 2370 2371 PetscFunctionBegin; 2372 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2373 PetscFunctionReturn(0); 2374 } 2375 2376 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2377 { 2378 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2379 2380 PetscFunctionBegin; 2381 *array = a->a; 2382 PetscFunctionReturn(0); 2383 } 2384 2385 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2386 { 2387 PetscFunctionBegin; 2388 *array = NULL; 2389 PetscFunctionReturn(0); 2390 } 2391 2392 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2393 { 2394 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2395 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2396 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2397 PetscErrorCode ierr; 2398 2399 PetscFunctionBegin; 2400 /* Set the number of nonzeros in the new matrix */ 2401 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2402 PetscFunctionReturn(0); 2403 } 2404 2405 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2406 { 2407 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2408 PetscErrorCode ierr; 2409 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2410 PetscBLASInt one=1; 2411 2412 PetscFunctionBegin; 2413 if (str == SAME_NONZERO_PATTERN) { 2414 PetscScalar alpha = a; 2415 PetscBLASInt bnz; 2416 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2417 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2418 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2419 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2420 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2421 } else { 2422 Mat B; 2423 PetscInt *nnz; 2424 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2425 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2426 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2427 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2428 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2429 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2430 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2431 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2432 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2433 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2434 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2435 ierr = PetscFree(nnz);CHKERRQ(ierr); 2436 } 2437 PetscFunctionReturn(0); 2438 } 2439 2440 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2441 { 2442 #if defined(PETSC_USE_COMPLEX) 2443 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2444 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2445 MatScalar *aa = a->a; 2446 2447 PetscFunctionBegin; 2448 for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]); 2449 #else 2450 PetscFunctionBegin; 2451 #endif 2452 PetscFunctionReturn(0); 2453 } 2454 2455 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2456 { 2457 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2458 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2459 MatScalar *aa = a->a; 2460 2461 PetscFunctionBegin; 2462 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2463 PetscFunctionReturn(0); 2464 } 2465 2466 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2467 { 2468 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2469 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2470 MatScalar *aa = a->a; 2471 2472 PetscFunctionBegin; 2473 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2474 PetscFunctionReturn(0); 2475 } 2476 2477 /* 2478 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2479 */ 2480 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2481 { 2482 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2483 PetscErrorCode ierr; 2484 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2485 PetscInt nz = a->i[m],row,*jj,mr,col; 2486 2487 PetscFunctionBegin; 2488 *nn = n; 2489 if (!ia) PetscFunctionReturn(0); 2490 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2491 else { 2492 ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2493 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2494 ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2495 jj = a->j; 2496 for (i=0; i<nz; i++) { 2497 collengths[jj[i]]++; 2498 } 2499 cia[0] = oshift; 2500 for (i=0; i<n; i++) { 2501 cia[i+1] = cia[i] + collengths[i]; 2502 } 2503 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2504 jj = a->j; 2505 for (row=0; row<m; row++) { 2506 mr = a->i[row+1] - a->i[row]; 2507 for (i=0; i<mr; i++) { 2508 col = *jj++; 2509 2510 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2511 } 2512 } 2513 ierr = PetscFree(collengths);CHKERRQ(ierr); 2514 *ia = cia; *ja = cja; 2515 } 2516 PetscFunctionReturn(0); 2517 } 2518 2519 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2520 { 2521 PetscErrorCode ierr; 2522 2523 PetscFunctionBegin; 2524 if (!ia) PetscFunctionReturn(0); 2525 ierr = PetscFree(*ia);CHKERRQ(ierr); 2526 ierr = PetscFree(*ja);CHKERRQ(ierr); 2527 PetscFunctionReturn(0); 2528 } 2529 2530 /* 2531 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2532 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2533 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2534 */ 2535 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2536 { 2537 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2538 PetscErrorCode ierr; 2539 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2540 PetscInt nz = a->i[m],row,*jj,mr,col; 2541 PetscInt *cspidx; 2542 2543 PetscFunctionBegin; 2544 *nn = n; 2545 if (!ia) PetscFunctionReturn(0); 2546 2547 ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2548 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2549 ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2550 ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr); 2551 jj = a->j; 2552 for (i=0; i<nz; i++) { 2553 collengths[jj[i]]++; 2554 } 2555 cia[0] = oshift; 2556 for (i=0; i<n; i++) { 2557 cia[i+1] = cia[i] + collengths[i]; 2558 } 2559 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2560 jj = a->j; 2561 for (row=0; row<m; row++) { 2562 mr = a->i[row+1] - a->i[row]; 2563 for (i=0; i<mr; i++) { 2564 col = *jj++; 2565 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2566 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2567 } 2568 } 2569 ierr = PetscFree(collengths);CHKERRQ(ierr); 2570 *ia = cia; 2571 *ja = cja; 2572 *spidx = cspidx; 2573 PetscFunctionReturn(0); 2574 } 2575 2576 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2577 { 2578 PetscErrorCode ierr; 2579 2580 PetscFunctionBegin; 2581 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2582 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2583 PetscFunctionReturn(0); 2584 } 2585 2586 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2587 { 2588 PetscErrorCode ierr; 2589 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2590 2591 PetscFunctionBegin; 2592 if (!Y->preallocated || !aij->nz) { 2593 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2594 } 2595 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2596 PetscFunctionReturn(0); 2597 } 2598 2599 /* -------------------------------------------------------------------*/ 2600 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2601 MatGetRow_SeqBAIJ, 2602 MatRestoreRow_SeqBAIJ, 2603 MatMult_SeqBAIJ_N, 2604 /* 4*/ MatMultAdd_SeqBAIJ_N, 2605 MatMultTranspose_SeqBAIJ, 2606 MatMultTransposeAdd_SeqBAIJ, 2607 NULL, 2608 NULL, 2609 NULL, 2610 /* 10*/ NULL, 2611 MatLUFactor_SeqBAIJ, 2612 NULL, 2613 NULL, 2614 MatTranspose_SeqBAIJ, 2615 /* 15*/ MatGetInfo_SeqBAIJ, 2616 MatEqual_SeqBAIJ, 2617 MatGetDiagonal_SeqBAIJ, 2618 MatDiagonalScale_SeqBAIJ, 2619 MatNorm_SeqBAIJ, 2620 /* 20*/ NULL, 2621 MatAssemblyEnd_SeqBAIJ, 2622 MatSetOption_SeqBAIJ, 2623 MatZeroEntries_SeqBAIJ, 2624 /* 24*/ MatZeroRows_SeqBAIJ, 2625 NULL, 2626 NULL, 2627 NULL, 2628 NULL, 2629 /* 29*/ MatSetUp_SeqBAIJ, 2630 NULL, 2631 NULL, 2632 NULL, 2633 NULL, 2634 /* 34*/ MatDuplicate_SeqBAIJ, 2635 NULL, 2636 NULL, 2637 MatILUFactor_SeqBAIJ, 2638 NULL, 2639 /* 39*/ MatAXPY_SeqBAIJ, 2640 MatCreateSubMatrices_SeqBAIJ, 2641 MatIncreaseOverlap_SeqBAIJ, 2642 MatGetValues_SeqBAIJ, 2643 MatCopy_SeqBAIJ, 2644 /* 44*/ NULL, 2645 MatScale_SeqBAIJ, 2646 MatShift_SeqBAIJ, 2647 NULL, 2648 MatZeroRowsColumns_SeqBAIJ, 2649 /* 49*/ NULL, 2650 MatGetRowIJ_SeqBAIJ, 2651 MatRestoreRowIJ_SeqBAIJ, 2652 MatGetColumnIJ_SeqBAIJ, 2653 MatRestoreColumnIJ_SeqBAIJ, 2654 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2655 NULL, 2656 NULL, 2657 NULL, 2658 MatSetValuesBlocked_SeqBAIJ, 2659 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2660 MatDestroy_SeqBAIJ, 2661 MatView_SeqBAIJ, 2662 NULL, 2663 NULL, 2664 /* 64*/ NULL, 2665 NULL, 2666 NULL, 2667 NULL, 2668 NULL, 2669 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2670 NULL, 2671 MatConvert_Basic, 2672 NULL, 2673 NULL, 2674 /* 74*/ NULL, 2675 MatFDColoringApply_BAIJ, 2676 NULL, 2677 NULL, 2678 NULL, 2679 /* 79*/ NULL, 2680 NULL, 2681 NULL, 2682 NULL, 2683 MatLoad_SeqBAIJ, 2684 /* 84*/ NULL, 2685 NULL, 2686 NULL, 2687 NULL, 2688 NULL, 2689 /* 89*/ NULL, 2690 NULL, 2691 NULL, 2692 NULL, 2693 NULL, 2694 /* 94*/ NULL, 2695 NULL, 2696 NULL, 2697 NULL, 2698 NULL, 2699 /* 99*/ NULL, 2700 NULL, 2701 NULL, 2702 MatConjugate_SeqBAIJ, 2703 NULL, 2704 /*104*/ NULL, 2705 MatRealPart_SeqBAIJ, 2706 MatImaginaryPart_SeqBAIJ, 2707 NULL, 2708 NULL, 2709 /*109*/ NULL, 2710 NULL, 2711 NULL, 2712 NULL, 2713 MatMissingDiagonal_SeqBAIJ, 2714 /*114*/ NULL, 2715 NULL, 2716 NULL, 2717 NULL, 2718 NULL, 2719 /*119*/ NULL, 2720 NULL, 2721 MatMultHermitianTranspose_SeqBAIJ, 2722 MatMultHermitianTransposeAdd_SeqBAIJ, 2723 NULL, 2724 /*124*/ NULL, 2725 NULL, 2726 MatInvertBlockDiagonal_SeqBAIJ, 2727 NULL, 2728 NULL, 2729 /*129*/ NULL, 2730 NULL, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /*134*/ NULL, 2735 NULL, 2736 NULL, 2737 NULL, 2738 NULL, 2739 /*139*/ MatSetBlockSizes_Default, 2740 NULL, 2741 NULL, 2742 MatFDColoringSetUp_SeqXAIJ, 2743 NULL, 2744 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2745 MatDestroySubMatrices_SeqBAIJ 2746 }; 2747 2748 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2749 { 2750 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2751 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2752 PetscErrorCode ierr; 2753 2754 PetscFunctionBegin; 2755 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2756 2757 /* allocate space for values if not already there */ 2758 if (!aij->saved_values) { 2759 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2760 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2761 } 2762 2763 /* copy values over */ 2764 ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr); 2765 PetscFunctionReturn(0); 2766 } 2767 2768 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2769 { 2770 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2771 PetscErrorCode ierr; 2772 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2773 2774 PetscFunctionBegin; 2775 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2776 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2777 2778 /* copy values over */ 2779 ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr); 2780 PetscFunctionReturn(0); 2781 } 2782 2783 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2784 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2785 2786 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2787 { 2788 Mat_SeqBAIJ *b; 2789 PetscErrorCode ierr; 2790 PetscInt i,mbs,nbs,bs2; 2791 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2792 2793 PetscFunctionBegin; 2794 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2795 if (nz == MAT_SKIP_ALLOCATION) { 2796 skipallocation = PETSC_TRUE; 2797 nz = 0; 2798 } 2799 2800 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2801 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2802 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2803 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2804 2805 B->preallocated = PETSC_TRUE; 2806 2807 mbs = B->rmap->n/bs; 2808 nbs = B->cmap->n/bs; 2809 bs2 = bs*bs; 2810 2811 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2812 2813 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2814 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2815 if (nnz) { 2816 for (i=0; i<mbs; i++) { 2817 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2818 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2819 } 2820 } 2821 2822 b = (Mat_SeqBAIJ*)B->data; 2823 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2824 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2825 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2826 2827 if (!flg) { 2828 switch (bs) { 2829 case 1: 2830 B->ops->mult = MatMult_SeqBAIJ_1; 2831 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2832 break; 2833 case 2: 2834 B->ops->mult = MatMult_SeqBAIJ_2; 2835 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2836 break; 2837 case 3: 2838 B->ops->mult = MatMult_SeqBAIJ_3; 2839 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2840 break; 2841 case 4: 2842 B->ops->mult = MatMult_SeqBAIJ_4; 2843 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2844 break; 2845 case 5: 2846 B->ops->mult = MatMult_SeqBAIJ_5; 2847 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2848 break; 2849 case 6: 2850 B->ops->mult = MatMult_SeqBAIJ_6; 2851 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2852 break; 2853 case 7: 2854 B->ops->mult = MatMult_SeqBAIJ_7; 2855 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2856 break; 2857 case 9: 2858 { 2859 PetscInt version = 1; 2860 ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 2861 switch (version) { 2862 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2863 case 1: 2864 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2865 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2866 ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2867 break; 2868 #endif 2869 default: 2870 B->ops->mult = MatMult_SeqBAIJ_N; 2871 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2872 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2873 break; 2874 } 2875 break; 2876 } 2877 case 11: 2878 B->ops->mult = MatMult_SeqBAIJ_11; 2879 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2880 break; 2881 case 12: 2882 { 2883 PetscInt version = 1; 2884 ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 2885 switch (version) { 2886 case 1: 2887 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 2888 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 2889 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2890 break; 2891 case 2: 2892 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 2893 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 2894 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2895 break; 2896 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2897 case 3: 2898 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 2899 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 2900 ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2901 break; 2902 #endif 2903 default: 2904 B->ops->mult = MatMult_SeqBAIJ_N; 2905 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2906 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2907 break; 2908 } 2909 break; 2910 } 2911 case 15: 2912 { 2913 PetscInt version = 1; 2914 ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 2915 switch (version) { 2916 case 1: 2917 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2918 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2919 break; 2920 case 2: 2921 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 2922 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2923 break; 2924 case 3: 2925 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 2926 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2927 break; 2928 case 4: 2929 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 2930 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2931 break; 2932 default: 2933 B->ops->mult = MatMult_SeqBAIJ_N; 2934 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2935 break; 2936 } 2937 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2938 break; 2939 } 2940 default: 2941 B->ops->mult = MatMult_SeqBAIJ_N; 2942 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2943 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2944 break; 2945 } 2946 } 2947 B->ops->sor = MatSOR_SeqBAIJ; 2948 b->mbs = mbs; 2949 b->nbs = nbs; 2950 if (!skipallocation) { 2951 if (!b->imax) { 2952 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2953 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2954 2955 b->free_imax_ilen = PETSC_TRUE; 2956 } 2957 /* b->ilen will count nonzeros in each block row so far. */ 2958 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2959 if (!nnz) { 2960 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2961 else if (nz < 0) nz = 1; 2962 nz = PetscMin(nz,nbs); 2963 for (i=0; i<mbs; i++) b->imax[i] = nz; 2964 ierr = PetscIntMultError(nz,mbs,&nz);CHKERRQ(ierr); 2965 } else { 2966 PetscInt64 nz64 = 0; 2967 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];} 2968 ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr); 2969 } 2970 2971 /* allocate the matrix space */ 2972 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2973 if (B->structure_only) { 2974 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 2975 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 2976 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 2977 } else { 2978 PetscInt nzbs2 = 0; 2979 ierr = PetscIntMultError(nz,bs2,&nzbs2);CHKERRQ(ierr); 2980 ierr = PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2981 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2982 ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr); 2983 } 2984 ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr); 2985 2986 if (B->structure_only) { 2987 b->singlemalloc = PETSC_FALSE; 2988 b->free_a = PETSC_FALSE; 2989 } else { 2990 b->singlemalloc = PETSC_TRUE; 2991 b->free_a = PETSC_TRUE; 2992 } 2993 b->free_ij = PETSC_TRUE; 2994 2995 b->i[0] = 0; 2996 for (i=1; i<mbs+1; i++) { 2997 b->i[i] = b->i[i-1] + b->imax[i-1]; 2998 } 2999 3000 } else { 3001 b->free_a = PETSC_FALSE; 3002 b->free_ij = PETSC_FALSE; 3003 } 3004 3005 b->bs2 = bs2; 3006 b->mbs = mbs; 3007 b->nz = 0; 3008 b->maxnz = nz; 3009 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3010 B->was_assembled = PETSC_FALSE; 3011 B->assembled = PETSC_FALSE; 3012 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3013 PetscFunctionReturn(0); 3014 } 3015 3016 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3017 { 3018 PetscInt i,m,nz,nz_max=0,*nnz; 3019 PetscScalar *values=NULL; 3020 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 3021 PetscErrorCode ierr; 3022 3023 PetscFunctionBegin; 3024 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3025 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3026 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3027 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3028 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3029 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3030 m = B->rmap->n/bs; 3031 3032 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 3033 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 3034 for (i=0; i<m; i++) { 3035 nz = ii[i+1]- ii[i]; 3036 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 3037 nz_max = PetscMax(nz_max, nz); 3038 nnz[i] = nz; 3039 } 3040 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3041 ierr = PetscFree(nnz);CHKERRQ(ierr); 3042 3043 values = (PetscScalar*)V; 3044 if (!values) { 3045 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 3046 } 3047 for (i=0; i<m; i++) { 3048 PetscInt ncols = ii[i+1] - ii[i]; 3049 const PetscInt *icols = jj + ii[i]; 3050 if (bs == 1 || !roworiented) { 3051 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3052 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3053 } else { 3054 PetscInt j; 3055 for (j=0; j<ncols; j++) { 3056 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 3057 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 3058 } 3059 } 3060 } 3061 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3062 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3063 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3064 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3065 PetscFunctionReturn(0); 3066 } 3067 3068 /*@C 3069 MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored 3070 3071 Not Collective 3072 3073 Input Parameter: 3074 . mat - a MATSEQBAIJ matrix 3075 3076 Output Parameter: 3077 . array - pointer to the data 3078 3079 Level: intermediate 3080 3081 .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3082 @*/ 3083 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array) 3084 { 3085 PetscErrorCode ierr; 3086 3087 PetscFunctionBegin; 3088 ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3089 PetscFunctionReturn(0); 3090 } 3091 3092 /*@C 3093 MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray() 3094 3095 Not Collective 3096 3097 Input Parameters: 3098 + mat - a MATSEQBAIJ matrix 3099 - array - pointer to the data 3100 3101 Level: intermediate 3102 3103 .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3104 @*/ 3105 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array) 3106 { 3107 PetscErrorCode ierr; 3108 3109 PetscFunctionBegin; 3110 ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3111 PetscFunctionReturn(0); 3112 } 3113 3114 /*MC 3115 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3116 block sparse compressed row format. 3117 3118 Options Database Keys: 3119 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3120 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3121 3122 Level: beginner 3123 3124 Notes: 3125 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 3126 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 3127 3128 Run with -info to see what version of the matrix-vector product is being used 3129 3130 .seealso: MatCreateSeqBAIJ() 3131 M*/ 3132 3133 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3134 3135 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3136 { 3137 PetscErrorCode ierr; 3138 PetscMPIInt size; 3139 Mat_SeqBAIJ *b; 3140 3141 PetscFunctionBegin; 3142 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 3143 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3144 3145 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3146 B->data = (void*)b; 3147 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3148 3149 b->row = NULL; 3150 b->col = NULL; 3151 b->icol = NULL; 3152 b->reallocs = 0; 3153 b->saved_values = NULL; 3154 3155 b->roworiented = PETSC_TRUE; 3156 b->nonew = 0; 3157 b->diag = NULL; 3158 B->spptr = NULL; 3159 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3160 b->keepnonzeropattern = PETSC_FALSE; 3161 3162 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr); 3163 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr); 3164 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3165 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3166 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3167 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3168 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3169 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3170 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3171 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3172 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3173 #if defined(PETSC_HAVE_HYPRE) 3174 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3175 #endif 3176 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 3177 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3178 PetscFunctionReturn(0); 3179 } 3180 3181 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3182 { 3183 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3184 PetscErrorCode ierr; 3185 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3186 3187 PetscFunctionBegin; 3188 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3189 3190 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3191 c->imax = a->imax; 3192 c->ilen = a->ilen; 3193 c->free_imax_ilen = PETSC_FALSE; 3194 } else { 3195 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3196 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3197 for (i=0; i<mbs; i++) { 3198 c->imax[i] = a->imax[i]; 3199 c->ilen[i] = a->ilen[i]; 3200 } 3201 c->free_imax_ilen = PETSC_TRUE; 3202 } 3203 3204 /* allocate the matrix space */ 3205 if (mallocmatspace) { 3206 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3207 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3208 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3209 3210 c->i = a->i; 3211 c->j = a->j; 3212 c->singlemalloc = PETSC_FALSE; 3213 c->free_a = PETSC_TRUE; 3214 c->free_ij = PETSC_FALSE; 3215 c->parent = A; 3216 C->preallocated = PETSC_TRUE; 3217 C->assembled = PETSC_TRUE; 3218 3219 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3220 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3221 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3222 } else { 3223 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3224 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3225 3226 c->singlemalloc = PETSC_TRUE; 3227 c->free_a = PETSC_TRUE; 3228 c->free_ij = PETSC_TRUE; 3229 3230 ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr); 3231 if (mbs > 0) { 3232 ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr); 3233 if (cpvalues == MAT_COPY_VALUES) { 3234 ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr); 3235 } else { 3236 ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr); 3237 } 3238 } 3239 C->preallocated = PETSC_TRUE; 3240 C->assembled = PETSC_TRUE; 3241 } 3242 } 3243 3244 c->roworiented = a->roworiented; 3245 c->nonew = a->nonew; 3246 3247 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3248 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3249 3250 c->bs2 = a->bs2; 3251 c->mbs = a->mbs; 3252 c->nbs = a->nbs; 3253 3254 if (a->diag) { 3255 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3256 c->diag = a->diag; 3257 c->free_diag = PETSC_FALSE; 3258 } else { 3259 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3260 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3261 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3262 c->free_diag = PETSC_TRUE; 3263 } 3264 } else c->diag = NULL; 3265 3266 c->nz = a->nz; 3267 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3268 c->solve_work = NULL; 3269 c->mult_work = NULL; 3270 c->sor_workt = NULL; 3271 c->sor_work = NULL; 3272 3273 c->compressedrow.use = a->compressedrow.use; 3274 c->compressedrow.nrows = a->compressedrow.nrows; 3275 if (a->compressedrow.use) { 3276 i = a->compressedrow.nrows; 3277 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3278 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3279 ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr); 3280 ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr); 3281 } else { 3282 c->compressedrow.use = PETSC_FALSE; 3283 c->compressedrow.i = NULL; 3284 c->compressedrow.rindex = NULL; 3285 } 3286 C->nonzerostate = A->nonzerostate; 3287 3288 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3289 PetscFunctionReturn(0); 3290 } 3291 3292 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3293 { 3294 PetscErrorCode ierr; 3295 3296 PetscFunctionBegin; 3297 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3298 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3299 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3300 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3301 PetscFunctionReturn(0); 3302 } 3303 3304 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3305 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 3306 { 3307 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3308 PetscInt *rowidxs,*colidxs; 3309 PetscScalar *matvals; 3310 PetscErrorCode ierr; 3311 3312 PetscFunctionBegin; 3313 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3314 3315 /* read matrix header */ 3316 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3317 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3318 M = header[1]; N = header[2]; nz = header[3]; 3319 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3320 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3321 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3322 3323 /* set block sizes from the viewer's .info file */ 3324 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3325 /* set local and global sizes if not set already */ 3326 if (mat->rmap->n < 0) mat->rmap->n = M; 3327 if (mat->cmap->n < 0) mat->cmap->n = N; 3328 if (mat->rmap->N < 0) mat->rmap->N = M; 3329 if (mat->cmap->N < 0) mat->cmap->N = N; 3330 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3331 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3332 3333 /* check if the matrix sizes are correct */ 3334 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3335 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3336 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3337 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 3338 mbs = m/bs; nbs = n/bs; 3339 3340 /* read in row lengths, column indices and nonzero values */ 3341 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3342 ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr); 3343 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3344 sum = rowidxs[m]; 3345 if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3346 3347 /* read in column indices and nonzero values */ 3348 ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr); 3349 ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr); 3350 ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr); 3351 3352 { /* preallocate matrix storage */ 3353 PetscBT bt; /* helper bit set to count nonzeros */ 3354 PetscInt *nnz; 3355 PetscBool sbaij; 3356 3357 ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr); 3358 ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr); 3359 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr); 3360 for (i=0; i<mbs; i++) { 3361 ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr); 3362 for (k=0; k<bs; k++) { 3363 PetscInt row = bs*i + k; 3364 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3365 PetscInt col = colidxs[j]; 3366 if (!sbaij || col >= row) 3367 if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++; 3368 } 3369 } 3370 } 3371 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 3372 ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3373 ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3374 ierr = PetscFree(nnz);CHKERRQ(ierr); 3375 } 3376 3377 /* store matrix values */ 3378 for (i=0; i<m; i++) { 3379 PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1]; 3380 ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr); 3381 } 3382 3383 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3384 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3385 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3386 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3387 PetscFunctionReturn(0); 3388 } 3389 3390 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer) 3391 { 3392 PetscErrorCode ierr; 3393 PetscBool isbinary; 3394 3395 PetscFunctionBegin; 3396 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3397 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3398 ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 3399 PetscFunctionReturn(0); 3400 } 3401 3402 /*@C 3403 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3404 compressed row) format. For good matrix assembly performance the 3405 user should preallocate the matrix storage by setting the parameter nz 3406 (or the array nnz). By setting these parameters accurately, performance 3407 during matrix assembly can be increased by more than a factor of 50. 3408 3409 Collective 3410 3411 Input Parameters: 3412 + comm - MPI communicator, set to PETSC_COMM_SELF 3413 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3414 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3415 . m - number of rows 3416 . n - number of columns 3417 . nz - number of nonzero blocks per block row (same for all rows) 3418 - nnz - array containing the number of nonzero blocks in the various block rows 3419 (possibly different for each block row) or NULL 3420 3421 Output Parameter: 3422 . A - the matrix 3423 3424 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3425 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3426 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3427 3428 Options Database Keys: 3429 + -mat_no_unroll - uses code that does not unroll the loops in the 3430 block calculations (much slower) 3431 - -mat_block_size - size of the blocks to use 3432 3433 Level: intermediate 3434 3435 Notes: 3436 The number of rows and columns must be divisible by blocksize. 3437 3438 If the nnz parameter is given then the nz parameter is ignored 3439 3440 A nonzero block is any block that as 1 or more nonzeros in it 3441 3442 The block AIJ format is fully compatible with standard Fortran 77 3443 storage. That is, the stored row and column indices can begin at 3444 either one (as in Fortran) or zero. See the users' manual for details. 3445 3446 Specify the preallocated storage with either nz or nnz (not both). 3447 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3448 allocation. See Users-Manual: ch_mat for details. 3449 matrices. 3450 3451 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3452 @*/ 3453 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3454 { 3455 PetscErrorCode ierr; 3456 3457 PetscFunctionBegin; 3458 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3459 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3460 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3461 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3462 PetscFunctionReturn(0); 3463 } 3464 3465 /*@C 3466 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3467 per row in the matrix. For good matrix assembly performance the 3468 user should preallocate the matrix storage by setting the parameter nz 3469 (or the array nnz). By setting these parameters accurately, performance 3470 during matrix assembly can be increased by more than a factor of 50. 3471 3472 Collective 3473 3474 Input Parameters: 3475 + B - the matrix 3476 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3477 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3478 . nz - number of block nonzeros per block row (same for all rows) 3479 - nnz - array containing the number of block nonzeros in the various block rows 3480 (possibly different for each block row) or NULL 3481 3482 Options Database Keys: 3483 + -mat_no_unroll - uses code that does not unroll the loops in the 3484 block calculations (much slower) 3485 - -mat_block_size - size of the blocks to use 3486 3487 Level: intermediate 3488 3489 Notes: 3490 If the nnz parameter is given then the nz parameter is ignored 3491 3492 You can call MatGetInfo() to get information on how effective the preallocation was; 3493 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3494 You can also run with the option -info and look for messages with the string 3495 malloc in them to see if additional memory allocation was needed. 3496 3497 The block AIJ format is fully compatible with standard Fortran 77 3498 storage. That is, the stored row and column indices can begin at 3499 either one (as in Fortran) or zero. See the users' manual for details. 3500 3501 Specify the preallocated storage with either nz or nnz (not both). 3502 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3503 allocation. See Users-Manual: ch_mat for details. 3504 3505 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3506 @*/ 3507 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3508 { 3509 PetscErrorCode ierr; 3510 3511 PetscFunctionBegin; 3512 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3513 PetscValidType(B,1); 3514 PetscValidLogicalCollectiveInt(B,bs,2); 3515 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3516 PetscFunctionReturn(0); 3517 } 3518 3519 /*@C 3520 MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 3521 3522 Collective 3523 3524 Input Parameters: 3525 + B - the matrix 3526 . i - the indices into j for the start of each local row (starts with zero) 3527 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3528 - v - optional values in the matrix 3529 3530 Level: advanced 3531 3532 Notes: 3533 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3534 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3535 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3536 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3537 block column and the second index is over columns within a block. 3538 3539 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3540 3541 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3542 @*/ 3543 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3544 { 3545 PetscErrorCode ierr; 3546 3547 PetscFunctionBegin; 3548 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3549 PetscValidType(B,1); 3550 PetscValidLogicalCollectiveInt(B,bs,2); 3551 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3552 PetscFunctionReturn(0); 3553 } 3554 3555 /*@ 3556 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3557 3558 Collective 3559 3560 Input Parameters: 3561 + comm - must be an MPI communicator of size 1 3562 . bs - size of block 3563 . m - number of rows 3564 . n - number of columns 3565 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3566 . j - column indices 3567 - a - matrix values 3568 3569 Output Parameter: 3570 . mat - the matrix 3571 3572 Level: advanced 3573 3574 Notes: 3575 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3576 once the matrix is destroyed 3577 3578 You cannot set new nonzero locations into this matrix, that will generate an error. 3579 3580 The i and j indices are 0 based 3581 3582 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3583 3584 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3585 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3586 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3587 with column-major ordering within blocks. 3588 3589 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3590 3591 @*/ 3592 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3593 { 3594 PetscErrorCode ierr; 3595 PetscInt ii; 3596 Mat_SeqBAIJ *baij; 3597 3598 PetscFunctionBegin; 3599 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3600 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3601 3602 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3603 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3604 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3605 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 3606 baij = (Mat_SeqBAIJ*)(*mat)->data; 3607 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3608 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3609 3610 baij->i = i; 3611 baij->j = j; 3612 baij->a = a; 3613 3614 baij->singlemalloc = PETSC_FALSE; 3615 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3616 baij->free_a = PETSC_FALSE; 3617 baij->free_ij = PETSC_FALSE; 3618 3619 for (ii=0; ii<m; ii++) { 3620 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3621 if (PetscUnlikelyDebug(i[ii+1] - i[ii] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3622 } 3623 if (PetscDefined(USE_DEBUG)) { 3624 for (ii=0; ii<baij->i[m]; ii++) { 3625 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3626 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3627 } 3628 } 3629 3630 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3631 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3632 PetscFunctionReturn(0); 3633 } 3634 3635 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3636 { 3637 PetscErrorCode ierr; 3638 PetscMPIInt size; 3639 3640 PetscFunctionBegin; 3641 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3642 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3643 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3644 } else { 3645 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3646 } 3647 PetscFunctionReturn(0); 3648 } 3649