1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19 20 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 21 { 22 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 23 PetscErrorCode ierr; 24 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 25 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 26 PetscReal shift = 0.0; 27 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 28 29 PetscFunctionBegin; 30 allowzeropivot = PetscNot(A->erroriffailure); 31 32 if (a->idiagvalid) { 33 if (values) *values = a->idiag; 34 PetscFunctionReturn(0); 35 } 36 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 37 diag_offset = a->diag; 38 if (!a->idiag) { 39 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 40 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 41 } 42 diag = a->idiag; 43 if (values) *values = a->idiag; 44 /* factor and invert each block */ 45 switch (bs) { 46 case 1: 47 for (i=0; i<mbs; i++) { 48 odiag = v + 1*diag_offset[i]; 49 diag[0] = odiag[0]; 50 51 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 52 if (allowzeropivot) { 53 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 54 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 55 A->factorerror_zeropivot_row = i; 56 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 57 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 58 } 59 60 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 61 diag += 1; 62 } 63 break; 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 69 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 70 diag += 4; 71 } 72 break; 73 case 3: 74 for (i=0; i<mbs; i++) { 75 odiag = v + 9*diag_offset[i]; 76 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 77 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 78 diag[8] = odiag[8]; 79 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 80 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 81 diag += 9; 82 } 83 break; 84 case 4: 85 for (i=0; i<mbs; i++) { 86 odiag = v + 16*diag_offset[i]; 87 ierr = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr); 88 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 89 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 90 diag += 16; 91 } 92 break; 93 case 5: 94 for (i=0; i<mbs; i++) { 95 odiag = v + 25*diag_offset[i]; 96 ierr = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr); 97 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 98 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 99 diag += 25; 100 } 101 break; 102 case 6: 103 for (i=0; i<mbs; i++) { 104 odiag = v + 36*diag_offset[i]; 105 ierr = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 107 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 108 diag += 36; 109 } 110 break; 111 case 7: 112 for (i=0; i<mbs; i++) { 113 odiag = v + 49*diag_offset[i]; 114 ierr = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr); 115 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 116 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 117 diag += 49; 118 } 119 break; 120 default: 121 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 122 for (i=0; i<mbs; i++) { 123 odiag = v + bs2*diag_offset[i]; 124 ierr = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr); 125 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 126 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 127 diag += bs2; 128 } 129 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 130 } 131 a->idiagvalid = PETSC_TRUE; 132 PetscFunctionReturn(0); 133 } 134 135 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 136 { 137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 138 PetscScalar *x,*work,*w,*workt,*t; 139 const MatScalar *v,*aa = a->a, *idiag; 140 const PetscScalar *b,*xb; 141 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 142 PetscErrorCode ierr; 143 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 144 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 145 146 PetscFunctionBegin; 147 its = its*lits; 148 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 149 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 150 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 151 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor"); 152 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 153 154 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 155 156 if (!m) PetscFunctionReturn(0); 157 diag = a->diag; 158 idiag = a->idiag; 159 k = PetscMax(A->rmap->n,A->cmap->n); 160 if (!a->mult_work) { 161 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 162 } 163 if (!a->sor_workt) { 164 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 165 } 166 if (!a->sor_work) { 167 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 168 } 169 work = a->mult_work; 170 t = a->sor_workt; 171 w = a->sor_work; 172 173 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 174 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 175 176 if (flag & SOR_ZERO_INITIAL_GUESS) { 177 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 178 switch (bs) { 179 case 1: 180 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 181 t[0] = b[0]; 182 i2 = 1; 183 idiag += 1; 184 for (i=1; i<m; i++) { 185 v = aa + ai[i]; 186 vi = aj + ai[i]; 187 nz = diag[i] - ai[i]; 188 s[0] = b[i2]; 189 for (j=0; j<nz; j++) { 190 xw[0] = x[vi[j]]; 191 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 192 } 193 t[i2] = s[0]; 194 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 195 x[i2] = xw[0]; 196 idiag += 1; 197 i2 += 1; 198 } 199 break; 200 case 2: 201 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 202 t[0] = b[0]; t[1] = b[1]; 203 i2 = 2; 204 idiag += 4; 205 for (i=1; i<m; i++) { 206 v = aa + 4*ai[i]; 207 vi = aj + ai[i]; 208 nz = diag[i] - ai[i]; 209 s[0] = b[i2]; s[1] = b[i2+1]; 210 for (j=0; j<nz; j++) { 211 idx = 2*vi[j]; 212 it = 4*j; 213 xw[0] = x[idx]; xw[1] = x[1+idx]; 214 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 215 } 216 t[i2] = s[0]; t[i2+1] = s[1]; 217 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 218 x[i2] = xw[0]; x[i2+1] = xw[1]; 219 idiag += 4; 220 i2 += 2; 221 } 222 break; 223 case 3: 224 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 225 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 226 i2 = 3; 227 idiag += 9; 228 for (i=1; i<m; i++) { 229 v = aa + 9*ai[i]; 230 vi = aj + ai[i]; 231 nz = diag[i] - ai[i]; 232 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 233 while (nz--) { 234 idx = 3*(*vi++); 235 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 236 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 237 v += 9; 238 } 239 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 240 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 241 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 242 idiag += 9; 243 i2 += 3; 244 } 245 break; 246 case 4: 247 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 248 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 249 i2 = 4; 250 idiag += 16; 251 for (i=1; i<m; i++) { 252 v = aa + 16*ai[i]; 253 vi = aj + ai[i]; 254 nz = diag[i] - ai[i]; 255 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 256 while (nz--) { 257 idx = 4*(*vi++); 258 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 259 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 260 v += 16; 261 } 262 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 263 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 264 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 265 idiag += 16; 266 i2 += 4; 267 } 268 break; 269 case 5: 270 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 271 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 272 i2 = 5; 273 idiag += 25; 274 for (i=1; i<m; i++) { 275 v = aa + 25*ai[i]; 276 vi = aj + ai[i]; 277 nz = diag[i] - ai[i]; 278 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 279 while (nz--) { 280 idx = 5*(*vi++); 281 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 282 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 283 v += 25; 284 } 285 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 286 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 287 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 288 idiag += 25; 289 i2 += 5; 290 } 291 break; 292 case 6: 293 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 294 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 295 i2 = 6; 296 idiag += 36; 297 for (i=1; i<m; i++) { 298 v = aa + 36*ai[i]; 299 vi = aj + ai[i]; 300 nz = diag[i] - ai[i]; 301 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 302 while (nz--) { 303 idx = 6*(*vi++); 304 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 305 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 306 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 307 v += 36; 308 } 309 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 310 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 311 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 312 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 313 idiag += 36; 314 i2 += 6; 315 } 316 break; 317 case 7: 318 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 319 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 320 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 321 i2 = 7; 322 idiag += 49; 323 for (i=1; i<m; i++) { 324 v = aa + 49*ai[i]; 325 vi = aj + ai[i]; 326 nz = diag[i] - ai[i]; 327 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 328 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 329 while (nz--) { 330 idx = 7*(*vi++); 331 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 332 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 333 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 334 v += 49; 335 } 336 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 337 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 338 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 339 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 340 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 341 idiag += 49; 342 i2 += 7; 343 } 344 break; 345 default: 346 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 347 ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr); 348 i2 = bs; 349 idiag += bs2; 350 for (i=1; i<m; i++) { 351 v = aa + bs2*ai[i]; 352 vi = aj + ai[i]; 353 nz = diag[i] - ai[i]; 354 355 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 356 /* copy all rows of x that are needed into contiguous space */ 357 workt = work; 358 for (j=0; j<nz; j++) { 359 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 360 workt += bs; 361 } 362 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 363 ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr); 364 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 365 366 idiag += bs2; 367 i2 += bs; 368 } 369 break; 370 } 371 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 372 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 373 xb = t; 374 } 375 else xb = b; 376 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 377 idiag = a->idiag+bs2*(a->mbs-1); 378 i2 = bs * (m-1); 379 switch (bs) { 380 case 1: 381 s[0] = xb[i2]; 382 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 383 x[i2] = xw[0]; 384 i2 -= 1; 385 for (i=m-2; i>=0; i--) { 386 v = aa + (diag[i]+1); 387 vi = aj + diag[i] + 1; 388 nz = ai[i+1] - diag[i] - 1; 389 s[0] = xb[i2]; 390 for (j=0; j<nz; j++) { 391 xw[0] = x[vi[j]]; 392 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 393 } 394 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 395 x[i2] = xw[0]; 396 idiag -= 1; 397 i2 -= 1; 398 } 399 break; 400 case 2: 401 s[0] = xb[i2]; s[1] = xb[i2+1]; 402 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 403 x[i2] = xw[0]; x[i2+1] = xw[1]; 404 i2 -= 2; 405 idiag -= 4; 406 for (i=m-2; i>=0; i--) { 407 v = aa + 4*(diag[i] + 1); 408 vi = aj + diag[i] + 1; 409 nz = ai[i+1] - diag[i] - 1; 410 s[0] = xb[i2]; s[1] = xb[i2+1]; 411 for (j=0; j<nz; j++) { 412 idx = 2*vi[j]; 413 it = 4*j; 414 xw[0] = x[idx]; xw[1] = x[1+idx]; 415 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 416 } 417 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 418 x[i2] = xw[0]; x[i2+1] = xw[1]; 419 idiag -= 4; 420 i2 -= 2; 421 } 422 break; 423 case 3: 424 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 425 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 426 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 427 i2 -= 3; 428 idiag -= 9; 429 for (i=m-2; i>=0; i--) { 430 v = aa + 9*(diag[i]+1); 431 vi = aj + diag[i] + 1; 432 nz = ai[i+1] - diag[i] - 1; 433 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 434 while (nz--) { 435 idx = 3*(*vi++); 436 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 437 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 438 v += 9; 439 } 440 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 441 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 442 idiag -= 9; 443 i2 -= 3; 444 } 445 break; 446 case 4: 447 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 448 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 449 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 450 i2 -= 4; 451 idiag -= 16; 452 for (i=m-2; i>=0; i--) { 453 v = aa + 16*(diag[i]+1); 454 vi = aj + diag[i] + 1; 455 nz = ai[i+1] - diag[i] - 1; 456 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 457 while (nz--) { 458 idx = 4*(*vi++); 459 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 460 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 461 v += 16; 462 } 463 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 464 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 465 idiag -= 16; 466 i2 -= 4; 467 } 468 break; 469 case 5: 470 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 471 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 472 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 473 i2 -= 5; 474 idiag -= 25; 475 for (i=m-2; i>=0; i--) { 476 v = aa + 25*(diag[i]+1); 477 vi = aj + diag[i] + 1; 478 nz = ai[i+1] - diag[i] - 1; 479 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 480 while (nz--) { 481 idx = 5*(*vi++); 482 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 483 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 484 v += 25; 485 } 486 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 487 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 488 idiag -= 25; 489 i2 -= 5; 490 } 491 break; 492 case 6: 493 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 494 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 495 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 496 i2 -= 6; 497 idiag -= 36; 498 for (i=m-2; i>=0; i--) { 499 v = aa + 36*(diag[i]+1); 500 vi = aj + diag[i] + 1; 501 nz = ai[i+1] - diag[i] - 1; 502 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 503 while (nz--) { 504 idx = 6*(*vi++); 505 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 506 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 507 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 508 v += 36; 509 } 510 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 511 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 512 idiag -= 36; 513 i2 -= 6; 514 } 515 break; 516 case 7: 517 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 518 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 519 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 520 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 521 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 522 i2 -= 7; 523 idiag -= 49; 524 for (i=m-2; i>=0; i--) { 525 v = aa + 49*(diag[i]+1); 526 vi = aj + diag[i] + 1; 527 nz = ai[i+1] - diag[i] - 1; 528 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 529 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 530 while (nz--) { 531 idx = 7*(*vi++); 532 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 533 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 534 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 535 v += 49; 536 } 537 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 538 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 539 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 540 idiag -= 49; 541 i2 -= 7; 542 } 543 break; 544 default: 545 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 546 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 547 i2 -= bs; 548 idiag -= bs2; 549 for (i=m-2; i>=0; i--) { 550 v = aa + bs2*(diag[i]+1); 551 vi = aj + diag[i] + 1; 552 nz = ai[i+1] - diag[i] - 1; 553 554 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 555 /* copy all rows of x that are needed into contiguous space */ 556 workt = work; 557 for (j=0; j<nz; j++) { 558 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 559 workt += bs; 560 } 561 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 562 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 563 564 idiag -= bs2; 565 i2 -= bs; 566 } 567 break; 568 } 569 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 570 } 571 its--; 572 } 573 while (its--) { 574 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 575 idiag = a->idiag; 576 i2 = 0; 577 switch (bs) { 578 case 1: 579 for (i=0; i<m; i++) { 580 v = aa + ai[i]; 581 vi = aj + ai[i]; 582 nz = ai[i+1] - ai[i]; 583 s[0] = b[i2]; 584 for (j=0; j<nz; j++) { 585 xw[0] = x[vi[j]]; 586 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 587 } 588 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 589 x[i2] += xw[0]; 590 idiag += 1; 591 i2 += 1; 592 } 593 break; 594 case 2: 595 for (i=0; i<m; i++) { 596 v = aa + 4*ai[i]; 597 vi = aj + ai[i]; 598 nz = ai[i+1] - ai[i]; 599 s[0] = b[i2]; s[1] = b[i2+1]; 600 for (j=0; j<nz; j++) { 601 idx = 2*vi[j]; 602 it = 4*j; 603 xw[0] = x[idx]; xw[1] = x[1+idx]; 604 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 605 } 606 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 607 x[i2] += xw[0]; x[i2+1] += xw[1]; 608 idiag += 4; 609 i2 += 2; 610 } 611 break; 612 case 3: 613 for (i=0; i<m; i++) { 614 v = aa + 9*ai[i]; 615 vi = aj + ai[i]; 616 nz = ai[i+1] - ai[i]; 617 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 618 while (nz--) { 619 idx = 3*(*vi++); 620 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 621 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 622 v += 9; 623 } 624 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 625 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 626 idiag += 9; 627 i2 += 3; 628 } 629 break; 630 case 4: 631 for (i=0; i<m; i++) { 632 v = aa + 16*ai[i]; 633 vi = aj + ai[i]; 634 nz = ai[i+1] - ai[i]; 635 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 636 while (nz--) { 637 idx = 4*(*vi++); 638 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 639 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 640 v += 16; 641 } 642 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 643 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 644 idiag += 16; 645 i2 += 4; 646 } 647 break; 648 case 5: 649 for (i=0; i<m; i++) { 650 v = aa + 25*ai[i]; 651 vi = aj + ai[i]; 652 nz = ai[i+1] - ai[i]; 653 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 654 while (nz--) { 655 idx = 5*(*vi++); 656 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 657 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 658 v += 25; 659 } 660 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 661 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 662 idiag += 25; 663 i2 += 5; 664 } 665 break; 666 case 6: 667 for (i=0; i<m; i++) { 668 v = aa + 36*ai[i]; 669 vi = aj + ai[i]; 670 nz = ai[i+1] - ai[i]; 671 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 672 while (nz--) { 673 idx = 6*(*vi++); 674 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 675 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 676 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 677 v += 36; 678 } 679 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 680 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 681 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 682 idiag += 36; 683 i2 += 6; 684 } 685 break; 686 case 7: 687 for (i=0; i<m; i++) { 688 v = aa + 49*ai[i]; 689 vi = aj + ai[i]; 690 nz = ai[i+1] - ai[i]; 691 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 692 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 693 while (nz--) { 694 idx = 7*(*vi++); 695 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 696 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 697 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 698 v += 49; 699 } 700 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 701 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 702 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 703 idiag += 49; 704 i2 += 7; 705 } 706 break; 707 default: 708 for (i=0; i<m; i++) { 709 v = aa + bs2*ai[i]; 710 vi = aj + ai[i]; 711 nz = ai[i+1] - ai[i]; 712 713 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 714 /* copy all rows of x that are needed into contiguous space */ 715 workt = work; 716 for (j=0; j<nz; j++) { 717 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 718 workt += bs; 719 } 720 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 721 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 722 723 idiag += bs2; 724 i2 += bs; 725 } 726 break; 727 } 728 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 729 } 730 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 731 idiag = a->idiag+bs2*(a->mbs-1); 732 i2 = bs * (m-1); 733 switch (bs) { 734 case 1: 735 for (i=m-1; i>=0; i--) { 736 v = aa + ai[i]; 737 vi = aj + ai[i]; 738 nz = ai[i+1] - ai[i]; 739 s[0] = b[i2]; 740 for (j=0; j<nz; j++) { 741 xw[0] = x[vi[j]]; 742 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 743 } 744 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 745 x[i2] += xw[0]; 746 idiag -= 1; 747 i2 -= 1; 748 } 749 break; 750 case 2: 751 for (i=m-1; i>=0; i--) { 752 v = aa + 4*ai[i]; 753 vi = aj + ai[i]; 754 nz = ai[i+1] - ai[i]; 755 s[0] = b[i2]; s[1] = b[i2+1]; 756 for (j=0; j<nz; j++) { 757 idx = 2*vi[j]; 758 it = 4*j; 759 xw[0] = x[idx]; xw[1] = x[1+idx]; 760 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 761 } 762 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 763 x[i2] += xw[0]; x[i2+1] += xw[1]; 764 idiag -= 4; 765 i2 -= 2; 766 } 767 break; 768 case 3: 769 for (i=m-1; i>=0; i--) { 770 v = aa + 9*ai[i]; 771 vi = aj + ai[i]; 772 nz = ai[i+1] - ai[i]; 773 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 774 while (nz--) { 775 idx = 3*(*vi++); 776 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 777 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 778 v += 9; 779 } 780 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 781 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 782 idiag -= 9; 783 i2 -= 3; 784 } 785 break; 786 case 4: 787 for (i=m-1; i>=0; i--) { 788 v = aa + 16*ai[i]; 789 vi = aj + ai[i]; 790 nz = ai[i+1] - ai[i]; 791 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 792 while (nz--) { 793 idx = 4*(*vi++); 794 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 795 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 796 v += 16; 797 } 798 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 799 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 800 idiag -= 16; 801 i2 -= 4; 802 } 803 break; 804 case 5: 805 for (i=m-1; i>=0; i--) { 806 v = aa + 25*ai[i]; 807 vi = aj + ai[i]; 808 nz = ai[i+1] - ai[i]; 809 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 810 while (nz--) { 811 idx = 5*(*vi++); 812 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 813 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 814 v += 25; 815 } 816 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 817 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 818 idiag -= 25; 819 i2 -= 5; 820 } 821 break; 822 case 6: 823 for (i=m-1; i>=0; i--) { 824 v = aa + 36*ai[i]; 825 vi = aj + ai[i]; 826 nz = ai[i+1] - ai[i]; 827 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 828 while (nz--) { 829 idx = 6*(*vi++); 830 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 831 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 832 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 833 v += 36; 834 } 835 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 836 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 837 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 838 idiag -= 36; 839 i2 -= 6; 840 } 841 break; 842 case 7: 843 for (i=m-1; i>=0; i--) { 844 v = aa + 49*ai[i]; 845 vi = aj + ai[i]; 846 nz = ai[i+1] - ai[i]; 847 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 848 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 849 while (nz--) { 850 idx = 7*(*vi++); 851 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 852 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 853 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 854 v += 49; 855 } 856 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 857 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 858 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 859 idiag -= 49; 860 i2 -= 7; 861 } 862 break; 863 default: 864 for (i=m-1; i>=0; i--) { 865 v = aa + bs2*ai[i]; 866 vi = aj + ai[i]; 867 nz = ai[i+1] - ai[i]; 868 869 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 870 /* copy all rows of x that are needed into contiguous space */ 871 workt = work; 872 for (j=0; j<nz; j++) { 873 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 874 workt += bs; 875 } 876 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 877 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 878 879 idiag -= bs2; 880 i2 -= bs; 881 } 882 break; 883 } 884 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 885 } 886 } 887 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 888 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 889 PetscFunctionReturn(0); 890 } 891 892 893 /* 894 Special version for direct calls from Fortran (Used in PETSc-fun3d) 895 */ 896 #if defined(PETSC_HAVE_FORTRAN_CAPS) 897 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 898 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 899 #define matsetvaluesblocked4_ matsetvaluesblocked4 900 #endif 901 902 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 903 { 904 Mat A = *AA; 905 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 906 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 907 PetscInt *ai =a->i,*ailen=a->ilen; 908 PetscInt *aj =a->j,stepval,lastcol = -1; 909 const PetscScalar *value = v; 910 MatScalar *ap,*aa = a->a,*bap; 911 PetscErrorCode ierr; 912 913 PetscFunctionBegin; 914 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 915 stepval = (n-1)*4; 916 for (k=0; k<m; k++) { /* loop over added rows */ 917 row = im[k]; 918 rp = aj + ai[row]; 919 ap = aa + 16*ai[row]; 920 nrow = ailen[row]; 921 low = 0; 922 high = nrow; 923 for (l=0; l<n; l++) { /* loop over added columns */ 924 col = in[l]; 925 if (col <= lastcol) low = 0; 926 else high = nrow; 927 lastcol = col; 928 value = v + k*(stepval+4 + l)*4; 929 while (high-low > 7) { 930 t = (low+high)/2; 931 if (rp[t] > col) high = t; 932 else low = t; 933 } 934 for (i=low; i<high; i++) { 935 if (rp[i] > col) break; 936 if (rp[i] == col) { 937 bap = ap + 16*i; 938 for (ii=0; ii<4; ii++,value+=stepval) { 939 for (jj=ii; jj<16; jj+=4) { 940 bap[jj] += *value++; 941 } 942 } 943 goto noinsert2; 944 } 945 } 946 N = nrow++ - 1; 947 high++; /* added new column index thus must search to one higher than before */ 948 /* shift up all the later entries in this row */ 949 for (ii=N; ii>=i; ii--) { 950 rp[ii+1] = rp[ii]; 951 ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr); 952 } 953 if (N >= i) { 954 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 955 } 956 rp[i] = col; 957 bap = ap + 16*i; 958 for (ii=0; ii<4; ii++,value+=stepval) { 959 for (jj=ii; jj<16; jj+=4) { 960 bap[jj] = *value++; 961 } 962 } 963 noinsert2:; 964 low = i; 965 } 966 ailen[row] = nrow; 967 } 968 PetscFunctionReturnVoid(); 969 } 970 971 #if defined(PETSC_HAVE_FORTRAN_CAPS) 972 #define matsetvalues4_ MATSETVALUES4 973 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 974 #define matsetvalues4_ matsetvalues4 975 #endif 976 977 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 978 { 979 Mat A = *AA; 980 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 981 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm; 982 PetscInt *ai=a->i,*ailen=a->ilen; 983 PetscInt *aj=a->j,brow,bcol; 984 PetscInt ridx,cidx,lastcol = -1; 985 MatScalar *ap,value,*aa=a->a,*bap; 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 for (k=0; k<m; k++) { /* loop over added rows */ 990 row = im[k]; brow = row/4; 991 rp = aj + ai[brow]; 992 ap = aa + 16*ai[brow]; 993 nrow = ailen[brow]; 994 low = 0; 995 high = nrow; 996 for (l=0; l<n; l++) { /* loop over added columns */ 997 col = in[l]; bcol = col/4; 998 ridx = row % 4; cidx = col % 4; 999 value = v[l + k*n]; 1000 if (col <= lastcol) low = 0; 1001 else high = nrow; 1002 lastcol = col; 1003 while (high-low > 7) { 1004 t = (low+high)/2; 1005 if (rp[t] > bcol) high = t; 1006 else low = t; 1007 } 1008 for (i=low; i<high; i++) { 1009 if (rp[i] > bcol) break; 1010 if (rp[i] == bcol) { 1011 bap = ap + 16*i + 4*cidx + ridx; 1012 *bap += value; 1013 goto noinsert1; 1014 } 1015 } 1016 N = nrow++ - 1; 1017 high++; /* added new column thus must search to one higher than before */ 1018 /* shift up all the later entries in this row */ 1019 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr); 1020 ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr); 1021 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 1022 rp[i] = bcol; 1023 ap[16*i + 4*cidx + ridx] = value; 1024 noinsert1:; 1025 low = i; 1026 } 1027 ailen[brow] = nrow; 1028 } 1029 PetscFunctionReturnVoid(); 1030 } 1031 1032 /* 1033 Checks for missing diagonals 1034 */ 1035 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1036 { 1037 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1038 PetscErrorCode ierr; 1039 PetscInt *diag,*ii = a->i,i; 1040 1041 PetscFunctionBegin; 1042 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1043 *missing = PETSC_FALSE; 1044 if (A->rmap->n > 0 && !ii) { 1045 *missing = PETSC_TRUE; 1046 if (d) *d = 0; 1047 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 1048 } else { 1049 PetscInt n; 1050 n = PetscMin(a->mbs, a->nbs); 1051 diag = a->diag; 1052 for (i=0; i<n; i++) { 1053 if (diag[i] >= ii[i+1]) { 1054 *missing = PETSC_TRUE; 1055 if (d) *d = i; 1056 ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr); 1057 break; 1058 } 1059 } 1060 } 1061 PetscFunctionReturn(0); 1062 } 1063 1064 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1065 { 1066 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1067 PetscErrorCode ierr; 1068 PetscInt i,j,m = a->mbs; 1069 1070 PetscFunctionBegin; 1071 if (!a->diag) { 1072 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1073 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1074 a->free_diag = PETSC_TRUE; 1075 } 1076 for (i=0; i<m; i++) { 1077 a->diag[i] = a->i[i+1]; 1078 for (j=a->i[i]; j<a->i[i+1]; j++) { 1079 if (a->j[j] == i) { 1080 a->diag[i] = j; 1081 break; 1082 } 1083 } 1084 } 1085 PetscFunctionReturn(0); 1086 } 1087 1088 1089 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1090 { 1091 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1092 PetscErrorCode ierr; 1093 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1094 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1095 1096 PetscFunctionBegin; 1097 *nn = n; 1098 if (!ia) PetscFunctionReturn(0); 1099 if (symmetric) { 1100 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1101 nz = tia[n]; 1102 } else { 1103 tia = a->i; tja = a->j; 1104 } 1105 1106 if (!blockcompressed && bs > 1) { 1107 (*nn) *= bs; 1108 /* malloc & create the natural set of indices */ 1109 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1110 if (n) { 1111 (*ia)[0] = oshift; 1112 for (j=1; j<bs; j++) { 1113 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1114 } 1115 } 1116 1117 for (i=1; i<n; i++) { 1118 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1119 for (j=1; j<bs; j++) { 1120 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1121 } 1122 } 1123 if (n) { 1124 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1125 } 1126 1127 if (inja) { 1128 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1129 cnt = 0; 1130 for (i=0; i<n; i++) { 1131 for (j=0; j<bs; j++) { 1132 for (k=tia[i]; k<tia[i+1]; k++) { 1133 for (l=0; l<bs; l++) { 1134 (*ja)[cnt++] = bs*tja[k] + l; 1135 } 1136 } 1137 } 1138 } 1139 } 1140 1141 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1142 ierr = PetscFree(tia);CHKERRQ(ierr); 1143 ierr = PetscFree(tja);CHKERRQ(ierr); 1144 } 1145 } else if (oshift == 1) { 1146 if (symmetric) { 1147 nz = tia[A->rmap->n/bs]; 1148 /* add 1 to i and j indices */ 1149 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1150 *ia = tia; 1151 if (ja) { 1152 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1153 *ja = tja; 1154 } 1155 } else { 1156 nz = a->i[A->rmap->n/bs]; 1157 /* malloc space and add 1 to i and j indices */ 1158 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1159 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1160 if (ja) { 1161 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1162 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1163 } 1164 } 1165 } else { 1166 *ia = tia; 1167 if (ja) *ja = tja; 1168 } 1169 PetscFunctionReturn(0); 1170 } 1171 1172 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1173 { 1174 PetscErrorCode ierr; 1175 1176 PetscFunctionBegin; 1177 if (!ia) PetscFunctionReturn(0); 1178 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1179 ierr = PetscFree(*ia);CHKERRQ(ierr); 1180 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1181 } 1182 PetscFunctionReturn(0); 1183 } 1184 1185 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1186 { 1187 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1188 PetscErrorCode ierr; 1189 1190 PetscFunctionBegin; 1191 #if defined(PETSC_USE_LOG) 1192 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1193 #endif 1194 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1195 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1196 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1197 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1198 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1199 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1200 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1201 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1202 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1203 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1204 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1205 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1206 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1207 1208 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1209 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1210 ierr = PetscFree(A->data);CHKERRQ(ierr); 1211 1212 ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr); 1213 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr); 1214 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr); 1215 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1225 #if defined(PETSC_HAVE_HYPRE) 1226 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr); 1227 #endif 1228 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1233 { 1234 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1235 PetscErrorCode ierr; 1236 1237 PetscFunctionBegin; 1238 switch (op) { 1239 case MAT_ROW_ORIENTED: 1240 a->roworiented = flg; 1241 break; 1242 case MAT_KEEP_NONZERO_PATTERN: 1243 a->keepnonzeropattern = flg; 1244 break; 1245 case MAT_NEW_NONZERO_LOCATIONS: 1246 a->nonew = (flg ? 0 : 1); 1247 break; 1248 case MAT_NEW_NONZERO_LOCATION_ERR: 1249 a->nonew = (flg ? -1 : 0); 1250 break; 1251 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1252 a->nonew = (flg ? -2 : 0); 1253 break; 1254 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1255 a->nounused = (flg ? -1 : 0); 1256 break; 1257 case MAT_FORCE_DIAGONAL_ENTRIES: 1258 case MAT_IGNORE_OFF_PROC_ENTRIES: 1259 case MAT_USE_HASH_TABLE: 1260 case MAT_SORTED_FULL: 1261 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1262 break; 1263 case MAT_SPD: 1264 case MAT_SYMMETRIC: 1265 case MAT_STRUCTURALLY_SYMMETRIC: 1266 case MAT_HERMITIAN: 1267 case MAT_SYMMETRY_ETERNAL: 1268 case MAT_SUBMAT_SINGLEIS: 1269 case MAT_STRUCTURE_ONLY: 1270 /* These options are handled directly by MatSetOption() */ 1271 break; 1272 default: 1273 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1274 } 1275 PetscFunctionReturn(0); 1276 } 1277 1278 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1279 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1280 { 1281 PetscErrorCode ierr; 1282 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1283 MatScalar *aa_i; 1284 PetscScalar *v_i; 1285 1286 PetscFunctionBegin; 1287 bs = A->rmap->bs; 1288 bs2 = bs*bs; 1289 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1290 1291 bn = row/bs; /* Block number */ 1292 bp = row % bs; /* Block Position */ 1293 M = ai[bn+1] - ai[bn]; 1294 *nz = bs*M; 1295 1296 if (v) { 1297 *v = NULL; 1298 if (*nz) { 1299 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1300 for (i=0; i<M; i++) { /* for each block in the block row */ 1301 v_i = *v + i*bs; 1302 aa_i = aa + bs2*(ai[bn] + i); 1303 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1304 } 1305 } 1306 } 1307 1308 if (idx) { 1309 *idx = NULL; 1310 if (*nz) { 1311 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1312 for (i=0; i<M; i++) { /* for each block in the block row */ 1313 idx_i = *idx + i*bs; 1314 itmp = bs*aj[ai[bn] + i]; 1315 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1316 } 1317 } 1318 } 1319 PetscFunctionReturn(0); 1320 } 1321 1322 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1323 { 1324 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } 1331 1332 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1333 { 1334 PetscErrorCode ierr; 1335 1336 PetscFunctionBegin; 1337 if (nz) *nz = 0; 1338 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1339 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1340 PetscFunctionReturn(0); 1341 } 1342 1343 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1344 { 1345 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at; 1346 Mat C; 1347 PetscErrorCode ierr; 1348 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill; 1349 PetscInt bs2=a->bs2,*ati,*atj,anzj,kr; 1350 MatScalar *ata,*aa=a->a; 1351 1352 PetscFunctionBegin; 1353 ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr); 1354 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1355 for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1356 1357 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1358 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1359 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1360 ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr); 1361 1362 at = (Mat_SeqBAIJ*)C->data; 1363 ati = at->i; 1364 for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i]; 1365 } else { 1366 C = *B; 1367 at = (Mat_SeqBAIJ*)C->data; 1368 ati = at->i; 1369 } 1370 1371 atj = at->j; 1372 ata = at->a; 1373 1374 /* Copy ati into atfill so we have locations of the next free space in atj */ 1375 ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr); 1376 1377 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1378 for (i=0; i<mbs; i++) { 1379 anzj = ai[i+1] - ai[i]; 1380 for (j=0; j<anzj; j++) { 1381 atj[atfill[*aj]] = i; 1382 for (kr=0; kr<bs; kr++) { 1383 for (k=0; k<bs; k++) { 1384 ata[bs2*atfill[*aj]+k*bs+kr] = *aa++; 1385 } 1386 } 1387 atfill[*aj++] += 1; 1388 } 1389 } 1390 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1391 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1392 1393 /* Clean up temporary space and complete requests. */ 1394 ierr = PetscFree(atfill);CHKERRQ(ierr); 1395 1396 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1397 ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1398 *B = C; 1399 } else { 1400 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1401 } 1402 PetscFunctionReturn(0); 1403 } 1404 1405 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1406 { 1407 PetscErrorCode ierr; 1408 Mat Btrans; 1409 1410 PetscFunctionBegin; 1411 *f = PETSC_FALSE; 1412 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1413 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1414 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1415 PetscFunctionReturn(0); 1416 } 1417 1418 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1419 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 1420 { 1421 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)mat->data; 1422 PetscInt header[4],M,N,m,bs,nz,cnt,i,j,k,l; 1423 PetscInt *rowlens,*colidxs; 1424 PetscScalar *matvals; 1425 PetscErrorCode ierr; 1426 1427 PetscFunctionBegin; 1428 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1429 1430 M = mat->rmap->N; 1431 N = mat->cmap->N; 1432 m = mat->rmap->n; 1433 bs = mat->rmap->bs; 1434 nz = bs*bs*A->nz; 1435 1436 /* write matrix header */ 1437 header[0] = MAT_FILE_CLASSID; 1438 header[1] = M; header[2] = N; header[3] = nz; 1439 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1440 1441 /* store row lengths */ 1442 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1443 for (cnt=0, i=0; i<A->mbs; i++) 1444 for (j=0; j<bs; j++) 1445 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]); 1446 ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr); 1447 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1448 1449 /* store column indices */ 1450 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1451 for (cnt=0, i=0; i<A->mbs; i++) 1452 for (k=0; k<bs; k++) 1453 for (j=A->i[i]; j<A->i[i+1]; j++) 1454 for (l=0; l<bs; l++) 1455 colidxs[cnt++] = bs*A->j[j] + l; 1456 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1457 ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr); 1458 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1459 1460 /* store nonzero values */ 1461 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1462 for (cnt=0, i=0; i<A->mbs; i++) 1463 for (k=0; k<bs; k++) 1464 for (j=A->i[i]; j<A->i[i+1]; j++) 1465 for (l=0; l<bs; l++) 1466 matvals[cnt++] = A->a[bs*(bs*j + l) + k]; 1467 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1468 ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1469 ierr = PetscFree(matvals);CHKERRQ(ierr); 1470 1471 /* write block size option to the viewer's .info file */ 1472 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1473 PetscFunctionReturn(0); 1474 } 1475 1476 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1477 { 1478 PetscErrorCode ierr; 1479 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1480 PetscInt i,bs = A->rmap->bs,k; 1481 1482 PetscFunctionBegin; 1483 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1484 for (i=0; i<a->mbs; i++) { 1485 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1486 for (k=a->i[i]; k<a->i[i+1]; k++) { 1487 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1488 } 1489 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1490 } 1491 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1492 PetscFunctionReturn(0); 1493 } 1494 1495 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1496 { 1497 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1498 PetscErrorCode ierr; 1499 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1500 PetscViewerFormat format; 1501 1502 PetscFunctionBegin; 1503 if (A->structure_only) { 1504 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1505 PetscFunctionReturn(0); 1506 } 1507 1508 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1509 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1510 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1511 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1512 const char *matname; 1513 Mat aij; 1514 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1515 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1516 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1517 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1518 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1519 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1520 PetscFunctionReturn(0); 1521 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1522 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1523 for (i=0; i<a->mbs; i++) { 1524 for (j=0; j<bs; j++) { 1525 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1526 for (k=a->i[i]; k<a->i[i+1]; k++) { 1527 for (l=0; l<bs; l++) { 1528 #if defined(PETSC_USE_COMPLEX) 1529 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1530 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1531 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1532 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1533 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1534 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1535 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1536 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1537 } 1538 #else 1539 if (a->a[bs2*k + l*bs + j] != 0.0) { 1540 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1541 } 1542 #endif 1543 } 1544 } 1545 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1546 } 1547 } 1548 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1549 } else { 1550 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1551 for (i=0; i<a->mbs; i++) { 1552 for (j=0; j<bs; j++) { 1553 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1554 for (k=a->i[i]; k<a->i[i+1]; k++) { 1555 for (l=0; l<bs; l++) { 1556 #if defined(PETSC_USE_COMPLEX) 1557 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1558 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1559 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1560 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1561 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1562 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1563 } else { 1564 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1565 } 1566 #else 1567 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1568 #endif 1569 } 1570 } 1571 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1572 } 1573 } 1574 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1575 } 1576 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1577 PetscFunctionReturn(0); 1578 } 1579 1580 #include <petscdraw.h> 1581 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1582 { 1583 Mat A = (Mat) Aa; 1584 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1585 PetscErrorCode ierr; 1586 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1587 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1588 MatScalar *aa; 1589 PetscViewer viewer; 1590 PetscViewerFormat format; 1591 1592 PetscFunctionBegin; 1593 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1594 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1595 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1596 1597 /* loop over matrix elements drawing boxes */ 1598 1599 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1600 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1601 /* Blue for negative, Cyan for zero and Red for positive */ 1602 color = PETSC_DRAW_BLUE; 1603 for (i=0,row=0; i<mbs; i++,row+=bs) { 1604 for (j=a->i[i]; j<a->i[i+1]; j++) { 1605 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1606 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1607 aa = a->a + j*bs2; 1608 for (k=0; k<bs; k++) { 1609 for (l=0; l<bs; l++) { 1610 if (PetscRealPart(*aa++) >= 0.) continue; 1611 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1612 } 1613 } 1614 } 1615 } 1616 color = PETSC_DRAW_CYAN; 1617 for (i=0,row=0; i<mbs; i++,row+=bs) { 1618 for (j=a->i[i]; j<a->i[i+1]; j++) { 1619 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1620 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1621 aa = a->a + j*bs2; 1622 for (k=0; k<bs; k++) { 1623 for (l=0; l<bs; l++) { 1624 if (PetscRealPart(*aa++) != 0.) continue; 1625 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1626 } 1627 } 1628 } 1629 } 1630 color = PETSC_DRAW_RED; 1631 for (i=0,row=0; i<mbs; i++,row+=bs) { 1632 for (j=a->i[i]; j<a->i[i+1]; j++) { 1633 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1634 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1635 aa = a->a + j*bs2; 1636 for (k=0; k<bs; k++) { 1637 for (l=0; l<bs; l++) { 1638 if (PetscRealPart(*aa++) <= 0.) continue; 1639 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1640 } 1641 } 1642 } 1643 } 1644 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1645 } else { 1646 /* use contour shading to indicate magnitude of values */ 1647 /* first determine max of all nonzero values */ 1648 PetscReal minv = 0.0, maxv = 0.0; 1649 PetscDraw popup; 1650 1651 for (i=0; i<a->nz*a->bs2; i++) { 1652 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1653 } 1654 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1655 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1656 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1657 1658 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1659 for (i=0,row=0; i<mbs; i++,row+=bs) { 1660 for (j=a->i[i]; j<a->i[i+1]; j++) { 1661 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1662 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1663 aa = a->a + j*bs2; 1664 for (k=0; k<bs; k++) { 1665 for (l=0; l<bs; l++) { 1666 MatScalar v = *aa++; 1667 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1668 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1669 } 1670 } 1671 } 1672 } 1673 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1674 } 1675 PetscFunctionReturn(0); 1676 } 1677 1678 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1679 { 1680 PetscErrorCode ierr; 1681 PetscReal xl,yl,xr,yr,w,h; 1682 PetscDraw draw; 1683 PetscBool isnull; 1684 1685 PetscFunctionBegin; 1686 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1687 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1688 if (isnull) PetscFunctionReturn(0); 1689 1690 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1691 xr += w; yr += h; xl = -w; yl = -h; 1692 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1693 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1694 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1695 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1696 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1697 PetscFunctionReturn(0); 1698 } 1699 1700 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1701 { 1702 PetscErrorCode ierr; 1703 PetscBool iascii,isbinary,isdraw; 1704 1705 PetscFunctionBegin; 1706 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1707 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1708 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1709 if (iascii) { 1710 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1711 } else if (isbinary) { 1712 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1713 } else if (isdraw) { 1714 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1715 } else { 1716 Mat B; 1717 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1718 ierr = MatView(B,viewer);CHKERRQ(ierr); 1719 ierr = MatDestroy(&B);CHKERRQ(ierr); 1720 } 1721 PetscFunctionReturn(0); 1722 } 1723 1724 1725 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1726 { 1727 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1728 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1729 PetscInt *ai = a->i,*ailen = a->ilen; 1730 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1731 MatScalar *ap,*aa = a->a; 1732 1733 PetscFunctionBegin; 1734 for (k=0; k<m; k++) { /* loop over rows */ 1735 row = im[k]; brow = row/bs; 1736 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1737 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1738 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1739 nrow = ailen[brow]; 1740 for (l=0; l<n; l++) { /* loop over columns */ 1741 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1742 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1743 col = in[l]; 1744 bcol = col/bs; 1745 cidx = col%bs; 1746 ridx = row%bs; 1747 high = nrow; 1748 low = 0; /* assume unsorted */ 1749 while (high-low > 5) { 1750 t = (low+high)/2; 1751 if (rp[t] > bcol) high = t; 1752 else low = t; 1753 } 1754 for (i=low; i<high; i++) { 1755 if (rp[i] > bcol) break; 1756 if (rp[i] == bcol) { 1757 *v++ = ap[bs2*i+bs*cidx+ridx]; 1758 goto finished; 1759 } 1760 } 1761 *v++ = 0.0; 1762 finished:; 1763 } 1764 } 1765 PetscFunctionReturn(0); 1766 } 1767 1768 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1769 { 1770 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1771 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1772 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1773 PetscErrorCode ierr; 1774 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1775 PetscBool roworiented=a->roworiented; 1776 const PetscScalar *value = v; 1777 MatScalar *ap=NULL,*aa = a->a,*bap; 1778 1779 PetscFunctionBegin; 1780 if (roworiented) { 1781 stepval = (n-1)*bs; 1782 } else { 1783 stepval = (m-1)*bs; 1784 } 1785 for (k=0; k<m; k++) { /* loop over added rows */ 1786 row = im[k]; 1787 if (row < 0) continue; 1788 if (PetscUnlikelyDebug(row >= a->mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1789 rp = aj + ai[row]; 1790 if (!A->structure_only) ap = aa + bs2*ai[row]; 1791 rmax = imax[row]; 1792 nrow = ailen[row]; 1793 low = 0; 1794 high = nrow; 1795 for (l=0; l<n; l++) { /* loop over added columns */ 1796 if (in[l] < 0) continue; 1797 if (PetscUnlikelyDebug(in[l] >= a->nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1798 col = in[l]; 1799 if (!A->structure_only) { 1800 if (roworiented) { 1801 value = v + (k*(stepval+bs) + l)*bs; 1802 } else { 1803 value = v + (l*(stepval+bs) + k)*bs; 1804 } 1805 } 1806 if (col <= lastcol) low = 0; 1807 else high = nrow; 1808 lastcol = col; 1809 while (high-low > 7) { 1810 t = (low+high)/2; 1811 if (rp[t] > col) high = t; 1812 else low = t; 1813 } 1814 for (i=low; i<high; i++) { 1815 if (rp[i] > col) break; 1816 if (rp[i] == col) { 1817 if (A->structure_only) goto noinsert2; 1818 bap = ap + bs2*i; 1819 if (roworiented) { 1820 if (is == ADD_VALUES) { 1821 for (ii=0; ii<bs; ii++,value+=stepval) { 1822 for (jj=ii; jj<bs2; jj+=bs) { 1823 bap[jj] += *value++; 1824 } 1825 } 1826 } else { 1827 for (ii=0; ii<bs; ii++,value+=stepval) { 1828 for (jj=ii; jj<bs2; jj+=bs) { 1829 bap[jj] = *value++; 1830 } 1831 } 1832 } 1833 } else { 1834 if (is == ADD_VALUES) { 1835 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1836 for (jj=0; jj<bs; jj++) { 1837 bap[jj] += value[jj]; 1838 } 1839 bap += bs; 1840 } 1841 } else { 1842 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1843 for (jj=0; jj<bs; jj++) { 1844 bap[jj] = value[jj]; 1845 } 1846 bap += bs; 1847 } 1848 } 1849 } 1850 goto noinsert2; 1851 } 1852 } 1853 if (nonew == 1) goto noinsert2; 1854 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1855 if (A->structure_only) { 1856 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1857 } else { 1858 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1859 } 1860 N = nrow++ - 1; high++; 1861 /* shift up all the later entries in this row */ 1862 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 1863 rp[i] = col; 1864 if (!A->structure_only) { 1865 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 1866 bap = ap + bs2*i; 1867 if (roworiented) { 1868 for (ii=0; ii<bs; ii++,value+=stepval) { 1869 for (jj=ii; jj<bs2; jj+=bs) { 1870 bap[jj] = *value++; 1871 } 1872 } 1873 } else { 1874 for (ii=0; ii<bs; ii++,value+=stepval) { 1875 for (jj=0; jj<bs; jj++) { 1876 *bap++ = *value++; 1877 } 1878 } 1879 } 1880 } 1881 noinsert2:; 1882 low = i; 1883 } 1884 ailen[row] = nrow; 1885 } 1886 PetscFunctionReturn(0); 1887 } 1888 1889 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1890 { 1891 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1892 PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax; 1893 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1894 PetscErrorCode ierr; 1895 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1896 MatScalar *aa = a->a,*ap; 1897 PetscReal ratio=0.6; 1898 1899 PetscFunctionBegin; 1900 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1901 1902 if (m) rmax = ailen[0]; 1903 for (i=1; i<mbs; i++) { 1904 /* move each row back by the amount of empty slots (fshift) before it*/ 1905 fshift += imax[i-1] - ailen[i-1]; 1906 rmax = PetscMax(rmax,ailen[i]); 1907 if (fshift) { 1908 ip = aj + ai[i]; 1909 ap = aa + bs2*ai[i]; 1910 N = ailen[i]; 1911 ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr); 1912 if (!A->structure_only) { 1913 ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr); 1914 } 1915 } 1916 ai[i] = ai[i-1] + ailen[i-1]; 1917 } 1918 if (mbs) { 1919 fshift += imax[mbs-1] - ailen[mbs-1]; 1920 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1921 } 1922 1923 /* reset ilen and imax for each row */ 1924 a->nonzerorowcnt = 0; 1925 if (A->structure_only) { 1926 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1927 } else { /* !A->structure_only */ 1928 for (i=0; i<mbs; i++) { 1929 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1930 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1931 } 1932 } 1933 a->nz = ai[mbs]; 1934 1935 /* diagonals may have moved, so kill the diagonal pointers */ 1936 a->idiagvalid = PETSC_FALSE; 1937 if (fshift && a->diag) { 1938 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1939 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1940 a->diag = NULL; 1941 } 1942 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1943 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1944 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1945 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1946 1947 A->info.mallocs += a->reallocs; 1948 a->reallocs = 0; 1949 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1950 a->rmax = rmax; 1951 1952 if (!A->structure_only) { 1953 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1954 } 1955 PetscFunctionReturn(0); 1956 } 1957 1958 /* 1959 This function returns an array of flags which indicate the locations of contiguous 1960 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1961 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1962 Assume: sizes should be long enough to hold all the values. 1963 */ 1964 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1965 { 1966 PetscInt i,j,k,row; 1967 PetscBool flg; 1968 1969 PetscFunctionBegin; 1970 for (i=0,j=0; i<n; j++) { 1971 row = idx[i]; 1972 if (row%bs!=0) { /* Not the begining of a block */ 1973 sizes[j] = 1; 1974 i++; 1975 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1976 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1977 i++; 1978 } else { /* Begining of the block, so check if the complete block exists */ 1979 flg = PETSC_TRUE; 1980 for (k=1; k<bs; k++) { 1981 if (row+k != idx[i+k]) { /* break in the block */ 1982 flg = PETSC_FALSE; 1983 break; 1984 } 1985 } 1986 if (flg) { /* No break in the bs */ 1987 sizes[j] = bs; 1988 i += bs; 1989 } else { 1990 sizes[j] = 1; 1991 i++; 1992 } 1993 } 1994 } 1995 *bs_max = j; 1996 PetscFunctionReturn(0); 1997 } 1998 1999 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2000 { 2001 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2002 PetscErrorCode ierr; 2003 PetscInt i,j,k,count,*rows; 2004 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2005 PetscScalar zero = 0.0; 2006 MatScalar *aa; 2007 const PetscScalar *xx; 2008 PetscScalar *bb; 2009 2010 PetscFunctionBegin; 2011 /* fix right hand side if needed */ 2012 if (x && b) { 2013 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2014 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2015 for (i=0; i<is_n; i++) { 2016 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2017 } 2018 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2019 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2020 } 2021 2022 /* Make a copy of the IS and sort it */ 2023 /* allocate memory for rows,sizes */ 2024 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2025 2026 /* copy IS values to rows, and sort them */ 2027 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2028 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2029 2030 if (baij->keepnonzeropattern) { 2031 for (i=0; i<is_n; i++) sizes[i] = 1; 2032 bs_max = is_n; 2033 } else { 2034 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2035 A->nonzerostate++; 2036 } 2037 2038 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2039 row = rows[j]; 2040 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2041 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2042 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2043 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2044 if (diag != (PetscScalar)0.0) { 2045 if (baij->ilen[row/bs] > 0) { 2046 baij->ilen[row/bs] = 1; 2047 baij->j[baij->i[row/bs]] = row/bs; 2048 2049 ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr); 2050 } 2051 /* Now insert all the diagonal values for this bs */ 2052 for (k=0; k<bs; k++) { 2053 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2054 } 2055 } else { /* (diag == 0.0) */ 2056 baij->ilen[row/bs] = 0; 2057 } /* end (diag == 0.0) */ 2058 } else { /* (sizes[i] != bs) */ 2059 if (PetscUnlikelyDebug(sizes[i] != 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2060 for (k=0; k<count; k++) { 2061 aa[0] = zero; 2062 aa += bs; 2063 } 2064 if (diag != (PetscScalar)0.0) { 2065 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2066 } 2067 } 2068 } 2069 2070 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2071 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2072 PetscFunctionReturn(0); 2073 } 2074 2075 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2076 { 2077 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2078 PetscErrorCode ierr; 2079 PetscInt i,j,k,count; 2080 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2081 PetscScalar zero = 0.0; 2082 MatScalar *aa; 2083 const PetscScalar *xx; 2084 PetscScalar *bb; 2085 PetscBool *zeroed,vecs = PETSC_FALSE; 2086 2087 PetscFunctionBegin; 2088 /* fix right hand side if needed */ 2089 if (x && b) { 2090 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2091 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2092 vecs = PETSC_TRUE; 2093 } 2094 2095 /* zero the columns */ 2096 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2097 for (i=0; i<is_n; i++) { 2098 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2099 zeroed[is_idx[i]] = PETSC_TRUE; 2100 } 2101 for (i=0; i<A->rmap->N; i++) { 2102 if (!zeroed[i]) { 2103 row = i/bs; 2104 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2105 for (k=0; k<bs; k++) { 2106 col = bs*baij->j[j] + k; 2107 if (zeroed[col]) { 2108 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2109 if (vecs) bb[i] -= aa[0]*xx[col]; 2110 aa[0] = 0.0; 2111 } 2112 } 2113 } 2114 } else if (vecs) bb[i] = diag*xx[i]; 2115 } 2116 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2117 if (vecs) { 2118 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2119 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2120 } 2121 2122 /* zero the rows */ 2123 for (i=0; i<is_n; i++) { 2124 row = is_idx[i]; 2125 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2126 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2127 for (k=0; k<count; k++) { 2128 aa[0] = zero; 2129 aa += bs; 2130 } 2131 if (diag != (PetscScalar)0.0) { 2132 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2133 } 2134 } 2135 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2136 PetscFunctionReturn(0); 2137 } 2138 2139 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2140 { 2141 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2142 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2143 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2144 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2145 PetscErrorCode ierr; 2146 PetscInt ridx,cidx,bs2=a->bs2; 2147 PetscBool roworiented=a->roworiented; 2148 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2149 2150 PetscFunctionBegin; 2151 for (k=0; k<m; k++) { /* loop over added rows */ 2152 row = im[k]; 2153 brow = row/bs; 2154 if (row < 0) continue; 2155 if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2156 rp = aj + ai[brow]; 2157 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2158 rmax = imax[brow]; 2159 nrow = ailen[brow]; 2160 low = 0; 2161 high = nrow; 2162 for (l=0; l<n; l++) { /* loop over added columns */ 2163 if (in[l] < 0) continue; 2164 if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2165 col = in[l]; bcol = col/bs; 2166 ridx = row % bs; cidx = col % bs; 2167 if (!A->structure_only) { 2168 if (roworiented) { 2169 value = v[l + k*n]; 2170 } else { 2171 value = v[k + l*m]; 2172 } 2173 } 2174 if (col <= lastcol) low = 0; else high = nrow; 2175 lastcol = col; 2176 while (high-low > 7) { 2177 t = (low+high)/2; 2178 if (rp[t] > bcol) high = t; 2179 else low = t; 2180 } 2181 for (i=low; i<high; i++) { 2182 if (rp[i] > bcol) break; 2183 if (rp[i] == bcol) { 2184 bap = ap + bs2*i + bs*cidx + ridx; 2185 if (!A->structure_only) { 2186 if (is == ADD_VALUES) *bap += value; 2187 else *bap = value; 2188 } 2189 goto noinsert1; 2190 } 2191 } 2192 if (nonew == 1) goto noinsert1; 2193 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2194 if (A->structure_only) { 2195 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2196 } else { 2197 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2198 } 2199 N = nrow++ - 1; high++; 2200 /* shift up all the later entries in this row */ 2201 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 2202 rp[i] = bcol; 2203 if (!A->structure_only) { 2204 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 2205 ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr); 2206 ap[bs2*i + bs*cidx + ridx] = value; 2207 } 2208 a->nz++; 2209 A->nonzerostate++; 2210 noinsert1:; 2211 low = i; 2212 } 2213 ailen[brow] = nrow; 2214 } 2215 PetscFunctionReturn(0); 2216 } 2217 2218 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2219 { 2220 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2221 Mat outA; 2222 PetscErrorCode ierr; 2223 PetscBool row_identity,col_identity; 2224 2225 PetscFunctionBegin; 2226 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2227 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2228 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2229 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2230 2231 outA = inA; 2232 inA->factortype = MAT_FACTOR_LU; 2233 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2234 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2235 2236 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2237 2238 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2239 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2240 a->row = row; 2241 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2242 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2243 a->col = col; 2244 2245 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2246 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2247 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2248 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2249 2250 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2251 if (!a->solve_work) { 2252 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2253 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2254 } 2255 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2256 PetscFunctionReturn(0); 2257 } 2258 2259 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2260 { 2261 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2262 PetscInt i,nz,mbs; 2263 2264 PetscFunctionBegin; 2265 nz = baij->maxnz; 2266 mbs = baij->mbs; 2267 for (i=0; i<nz; i++) { 2268 baij->j[i] = indices[i]; 2269 } 2270 baij->nz = nz; 2271 for (i=0; i<mbs; i++) { 2272 baij->ilen[i] = baij->imax[i]; 2273 } 2274 PetscFunctionReturn(0); 2275 } 2276 2277 /*@ 2278 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2279 in the matrix. 2280 2281 Input Parameters: 2282 + mat - the SeqBAIJ matrix 2283 - indices - the column indices 2284 2285 Level: advanced 2286 2287 Notes: 2288 This can be called if you have precomputed the nonzero structure of the 2289 matrix and want to provide it to the matrix object to improve the performance 2290 of the MatSetValues() operation. 2291 2292 You MUST have set the correct numbers of nonzeros per row in the call to 2293 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2294 2295 MUST be called before any calls to MatSetValues(); 2296 2297 @*/ 2298 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2299 { 2300 PetscErrorCode ierr; 2301 2302 PetscFunctionBegin; 2303 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2304 PetscValidPointer(indices,2); 2305 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2306 PetscFunctionReturn(0); 2307 } 2308 2309 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2310 { 2311 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2312 PetscErrorCode ierr; 2313 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2314 PetscReal atmp; 2315 PetscScalar *x,zero = 0.0; 2316 MatScalar *aa; 2317 PetscInt ncols,brow,krow,kcol; 2318 2319 PetscFunctionBegin; 2320 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2321 bs = A->rmap->bs; 2322 aa = a->a; 2323 ai = a->i; 2324 aj = a->j; 2325 mbs = a->mbs; 2326 2327 ierr = VecSet(v,zero);CHKERRQ(ierr); 2328 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2329 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2330 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2331 for (i=0; i<mbs; i++) { 2332 ncols = ai[1] - ai[0]; ai++; 2333 brow = bs*i; 2334 for (j=0; j<ncols; j++) { 2335 for (kcol=0; kcol<bs; kcol++) { 2336 for (krow=0; krow<bs; krow++) { 2337 atmp = PetscAbsScalar(*aa);aa++; 2338 row = brow + krow; /* row index */ 2339 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2340 } 2341 } 2342 aj++; 2343 } 2344 } 2345 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2346 PetscFunctionReturn(0); 2347 } 2348 2349 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2350 { 2351 PetscErrorCode ierr; 2352 2353 PetscFunctionBegin; 2354 /* If the two matrices have the same copy implementation, use fast copy. */ 2355 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2356 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2357 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2358 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2359 2360 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2361 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2362 ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr); 2363 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2364 } else { 2365 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2366 } 2367 PetscFunctionReturn(0); 2368 } 2369 2370 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2371 { 2372 PetscErrorCode ierr; 2373 2374 PetscFunctionBegin; 2375 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2376 PetscFunctionReturn(0); 2377 } 2378 2379 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2380 { 2381 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2382 2383 PetscFunctionBegin; 2384 *array = a->a; 2385 PetscFunctionReturn(0); 2386 } 2387 2388 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2389 { 2390 PetscFunctionBegin; 2391 *array = NULL; 2392 PetscFunctionReturn(0); 2393 } 2394 2395 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2396 { 2397 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2398 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2399 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2400 PetscErrorCode ierr; 2401 2402 PetscFunctionBegin; 2403 /* Set the number of nonzeros in the new matrix */ 2404 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2405 PetscFunctionReturn(0); 2406 } 2407 2408 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2409 { 2410 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2411 PetscErrorCode ierr; 2412 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2413 PetscBLASInt one=1; 2414 2415 PetscFunctionBegin; 2416 if (str == SAME_NONZERO_PATTERN) { 2417 PetscScalar alpha = a; 2418 PetscBLASInt bnz; 2419 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2420 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2421 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2422 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2423 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2424 } else { 2425 Mat B; 2426 PetscInt *nnz; 2427 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2428 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2429 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2430 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2431 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2432 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2433 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2434 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2435 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2436 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2437 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2438 ierr = PetscFree(nnz);CHKERRQ(ierr); 2439 } 2440 PetscFunctionReturn(0); 2441 } 2442 2443 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2444 { 2445 #if defined(PETSC_USE_COMPLEX) 2446 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2447 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2448 MatScalar *aa = a->a; 2449 2450 PetscFunctionBegin; 2451 for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]); 2452 #else 2453 PetscFunctionBegin; 2454 #endif 2455 PetscFunctionReturn(0); 2456 } 2457 2458 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2459 { 2460 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2461 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2462 MatScalar *aa = a->a; 2463 2464 PetscFunctionBegin; 2465 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2466 PetscFunctionReturn(0); 2467 } 2468 2469 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2470 { 2471 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2472 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2473 MatScalar *aa = a->a; 2474 2475 PetscFunctionBegin; 2476 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2477 PetscFunctionReturn(0); 2478 } 2479 2480 /* 2481 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2482 */ 2483 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2484 { 2485 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2486 PetscErrorCode ierr; 2487 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2488 PetscInt nz = a->i[m],row,*jj,mr,col; 2489 2490 PetscFunctionBegin; 2491 *nn = n; 2492 if (!ia) PetscFunctionReturn(0); 2493 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2494 else { 2495 ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2496 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2497 ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2498 jj = a->j; 2499 for (i=0; i<nz; i++) { 2500 collengths[jj[i]]++; 2501 } 2502 cia[0] = oshift; 2503 for (i=0; i<n; i++) { 2504 cia[i+1] = cia[i] + collengths[i]; 2505 } 2506 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2507 jj = a->j; 2508 for (row=0; row<m; row++) { 2509 mr = a->i[row+1] - a->i[row]; 2510 for (i=0; i<mr; i++) { 2511 col = *jj++; 2512 2513 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2514 } 2515 } 2516 ierr = PetscFree(collengths);CHKERRQ(ierr); 2517 *ia = cia; *ja = cja; 2518 } 2519 PetscFunctionReturn(0); 2520 } 2521 2522 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2523 { 2524 PetscErrorCode ierr; 2525 2526 PetscFunctionBegin; 2527 if (!ia) PetscFunctionReturn(0); 2528 ierr = PetscFree(*ia);CHKERRQ(ierr); 2529 ierr = PetscFree(*ja);CHKERRQ(ierr); 2530 PetscFunctionReturn(0); 2531 } 2532 2533 /* 2534 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2535 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2536 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2537 */ 2538 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2539 { 2540 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2541 PetscErrorCode ierr; 2542 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2543 PetscInt nz = a->i[m],row,*jj,mr,col; 2544 PetscInt *cspidx; 2545 2546 PetscFunctionBegin; 2547 *nn = n; 2548 if (!ia) PetscFunctionReturn(0); 2549 2550 ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2551 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2552 ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2553 ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr); 2554 jj = a->j; 2555 for (i=0; i<nz; i++) { 2556 collengths[jj[i]]++; 2557 } 2558 cia[0] = oshift; 2559 for (i=0; i<n; i++) { 2560 cia[i+1] = cia[i] + collengths[i]; 2561 } 2562 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2563 jj = a->j; 2564 for (row=0; row<m; row++) { 2565 mr = a->i[row+1] - a->i[row]; 2566 for (i=0; i<mr; i++) { 2567 col = *jj++; 2568 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2569 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2570 } 2571 } 2572 ierr = PetscFree(collengths);CHKERRQ(ierr); 2573 *ia = cia; 2574 *ja = cja; 2575 *spidx = cspidx; 2576 PetscFunctionReturn(0); 2577 } 2578 2579 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2580 { 2581 PetscErrorCode ierr; 2582 2583 PetscFunctionBegin; 2584 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2585 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2586 PetscFunctionReturn(0); 2587 } 2588 2589 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2590 { 2591 PetscErrorCode ierr; 2592 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2593 2594 PetscFunctionBegin; 2595 if (!Y->preallocated || !aij->nz) { 2596 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2597 } 2598 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2599 PetscFunctionReturn(0); 2600 } 2601 2602 /* -------------------------------------------------------------------*/ 2603 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2604 MatGetRow_SeqBAIJ, 2605 MatRestoreRow_SeqBAIJ, 2606 MatMult_SeqBAIJ_N, 2607 /* 4*/ MatMultAdd_SeqBAIJ_N, 2608 MatMultTranspose_SeqBAIJ, 2609 MatMultTransposeAdd_SeqBAIJ, 2610 NULL, 2611 NULL, 2612 NULL, 2613 /* 10*/ NULL, 2614 MatLUFactor_SeqBAIJ, 2615 NULL, 2616 NULL, 2617 MatTranspose_SeqBAIJ, 2618 /* 15*/ MatGetInfo_SeqBAIJ, 2619 MatEqual_SeqBAIJ, 2620 MatGetDiagonal_SeqBAIJ, 2621 MatDiagonalScale_SeqBAIJ, 2622 MatNorm_SeqBAIJ, 2623 /* 20*/ NULL, 2624 MatAssemblyEnd_SeqBAIJ, 2625 MatSetOption_SeqBAIJ, 2626 MatZeroEntries_SeqBAIJ, 2627 /* 24*/ MatZeroRows_SeqBAIJ, 2628 NULL, 2629 NULL, 2630 NULL, 2631 NULL, 2632 /* 29*/ MatSetUp_SeqBAIJ, 2633 NULL, 2634 NULL, 2635 NULL, 2636 NULL, 2637 /* 34*/ MatDuplicate_SeqBAIJ, 2638 NULL, 2639 NULL, 2640 MatILUFactor_SeqBAIJ, 2641 NULL, 2642 /* 39*/ MatAXPY_SeqBAIJ, 2643 MatCreateSubMatrices_SeqBAIJ, 2644 MatIncreaseOverlap_SeqBAIJ, 2645 MatGetValues_SeqBAIJ, 2646 MatCopy_SeqBAIJ, 2647 /* 44*/ NULL, 2648 MatScale_SeqBAIJ, 2649 MatShift_SeqBAIJ, 2650 NULL, 2651 MatZeroRowsColumns_SeqBAIJ, 2652 /* 49*/ NULL, 2653 MatGetRowIJ_SeqBAIJ, 2654 MatRestoreRowIJ_SeqBAIJ, 2655 MatGetColumnIJ_SeqBAIJ, 2656 MatRestoreColumnIJ_SeqBAIJ, 2657 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2658 NULL, 2659 NULL, 2660 NULL, 2661 MatSetValuesBlocked_SeqBAIJ, 2662 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2663 MatDestroy_SeqBAIJ, 2664 MatView_SeqBAIJ, 2665 NULL, 2666 NULL, 2667 /* 64*/ NULL, 2668 NULL, 2669 NULL, 2670 NULL, 2671 NULL, 2672 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2673 NULL, 2674 MatConvert_Basic, 2675 NULL, 2676 NULL, 2677 /* 74*/ NULL, 2678 MatFDColoringApply_BAIJ, 2679 NULL, 2680 NULL, 2681 NULL, 2682 /* 79*/ NULL, 2683 NULL, 2684 NULL, 2685 NULL, 2686 MatLoad_SeqBAIJ, 2687 /* 84*/ NULL, 2688 NULL, 2689 NULL, 2690 NULL, 2691 NULL, 2692 /* 89*/ NULL, 2693 NULL, 2694 NULL, 2695 NULL, 2696 NULL, 2697 /* 94*/ NULL, 2698 NULL, 2699 NULL, 2700 NULL, 2701 NULL, 2702 /* 99*/ NULL, 2703 NULL, 2704 NULL, 2705 MatConjugate_SeqBAIJ, 2706 NULL, 2707 /*104*/ NULL, 2708 MatRealPart_SeqBAIJ, 2709 MatImaginaryPart_SeqBAIJ, 2710 NULL, 2711 NULL, 2712 /*109*/ NULL, 2713 NULL, 2714 NULL, 2715 NULL, 2716 MatMissingDiagonal_SeqBAIJ, 2717 /*114*/ NULL, 2718 NULL, 2719 NULL, 2720 NULL, 2721 NULL, 2722 /*119*/ NULL, 2723 NULL, 2724 MatMultHermitianTranspose_SeqBAIJ, 2725 MatMultHermitianTransposeAdd_SeqBAIJ, 2726 NULL, 2727 /*124*/ NULL, 2728 NULL, 2729 MatInvertBlockDiagonal_SeqBAIJ, 2730 NULL, 2731 NULL, 2732 /*129*/ NULL, 2733 NULL, 2734 NULL, 2735 NULL, 2736 NULL, 2737 /*134*/ NULL, 2738 NULL, 2739 NULL, 2740 NULL, 2741 NULL, 2742 /*139*/ MatSetBlockSizes_Default, 2743 NULL, 2744 NULL, 2745 MatFDColoringSetUp_SeqXAIJ, 2746 NULL, 2747 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2748 MatDestroySubMatrices_SeqBAIJ 2749 }; 2750 2751 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2752 { 2753 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2754 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2755 PetscErrorCode ierr; 2756 2757 PetscFunctionBegin; 2758 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2759 2760 /* allocate space for values if not already there */ 2761 if (!aij->saved_values) { 2762 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2763 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2764 } 2765 2766 /* copy values over */ 2767 ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr); 2768 PetscFunctionReturn(0); 2769 } 2770 2771 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2772 { 2773 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2774 PetscErrorCode ierr; 2775 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2776 2777 PetscFunctionBegin; 2778 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2779 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2780 2781 /* copy values over */ 2782 ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr); 2783 PetscFunctionReturn(0); 2784 } 2785 2786 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2787 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2788 2789 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2790 { 2791 Mat_SeqBAIJ *b; 2792 PetscErrorCode ierr; 2793 PetscInt i,mbs,nbs,bs2; 2794 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2795 2796 PetscFunctionBegin; 2797 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2798 if (nz == MAT_SKIP_ALLOCATION) { 2799 skipallocation = PETSC_TRUE; 2800 nz = 0; 2801 } 2802 2803 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2804 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2805 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2806 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2807 2808 B->preallocated = PETSC_TRUE; 2809 2810 mbs = B->rmap->n/bs; 2811 nbs = B->cmap->n/bs; 2812 bs2 = bs*bs; 2813 2814 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2815 2816 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2817 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2818 if (nnz) { 2819 for (i=0; i<mbs; i++) { 2820 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2821 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2822 } 2823 } 2824 2825 b = (Mat_SeqBAIJ*)B->data; 2826 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2827 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2828 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2829 2830 if (!flg) { 2831 switch (bs) { 2832 case 1: 2833 B->ops->mult = MatMult_SeqBAIJ_1; 2834 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2835 break; 2836 case 2: 2837 B->ops->mult = MatMult_SeqBAIJ_2; 2838 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2839 break; 2840 case 3: 2841 B->ops->mult = MatMult_SeqBAIJ_3; 2842 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2843 break; 2844 case 4: 2845 B->ops->mult = MatMult_SeqBAIJ_4; 2846 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2847 break; 2848 case 5: 2849 B->ops->mult = MatMult_SeqBAIJ_5; 2850 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2851 break; 2852 case 6: 2853 B->ops->mult = MatMult_SeqBAIJ_6; 2854 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2855 break; 2856 case 7: 2857 B->ops->mult = MatMult_SeqBAIJ_7; 2858 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2859 break; 2860 case 9: 2861 { 2862 PetscInt version = 1; 2863 ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 2864 switch (version) { 2865 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2866 case 1: 2867 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2868 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2869 ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs); 2870 break; 2871 #endif 2872 default: 2873 B->ops->mult = MatMult_SeqBAIJ_N; 2874 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2875 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs); 2876 break; 2877 } 2878 break; 2879 } 2880 case 11: 2881 B->ops->mult = MatMult_SeqBAIJ_11; 2882 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2883 break; 2884 case 12: 2885 { 2886 PetscInt version = 1; 2887 ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 2888 switch (version) { 2889 case 1: 2890 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 2891 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 2892 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs); 2893 break; 2894 case 2: 2895 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 2896 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 2897 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs); 2898 break; 2899 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2900 case 3: 2901 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 2902 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 2903 ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs); 2904 break; 2905 #endif 2906 default: 2907 B->ops->mult = MatMult_SeqBAIJ_N; 2908 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2909 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs); 2910 break; 2911 } 2912 break; 2913 } 2914 case 15: 2915 { 2916 PetscInt version = 1; 2917 ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 2918 switch (version) { 2919 case 1: 2920 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2921 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs); 2922 break; 2923 case 2: 2924 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 2925 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs); 2926 break; 2927 case 3: 2928 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 2929 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs); 2930 break; 2931 case 4: 2932 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 2933 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs); 2934 break; 2935 default: 2936 B->ops->mult = MatMult_SeqBAIJ_N; 2937 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs); 2938 break; 2939 } 2940 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2941 break; 2942 } 2943 default: 2944 B->ops->mult = MatMult_SeqBAIJ_N; 2945 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2946 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs); 2947 break; 2948 } 2949 } 2950 B->ops->sor = MatSOR_SeqBAIJ; 2951 b->mbs = mbs; 2952 b->nbs = nbs; 2953 if (!skipallocation) { 2954 if (!b->imax) { 2955 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2956 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2957 2958 b->free_imax_ilen = PETSC_TRUE; 2959 } 2960 /* b->ilen will count nonzeros in each block row so far. */ 2961 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2962 if (!nnz) { 2963 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2964 else if (nz < 0) nz = 1; 2965 nz = PetscMin(nz,nbs); 2966 for (i=0; i<mbs; i++) b->imax[i] = nz; 2967 ierr = PetscIntMultError(nz,mbs,&nz);CHKERRQ(ierr); 2968 } else { 2969 PetscInt64 nz64 = 0; 2970 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];} 2971 ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr); 2972 } 2973 2974 /* allocate the matrix space */ 2975 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2976 if (B->structure_only) { 2977 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 2978 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 2979 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 2980 } else { 2981 PetscInt nzbs2 = 0; 2982 ierr = PetscIntMultError(nz,bs2,&nzbs2);CHKERRQ(ierr); 2983 ierr = PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2984 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2985 ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr); 2986 } 2987 ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr); 2988 2989 if (B->structure_only) { 2990 b->singlemalloc = PETSC_FALSE; 2991 b->free_a = PETSC_FALSE; 2992 } else { 2993 b->singlemalloc = PETSC_TRUE; 2994 b->free_a = PETSC_TRUE; 2995 } 2996 b->free_ij = PETSC_TRUE; 2997 2998 b->i[0] = 0; 2999 for (i=1; i<mbs+1; i++) { 3000 b->i[i] = b->i[i-1] + b->imax[i-1]; 3001 } 3002 3003 } else { 3004 b->free_a = PETSC_FALSE; 3005 b->free_ij = PETSC_FALSE; 3006 } 3007 3008 b->bs2 = bs2; 3009 b->mbs = mbs; 3010 b->nz = 0; 3011 b->maxnz = nz; 3012 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3013 B->was_assembled = PETSC_FALSE; 3014 B->assembled = PETSC_FALSE; 3015 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3016 PetscFunctionReturn(0); 3017 } 3018 3019 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3020 { 3021 PetscInt i,m,nz,nz_max=0,*nnz; 3022 PetscScalar *values=NULL; 3023 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 3024 PetscErrorCode ierr; 3025 3026 PetscFunctionBegin; 3027 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3028 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3029 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3030 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3031 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3032 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3033 m = B->rmap->n/bs; 3034 3035 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 3036 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 3037 for (i=0; i<m; i++) { 3038 nz = ii[i+1]- ii[i]; 3039 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 3040 nz_max = PetscMax(nz_max, nz); 3041 nnz[i] = nz; 3042 } 3043 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3044 ierr = PetscFree(nnz);CHKERRQ(ierr); 3045 3046 values = (PetscScalar*)V; 3047 if (!values) { 3048 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 3049 } 3050 for (i=0; i<m; i++) { 3051 PetscInt ncols = ii[i+1] - ii[i]; 3052 const PetscInt *icols = jj + ii[i]; 3053 if (bs == 1 || !roworiented) { 3054 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3055 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3056 } else { 3057 PetscInt j; 3058 for (j=0; j<ncols; j++) { 3059 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 3060 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 3061 } 3062 } 3063 } 3064 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3065 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3066 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3067 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3068 PetscFunctionReturn(0); 3069 } 3070 3071 /*@C 3072 MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored 3073 3074 Not Collective 3075 3076 Input Parameter: 3077 . mat - a MATSEQBAIJ matrix 3078 3079 Output Parameter: 3080 . array - pointer to the data 3081 3082 Level: intermediate 3083 3084 .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3085 @*/ 3086 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array) 3087 { 3088 PetscErrorCode ierr; 3089 3090 PetscFunctionBegin; 3091 ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3092 PetscFunctionReturn(0); 3093 } 3094 3095 /*@C 3096 MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray() 3097 3098 Not Collective 3099 3100 Input Parameters: 3101 + mat - a MATSEQBAIJ matrix 3102 - array - pointer to the data 3103 3104 Level: intermediate 3105 3106 .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3107 @*/ 3108 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array) 3109 { 3110 PetscErrorCode ierr; 3111 3112 PetscFunctionBegin; 3113 ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3114 PetscFunctionReturn(0); 3115 } 3116 3117 /*MC 3118 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3119 block sparse compressed row format. 3120 3121 Options Database Keys: 3122 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3123 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3124 3125 Level: beginner 3126 3127 Notes: 3128 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 3129 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 3130 3131 Run with -info to see what version of the matrix-vector product is being used 3132 3133 .seealso: MatCreateSeqBAIJ() 3134 M*/ 3135 3136 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3137 3138 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3139 { 3140 PetscErrorCode ierr; 3141 PetscMPIInt size; 3142 Mat_SeqBAIJ *b; 3143 3144 PetscFunctionBegin; 3145 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 3146 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3147 3148 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3149 B->data = (void*)b; 3150 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3151 3152 b->row = NULL; 3153 b->col = NULL; 3154 b->icol = NULL; 3155 b->reallocs = 0; 3156 b->saved_values = NULL; 3157 3158 b->roworiented = PETSC_TRUE; 3159 b->nonew = 0; 3160 b->diag = NULL; 3161 B->spptr = NULL; 3162 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3163 b->keepnonzeropattern = PETSC_FALSE; 3164 3165 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr); 3166 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr); 3167 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3168 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3169 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3170 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3171 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3172 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3173 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3174 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3175 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3176 #if defined(PETSC_HAVE_HYPRE) 3177 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3178 #endif 3179 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 3180 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3181 PetscFunctionReturn(0); 3182 } 3183 3184 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3185 { 3186 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3187 PetscErrorCode ierr; 3188 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3189 3190 PetscFunctionBegin; 3191 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3192 3193 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3194 c->imax = a->imax; 3195 c->ilen = a->ilen; 3196 c->free_imax_ilen = PETSC_FALSE; 3197 } else { 3198 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3199 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3200 for (i=0; i<mbs; i++) { 3201 c->imax[i] = a->imax[i]; 3202 c->ilen[i] = a->ilen[i]; 3203 } 3204 c->free_imax_ilen = PETSC_TRUE; 3205 } 3206 3207 /* allocate the matrix space */ 3208 if (mallocmatspace) { 3209 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3210 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3211 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3212 3213 c->i = a->i; 3214 c->j = a->j; 3215 c->singlemalloc = PETSC_FALSE; 3216 c->free_a = PETSC_TRUE; 3217 c->free_ij = PETSC_FALSE; 3218 c->parent = A; 3219 C->preallocated = PETSC_TRUE; 3220 C->assembled = PETSC_TRUE; 3221 3222 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3223 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3224 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3225 } else { 3226 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3227 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3228 3229 c->singlemalloc = PETSC_TRUE; 3230 c->free_a = PETSC_TRUE; 3231 c->free_ij = PETSC_TRUE; 3232 3233 ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr); 3234 if (mbs > 0) { 3235 ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr); 3236 if (cpvalues == MAT_COPY_VALUES) { 3237 ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr); 3238 } else { 3239 ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr); 3240 } 3241 } 3242 C->preallocated = PETSC_TRUE; 3243 C->assembled = PETSC_TRUE; 3244 } 3245 } 3246 3247 c->roworiented = a->roworiented; 3248 c->nonew = a->nonew; 3249 3250 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3251 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3252 3253 c->bs2 = a->bs2; 3254 c->mbs = a->mbs; 3255 c->nbs = a->nbs; 3256 3257 if (a->diag) { 3258 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3259 c->diag = a->diag; 3260 c->free_diag = PETSC_FALSE; 3261 } else { 3262 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3263 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3264 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3265 c->free_diag = PETSC_TRUE; 3266 } 3267 } else c->diag = NULL; 3268 3269 c->nz = a->nz; 3270 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3271 c->solve_work = NULL; 3272 c->mult_work = NULL; 3273 c->sor_workt = NULL; 3274 c->sor_work = NULL; 3275 3276 c->compressedrow.use = a->compressedrow.use; 3277 c->compressedrow.nrows = a->compressedrow.nrows; 3278 if (a->compressedrow.use) { 3279 i = a->compressedrow.nrows; 3280 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3281 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3282 ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr); 3283 ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr); 3284 } else { 3285 c->compressedrow.use = PETSC_FALSE; 3286 c->compressedrow.i = NULL; 3287 c->compressedrow.rindex = NULL; 3288 } 3289 C->nonzerostate = A->nonzerostate; 3290 3291 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3292 PetscFunctionReturn(0); 3293 } 3294 3295 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3296 { 3297 PetscErrorCode ierr; 3298 3299 PetscFunctionBegin; 3300 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3301 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3302 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3303 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3304 PetscFunctionReturn(0); 3305 } 3306 3307 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3308 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 3309 { 3310 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3311 PetscInt *rowidxs,*colidxs; 3312 PetscScalar *matvals; 3313 PetscErrorCode ierr; 3314 3315 PetscFunctionBegin; 3316 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3317 3318 /* read matrix header */ 3319 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3320 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3321 M = header[1]; N = header[2]; nz = header[3]; 3322 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3323 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3324 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3325 3326 /* set block sizes from the viewer's .info file */ 3327 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3328 /* set local and global sizes if not set already */ 3329 if (mat->rmap->n < 0) mat->rmap->n = M; 3330 if (mat->cmap->n < 0) mat->cmap->n = N; 3331 if (mat->rmap->N < 0) mat->rmap->N = M; 3332 if (mat->cmap->N < 0) mat->cmap->N = N; 3333 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3334 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3335 3336 /* check if the matrix sizes are correct */ 3337 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3338 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3339 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3340 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 3341 mbs = m/bs; nbs = n/bs; 3342 3343 /* read in row lengths, column indices and nonzero values */ 3344 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3345 ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr); 3346 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3347 sum = rowidxs[m]; 3348 if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3349 3350 /* read in column indices and nonzero values */ 3351 ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr); 3352 ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr); 3353 ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr); 3354 3355 { /* preallocate matrix storage */ 3356 PetscBT bt; /* helper bit set to count nonzeros */ 3357 PetscInt *nnz; 3358 PetscBool sbaij; 3359 3360 ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr); 3361 ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr); 3362 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr); 3363 for (i=0; i<mbs; i++) { 3364 ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr); 3365 for (k=0; k<bs; k++) { 3366 PetscInt row = bs*i + k; 3367 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3368 PetscInt col = colidxs[j]; 3369 if (!sbaij || col >= row) 3370 if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++; 3371 } 3372 } 3373 } 3374 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 3375 ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3376 ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3377 ierr = PetscFree(nnz);CHKERRQ(ierr); 3378 } 3379 3380 /* store matrix values */ 3381 for (i=0; i<m; i++) { 3382 PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1]; 3383 ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr); 3384 } 3385 3386 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3387 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3388 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3389 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3390 PetscFunctionReturn(0); 3391 } 3392 3393 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer) 3394 { 3395 PetscErrorCode ierr; 3396 PetscBool isbinary; 3397 3398 PetscFunctionBegin; 3399 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3400 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3401 ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 3402 PetscFunctionReturn(0); 3403 } 3404 3405 /*@C 3406 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3407 compressed row) format. For good matrix assembly performance the 3408 user should preallocate the matrix storage by setting the parameter nz 3409 (or the array nnz). By setting these parameters accurately, performance 3410 during matrix assembly can be increased by more than a factor of 50. 3411 3412 Collective 3413 3414 Input Parameters: 3415 + comm - MPI communicator, set to PETSC_COMM_SELF 3416 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3417 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3418 . m - number of rows 3419 . n - number of columns 3420 . nz - number of nonzero blocks per block row (same for all rows) 3421 - nnz - array containing the number of nonzero blocks in the various block rows 3422 (possibly different for each block row) or NULL 3423 3424 Output Parameter: 3425 . A - the matrix 3426 3427 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3428 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3429 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3430 3431 Options Database Keys: 3432 + -mat_no_unroll - uses code that does not unroll the loops in the 3433 block calculations (much slower) 3434 - -mat_block_size - size of the blocks to use 3435 3436 Level: intermediate 3437 3438 Notes: 3439 The number of rows and columns must be divisible by blocksize. 3440 3441 If the nnz parameter is given then the nz parameter is ignored 3442 3443 A nonzero block is any block that as 1 or more nonzeros in it 3444 3445 The block AIJ format is fully compatible with standard Fortran 77 3446 storage. That is, the stored row and column indices can begin at 3447 either one (as in Fortran) or zero. See the users' manual for details. 3448 3449 Specify the preallocated storage with either nz or nnz (not both). 3450 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3451 allocation. See Users-Manual: ch_mat for details. 3452 matrices. 3453 3454 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3455 @*/ 3456 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3457 { 3458 PetscErrorCode ierr; 3459 3460 PetscFunctionBegin; 3461 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3462 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3463 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3464 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3465 PetscFunctionReturn(0); 3466 } 3467 3468 /*@C 3469 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3470 per row in the matrix. For good matrix assembly performance the 3471 user should preallocate the matrix storage by setting the parameter nz 3472 (or the array nnz). By setting these parameters accurately, performance 3473 during matrix assembly can be increased by more than a factor of 50. 3474 3475 Collective 3476 3477 Input Parameters: 3478 + B - the matrix 3479 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3480 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3481 . nz - number of block nonzeros per block row (same for all rows) 3482 - nnz - array containing the number of block nonzeros in the various block rows 3483 (possibly different for each block row) or NULL 3484 3485 Options Database Keys: 3486 + -mat_no_unroll - uses code that does not unroll the loops in the 3487 block calculations (much slower) 3488 - -mat_block_size - size of the blocks to use 3489 3490 Level: intermediate 3491 3492 Notes: 3493 If the nnz parameter is given then the nz parameter is ignored 3494 3495 You can call MatGetInfo() to get information on how effective the preallocation was; 3496 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3497 You can also run with the option -info and look for messages with the string 3498 malloc in them to see if additional memory allocation was needed. 3499 3500 The block AIJ format is fully compatible with standard Fortran 77 3501 storage. That is, the stored row and column indices can begin at 3502 either one (as in Fortran) or zero. See the users' manual for details. 3503 3504 Specify the preallocated storage with either nz or nnz (not both). 3505 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3506 allocation. See Users-Manual: ch_mat for details. 3507 3508 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3509 @*/ 3510 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3511 { 3512 PetscErrorCode ierr; 3513 3514 PetscFunctionBegin; 3515 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3516 PetscValidType(B,1); 3517 PetscValidLogicalCollectiveInt(B,bs,2); 3518 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3519 PetscFunctionReturn(0); 3520 } 3521 3522 /*@C 3523 MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 3524 3525 Collective 3526 3527 Input Parameters: 3528 + B - the matrix 3529 . i - the indices into j for the start of each local row (starts with zero) 3530 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3531 - v - optional values in the matrix 3532 3533 Level: advanced 3534 3535 Notes: 3536 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3537 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3538 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3539 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3540 block column and the second index is over columns within a block. 3541 3542 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3543 3544 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3545 @*/ 3546 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3547 { 3548 PetscErrorCode ierr; 3549 3550 PetscFunctionBegin; 3551 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3552 PetscValidType(B,1); 3553 PetscValidLogicalCollectiveInt(B,bs,2); 3554 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3555 PetscFunctionReturn(0); 3556 } 3557 3558 3559 /*@ 3560 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3561 3562 Collective 3563 3564 Input Parameters: 3565 + comm - must be an MPI communicator of size 1 3566 . bs - size of block 3567 . m - number of rows 3568 . n - number of columns 3569 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3570 . j - column indices 3571 - a - matrix values 3572 3573 Output Parameter: 3574 . mat - the matrix 3575 3576 Level: advanced 3577 3578 Notes: 3579 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3580 once the matrix is destroyed 3581 3582 You cannot set new nonzero locations into this matrix, that will generate an error. 3583 3584 The i and j indices are 0 based 3585 3586 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3587 3588 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3589 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3590 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3591 with column-major ordering within blocks. 3592 3593 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3594 3595 @*/ 3596 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3597 { 3598 PetscErrorCode ierr; 3599 PetscInt ii; 3600 Mat_SeqBAIJ *baij; 3601 3602 PetscFunctionBegin; 3603 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3604 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3605 3606 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3607 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3608 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3609 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 3610 baij = (Mat_SeqBAIJ*)(*mat)->data; 3611 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3612 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3613 3614 baij->i = i; 3615 baij->j = j; 3616 baij->a = a; 3617 3618 baij->singlemalloc = PETSC_FALSE; 3619 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3620 baij->free_a = PETSC_FALSE; 3621 baij->free_ij = PETSC_FALSE; 3622 3623 for (ii=0; ii<m; ii++) { 3624 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3625 if (PetscUnlikelyDebug(i[ii+1] - i[ii] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3626 } 3627 if (PetscDefined(USE_DEBUG)) { 3628 for (ii=0; ii<baij->i[m]; ii++) { 3629 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3630 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3631 } 3632 } 3633 3634 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3635 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3636 PetscFunctionReturn(0); 3637 } 3638 3639 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3640 { 3641 PetscErrorCode ierr; 3642 PetscMPIInt size; 3643 3644 PetscFunctionBegin; 3645 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3646 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3647 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3648 } else { 3649 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3650 } 3651 PetscFunctionReturn(0); 3652 } 3653