1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 20 21 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 22 { 23 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 24 PetscErrorCode ierr; 25 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 26 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 27 PetscReal shift = 0.0; 28 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 29 30 PetscFunctionBegin; 31 allowzeropivot = PetscNot(A->erroriffailure); 32 33 if (a->idiagvalid) { 34 if (values) *values = a->idiag; 35 PetscFunctionReturn(0); 36 } 37 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 38 diag_offset = a->diag; 39 if (!a->idiag) { 40 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 41 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 42 } 43 diag = a->idiag; 44 if (values) *values = a->idiag; 45 /* factor and invert each block */ 46 switch (bs) { 47 case 1: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 1*diag_offset[i]; 50 diag[0] = odiag[0]; 51 52 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 53 if (allowzeropivot) { 54 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 55 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 56 A->factorerror_zeropivot_row = i; 57 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 58 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 59 } 60 61 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 62 diag += 1; 63 } 64 break; 65 case 2: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 4*diag_offset[i]; 68 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 69 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 70 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 71 diag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 81 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 82 diag += 9; 83 } 84 break; 85 case 4: 86 for (i=0; i<mbs; i++) { 87 odiag = v + 16*diag_offset[i]; 88 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 89 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 90 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 91 diag += 16; 92 } 93 break; 94 case 5: 95 for (i=0; i<mbs; i++) { 96 odiag = v + 25*diag_offset[i]; 97 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 98 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 99 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 100 diag += 25; 101 } 102 break; 103 case 6: 104 for (i=0; i<mbs; i++) { 105 odiag = v + 36*diag_offset[i]; 106 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 107 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 108 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 109 diag += 36; 110 } 111 break; 112 case 7: 113 for (i=0; i<mbs; i++) { 114 odiag = v + 49*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 117 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 118 diag += 49; 119 } 120 break; 121 default: 122 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 123 for (i=0; i<mbs; i++) { 124 odiag = v + bs2*diag_offset[i]; 125 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 126 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 127 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 128 diag += bs2; 129 } 130 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 131 } 132 a->idiagvalid = PETSC_TRUE; 133 PetscFunctionReturn(0); 134 } 135 136 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 137 { 138 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 139 PetscScalar *x,*work,*w,*workt,*t; 140 const MatScalar *v,*aa = a->a, *idiag; 141 const PetscScalar *b,*xb; 142 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 143 PetscErrorCode ierr; 144 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 145 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 146 147 PetscFunctionBegin; 148 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 149 its = its*lits; 150 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 151 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 152 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 153 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 154 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 155 156 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 157 158 if (!m) PetscFunctionReturn(0); 159 diag = a->diag; 160 idiag = a->idiag; 161 k = PetscMax(A->rmap->n,A->cmap->n); 162 if (!a->mult_work) { 163 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 164 } 165 if (!a->sor_workt) { 166 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 167 } 168 if (!a->sor_work) { 169 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 170 } 171 work = a->mult_work; 172 t = a->sor_workt; 173 w = a->sor_work; 174 175 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 176 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 177 178 if (flag & SOR_ZERO_INITIAL_GUESS) { 179 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 180 switch (bs) { 181 case 1: 182 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 183 t[0] = b[0]; 184 i2 = 1; 185 idiag += 1; 186 for (i=1; i<m; i++) { 187 v = aa + ai[i]; 188 vi = aj + ai[i]; 189 nz = diag[i] - ai[i]; 190 s[0] = b[i2]; 191 for (j=0; j<nz; j++) { 192 xw[0] = x[vi[j]]; 193 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 194 } 195 t[i2] = s[0]; 196 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 197 x[i2] = xw[0]; 198 idiag += 1; 199 i2 += 1; 200 } 201 break; 202 case 2: 203 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 204 t[0] = b[0]; t[1] = b[1]; 205 i2 = 2; 206 idiag += 4; 207 for (i=1; i<m; i++) { 208 v = aa + 4*ai[i]; 209 vi = aj + ai[i]; 210 nz = diag[i] - ai[i]; 211 s[0] = b[i2]; s[1] = b[i2+1]; 212 for (j=0; j<nz; j++) { 213 idx = 2*vi[j]; 214 it = 4*j; 215 xw[0] = x[idx]; xw[1] = x[1+idx]; 216 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 217 } 218 t[i2] = s[0]; t[i2+1] = s[1]; 219 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 220 x[i2] = xw[0]; x[i2+1] = xw[1]; 221 idiag += 4; 222 i2 += 2; 223 } 224 break; 225 case 3: 226 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 227 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 228 i2 = 3; 229 idiag += 9; 230 for (i=1; i<m; i++) { 231 v = aa + 9*ai[i]; 232 vi = aj + ai[i]; 233 nz = diag[i] - ai[i]; 234 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 235 while (nz--) { 236 idx = 3*(*vi++); 237 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 238 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 239 v += 9; 240 } 241 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 242 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 243 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 244 idiag += 9; 245 i2 += 3; 246 } 247 break; 248 case 4: 249 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 250 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 251 i2 = 4; 252 idiag += 16; 253 for (i=1; i<m; i++) { 254 v = aa + 16*ai[i]; 255 vi = aj + ai[i]; 256 nz = diag[i] - ai[i]; 257 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 258 while (nz--) { 259 idx = 4*(*vi++); 260 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 261 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 262 v += 16; 263 } 264 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 265 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 266 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 267 idiag += 16; 268 i2 += 4; 269 } 270 break; 271 case 5: 272 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 273 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 274 i2 = 5; 275 idiag += 25; 276 for (i=1; i<m; i++) { 277 v = aa + 25*ai[i]; 278 vi = aj + ai[i]; 279 nz = diag[i] - ai[i]; 280 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 281 while (nz--) { 282 idx = 5*(*vi++); 283 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 284 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 285 v += 25; 286 } 287 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 288 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 289 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 290 idiag += 25; 291 i2 += 5; 292 } 293 break; 294 case 6: 295 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 296 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 297 i2 = 6; 298 idiag += 36; 299 for (i=1; i<m; i++) { 300 v = aa + 36*ai[i]; 301 vi = aj + ai[i]; 302 nz = diag[i] - ai[i]; 303 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 304 while (nz--) { 305 idx = 6*(*vi++); 306 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 307 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 308 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 309 v += 36; 310 } 311 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 312 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 313 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 314 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 315 idiag += 36; 316 i2 += 6; 317 } 318 break; 319 case 7: 320 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 321 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 322 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 323 i2 = 7; 324 idiag += 49; 325 for (i=1; i<m; i++) { 326 v = aa + 49*ai[i]; 327 vi = aj + ai[i]; 328 nz = diag[i] - ai[i]; 329 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 330 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 331 while (nz--) { 332 idx = 7*(*vi++); 333 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 334 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 335 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 336 v += 49; 337 } 338 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 339 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 340 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 341 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 342 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 343 idiag += 49; 344 i2 += 7; 345 } 346 break; 347 default: 348 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 349 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 350 i2 = bs; 351 idiag += bs2; 352 for (i=1; i<m; i++) { 353 v = aa + bs2*ai[i]; 354 vi = aj + ai[i]; 355 nz = diag[i] - ai[i]; 356 357 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 358 /* copy all rows of x that are needed into contiguous space */ 359 workt = work; 360 for (j=0; j<nz; j++) { 361 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 362 workt += bs; 363 } 364 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 365 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 366 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 367 368 idiag += bs2; 369 i2 += bs; 370 } 371 break; 372 } 373 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 374 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 375 xb = t; 376 } 377 else xb = b; 378 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 379 idiag = a->idiag+bs2*(a->mbs-1); 380 i2 = bs * (m-1); 381 switch (bs) { 382 case 1: 383 s[0] = xb[i2]; 384 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 385 x[i2] = xw[0]; 386 i2 -= 1; 387 for (i=m-2; i>=0; i--) { 388 v = aa + (diag[i]+1); 389 vi = aj + diag[i] + 1; 390 nz = ai[i+1] - diag[i] - 1; 391 s[0] = xb[i2]; 392 for (j=0; j<nz; j++) { 393 xw[0] = x[vi[j]]; 394 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 395 } 396 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 397 x[i2] = xw[0]; 398 idiag -= 1; 399 i2 -= 1; 400 } 401 break; 402 case 2: 403 s[0] = xb[i2]; s[1] = xb[i2+1]; 404 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 405 x[i2] = xw[0]; x[i2+1] = xw[1]; 406 i2 -= 2; 407 idiag -= 4; 408 for (i=m-2; i>=0; i--) { 409 v = aa + 4*(diag[i] + 1); 410 vi = aj + diag[i] + 1; 411 nz = ai[i+1] - diag[i] - 1; 412 s[0] = xb[i2]; s[1] = xb[i2+1]; 413 for (j=0; j<nz; j++) { 414 idx = 2*vi[j]; 415 it = 4*j; 416 xw[0] = x[idx]; xw[1] = x[1+idx]; 417 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 418 } 419 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 420 x[i2] = xw[0]; x[i2+1] = xw[1]; 421 idiag -= 4; 422 i2 -= 2; 423 } 424 break; 425 case 3: 426 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 427 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 428 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 429 i2 -= 3; 430 idiag -= 9; 431 for (i=m-2; i>=0; i--) { 432 v = aa + 9*(diag[i]+1); 433 vi = aj + diag[i] + 1; 434 nz = ai[i+1] - diag[i] - 1; 435 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 436 while (nz--) { 437 idx = 3*(*vi++); 438 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 439 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 440 v += 9; 441 } 442 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 443 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 444 idiag -= 9; 445 i2 -= 3; 446 } 447 break; 448 case 4: 449 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 450 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 451 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 452 i2 -= 4; 453 idiag -= 16; 454 for (i=m-2; i>=0; i--) { 455 v = aa + 16*(diag[i]+1); 456 vi = aj + diag[i] + 1; 457 nz = ai[i+1] - diag[i] - 1; 458 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 459 while (nz--) { 460 idx = 4*(*vi++); 461 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 462 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 463 v += 16; 464 } 465 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 466 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 467 idiag -= 16; 468 i2 -= 4; 469 } 470 break; 471 case 5: 472 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 473 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 474 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 475 i2 -= 5; 476 idiag -= 25; 477 for (i=m-2; i>=0; i--) { 478 v = aa + 25*(diag[i]+1); 479 vi = aj + diag[i] + 1; 480 nz = ai[i+1] - diag[i] - 1; 481 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 482 while (nz--) { 483 idx = 5*(*vi++); 484 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 485 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 486 v += 25; 487 } 488 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 489 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 490 idiag -= 25; 491 i2 -= 5; 492 } 493 break; 494 case 6: 495 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 496 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 497 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 498 i2 -= 6; 499 idiag -= 36; 500 for (i=m-2; i>=0; i--) { 501 v = aa + 36*(diag[i]+1); 502 vi = aj + diag[i] + 1; 503 nz = ai[i+1] - diag[i] - 1; 504 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 505 while (nz--) { 506 idx = 6*(*vi++); 507 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 508 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 509 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 510 v += 36; 511 } 512 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 513 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 514 idiag -= 36; 515 i2 -= 6; 516 } 517 break; 518 case 7: 519 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 520 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 521 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 522 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 523 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 524 i2 -= 7; 525 idiag -= 49; 526 for (i=m-2; i>=0; i--) { 527 v = aa + 49*(diag[i]+1); 528 vi = aj + diag[i] + 1; 529 nz = ai[i+1] - diag[i] - 1; 530 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 531 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 532 while (nz--) { 533 idx = 7*(*vi++); 534 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 535 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 536 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 537 v += 49; 538 } 539 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 540 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 541 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 542 idiag -= 49; 543 i2 -= 7; 544 } 545 break; 546 default: 547 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 548 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 549 i2 -= bs; 550 idiag -= bs2; 551 for (i=m-2; i>=0; i--) { 552 v = aa + bs2*(diag[i]+1); 553 vi = aj + diag[i] + 1; 554 nz = ai[i+1] - diag[i] - 1; 555 556 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 557 /* copy all rows of x that are needed into contiguous space */ 558 workt = work; 559 for (j=0; j<nz; j++) { 560 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 561 workt += bs; 562 } 563 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 564 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 565 566 idiag -= bs2; 567 i2 -= bs; 568 } 569 break; 570 } 571 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 572 } 573 its--; 574 } 575 while (its--) { 576 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 577 idiag = a->idiag; 578 i2 = 0; 579 switch (bs) { 580 case 1: 581 for (i=0; i<m; i++) { 582 v = aa + ai[i]; 583 vi = aj + ai[i]; 584 nz = ai[i+1] - ai[i]; 585 s[0] = b[i2]; 586 for (j=0; j<nz; j++) { 587 xw[0] = x[vi[j]]; 588 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 589 } 590 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 591 x[i2] += xw[0]; 592 idiag += 1; 593 i2 += 1; 594 } 595 break; 596 case 2: 597 for (i=0; i<m; i++) { 598 v = aa + 4*ai[i]; 599 vi = aj + ai[i]; 600 nz = ai[i+1] - ai[i]; 601 s[0] = b[i2]; s[1] = b[i2+1]; 602 for (j=0; j<nz; j++) { 603 idx = 2*vi[j]; 604 it = 4*j; 605 xw[0] = x[idx]; xw[1] = x[1+idx]; 606 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 607 } 608 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 609 x[i2] += xw[0]; x[i2+1] += xw[1]; 610 idiag += 4; 611 i2 += 2; 612 } 613 break; 614 case 3: 615 for (i=0; i<m; i++) { 616 v = aa + 9*ai[i]; 617 vi = aj + ai[i]; 618 nz = ai[i+1] - ai[i]; 619 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 620 while (nz--) { 621 idx = 3*(*vi++); 622 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 623 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 624 v += 9; 625 } 626 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 627 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 628 idiag += 9; 629 i2 += 3; 630 } 631 break; 632 case 4: 633 for (i=0; i<m; i++) { 634 v = aa + 16*ai[i]; 635 vi = aj + ai[i]; 636 nz = ai[i+1] - ai[i]; 637 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 638 while (nz--) { 639 idx = 4*(*vi++); 640 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 641 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 642 v += 16; 643 } 644 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 645 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 646 idiag += 16; 647 i2 += 4; 648 } 649 break; 650 case 5: 651 for (i=0; i<m; i++) { 652 v = aa + 25*ai[i]; 653 vi = aj + ai[i]; 654 nz = ai[i+1] - ai[i]; 655 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 656 while (nz--) { 657 idx = 5*(*vi++); 658 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 659 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 660 v += 25; 661 } 662 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 663 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 664 idiag += 25; 665 i2 += 5; 666 } 667 break; 668 case 6: 669 for (i=0; i<m; i++) { 670 v = aa + 36*ai[i]; 671 vi = aj + ai[i]; 672 nz = ai[i+1] - ai[i]; 673 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 674 while (nz--) { 675 idx = 6*(*vi++); 676 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 677 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 678 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 679 v += 36; 680 } 681 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 682 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 683 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 684 idiag += 36; 685 i2 += 6; 686 } 687 break; 688 case 7: 689 for (i=0; i<m; i++) { 690 v = aa + 49*ai[i]; 691 vi = aj + ai[i]; 692 nz = ai[i+1] - ai[i]; 693 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 694 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 695 while (nz--) { 696 idx = 7*(*vi++); 697 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 698 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 699 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 700 v += 49; 701 } 702 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 703 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 704 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 705 idiag += 49; 706 i2 += 7; 707 } 708 break; 709 default: 710 for (i=0; i<m; i++) { 711 v = aa + bs2*ai[i]; 712 vi = aj + ai[i]; 713 nz = ai[i+1] - ai[i]; 714 715 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 716 /* copy all rows of x that are needed into contiguous space */ 717 workt = work; 718 for (j=0; j<nz; j++) { 719 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 720 workt += bs; 721 } 722 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 723 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 724 725 idiag += bs2; 726 i2 += bs; 727 } 728 break; 729 } 730 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 731 } 732 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 733 idiag = a->idiag+bs2*(a->mbs-1); 734 i2 = bs * (m-1); 735 switch (bs) { 736 case 1: 737 for (i=m-1; i>=0; i--) { 738 v = aa + ai[i]; 739 vi = aj + ai[i]; 740 nz = ai[i+1] - ai[i]; 741 s[0] = b[i2]; 742 for (j=0; j<nz; j++) { 743 xw[0] = x[vi[j]]; 744 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 745 } 746 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 747 x[i2] += xw[0]; 748 idiag -= 1; 749 i2 -= 1; 750 } 751 break; 752 case 2: 753 for (i=m-1; i>=0; i--) { 754 v = aa + 4*ai[i]; 755 vi = aj + ai[i]; 756 nz = ai[i+1] - ai[i]; 757 s[0] = b[i2]; s[1] = b[i2+1]; 758 for (j=0; j<nz; j++) { 759 idx = 2*vi[j]; 760 it = 4*j; 761 xw[0] = x[idx]; xw[1] = x[1+idx]; 762 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 763 } 764 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 765 x[i2] += xw[0]; x[i2+1] += xw[1]; 766 idiag -= 4; 767 i2 -= 2; 768 } 769 break; 770 case 3: 771 for (i=m-1; i>=0; i--) { 772 v = aa + 9*ai[i]; 773 vi = aj + ai[i]; 774 nz = ai[i+1] - ai[i]; 775 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 776 while (nz--) { 777 idx = 3*(*vi++); 778 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 779 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 780 v += 9; 781 } 782 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 783 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 784 idiag -= 9; 785 i2 -= 3; 786 } 787 break; 788 case 4: 789 for (i=m-1; i>=0; i--) { 790 v = aa + 16*ai[i]; 791 vi = aj + ai[i]; 792 nz = ai[i+1] - ai[i]; 793 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 794 while (nz--) { 795 idx = 4*(*vi++); 796 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 797 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 798 v += 16; 799 } 800 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 801 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 802 idiag -= 16; 803 i2 -= 4; 804 } 805 break; 806 case 5: 807 for (i=m-1; i>=0; i--) { 808 v = aa + 25*ai[i]; 809 vi = aj + ai[i]; 810 nz = ai[i+1] - ai[i]; 811 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 812 while (nz--) { 813 idx = 5*(*vi++); 814 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 815 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 816 v += 25; 817 } 818 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 819 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 820 idiag -= 25; 821 i2 -= 5; 822 } 823 break; 824 case 6: 825 for (i=m-1; i>=0; i--) { 826 v = aa + 36*ai[i]; 827 vi = aj + ai[i]; 828 nz = ai[i+1] - ai[i]; 829 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 830 while (nz--) { 831 idx = 6*(*vi++); 832 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 833 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 834 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 835 v += 36; 836 } 837 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 838 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 839 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 840 idiag -= 36; 841 i2 -= 6; 842 } 843 break; 844 case 7: 845 for (i=m-1; i>=0; i--) { 846 v = aa + 49*ai[i]; 847 vi = aj + ai[i]; 848 nz = ai[i+1] - ai[i]; 849 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 850 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 851 while (nz--) { 852 idx = 7*(*vi++); 853 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 854 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 855 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 856 v += 49; 857 } 858 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 859 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 860 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 861 idiag -= 49; 862 i2 -= 7; 863 } 864 break; 865 default: 866 for (i=m-1; i>=0; i--) { 867 v = aa + bs2*ai[i]; 868 vi = aj + ai[i]; 869 nz = ai[i+1] - ai[i]; 870 871 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 872 /* copy all rows of x that are needed into contiguous space */ 873 workt = work; 874 for (j=0; j<nz; j++) { 875 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 876 workt += bs; 877 } 878 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 879 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 880 881 idiag -= bs2; 882 i2 -= bs; 883 } 884 break; 885 } 886 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 887 } 888 } 889 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 890 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 891 PetscFunctionReturn(0); 892 } 893 894 895 /* 896 Special version for direct calls from Fortran (Used in PETSc-fun3d) 897 */ 898 #if defined(PETSC_HAVE_FORTRAN_CAPS) 899 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 900 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 901 #define matsetvaluesblocked4_ matsetvaluesblocked4 902 #endif 903 904 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 905 { 906 Mat A = *AA; 907 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 908 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 909 PetscInt *ai =a->i,*ailen=a->ilen; 910 PetscInt *aj =a->j,stepval,lastcol = -1; 911 const PetscScalar *value = v; 912 MatScalar *ap,*aa = a->a,*bap; 913 914 PetscFunctionBegin; 915 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 916 stepval = (n-1)*4; 917 for (k=0; k<m; k++) { /* loop over added rows */ 918 row = im[k]; 919 rp = aj + ai[row]; 920 ap = aa + 16*ai[row]; 921 nrow = ailen[row]; 922 low = 0; 923 high = nrow; 924 for (l=0; l<n; l++) { /* loop over added columns */ 925 col = in[l]; 926 if (col <= lastcol) low = 0; 927 else high = nrow; 928 lastcol = col; 929 value = v + k*(stepval+4 + l)*4; 930 while (high-low > 7) { 931 t = (low+high)/2; 932 if (rp[t] > col) high = t; 933 else low = t; 934 } 935 for (i=low; i<high; i++) { 936 if (rp[i] > col) break; 937 if (rp[i] == col) { 938 bap = ap + 16*i; 939 for (ii=0; ii<4; ii++,value+=stepval) { 940 for (jj=ii; jj<16; jj+=4) { 941 bap[jj] += *value++; 942 } 943 } 944 goto noinsert2; 945 } 946 } 947 N = nrow++ - 1; 948 high++; /* added new column index thus must search to one higher than before */ 949 /* shift up all the later entries in this row */ 950 for (ii=N; ii>=i; ii--) { 951 rp[ii+1] = rp[ii]; 952 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 953 } 954 if (N >= i) { 955 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 956 } 957 rp[i] = col; 958 bap = ap + 16*i; 959 for (ii=0; ii<4; ii++,value+=stepval) { 960 for (jj=ii; jj<16; jj+=4) { 961 bap[jj] = *value++; 962 } 963 } 964 noinsert2:; 965 low = i; 966 } 967 ailen[row] = nrow; 968 } 969 PetscFunctionReturnVoid(); 970 } 971 972 #if defined(PETSC_HAVE_FORTRAN_CAPS) 973 #define matsetvalues4_ MATSETVALUES4 974 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 975 #define matsetvalues4_ matsetvalues4 976 #endif 977 978 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 979 { 980 Mat A = *AA; 981 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 982 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 983 PetscInt *ai=a->i,*ailen=a->ilen; 984 PetscInt *aj=a->j,brow,bcol; 985 PetscInt ridx,cidx,lastcol = -1; 986 MatScalar *ap,value,*aa=a->a,*bap; 987 988 PetscFunctionBegin; 989 for (k=0; k<m; k++) { /* loop over added rows */ 990 row = im[k]; brow = row/4; 991 rp = aj + ai[brow]; 992 ap = aa + 16*ai[brow]; 993 nrow = ailen[brow]; 994 low = 0; 995 high = nrow; 996 for (l=0; l<n; l++) { /* loop over added columns */ 997 col = in[l]; bcol = col/4; 998 ridx = row % 4; cidx = col % 4; 999 value = v[l + k*n]; 1000 if (col <= lastcol) low = 0; 1001 else high = nrow; 1002 lastcol = col; 1003 while (high-low > 7) { 1004 t = (low+high)/2; 1005 if (rp[t] > bcol) high = t; 1006 else low = t; 1007 } 1008 for (i=low; i<high; i++) { 1009 if (rp[i] > bcol) break; 1010 if (rp[i] == bcol) { 1011 bap = ap + 16*i + 4*cidx + ridx; 1012 *bap += value; 1013 goto noinsert1; 1014 } 1015 } 1016 N = nrow++ - 1; 1017 high++; /* added new column thus must search to one higher than before */ 1018 /* shift up all the later entries in this row */ 1019 for (ii=N; ii>=i; ii--) { 1020 rp[ii+1] = rp[ii]; 1021 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1022 } 1023 if (N>=i) { 1024 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1025 } 1026 rp[i] = bcol; 1027 ap[16*i + 4*cidx + ridx] = value; 1028 noinsert1:; 1029 low = i; 1030 } 1031 ailen[brow] = nrow; 1032 } 1033 PetscFunctionReturnVoid(); 1034 } 1035 1036 /* 1037 Checks for missing diagonals 1038 */ 1039 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1040 { 1041 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1042 PetscErrorCode ierr; 1043 PetscInt *diag,*ii = a->i,i; 1044 1045 PetscFunctionBegin; 1046 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1047 *missing = PETSC_FALSE; 1048 if (A->rmap->n > 0 && !ii) { 1049 *missing = PETSC_TRUE; 1050 if (d) *d = 0; 1051 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 1052 } else { 1053 diag = a->diag; 1054 for (i=0; i<a->mbs; i++) { 1055 if (diag[i] >= ii[i+1]) { 1056 *missing = PETSC_TRUE; 1057 if (d) *d = i; 1058 ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr); 1059 break; 1060 } 1061 } 1062 } 1063 PetscFunctionReturn(0); 1064 } 1065 1066 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1067 { 1068 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1069 PetscErrorCode ierr; 1070 PetscInt i,j,m = a->mbs; 1071 1072 PetscFunctionBegin; 1073 if (!a->diag) { 1074 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1075 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1076 a->free_diag = PETSC_TRUE; 1077 } 1078 for (i=0; i<m; i++) { 1079 a->diag[i] = a->i[i+1]; 1080 for (j=a->i[i]; j<a->i[i+1]; j++) { 1081 if (a->j[j] == i) { 1082 a->diag[i] = j; 1083 break; 1084 } 1085 } 1086 } 1087 PetscFunctionReturn(0); 1088 } 1089 1090 1091 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1092 { 1093 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1094 PetscErrorCode ierr; 1095 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1096 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1097 1098 PetscFunctionBegin; 1099 *nn = n; 1100 if (!ia) PetscFunctionReturn(0); 1101 if (symmetric) { 1102 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1103 nz = tia[n]; 1104 } else { 1105 tia = a->i; tja = a->j; 1106 } 1107 1108 if (!blockcompressed && bs > 1) { 1109 (*nn) *= bs; 1110 /* malloc & create the natural set of indices */ 1111 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1112 if (n) { 1113 (*ia)[0] = oshift; 1114 for (j=1; j<bs; j++) { 1115 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1116 } 1117 } 1118 1119 for (i=1; i<n; i++) { 1120 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1121 for (j=1; j<bs; j++) { 1122 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1123 } 1124 } 1125 if (n) { 1126 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1127 } 1128 1129 if (inja) { 1130 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1131 cnt = 0; 1132 for (i=0; i<n; i++) { 1133 for (j=0; j<bs; j++) { 1134 for (k=tia[i]; k<tia[i+1]; k++) { 1135 for (l=0; l<bs; l++) { 1136 (*ja)[cnt++] = bs*tja[k] + l; 1137 } 1138 } 1139 } 1140 } 1141 } 1142 1143 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1144 ierr = PetscFree(tia);CHKERRQ(ierr); 1145 ierr = PetscFree(tja);CHKERRQ(ierr); 1146 } 1147 } else if (oshift == 1) { 1148 if (symmetric) { 1149 nz = tia[A->rmap->n/bs]; 1150 /* add 1 to i and j indices */ 1151 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1152 *ia = tia; 1153 if (ja) { 1154 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1155 *ja = tja; 1156 } 1157 } else { 1158 nz = a->i[A->rmap->n/bs]; 1159 /* malloc space and add 1 to i and j indices */ 1160 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1161 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1162 if (ja) { 1163 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1164 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1165 } 1166 } 1167 } else { 1168 *ia = tia; 1169 if (ja) *ja = tja; 1170 } 1171 PetscFunctionReturn(0); 1172 } 1173 1174 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1175 { 1176 PetscErrorCode ierr; 1177 1178 PetscFunctionBegin; 1179 if (!ia) PetscFunctionReturn(0); 1180 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1181 ierr = PetscFree(*ia);CHKERRQ(ierr); 1182 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1188 { 1189 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1190 PetscErrorCode ierr; 1191 1192 PetscFunctionBegin; 1193 #if defined(PETSC_USE_LOG) 1194 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1195 #endif 1196 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1197 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1198 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1199 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1200 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1201 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1202 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1203 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1204 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1205 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1206 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1207 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1208 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1209 1210 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1211 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1212 ierr = PetscFree(A->data);CHKERRQ(ierr); 1213 1214 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1215 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1225 #if defined(PETSC_HAVE_HYPRE) 1226 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr); 1227 #endif 1228 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1234 { 1235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 switch (op) { 1240 case MAT_ROW_ORIENTED: 1241 a->roworiented = flg; 1242 break; 1243 case MAT_KEEP_NONZERO_PATTERN: 1244 a->keepnonzeropattern = flg; 1245 break; 1246 case MAT_NEW_NONZERO_LOCATIONS: 1247 a->nonew = (flg ? 0 : 1); 1248 break; 1249 case MAT_NEW_NONZERO_LOCATION_ERR: 1250 a->nonew = (flg ? -1 : 0); 1251 break; 1252 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1253 a->nonew = (flg ? -2 : 0); 1254 break; 1255 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1256 a->nounused = (flg ? -1 : 0); 1257 break; 1258 case MAT_NEW_DIAGONALS: 1259 case MAT_IGNORE_OFF_PROC_ENTRIES: 1260 case MAT_USE_HASH_TABLE: 1261 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1262 break; 1263 case MAT_SPD: 1264 case MAT_SYMMETRIC: 1265 case MAT_STRUCTURALLY_SYMMETRIC: 1266 case MAT_HERMITIAN: 1267 case MAT_SYMMETRY_ETERNAL: 1268 case MAT_SUBMAT_SINGLEIS: 1269 case MAT_STRUCTURE_ONLY: 1270 /* These options are handled directly by MatSetOption() */ 1271 break; 1272 default: 1273 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1274 } 1275 PetscFunctionReturn(0); 1276 } 1277 1278 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1279 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1280 { 1281 PetscErrorCode ierr; 1282 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1283 MatScalar *aa_i; 1284 PetscScalar *v_i; 1285 1286 PetscFunctionBegin; 1287 bs = A->rmap->bs; 1288 bs2 = bs*bs; 1289 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1290 1291 bn = row/bs; /* Block number */ 1292 bp = row % bs; /* Block Position */ 1293 M = ai[bn+1] - ai[bn]; 1294 *nz = bs*M; 1295 1296 if (v) { 1297 *v = 0; 1298 if (*nz) { 1299 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1300 for (i=0; i<M; i++) { /* for each block in the block row */ 1301 v_i = *v + i*bs; 1302 aa_i = aa + bs2*(ai[bn] + i); 1303 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1304 } 1305 } 1306 } 1307 1308 if (idx) { 1309 *idx = 0; 1310 if (*nz) { 1311 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1312 for (i=0; i<M; i++) { /* for each block in the block row */ 1313 idx_i = *idx + i*bs; 1314 itmp = bs*aj[ai[bn] + i]; 1315 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1316 } 1317 } 1318 } 1319 PetscFunctionReturn(0); 1320 } 1321 1322 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1323 { 1324 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } 1331 1332 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1333 { 1334 PetscErrorCode ierr; 1335 1336 PetscFunctionBegin; 1337 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1338 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1339 PetscFunctionReturn(0); 1340 } 1341 1342 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1343 { 1344 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1345 Mat C; 1346 PetscErrorCode ierr; 1347 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1348 PetscInt *rows,*cols,bs2=a->bs2; 1349 MatScalar *array; 1350 1351 PetscFunctionBegin; 1352 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1353 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1354 1355 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1356 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1357 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1358 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1359 ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr); 1360 ierr = PetscFree(col);CHKERRQ(ierr); 1361 } else { 1362 C = *B; 1363 } 1364 1365 array = a->a; 1366 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1367 for (i=0; i<mbs; i++) { 1368 cols[0] = i*bs; 1369 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1370 len = ai[i+1] - ai[i]; 1371 for (j=0; j<len; j++) { 1372 rows[0] = (*aj++)*bs; 1373 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1374 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1375 array += bs2; 1376 } 1377 } 1378 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1379 1380 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1381 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1382 1383 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1384 *B = C; 1385 } else { 1386 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1387 } 1388 PetscFunctionReturn(0); 1389 } 1390 1391 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1392 { 1393 PetscErrorCode ierr; 1394 Mat Btrans; 1395 1396 PetscFunctionBegin; 1397 *f = PETSC_FALSE; 1398 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1399 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1400 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1401 PetscFunctionReturn(0); 1402 } 1403 1404 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1405 { 1406 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1407 PetscErrorCode ierr; 1408 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1409 int fd; 1410 PetscScalar *aa; 1411 FILE *file; 1412 1413 PetscFunctionBegin; 1414 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1415 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1416 col_lens[0] = MAT_FILE_CLASSID; 1417 1418 col_lens[1] = A->rmap->N; 1419 col_lens[2] = A->cmap->n; 1420 col_lens[3] = a->nz*bs2; 1421 1422 /* store lengths of each row and write (including header) to file */ 1423 count = 0; 1424 for (i=0; i<a->mbs; i++) { 1425 for (j=0; j<bs; j++) { 1426 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1427 } 1428 } 1429 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1430 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1431 1432 /* store column indices (zero start index) */ 1433 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1434 count = 0; 1435 for (i=0; i<a->mbs; i++) { 1436 for (j=0; j<bs; j++) { 1437 for (k=a->i[i]; k<a->i[i+1]; k++) { 1438 for (l=0; l<bs; l++) { 1439 jj[count++] = bs*a->j[k] + l; 1440 } 1441 } 1442 } 1443 } 1444 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1445 ierr = PetscFree(jj);CHKERRQ(ierr); 1446 1447 /* store nonzero values */ 1448 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1449 count = 0; 1450 for (i=0; i<a->mbs; i++) { 1451 for (j=0; j<bs; j++) { 1452 for (k=a->i[i]; k<a->i[i+1]; k++) { 1453 for (l=0; l<bs; l++) { 1454 aa[count++] = a->a[bs2*k + l*bs + j]; 1455 } 1456 } 1457 } 1458 } 1459 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1460 ierr = PetscFree(aa);CHKERRQ(ierr); 1461 1462 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1463 if (file) { 1464 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1465 } 1466 PetscFunctionReturn(0); 1467 } 1468 1469 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1470 { 1471 PetscErrorCode ierr; 1472 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1473 PetscInt i,bs = A->rmap->bs,k; 1474 1475 PetscFunctionBegin; 1476 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1477 for (i=0; i<a->mbs; i++) { 1478 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1479 for (k=a->i[i]; k<a->i[i+1]; k++) { 1480 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1481 } 1482 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1483 } 1484 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1485 PetscFunctionReturn(0); 1486 } 1487 1488 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1489 { 1490 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1491 PetscErrorCode ierr; 1492 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1493 PetscViewerFormat format; 1494 1495 PetscFunctionBegin; 1496 if (A->structure_only) { 1497 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } 1500 1501 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1502 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1503 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1504 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1505 const char *matname; 1506 Mat aij; 1507 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1508 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1509 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1510 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1511 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1512 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1513 PetscFunctionReturn(0); 1514 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1515 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1516 for (i=0; i<a->mbs; i++) { 1517 for (j=0; j<bs; j++) { 1518 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1519 for (k=a->i[i]; k<a->i[i+1]; k++) { 1520 for (l=0; l<bs; l++) { 1521 #if defined(PETSC_USE_COMPLEX) 1522 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1523 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1524 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1525 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1526 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1527 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1528 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1529 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1530 } 1531 #else 1532 if (a->a[bs2*k + l*bs + j] != 0.0) { 1533 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1534 } 1535 #endif 1536 } 1537 } 1538 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1539 } 1540 } 1541 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1542 } else { 1543 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1544 for (i=0; i<a->mbs; i++) { 1545 for (j=0; j<bs; j++) { 1546 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1547 for (k=a->i[i]; k<a->i[i+1]; k++) { 1548 for (l=0; l<bs; l++) { 1549 #if defined(PETSC_USE_COMPLEX) 1550 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1551 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1552 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1553 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1554 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1555 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1556 } else { 1557 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1558 } 1559 #else 1560 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1561 #endif 1562 } 1563 } 1564 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1565 } 1566 } 1567 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1568 } 1569 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1570 PetscFunctionReturn(0); 1571 } 1572 1573 #include <petscdraw.h> 1574 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1575 { 1576 Mat A = (Mat) Aa; 1577 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1578 PetscErrorCode ierr; 1579 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1580 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1581 MatScalar *aa; 1582 PetscViewer viewer; 1583 PetscViewerFormat format; 1584 1585 PetscFunctionBegin; 1586 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1587 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1588 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1589 1590 /* loop over matrix elements drawing boxes */ 1591 1592 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1593 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1594 /* Blue for negative, Cyan for zero and Red for positive */ 1595 color = PETSC_DRAW_BLUE; 1596 for (i=0,row=0; i<mbs; i++,row+=bs) { 1597 for (j=a->i[i]; j<a->i[i+1]; j++) { 1598 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1599 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1600 aa = a->a + j*bs2; 1601 for (k=0; k<bs; k++) { 1602 for (l=0; l<bs; l++) { 1603 if (PetscRealPart(*aa++) >= 0.) continue; 1604 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1605 } 1606 } 1607 } 1608 } 1609 color = PETSC_DRAW_CYAN; 1610 for (i=0,row=0; i<mbs; i++,row+=bs) { 1611 for (j=a->i[i]; j<a->i[i+1]; j++) { 1612 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1613 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1614 aa = a->a + j*bs2; 1615 for (k=0; k<bs; k++) { 1616 for (l=0; l<bs; l++) { 1617 if (PetscRealPart(*aa++) != 0.) continue; 1618 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1619 } 1620 } 1621 } 1622 } 1623 color = PETSC_DRAW_RED; 1624 for (i=0,row=0; i<mbs; i++,row+=bs) { 1625 for (j=a->i[i]; j<a->i[i+1]; j++) { 1626 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1627 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1628 aa = a->a + j*bs2; 1629 for (k=0; k<bs; k++) { 1630 for (l=0; l<bs; l++) { 1631 if (PetscRealPart(*aa++) <= 0.) continue; 1632 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1633 } 1634 } 1635 } 1636 } 1637 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1638 } else { 1639 /* use contour shading to indicate magnitude of values */ 1640 /* first determine max of all nonzero values */ 1641 PetscReal minv = 0.0, maxv = 0.0; 1642 PetscDraw popup; 1643 1644 for (i=0; i<a->nz*a->bs2; i++) { 1645 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1646 } 1647 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1648 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1649 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1650 1651 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1652 for (i=0,row=0; i<mbs; i++,row+=bs) { 1653 for (j=a->i[i]; j<a->i[i+1]; j++) { 1654 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1655 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1656 aa = a->a + j*bs2; 1657 for (k=0; k<bs; k++) { 1658 for (l=0; l<bs; l++) { 1659 MatScalar v = *aa++; 1660 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1661 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1662 } 1663 } 1664 } 1665 } 1666 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1667 } 1668 PetscFunctionReturn(0); 1669 } 1670 1671 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1672 { 1673 PetscErrorCode ierr; 1674 PetscReal xl,yl,xr,yr,w,h; 1675 PetscDraw draw; 1676 PetscBool isnull; 1677 1678 PetscFunctionBegin; 1679 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1680 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1681 if (isnull) PetscFunctionReturn(0); 1682 1683 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1684 xr += w; yr += h; xl = -w; yl = -h; 1685 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1686 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1687 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1688 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1689 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1690 PetscFunctionReturn(0); 1691 } 1692 1693 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1694 { 1695 PetscErrorCode ierr; 1696 PetscBool iascii,isbinary,isdraw; 1697 1698 PetscFunctionBegin; 1699 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1700 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1701 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1702 if (iascii) { 1703 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1704 } else if (isbinary) { 1705 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1706 } else if (isdraw) { 1707 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1708 } else { 1709 Mat B; 1710 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1711 ierr = MatView(B,viewer);CHKERRQ(ierr); 1712 ierr = MatDestroy(&B);CHKERRQ(ierr); 1713 } 1714 PetscFunctionReturn(0); 1715 } 1716 1717 1718 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1719 { 1720 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1721 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1722 PetscInt *ai = a->i,*ailen = a->ilen; 1723 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1724 MatScalar *ap,*aa = a->a; 1725 1726 PetscFunctionBegin; 1727 for (k=0; k<m; k++) { /* loop over rows */ 1728 row = im[k]; brow = row/bs; 1729 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1730 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1731 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1732 nrow = ailen[brow]; 1733 for (l=0; l<n; l++) { /* loop over columns */ 1734 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1735 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1736 col = in[l]; 1737 bcol = col/bs; 1738 cidx = col%bs; 1739 ridx = row%bs; 1740 high = nrow; 1741 low = 0; /* assume unsorted */ 1742 while (high-low > 5) { 1743 t = (low+high)/2; 1744 if (rp[t] > bcol) high = t; 1745 else low = t; 1746 } 1747 for (i=low; i<high; i++) { 1748 if (rp[i] > bcol) break; 1749 if (rp[i] == bcol) { 1750 *v++ = ap[bs2*i+bs*cidx+ridx]; 1751 goto finished; 1752 } 1753 } 1754 *v++ = 0.0; 1755 finished:; 1756 } 1757 } 1758 PetscFunctionReturn(0); 1759 } 1760 1761 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1762 { 1763 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1764 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1765 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1766 PetscErrorCode ierr; 1767 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1768 PetscBool roworiented=a->roworiented; 1769 const PetscScalar *value = v; 1770 MatScalar *ap=NULL,*aa = a->a,*bap; 1771 1772 PetscFunctionBegin; 1773 if (roworiented) { 1774 stepval = (n-1)*bs; 1775 } else { 1776 stepval = (m-1)*bs; 1777 } 1778 for (k=0; k<m; k++) { /* loop over added rows */ 1779 row = im[k]; 1780 if (row < 0) continue; 1781 #if defined(PETSC_USE_DEBUG) 1782 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1783 #endif 1784 rp = aj + ai[row]; 1785 if (!A->structure_only) ap = aa + bs2*ai[row]; 1786 rmax = imax[row]; 1787 nrow = ailen[row]; 1788 low = 0; 1789 high = nrow; 1790 for (l=0; l<n; l++) { /* loop over added columns */ 1791 if (in[l] < 0) continue; 1792 #if defined(PETSC_USE_DEBUG) 1793 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1794 #endif 1795 col = in[l]; 1796 if (!A->structure_only) { 1797 if (roworiented) { 1798 value = v + (k*(stepval+bs) + l)*bs; 1799 } else { 1800 value = v + (l*(stepval+bs) + k)*bs; 1801 } 1802 } 1803 if (col <= lastcol) low = 0; 1804 else high = nrow; 1805 lastcol = col; 1806 while (high-low > 7) { 1807 t = (low+high)/2; 1808 if (rp[t] > col) high = t; 1809 else low = t; 1810 } 1811 for (i=low; i<high; i++) { 1812 if (rp[i] > col) break; 1813 if (rp[i] == col) { 1814 if (A->structure_only) goto noinsert2; 1815 bap = ap + bs2*i; 1816 if (roworiented) { 1817 if (is == ADD_VALUES) { 1818 for (ii=0; ii<bs; ii++,value+=stepval) { 1819 for (jj=ii; jj<bs2; jj+=bs) { 1820 bap[jj] += *value++; 1821 } 1822 } 1823 } else { 1824 for (ii=0; ii<bs; ii++,value+=stepval) { 1825 for (jj=ii; jj<bs2; jj+=bs) { 1826 bap[jj] = *value++; 1827 } 1828 } 1829 } 1830 } else { 1831 if (is == ADD_VALUES) { 1832 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1833 for (jj=0; jj<bs; jj++) { 1834 bap[jj] += value[jj]; 1835 } 1836 bap += bs; 1837 } 1838 } else { 1839 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1840 for (jj=0; jj<bs; jj++) { 1841 bap[jj] = value[jj]; 1842 } 1843 bap += bs; 1844 } 1845 } 1846 } 1847 goto noinsert2; 1848 } 1849 } 1850 if (nonew == 1) goto noinsert2; 1851 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1852 if (A->structure_only) { 1853 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1854 } else { 1855 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1856 } 1857 N = nrow++ - 1; high++; 1858 /* shift up all the later entries in this row */ 1859 for (ii=N; ii>=i; ii--) { 1860 rp[ii+1] = rp[ii]; 1861 if (!A->structure_only) { 1862 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1863 } 1864 } 1865 if (N >= i && !A->structure_only) { 1866 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1867 } 1868 1869 rp[i] = col; 1870 if (!A->structure_only) { 1871 bap = ap + bs2*i; 1872 if (roworiented) { 1873 for (ii=0; ii<bs; ii++,value+=stepval) { 1874 for (jj=ii; jj<bs2; jj+=bs) { 1875 bap[jj] = *value++; 1876 } 1877 } 1878 } else { 1879 for (ii=0; ii<bs; ii++,value+=stepval) { 1880 for (jj=0; jj<bs; jj++) { 1881 *bap++ = *value++; 1882 } 1883 } 1884 } 1885 } 1886 noinsert2:; 1887 low = i; 1888 } 1889 ailen[row] = nrow; 1890 } 1891 PetscFunctionReturn(0); 1892 } 1893 1894 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1895 { 1896 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1897 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1898 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1899 PetscErrorCode ierr; 1900 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1901 MatScalar *aa = a->a,*ap; 1902 PetscReal ratio=0.6; 1903 1904 PetscFunctionBegin; 1905 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1906 1907 if (m) rmax = ailen[0]; 1908 for (i=1; i<mbs; i++) { 1909 /* move each row back by the amount of empty slots (fshift) before it*/ 1910 fshift += imax[i-1] - ailen[i-1]; 1911 rmax = PetscMax(rmax,ailen[i]); 1912 if (fshift) { 1913 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1914 N = ailen[i]; 1915 for (j=0; j<N; j++) { 1916 ip[j-fshift] = ip[j]; 1917 if (!A->structure_only) { 1918 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1919 } 1920 } 1921 } 1922 ai[i] = ai[i-1] + ailen[i-1]; 1923 } 1924 if (mbs) { 1925 fshift += imax[mbs-1] - ailen[mbs-1]; 1926 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1927 } 1928 1929 /* reset ilen and imax for each row */ 1930 a->nonzerorowcnt = 0; 1931 if (A->structure_only) { 1932 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1933 } else { /* !A->structure_only */ 1934 for (i=0; i<mbs; i++) { 1935 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1936 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1937 } 1938 } 1939 a->nz = ai[mbs]; 1940 1941 /* diagonals may have moved, so kill the diagonal pointers */ 1942 a->idiagvalid = PETSC_FALSE; 1943 if (fshift && a->diag) { 1944 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1945 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1946 a->diag = 0; 1947 } 1948 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1949 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1950 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1951 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1952 1953 A->info.mallocs += a->reallocs; 1954 a->reallocs = 0; 1955 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1956 a->rmax = rmax; 1957 1958 if (!A->structure_only) { 1959 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1960 } 1961 PetscFunctionReturn(0); 1962 } 1963 1964 /* 1965 This function returns an array of flags which indicate the locations of contiguous 1966 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1967 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1968 Assume: sizes should be long enough to hold all the values. 1969 */ 1970 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1971 { 1972 PetscInt i,j,k,row; 1973 PetscBool flg; 1974 1975 PetscFunctionBegin; 1976 for (i=0,j=0; i<n; j++) { 1977 row = idx[i]; 1978 if (row%bs!=0) { /* Not the begining of a block */ 1979 sizes[j] = 1; 1980 i++; 1981 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1982 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1983 i++; 1984 } else { /* Begining of the block, so check if the complete block exists */ 1985 flg = PETSC_TRUE; 1986 for (k=1; k<bs; k++) { 1987 if (row+k != idx[i+k]) { /* break in the block */ 1988 flg = PETSC_FALSE; 1989 break; 1990 } 1991 } 1992 if (flg) { /* No break in the bs */ 1993 sizes[j] = bs; 1994 i += bs; 1995 } else { 1996 sizes[j] = 1; 1997 i++; 1998 } 1999 } 2000 } 2001 *bs_max = j; 2002 PetscFunctionReturn(0); 2003 } 2004 2005 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2006 { 2007 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2008 PetscErrorCode ierr; 2009 PetscInt i,j,k,count,*rows; 2010 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2011 PetscScalar zero = 0.0; 2012 MatScalar *aa; 2013 const PetscScalar *xx; 2014 PetscScalar *bb; 2015 2016 PetscFunctionBegin; 2017 /* fix right hand side if needed */ 2018 if (x && b) { 2019 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2020 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2021 for (i=0; i<is_n; i++) { 2022 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2023 } 2024 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2025 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2026 } 2027 2028 /* Make a copy of the IS and sort it */ 2029 /* allocate memory for rows,sizes */ 2030 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2031 2032 /* copy IS values to rows, and sort them */ 2033 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2034 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2035 2036 if (baij->keepnonzeropattern) { 2037 for (i=0; i<is_n; i++) sizes[i] = 1; 2038 bs_max = is_n; 2039 } else { 2040 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2041 A->nonzerostate++; 2042 } 2043 2044 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2045 row = rows[j]; 2046 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2047 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2048 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2049 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2050 if (diag != (PetscScalar)0.0) { 2051 if (baij->ilen[row/bs] > 0) { 2052 baij->ilen[row/bs] = 1; 2053 baij->j[baij->i[row/bs]] = row/bs; 2054 2055 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2056 } 2057 /* Now insert all the diagonal values for this bs */ 2058 for (k=0; k<bs; k++) { 2059 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2060 } 2061 } else { /* (diag == 0.0) */ 2062 baij->ilen[row/bs] = 0; 2063 } /* end (diag == 0.0) */ 2064 } else { /* (sizes[i] != bs) */ 2065 #if defined(PETSC_USE_DEBUG) 2066 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2067 #endif 2068 for (k=0; k<count; k++) { 2069 aa[0] = zero; 2070 aa += bs; 2071 } 2072 if (diag != (PetscScalar)0.0) { 2073 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2074 } 2075 } 2076 } 2077 2078 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2079 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2080 PetscFunctionReturn(0); 2081 } 2082 2083 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2084 { 2085 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2086 PetscErrorCode ierr; 2087 PetscInt i,j,k,count; 2088 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2089 PetscScalar zero = 0.0; 2090 MatScalar *aa; 2091 const PetscScalar *xx; 2092 PetscScalar *bb; 2093 PetscBool *zeroed,vecs = PETSC_FALSE; 2094 2095 PetscFunctionBegin; 2096 /* fix right hand side if needed */ 2097 if (x && b) { 2098 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2099 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2100 vecs = PETSC_TRUE; 2101 } 2102 2103 /* zero the columns */ 2104 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2105 for (i=0; i<is_n; i++) { 2106 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2107 zeroed[is_idx[i]] = PETSC_TRUE; 2108 } 2109 for (i=0; i<A->rmap->N; i++) { 2110 if (!zeroed[i]) { 2111 row = i/bs; 2112 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2113 for (k=0; k<bs; k++) { 2114 col = bs*baij->j[j] + k; 2115 if (zeroed[col]) { 2116 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2117 if (vecs) bb[i] -= aa[0]*xx[col]; 2118 aa[0] = 0.0; 2119 } 2120 } 2121 } 2122 } else if (vecs) bb[i] = diag*xx[i]; 2123 } 2124 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2125 if (vecs) { 2126 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2127 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2128 } 2129 2130 /* zero the rows */ 2131 for (i=0; i<is_n; i++) { 2132 row = is_idx[i]; 2133 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2134 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2135 for (k=0; k<count; k++) { 2136 aa[0] = zero; 2137 aa += bs; 2138 } 2139 if (diag != (PetscScalar)0.0) { 2140 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2141 } 2142 } 2143 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2148 { 2149 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2150 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2151 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2152 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2153 PetscErrorCode ierr; 2154 PetscInt ridx,cidx,bs2=a->bs2; 2155 PetscBool roworiented=a->roworiented; 2156 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2157 2158 PetscFunctionBegin; 2159 for (k=0; k<m; k++) { /* loop over added rows */ 2160 row = im[k]; 2161 brow = row/bs; 2162 if (row < 0) continue; 2163 #if defined(PETSC_USE_DEBUG) 2164 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2165 #endif 2166 rp = aj + ai[brow]; 2167 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2168 rmax = imax[brow]; 2169 nrow = ailen[brow]; 2170 low = 0; 2171 high = nrow; 2172 for (l=0; l<n; l++) { /* loop over added columns */ 2173 if (in[l] < 0) continue; 2174 #if defined(PETSC_USE_DEBUG) 2175 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2176 #endif 2177 col = in[l]; bcol = col/bs; 2178 ridx = row % bs; cidx = col % bs; 2179 if (!A->structure_only) { 2180 if (roworiented) { 2181 value = v[l + k*n]; 2182 } else { 2183 value = v[k + l*m]; 2184 } 2185 } 2186 if (col <= lastcol) low = 0; else high = nrow; 2187 lastcol = col; 2188 while (high-low > 7) { 2189 t = (low+high)/2; 2190 if (rp[t] > bcol) high = t; 2191 else low = t; 2192 } 2193 for (i=low; i<high; i++) { 2194 if (rp[i] > bcol) break; 2195 if (rp[i] == bcol) { 2196 bap = ap + bs2*i + bs*cidx + ridx; 2197 if (!A->structure_only) { 2198 if (is == ADD_VALUES) *bap += value; 2199 else *bap = value; 2200 } 2201 goto noinsert1; 2202 } 2203 } 2204 if (nonew == 1) goto noinsert1; 2205 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2206 if (A->structure_only) { 2207 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2208 } else { 2209 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2210 } 2211 N = nrow++ - 1; high++; 2212 /* shift up all the later entries in this row */ 2213 for (ii=N; ii>=i; ii--) { 2214 rp[ii+1] = rp[ii]; 2215 if (!A->structure_only) { 2216 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2217 } 2218 } 2219 if (N>=i && !A->structure_only) { 2220 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2221 } 2222 rp[i] = bcol; 2223 if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value; 2224 a->nz++; 2225 A->nonzerostate++; 2226 noinsert1:; 2227 low = i; 2228 } 2229 ailen[brow] = nrow; 2230 } 2231 PetscFunctionReturn(0); 2232 } 2233 2234 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2235 { 2236 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2237 Mat outA; 2238 PetscErrorCode ierr; 2239 PetscBool row_identity,col_identity; 2240 2241 PetscFunctionBegin; 2242 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2243 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2244 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2245 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2246 2247 outA = inA; 2248 inA->factortype = MAT_FACTOR_LU; 2249 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2250 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2251 2252 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2253 2254 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2255 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2256 a->row = row; 2257 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2258 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2259 a->col = col; 2260 2261 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2262 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2263 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2264 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2265 2266 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2267 if (!a->solve_work) { 2268 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2269 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2270 } 2271 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2272 PetscFunctionReturn(0); 2273 } 2274 2275 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2276 { 2277 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2278 PetscInt i,nz,mbs; 2279 2280 PetscFunctionBegin; 2281 nz = baij->maxnz; 2282 mbs = baij->mbs; 2283 for (i=0; i<nz; i++) { 2284 baij->j[i] = indices[i]; 2285 } 2286 baij->nz = nz; 2287 for (i=0; i<mbs; i++) { 2288 baij->ilen[i] = baij->imax[i]; 2289 } 2290 PetscFunctionReturn(0); 2291 } 2292 2293 /*@ 2294 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2295 in the matrix. 2296 2297 Input Parameters: 2298 + mat - the SeqBAIJ matrix 2299 - indices - the column indices 2300 2301 Level: advanced 2302 2303 Notes: 2304 This can be called if you have precomputed the nonzero structure of the 2305 matrix and want to provide it to the matrix object to improve the performance 2306 of the MatSetValues() operation. 2307 2308 You MUST have set the correct numbers of nonzeros per row in the call to 2309 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2310 2311 MUST be called before any calls to MatSetValues(); 2312 2313 @*/ 2314 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2315 { 2316 PetscErrorCode ierr; 2317 2318 PetscFunctionBegin; 2319 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2320 PetscValidPointer(indices,2); 2321 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2322 PetscFunctionReturn(0); 2323 } 2324 2325 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2326 { 2327 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2328 PetscErrorCode ierr; 2329 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2330 PetscReal atmp; 2331 PetscScalar *x,zero = 0.0; 2332 MatScalar *aa; 2333 PetscInt ncols,brow,krow,kcol; 2334 2335 PetscFunctionBegin; 2336 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2337 bs = A->rmap->bs; 2338 aa = a->a; 2339 ai = a->i; 2340 aj = a->j; 2341 mbs = a->mbs; 2342 2343 ierr = VecSet(v,zero);CHKERRQ(ierr); 2344 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2345 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2346 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2347 for (i=0; i<mbs; i++) { 2348 ncols = ai[1] - ai[0]; ai++; 2349 brow = bs*i; 2350 for (j=0; j<ncols; j++) { 2351 for (kcol=0; kcol<bs; kcol++) { 2352 for (krow=0; krow<bs; krow++) { 2353 atmp = PetscAbsScalar(*aa);aa++; 2354 row = brow + krow; /* row index */ 2355 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2356 } 2357 } 2358 aj++; 2359 } 2360 } 2361 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2362 PetscFunctionReturn(0); 2363 } 2364 2365 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2366 { 2367 PetscErrorCode ierr; 2368 2369 PetscFunctionBegin; 2370 /* If the two matrices have the same copy implementation, use fast copy. */ 2371 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2372 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2373 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2374 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2375 2376 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2377 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2378 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2379 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2380 } else { 2381 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2382 } 2383 PetscFunctionReturn(0); 2384 } 2385 2386 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2387 { 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2392 PetscFunctionReturn(0); 2393 } 2394 2395 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2396 { 2397 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2398 2399 PetscFunctionBegin; 2400 *array = a->a; 2401 PetscFunctionReturn(0); 2402 } 2403 2404 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2405 { 2406 PetscFunctionBegin; 2407 PetscFunctionReturn(0); 2408 } 2409 2410 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2411 { 2412 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2413 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2414 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2415 PetscErrorCode ierr; 2416 2417 PetscFunctionBegin; 2418 /* Set the number of nonzeros in the new matrix */ 2419 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2420 PetscFunctionReturn(0); 2421 } 2422 2423 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2424 { 2425 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2426 PetscErrorCode ierr; 2427 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2428 PetscBLASInt one=1; 2429 2430 PetscFunctionBegin; 2431 if (str == SAME_NONZERO_PATTERN) { 2432 PetscScalar alpha = a; 2433 PetscBLASInt bnz; 2434 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2435 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2436 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2437 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2438 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2439 } else { 2440 Mat B; 2441 PetscInt *nnz; 2442 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2443 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2444 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2445 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2446 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2447 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2448 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2449 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2450 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2451 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2452 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2453 ierr = PetscFree(nnz);CHKERRQ(ierr); 2454 } 2455 PetscFunctionReturn(0); 2456 } 2457 2458 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2459 { 2460 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2461 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2462 MatScalar *aa = a->a; 2463 2464 PetscFunctionBegin; 2465 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2466 PetscFunctionReturn(0); 2467 } 2468 2469 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2470 { 2471 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2472 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2473 MatScalar *aa = a->a; 2474 2475 PetscFunctionBegin; 2476 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2477 PetscFunctionReturn(0); 2478 } 2479 2480 /* 2481 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2482 */ 2483 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2484 { 2485 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2486 PetscErrorCode ierr; 2487 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2488 PetscInt nz = a->i[m],row,*jj,mr,col; 2489 2490 PetscFunctionBegin; 2491 *nn = n; 2492 if (!ia) PetscFunctionReturn(0); 2493 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2494 else { 2495 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2496 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2497 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2498 jj = a->j; 2499 for (i=0; i<nz; i++) { 2500 collengths[jj[i]]++; 2501 } 2502 cia[0] = oshift; 2503 for (i=0; i<n; i++) { 2504 cia[i+1] = cia[i] + collengths[i]; 2505 } 2506 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2507 jj = a->j; 2508 for (row=0; row<m; row++) { 2509 mr = a->i[row+1] - a->i[row]; 2510 for (i=0; i<mr; i++) { 2511 col = *jj++; 2512 2513 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2514 } 2515 } 2516 ierr = PetscFree(collengths);CHKERRQ(ierr); 2517 *ia = cia; *ja = cja; 2518 } 2519 PetscFunctionReturn(0); 2520 } 2521 2522 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2523 { 2524 PetscErrorCode ierr; 2525 2526 PetscFunctionBegin; 2527 if (!ia) PetscFunctionReturn(0); 2528 ierr = PetscFree(*ia);CHKERRQ(ierr); 2529 ierr = PetscFree(*ja);CHKERRQ(ierr); 2530 PetscFunctionReturn(0); 2531 } 2532 2533 /* 2534 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2535 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2536 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2537 */ 2538 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2539 { 2540 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2541 PetscErrorCode ierr; 2542 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2543 PetscInt nz = a->i[m],row,*jj,mr,col; 2544 PetscInt *cspidx; 2545 2546 PetscFunctionBegin; 2547 *nn = n; 2548 if (!ia) PetscFunctionReturn(0); 2549 2550 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2551 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2552 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2553 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2554 jj = a->j; 2555 for (i=0; i<nz; i++) { 2556 collengths[jj[i]]++; 2557 } 2558 cia[0] = oshift; 2559 for (i=0; i<n; i++) { 2560 cia[i+1] = cia[i] + collengths[i]; 2561 } 2562 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2563 jj = a->j; 2564 for (row=0; row<m; row++) { 2565 mr = a->i[row+1] - a->i[row]; 2566 for (i=0; i<mr; i++) { 2567 col = *jj++; 2568 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2569 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2570 } 2571 } 2572 ierr = PetscFree(collengths);CHKERRQ(ierr); 2573 *ia = cia; *ja = cja; 2574 *spidx = cspidx; 2575 PetscFunctionReturn(0); 2576 } 2577 2578 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2579 { 2580 PetscErrorCode ierr; 2581 2582 PetscFunctionBegin; 2583 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2584 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2585 PetscFunctionReturn(0); 2586 } 2587 2588 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2589 { 2590 PetscErrorCode ierr; 2591 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2592 2593 PetscFunctionBegin; 2594 if (!Y->preallocated || !aij->nz) { 2595 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2596 } 2597 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2598 PetscFunctionReturn(0); 2599 } 2600 2601 /* -------------------------------------------------------------------*/ 2602 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2603 MatGetRow_SeqBAIJ, 2604 MatRestoreRow_SeqBAIJ, 2605 MatMult_SeqBAIJ_N, 2606 /* 4*/ MatMultAdd_SeqBAIJ_N, 2607 MatMultTranspose_SeqBAIJ, 2608 MatMultTransposeAdd_SeqBAIJ, 2609 0, 2610 0, 2611 0, 2612 /* 10*/ 0, 2613 MatLUFactor_SeqBAIJ, 2614 0, 2615 0, 2616 MatTranspose_SeqBAIJ, 2617 /* 15*/ MatGetInfo_SeqBAIJ, 2618 MatEqual_SeqBAIJ, 2619 MatGetDiagonal_SeqBAIJ, 2620 MatDiagonalScale_SeqBAIJ, 2621 MatNorm_SeqBAIJ, 2622 /* 20*/ 0, 2623 MatAssemblyEnd_SeqBAIJ, 2624 MatSetOption_SeqBAIJ, 2625 MatZeroEntries_SeqBAIJ, 2626 /* 24*/ MatZeroRows_SeqBAIJ, 2627 0, 2628 0, 2629 0, 2630 0, 2631 /* 29*/ MatSetUp_SeqBAIJ, 2632 0, 2633 0, 2634 0, 2635 0, 2636 /* 34*/ MatDuplicate_SeqBAIJ, 2637 0, 2638 0, 2639 MatILUFactor_SeqBAIJ, 2640 0, 2641 /* 39*/ MatAXPY_SeqBAIJ, 2642 MatCreateSubMatrices_SeqBAIJ, 2643 MatIncreaseOverlap_SeqBAIJ, 2644 MatGetValues_SeqBAIJ, 2645 MatCopy_SeqBAIJ, 2646 /* 44*/ 0, 2647 MatScale_SeqBAIJ, 2648 MatShift_SeqBAIJ, 2649 0, 2650 MatZeroRowsColumns_SeqBAIJ, 2651 /* 49*/ 0, 2652 MatGetRowIJ_SeqBAIJ, 2653 MatRestoreRowIJ_SeqBAIJ, 2654 MatGetColumnIJ_SeqBAIJ, 2655 MatRestoreColumnIJ_SeqBAIJ, 2656 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2657 0, 2658 0, 2659 0, 2660 MatSetValuesBlocked_SeqBAIJ, 2661 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2662 MatDestroy_SeqBAIJ, 2663 MatView_SeqBAIJ, 2664 0, 2665 0, 2666 /* 64*/ 0, 2667 0, 2668 0, 2669 0, 2670 0, 2671 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2672 0, 2673 MatConvert_Basic, 2674 0, 2675 0, 2676 /* 74*/ 0, 2677 MatFDColoringApply_BAIJ, 2678 0, 2679 0, 2680 0, 2681 /* 79*/ 0, 2682 0, 2683 0, 2684 0, 2685 MatLoad_SeqBAIJ, 2686 /* 84*/ 0, 2687 0, 2688 0, 2689 0, 2690 0, 2691 /* 89*/ 0, 2692 0, 2693 0, 2694 0, 2695 0, 2696 /* 94*/ 0, 2697 0, 2698 0, 2699 0, 2700 0, 2701 /* 99*/ 0, 2702 0, 2703 0, 2704 0, 2705 0, 2706 /*104*/ 0, 2707 MatRealPart_SeqBAIJ, 2708 MatImaginaryPart_SeqBAIJ, 2709 0, 2710 0, 2711 /*109*/ 0, 2712 0, 2713 0, 2714 0, 2715 MatMissingDiagonal_SeqBAIJ, 2716 /*114*/ 0, 2717 0, 2718 0, 2719 0, 2720 0, 2721 /*119*/ 0, 2722 0, 2723 MatMultHermitianTranspose_SeqBAIJ, 2724 MatMultHermitianTransposeAdd_SeqBAIJ, 2725 0, 2726 /*124*/ 0, 2727 0, 2728 MatInvertBlockDiagonal_SeqBAIJ, 2729 0, 2730 0, 2731 /*129*/ 0, 2732 0, 2733 0, 2734 0, 2735 0, 2736 /*134*/ 0, 2737 0, 2738 0, 2739 0, 2740 0, 2741 /*139*/ MatSetBlockSizes_Default, 2742 0, 2743 0, 2744 MatFDColoringSetUp_SeqXAIJ, 2745 0, 2746 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2747 MatDestroySubMatrices_SeqBAIJ 2748 }; 2749 2750 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2751 { 2752 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2753 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2754 PetscErrorCode ierr; 2755 2756 PetscFunctionBegin; 2757 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2758 2759 /* allocate space for values if not already there */ 2760 if (!aij->saved_values) { 2761 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2762 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2763 } 2764 2765 /* copy values over */ 2766 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2767 PetscFunctionReturn(0); 2768 } 2769 2770 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2771 { 2772 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2773 PetscErrorCode ierr; 2774 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2775 2776 PetscFunctionBegin; 2777 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2778 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2779 2780 /* copy values over */ 2781 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2782 PetscFunctionReturn(0); 2783 } 2784 2785 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2786 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2787 2788 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2789 { 2790 Mat_SeqBAIJ *b; 2791 PetscErrorCode ierr; 2792 PetscInt i,mbs,nbs,bs2; 2793 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2794 2795 PetscFunctionBegin; 2796 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2797 if (nz == MAT_SKIP_ALLOCATION) { 2798 skipallocation = PETSC_TRUE; 2799 nz = 0; 2800 } 2801 2802 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2803 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2804 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2805 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2806 2807 B->preallocated = PETSC_TRUE; 2808 2809 mbs = B->rmap->n/bs; 2810 nbs = B->cmap->n/bs; 2811 bs2 = bs*bs; 2812 2813 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2814 2815 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2816 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2817 if (nnz) { 2818 for (i=0; i<mbs; i++) { 2819 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2820 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2821 } 2822 } 2823 2824 b = (Mat_SeqBAIJ*)B->data; 2825 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2826 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2827 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2828 2829 if (!flg) { 2830 switch (bs) { 2831 case 1: 2832 B->ops->mult = MatMult_SeqBAIJ_1; 2833 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2834 break; 2835 case 2: 2836 B->ops->mult = MatMult_SeqBAIJ_2; 2837 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2838 break; 2839 case 3: 2840 B->ops->mult = MatMult_SeqBAIJ_3; 2841 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2842 break; 2843 case 4: 2844 B->ops->mult = MatMult_SeqBAIJ_4; 2845 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2846 break; 2847 case 5: 2848 B->ops->mult = MatMult_SeqBAIJ_5; 2849 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2850 break; 2851 case 6: 2852 B->ops->mult = MatMult_SeqBAIJ_6; 2853 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2854 break; 2855 case 7: 2856 B->ops->mult = MatMult_SeqBAIJ_7; 2857 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2858 break; 2859 case 9: 2860 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2861 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2862 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2863 #else 2864 B->ops->mult = MatMult_SeqBAIJ_N; 2865 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2866 #endif 2867 break; 2868 case 11: 2869 B->ops->mult = MatMult_SeqBAIJ_11; 2870 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2871 break; 2872 case 15: 2873 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2874 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2875 break; 2876 default: 2877 B->ops->mult = MatMult_SeqBAIJ_N; 2878 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2879 break; 2880 } 2881 } 2882 B->ops->sor = MatSOR_SeqBAIJ; 2883 b->mbs = mbs; 2884 b->nbs = nbs; 2885 if (!skipallocation) { 2886 if (!b->imax) { 2887 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2888 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2889 2890 b->free_imax_ilen = PETSC_TRUE; 2891 } 2892 /* b->ilen will count nonzeros in each block row so far. */ 2893 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2894 if (!nnz) { 2895 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2896 else if (nz < 0) nz = 1; 2897 for (i=0; i<mbs; i++) b->imax[i] = nz; 2898 nz = nz*mbs; 2899 } else { 2900 nz = 0; 2901 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2902 } 2903 2904 /* allocate the matrix space */ 2905 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2906 if (B->structure_only) { 2907 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 2908 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 2909 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 2910 } else { 2911 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2912 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2913 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2914 } 2915 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2916 2917 if (B->structure_only) { 2918 b->singlemalloc = PETSC_FALSE; 2919 b->free_a = PETSC_FALSE; 2920 } else { 2921 b->singlemalloc = PETSC_TRUE; 2922 b->free_a = PETSC_TRUE; 2923 } 2924 b->free_ij = PETSC_TRUE; 2925 2926 b->i[0] = 0; 2927 for (i=1; i<mbs+1; i++) { 2928 b->i[i] = b->i[i-1] + b->imax[i-1]; 2929 } 2930 2931 } else { 2932 b->free_a = PETSC_FALSE; 2933 b->free_ij = PETSC_FALSE; 2934 } 2935 2936 b->bs2 = bs2; 2937 b->mbs = mbs; 2938 b->nz = 0; 2939 b->maxnz = nz; 2940 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2941 B->was_assembled = PETSC_FALSE; 2942 B->assembled = PETSC_FALSE; 2943 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2944 PetscFunctionReturn(0); 2945 } 2946 2947 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2948 { 2949 PetscInt i,m,nz,nz_max=0,*nnz; 2950 PetscScalar *values=0; 2951 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2952 PetscErrorCode ierr; 2953 2954 PetscFunctionBegin; 2955 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2956 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2957 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2958 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2959 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2960 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2961 m = B->rmap->n/bs; 2962 2963 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2964 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2965 for (i=0; i<m; i++) { 2966 nz = ii[i+1]- ii[i]; 2967 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2968 nz_max = PetscMax(nz_max, nz); 2969 nnz[i] = nz; 2970 } 2971 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2972 ierr = PetscFree(nnz);CHKERRQ(ierr); 2973 2974 values = (PetscScalar*)V; 2975 if (!values) { 2976 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2977 } 2978 for (i=0; i<m; i++) { 2979 PetscInt ncols = ii[i+1] - ii[i]; 2980 const PetscInt *icols = jj + ii[i]; 2981 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2982 if (!roworiented) { 2983 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2984 } else { 2985 PetscInt j; 2986 for (j=0; j<ncols; j++) { 2987 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2988 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2989 } 2990 } 2991 } 2992 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2993 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2994 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2995 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2996 PetscFunctionReturn(0); 2997 } 2998 2999 /*MC 3000 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3001 block sparse compressed row format. 3002 3003 Options Database Keys: 3004 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3005 3006 Level: beginner 3007 3008 .seealso: MatCreateSeqBAIJ() 3009 M*/ 3010 3011 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3012 3013 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3014 { 3015 PetscErrorCode ierr; 3016 PetscMPIInt size; 3017 Mat_SeqBAIJ *b; 3018 3019 PetscFunctionBegin; 3020 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3021 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3022 3023 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3024 B->data = (void*)b; 3025 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3026 3027 b->row = 0; 3028 b->col = 0; 3029 b->icol = 0; 3030 b->reallocs = 0; 3031 b->saved_values = 0; 3032 3033 b->roworiented = PETSC_TRUE; 3034 b->nonew = 0; 3035 b->diag = 0; 3036 B->spptr = 0; 3037 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3038 b->keepnonzeropattern = PETSC_FALSE; 3039 3040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3049 #if defined(PETSC_HAVE_HYPRE) 3050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3051 #endif 3052 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 3053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 3054 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3055 PetscFunctionReturn(0); 3056 } 3057 3058 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3059 { 3060 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3061 PetscErrorCode ierr; 3062 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3063 3064 PetscFunctionBegin; 3065 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3066 3067 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3068 c->imax = a->imax; 3069 c->ilen = a->ilen; 3070 c->free_imax_ilen = PETSC_FALSE; 3071 } else { 3072 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3073 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3074 for (i=0; i<mbs; i++) { 3075 c->imax[i] = a->imax[i]; 3076 c->ilen[i] = a->ilen[i]; 3077 } 3078 c->free_imax_ilen = PETSC_TRUE; 3079 } 3080 3081 /* allocate the matrix space */ 3082 if (mallocmatspace) { 3083 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3084 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3085 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3086 3087 c->i = a->i; 3088 c->j = a->j; 3089 c->singlemalloc = PETSC_FALSE; 3090 c->free_a = PETSC_TRUE; 3091 c->free_ij = PETSC_FALSE; 3092 c->parent = A; 3093 C->preallocated = PETSC_TRUE; 3094 C->assembled = PETSC_TRUE; 3095 3096 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3097 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3098 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3099 } else { 3100 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3101 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3102 3103 c->singlemalloc = PETSC_TRUE; 3104 c->free_a = PETSC_TRUE; 3105 c->free_ij = PETSC_TRUE; 3106 3107 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3108 if (mbs > 0) { 3109 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3110 if (cpvalues == MAT_COPY_VALUES) { 3111 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3112 } else { 3113 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3114 } 3115 } 3116 C->preallocated = PETSC_TRUE; 3117 C->assembled = PETSC_TRUE; 3118 } 3119 } 3120 3121 c->roworiented = a->roworiented; 3122 c->nonew = a->nonew; 3123 3124 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3125 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3126 3127 c->bs2 = a->bs2; 3128 c->mbs = a->mbs; 3129 c->nbs = a->nbs; 3130 3131 if (a->diag) { 3132 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3133 c->diag = a->diag; 3134 c->free_diag = PETSC_FALSE; 3135 } else { 3136 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3137 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3138 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3139 c->free_diag = PETSC_TRUE; 3140 } 3141 } else c->diag = 0; 3142 3143 c->nz = a->nz; 3144 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3145 c->solve_work = NULL; 3146 c->mult_work = NULL; 3147 c->sor_workt = NULL; 3148 c->sor_work = NULL; 3149 3150 c->compressedrow.use = a->compressedrow.use; 3151 c->compressedrow.nrows = a->compressedrow.nrows; 3152 if (a->compressedrow.use) { 3153 i = a->compressedrow.nrows; 3154 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3155 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3156 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3157 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3158 } else { 3159 c->compressedrow.use = PETSC_FALSE; 3160 c->compressedrow.i = NULL; 3161 c->compressedrow.rindex = NULL; 3162 } 3163 C->nonzerostate = A->nonzerostate; 3164 3165 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3166 PetscFunctionReturn(0); 3167 } 3168 3169 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3170 { 3171 PetscErrorCode ierr; 3172 3173 PetscFunctionBegin; 3174 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3175 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3176 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3177 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3178 PetscFunctionReturn(0); 3179 } 3180 3181 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3182 { 3183 Mat_SeqBAIJ *a; 3184 PetscErrorCode ierr; 3185 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3186 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3187 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3188 PetscInt *masked,nmask,tmp,bs2,ishift; 3189 PetscMPIInt size; 3190 int fd; 3191 PetscScalar *aa; 3192 MPI_Comm comm; 3193 3194 PetscFunctionBegin; 3195 /* force binary viewer to load .info file if it has not yet done so */ 3196 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3197 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3198 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3199 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3200 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3201 if (bs < 0) bs = 1; 3202 bs2 = bs*bs; 3203 3204 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3205 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3206 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3207 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3208 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3209 M = header[1]; N = header[2]; nz = header[3]; 3210 3211 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3212 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3213 3214 /* 3215 This code adds extra rows to make sure the number of rows is 3216 divisible by the blocksize 3217 */ 3218 mbs = M/bs; 3219 extra_rows = bs - M + bs*(mbs); 3220 if (extra_rows == bs) extra_rows = 0; 3221 else mbs++; 3222 if (extra_rows) { 3223 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3224 } 3225 3226 /* Set global sizes if not already set */ 3227 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3228 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3229 } else { /* Check if the matrix global sizes are correct */ 3230 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3231 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3232 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3233 } 3234 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3235 } 3236 3237 /* read in row lengths */ 3238 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3239 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3240 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3241 3242 /* read in column indices */ 3243 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3244 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3245 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3246 3247 /* loop over row lengths determining block row lengths */ 3248 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3249 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3250 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3251 rowcount = 0; 3252 nzcount = 0; 3253 for (i=0; i<mbs; i++) { 3254 nmask = 0; 3255 for (j=0; j<bs; j++) { 3256 kmax = rowlengths[rowcount]; 3257 for (k=0; k<kmax; k++) { 3258 tmp = jj[nzcount++]/bs; 3259 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3260 } 3261 rowcount++; 3262 } 3263 browlengths[i] += nmask; 3264 /* zero out the mask elements we set */ 3265 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3266 } 3267 3268 /* Do preallocation */ 3269 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3270 a = (Mat_SeqBAIJ*)newmat->data; 3271 3272 /* set matrix "i" values */ 3273 a->i[0] = 0; 3274 for (i=1; i<= mbs; i++) { 3275 a->i[i] = a->i[i-1] + browlengths[i-1]; 3276 a->ilen[i-1] = browlengths[i-1]; 3277 } 3278 a->nz = 0; 3279 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3280 3281 /* read in nonzero values */ 3282 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3283 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3284 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3285 3286 /* set "a" and "j" values into matrix */ 3287 nzcount = 0; jcount = 0; 3288 for (i=0; i<mbs; i++) { 3289 nzcountb = nzcount; 3290 nmask = 0; 3291 for (j=0; j<bs; j++) { 3292 kmax = rowlengths[i*bs+j]; 3293 for (k=0; k<kmax; k++) { 3294 tmp = jj[nzcount++]/bs; 3295 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3296 } 3297 } 3298 /* sort the masked values */ 3299 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3300 3301 /* set "j" values into matrix */ 3302 maskcount = 1; 3303 for (j=0; j<nmask; j++) { 3304 a->j[jcount++] = masked[j]; 3305 mask[masked[j]] = maskcount++; 3306 } 3307 /* set "a" values into matrix */ 3308 ishift = bs2*a->i[i]; 3309 for (j=0; j<bs; j++) { 3310 kmax = rowlengths[i*bs+j]; 3311 for (k=0; k<kmax; k++) { 3312 tmp = jj[nzcountb]/bs; 3313 block = mask[tmp] - 1; 3314 point = jj[nzcountb] - bs*tmp; 3315 idx = ishift + bs2*block + j + bs*point; 3316 a->a[idx] = (MatScalar)aa[nzcountb++]; 3317 } 3318 } 3319 /* zero out the mask elements we set */ 3320 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3321 } 3322 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3323 3324 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3325 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3326 ierr = PetscFree(aa);CHKERRQ(ierr); 3327 ierr = PetscFree(jj);CHKERRQ(ierr); 3328 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3329 3330 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3331 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3332 PetscFunctionReturn(0); 3333 } 3334 3335 /*@C 3336 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3337 compressed row) format. For good matrix assembly performance the 3338 user should preallocate the matrix storage by setting the parameter nz 3339 (or the array nnz). By setting these parameters accurately, performance 3340 during matrix assembly can be increased by more than a factor of 50. 3341 3342 Collective on MPI_Comm 3343 3344 Input Parameters: 3345 + comm - MPI communicator, set to PETSC_COMM_SELF 3346 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3347 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3348 . m - number of rows 3349 . n - number of columns 3350 . nz - number of nonzero blocks per block row (same for all rows) 3351 - nnz - array containing the number of nonzero blocks in the various block rows 3352 (possibly different for each block row) or NULL 3353 3354 Output Parameter: 3355 . A - the matrix 3356 3357 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3358 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3359 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3360 3361 Options Database Keys: 3362 . -mat_no_unroll - uses code that does not unroll the loops in the 3363 block calculations (much slower) 3364 . -mat_block_size - size of the blocks to use 3365 3366 Level: intermediate 3367 3368 Notes: 3369 The number of rows and columns must be divisible by blocksize. 3370 3371 If the nnz parameter is given then the nz parameter is ignored 3372 3373 A nonzero block is any block that as 1 or more nonzeros in it 3374 3375 The block AIJ format is fully compatible with standard Fortran 77 3376 storage. That is, the stored row and column indices can begin at 3377 either one (as in Fortran) or zero. See the users' manual for details. 3378 3379 Specify the preallocated storage with either nz or nnz (not both). 3380 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3381 allocation. See Users-Manual: ch_mat for details. 3382 matrices. 3383 3384 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3385 @*/ 3386 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3387 { 3388 PetscErrorCode ierr; 3389 3390 PetscFunctionBegin; 3391 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3392 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3393 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3394 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3395 PetscFunctionReturn(0); 3396 } 3397 3398 /*@C 3399 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3400 per row in the matrix. For good matrix assembly performance the 3401 user should preallocate the matrix storage by setting the parameter nz 3402 (or the array nnz). By setting these parameters accurately, performance 3403 during matrix assembly can be increased by more than a factor of 50. 3404 3405 Collective on MPI_Comm 3406 3407 Input Parameters: 3408 + B - the matrix 3409 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3410 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3411 . nz - number of block nonzeros per block row (same for all rows) 3412 - nnz - array containing the number of block nonzeros in the various block rows 3413 (possibly different for each block row) or NULL 3414 3415 Options Database Keys: 3416 . -mat_no_unroll - uses code that does not unroll the loops in the 3417 block calculations (much slower) 3418 . -mat_block_size - size of the blocks to use 3419 3420 Level: intermediate 3421 3422 Notes: 3423 If the nnz parameter is given then the nz parameter is ignored 3424 3425 You can call MatGetInfo() to get information on how effective the preallocation was; 3426 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3427 You can also run with the option -info and look for messages with the string 3428 malloc in them to see if additional memory allocation was needed. 3429 3430 The block AIJ format is fully compatible with standard Fortran 77 3431 storage. That is, the stored row and column indices can begin at 3432 either one (as in Fortran) or zero. See the users' manual for details. 3433 3434 Specify the preallocated storage with either nz or nnz (not both). 3435 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3436 allocation. See Users-Manual: ch_mat for details. 3437 3438 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3439 @*/ 3440 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3441 { 3442 PetscErrorCode ierr; 3443 3444 PetscFunctionBegin; 3445 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3446 PetscValidType(B,1); 3447 PetscValidLogicalCollectiveInt(B,bs,2); 3448 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3449 PetscFunctionReturn(0); 3450 } 3451 3452 /*@C 3453 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3454 (the default sequential PETSc format). 3455 3456 Collective on MPI_Comm 3457 3458 Input Parameters: 3459 + B - the matrix 3460 . i - the indices into j for the start of each local row (starts with zero) 3461 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3462 - v - optional values in the matrix 3463 3464 Level: developer 3465 3466 Notes: 3467 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3468 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3469 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3470 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3471 block column and the second index is over columns within a block. 3472 3473 .keywords: matrix, aij, compressed row, sparse 3474 3475 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3476 @*/ 3477 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3478 { 3479 PetscErrorCode ierr; 3480 3481 PetscFunctionBegin; 3482 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3483 PetscValidType(B,1); 3484 PetscValidLogicalCollectiveInt(B,bs,2); 3485 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3486 PetscFunctionReturn(0); 3487 } 3488 3489 3490 /*@ 3491 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3492 3493 Collective on MPI_Comm 3494 3495 Input Parameters: 3496 + comm - must be an MPI communicator of size 1 3497 . bs - size of block 3498 . m - number of rows 3499 . n - number of columns 3500 . i - row indices 3501 . j - column indices 3502 - a - matrix values 3503 3504 Output Parameter: 3505 . mat - the matrix 3506 3507 Level: advanced 3508 3509 Notes: 3510 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3511 once the matrix is destroyed 3512 3513 You cannot set new nonzero locations into this matrix, that will generate an error. 3514 3515 The i and j indices are 0 based 3516 3517 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3518 3519 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3520 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3521 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3522 with column-major ordering within blocks. 3523 3524 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3525 3526 @*/ 3527 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3528 { 3529 PetscErrorCode ierr; 3530 PetscInt ii; 3531 Mat_SeqBAIJ *baij; 3532 3533 PetscFunctionBegin; 3534 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3535 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3536 3537 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3538 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3539 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3540 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3541 baij = (Mat_SeqBAIJ*)(*mat)->data; 3542 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3543 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3544 3545 baij->i = i; 3546 baij->j = j; 3547 baij->a = a; 3548 3549 baij->singlemalloc = PETSC_FALSE; 3550 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3551 baij->free_a = PETSC_FALSE; 3552 baij->free_ij = PETSC_FALSE; 3553 3554 for (ii=0; ii<m; ii++) { 3555 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3556 #if defined(PETSC_USE_DEBUG) 3557 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3558 #endif 3559 } 3560 #if defined(PETSC_USE_DEBUG) 3561 for (ii=0; ii<baij->i[m]; ii++) { 3562 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3563 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3564 } 3565 #endif 3566 3567 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3568 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3569 PetscFunctionReturn(0); 3570 } 3571 3572 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3573 { 3574 PetscErrorCode ierr; 3575 PetscMPIInt size; 3576 3577 PetscFunctionBegin; 3578 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3579 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3580 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3581 } else { 3582 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3583 } 3584 PetscFunctionReturn(0); 3585 } 3586