1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 20 21 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 22 { 23 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 24 PetscErrorCode ierr; 25 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 26 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 27 PetscReal shift = 0.0; 28 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 29 30 PetscFunctionBegin; 31 allowzeropivot = PetscNot(A->erroriffailure); 32 33 if (a->idiagvalid) { 34 if (values) *values = a->idiag; 35 PetscFunctionReturn(0); 36 } 37 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 38 diag_offset = a->diag; 39 if (!a->idiag) { 40 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 41 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 42 } 43 diag = a->idiag; 44 if (values) *values = a->idiag; 45 /* factor and invert each block */ 46 switch (bs) { 47 case 1: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 1*diag_offset[i]; 50 diag[0] = odiag[0]; 51 52 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 53 if (allowzeropivot) { 54 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 55 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 56 A->factorerror_zeropivot_row = i; 57 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 58 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 59 } 60 61 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 62 diag += 1; 63 } 64 break; 65 case 2: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 4*diag_offset[i]; 68 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 69 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 70 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 71 diag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 81 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 82 diag += 9; 83 } 84 break; 85 case 4: 86 for (i=0; i<mbs; i++) { 87 odiag = v + 16*diag_offset[i]; 88 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 89 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 90 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 91 diag += 16; 92 } 93 break; 94 case 5: 95 for (i=0; i<mbs; i++) { 96 odiag = v + 25*diag_offset[i]; 97 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 98 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 99 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 100 diag += 25; 101 } 102 break; 103 case 6: 104 for (i=0; i<mbs; i++) { 105 odiag = v + 36*diag_offset[i]; 106 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 107 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 108 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 109 diag += 36; 110 } 111 break; 112 case 7: 113 for (i=0; i<mbs; i++) { 114 odiag = v + 49*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 117 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 118 diag += 49; 119 } 120 break; 121 default: 122 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 123 for (i=0; i<mbs; i++) { 124 odiag = v + bs2*diag_offset[i]; 125 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 126 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 127 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 128 diag += bs2; 129 } 130 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 131 } 132 a->idiagvalid = PETSC_TRUE; 133 PetscFunctionReturn(0); 134 } 135 136 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 137 { 138 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 139 PetscScalar *x,*work,*w,*workt,*t; 140 const MatScalar *v,*aa = a->a, *idiag; 141 const PetscScalar *b,*xb; 142 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 143 PetscErrorCode ierr; 144 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 145 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 146 147 PetscFunctionBegin; 148 its = its*lits; 149 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 150 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 151 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 152 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 153 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 154 155 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 156 157 if (!m) PetscFunctionReturn(0); 158 diag = a->diag; 159 idiag = a->idiag; 160 k = PetscMax(A->rmap->n,A->cmap->n); 161 if (!a->mult_work) { 162 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 163 } 164 if (!a->sor_workt) { 165 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 166 } 167 if (!a->sor_work) { 168 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 169 } 170 work = a->mult_work; 171 t = a->sor_workt; 172 w = a->sor_work; 173 174 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 175 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 176 177 if (flag & SOR_ZERO_INITIAL_GUESS) { 178 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 179 switch (bs) { 180 case 1: 181 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 182 t[0] = b[0]; 183 i2 = 1; 184 idiag += 1; 185 for (i=1; i<m; i++) { 186 v = aa + ai[i]; 187 vi = aj + ai[i]; 188 nz = diag[i] - ai[i]; 189 s[0] = b[i2]; 190 for (j=0; j<nz; j++) { 191 xw[0] = x[vi[j]]; 192 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 193 } 194 t[i2] = s[0]; 195 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 196 x[i2] = xw[0]; 197 idiag += 1; 198 i2 += 1; 199 } 200 break; 201 case 2: 202 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 203 t[0] = b[0]; t[1] = b[1]; 204 i2 = 2; 205 idiag += 4; 206 for (i=1; i<m; i++) { 207 v = aa + 4*ai[i]; 208 vi = aj + ai[i]; 209 nz = diag[i] - ai[i]; 210 s[0] = b[i2]; s[1] = b[i2+1]; 211 for (j=0; j<nz; j++) { 212 idx = 2*vi[j]; 213 it = 4*j; 214 xw[0] = x[idx]; xw[1] = x[1+idx]; 215 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 216 } 217 t[i2] = s[0]; t[i2+1] = s[1]; 218 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 219 x[i2] = xw[0]; x[i2+1] = xw[1]; 220 idiag += 4; 221 i2 += 2; 222 } 223 break; 224 case 3: 225 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 226 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 227 i2 = 3; 228 idiag += 9; 229 for (i=1; i<m; i++) { 230 v = aa + 9*ai[i]; 231 vi = aj + ai[i]; 232 nz = diag[i] - ai[i]; 233 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 234 while (nz--) { 235 idx = 3*(*vi++); 236 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 237 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 238 v += 9; 239 } 240 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 241 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 242 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 243 idiag += 9; 244 i2 += 3; 245 } 246 break; 247 case 4: 248 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 249 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 250 i2 = 4; 251 idiag += 16; 252 for (i=1; i<m; i++) { 253 v = aa + 16*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 257 while (nz--) { 258 idx = 4*(*vi++); 259 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 260 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 261 v += 16; 262 } 263 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 264 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 265 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 266 idiag += 16; 267 i2 += 4; 268 } 269 break; 270 case 5: 271 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 272 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 273 i2 = 5; 274 idiag += 25; 275 for (i=1; i<m; i++) { 276 v = aa + 25*ai[i]; 277 vi = aj + ai[i]; 278 nz = diag[i] - ai[i]; 279 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 280 while (nz--) { 281 idx = 5*(*vi++); 282 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 283 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 284 v += 25; 285 } 286 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 287 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 288 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 289 idiag += 25; 290 i2 += 5; 291 } 292 break; 293 case 6: 294 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 295 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 296 i2 = 6; 297 idiag += 36; 298 for (i=1; i<m; i++) { 299 v = aa + 36*ai[i]; 300 vi = aj + ai[i]; 301 nz = diag[i] - ai[i]; 302 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 303 while (nz--) { 304 idx = 6*(*vi++); 305 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 306 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 307 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 308 v += 36; 309 } 310 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 311 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 312 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 313 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 314 idiag += 36; 315 i2 += 6; 316 } 317 break; 318 case 7: 319 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 320 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 321 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 322 i2 = 7; 323 idiag += 49; 324 for (i=1; i<m; i++) { 325 v = aa + 49*ai[i]; 326 vi = aj + ai[i]; 327 nz = diag[i] - ai[i]; 328 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 329 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 330 while (nz--) { 331 idx = 7*(*vi++); 332 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 333 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 334 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 335 v += 49; 336 } 337 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 338 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 339 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 340 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 341 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 342 idiag += 49; 343 i2 += 7; 344 } 345 break; 346 default: 347 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 348 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 349 i2 = bs; 350 idiag += bs2; 351 for (i=1; i<m; i++) { 352 v = aa + bs2*ai[i]; 353 vi = aj + ai[i]; 354 nz = diag[i] - ai[i]; 355 356 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 357 /* copy all rows of x that are needed into contiguous space */ 358 workt = work; 359 for (j=0; j<nz; j++) { 360 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 361 workt += bs; 362 } 363 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 364 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 365 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 366 367 idiag += bs2; 368 i2 += bs; 369 } 370 break; 371 } 372 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 373 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 374 xb = t; 375 } 376 else xb = b; 377 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 378 idiag = a->idiag+bs2*(a->mbs-1); 379 i2 = bs * (m-1); 380 switch (bs) { 381 case 1: 382 s[0] = xb[i2]; 383 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 384 x[i2] = xw[0]; 385 i2 -= 1; 386 for (i=m-2; i>=0; i--) { 387 v = aa + (diag[i]+1); 388 vi = aj + diag[i] + 1; 389 nz = ai[i+1] - diag[i] - 1; 390 s[0] = xb[i2]; 391 for (j=0; j<nz; j++) { 392 xw[0] = x[vi[j]]; 393 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 394 } 395 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 396 x[i2] = xw[0]; 397 idiag -= 1; 398 i2 -= 1; 399 } 400 break; 401 case 2: 402 s[0] = xb[i2]; s[1] = xb[i2+1]; 403 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 404 x[i2] = xw[0]; x[i2+1] = xw[1]; 405 i2 -= 2; 406 idiag -= 4; 407 for (i=m-2; i>=0; i--) { 408 v = aa + 4*(diag[i] + 1); 409 vi = aj + diag[i] + 1; 410 nz = ai[i+1] - diag[i] - 1; 411 s[0] = xb[i2]; s[1] = xb[i2+1]; 412 for (j=0; j<nz; j++) { 413 idx = 2*vi[j]; 414 it = 4*j; 415 xw[0] = x[idx]; xw[1] = x[1+idx]; 416 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 417 } 418 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 419 x[i2] = xw[0]; x[i2+1] = xw[1]; 420 idiag -= 4; 421 i2 -= 2; 422 } 423 break; 424 case 3: 425 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 426 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 427 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 428 i2 -= 3; 429 idiag -= 9; 430 for (i=m-2; i>=0; i--) { 431 v = aa + 9*(diag[i]+1); 432 vi = aj + diag[i] + 1; 433 nz = ai[i+1] - diag[i] - 1; 434 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 435 while (nz--) { 436 idx = 3*(*vi++); 437 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 438 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 439 v += 9; 440 } 441 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 442 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 443 idiag -= 9; 444 i2 -= 3; 445 } 446 break; 447 case 4: 448 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 449 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 450 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 451 i2 -= 4; 452 idiag -= 16; 453 for (i=m-2; i>=0; i--) { 454 v = aa + 16*(diag[i]+1); 455 vi = aj + diag[i] + 1; 456 nz = ai[i+1] - diag[i] - 1; 457 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 458 while (nz--) { 459 idx = 4*(*vi++); 460 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 461 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 462 v += 16; 463 } 464 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 465 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 466 idiag -= 16; 467 i2 -= 4; 468 } 469 break; 470 case 5: 471 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 472 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 473 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 474 i2 -= 5; 475 idiag -= 25; 476 for (i=m-2; i>=0; i--) { 477 v = aa + 25*(diag[i]+1); 478 vi = aj + diag[i] + 1; 479 nz = ai[i+1] - diag[i] - 1; 480 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 481 while (nz--) { 482 idx = 5*(*vi++); 483 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 484 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 485 v += 25; 486 } 487 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 488 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 489 idiag -= 25; 490 i2 -= 5; 491 } 492 break; 493 case 6: 494 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 495 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 496 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 497 i2 -= 6; 498 idiag -= 36; 499 for (i=m-2; i>=0; i--) { 500 v = aa + 36*(diag[i]+1); 501 vi = aj + diag[i] + 1; 502 nz = ai[i+1] - diag[i] - 1; 503 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 504 while (nz--) { 505 idx = 6*(*vi++); 506 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 507 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 508 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 509 v += 36; 510 } 511 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 512 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 513 idiag -= 36; 514 i2 -= 6; 515 } 516 break; 517 case 7: 518 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 519 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 520 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 521 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 522 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 523 i2 -= 7; 524 idiag -= 49; 525 for (i=m-2; i>=0; i--) { 526 v = aa + 49*(diag[i]+1); 527 vi = aj + diag[i] + 1; 528 nz = ai[i+1] - diag[i] - 1; 529 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 530 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 531 while (nz--) { 532 idx = 7*(*vi++); 533 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 534 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 535 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 536 v += 49; 537 } 538 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 539 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 540 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 541 idiag -= 49; 542 i2 -= 7; 543 } 544 break; 545 default: 546 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 547 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 548 i2 -= bs; 549 idiag -= bs2; 550 for (i=m-2; i>=0; i--) { 551 v = aa + bs2*(diag[i]+1); 552 vi = aj + diag[i] + 1; 553 nz = ai[i+1] - diag[i] - 1; 554 555 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 556 /* copy all rows of x that are needed into contiguous space */ 557 workt = work; 558 for (j=0; j<nz; j++) { 559 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 560 workt += bs; 561 } 562 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 563 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 564 565 idiag -= bs2; 566 i2 -= bs; 567 } 568 break; 569 } 570 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 571 } 572 its--; 573 } 574 while (its--) { 575 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 576 idiag = a->idiag; 577 i2 = 0; 578 switch (bs) { 579 case 1: 580 for (i=0; i<m; i++) { 581 v = aa + ai[i]; 582 vi = aj + ai[i]; 583 nz = ai[i+1] - ai[i]; 584 s[0] = b[i2]; 585 for (j=0; j<nz; j++) { 586 xw[0] = x[vi[j]]; 587 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 588 } 589 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 590 x[i2] += xw[0]; 591 idiag += 1; 592 i2 += 1; 593 } 594 break; 595 case 2: 596 for (i=0; i<m; i++) { 597 v = aa + 4*ai[i]; 598 vi = aj + ai[i]; 599 nz = ai[i+1] - ai[i]; 600 s[0] = b[i2]; s[1] = b[i2+1]; 601 for (j=0; j<nz; j++) { 602 idx = 2*vi[j]; 603 it = 4*j; 604 xw[0] = x[idx]; xw[1] = x[1+idx]; 605 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 606 } 607 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 608 x[i2] += xw[0]; x[i2+1] += xw[1]; 609 idiag += 4; 610 i2 += 2; 611 } 612 break; 613 case 3: 614 for (i=0; i<m; i++) { 615 v = aa + 9*ai[i]; 616 vi = aj + ai[i]; 617 nz = ai[i+1] - ai[i]; 618 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 619 while (nz--) { 620 idx = 3*(*vi++); 621 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 622 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 623 v += 9; 624 } 625 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 626 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 627 idiag += 9; 628 i2 += 3; 629 } 630 break; 631 case 4: 632 for (i=0; i<m; i++) { 633 v = aa + 16*ai[i]; 634 vi = aj + ai[i]; 635 nz = ai[i+1] - ai[i]; 636 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 637 while (nz--) { 638 idx = 4*(*vi++); 639 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 640 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 641 v += 16; 642 } 643 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 644 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 645 idiag += 16; 646 i2 += 4; 647 } 648 break; 649 case 5: 650 for (i=0; i<m; i++) { 651 v = aa + 25*ai[i]; 652 vi = aj + ai[i]; 653 nz = ai[i+1] - ai[i]; 654 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 655 while (nz--) { 656 idx = 5*(*vi++); 657 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 658 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 659 v += 25; 660 } 661 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 662 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 663 idiag += 25; 664 i2 += 5; 665 } 666 break; 667 case 6: 668 for (i=0; i<m; i++) { 669 v = aa + 36*ai[i]; 670 vi = aj + ai[i]; 671 nz = ai[i+1] - ai[i]; 672 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 673 while (nz--) { 674 idx = 6*(*vi++); 675 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 676 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 677 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 678 v += 36; 679 } 680 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 681 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 682 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 683 idiag += 36; 684 i2 += 6; 685 } 686 break; 687 case 7: 688 for (i=0; i<m; i++) { 689 v = aa + 49*ai[i]; 690 vi = aj + ai[i]; 691 nz = ai[i+1] - ai[i]; 692 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 693 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 694 while (nz--) { 695 idx = 7*(*vi++); 696 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 697 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 698 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 699 v += 49; 700 } 701 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 702 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 703 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 704 idiag += 49; 705 i2 += 7; 706 } 707 break; 708 default: 709 for (i=0; i<m; i++) { 710 v = aa + bs2*ai[i]; 711 vi = aj + ai[i]; 712 nz = ai[i+1] - ai[i]; 713 714 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 715 /* copy all rows of x that are needed into contiguous space */ 716 workt = work; 717 for (j=0; j<nz; j++) { 718 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 719 workt += bs; 720 } 721 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 722 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 723 724 idiag += bs2; 725 i2 += bs; 726 } 727 break; 728 } 729 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 730 } 731 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 732 idiag = a->idiag+bs2*(a->mbs-1); 733 i2 = bs * (m-1); 734 switch (bs) { 735 case 1: 736 for (i=m-1; i>=0; i--) { 737 v = aa + ai[i]; 738 vi = aj + ai[i]; 739 nz = ai[i+1] - ai[i]; 740 s[0] = b[i2]; 741 for (j=0; j<nz; j++) { 742 xw[0] = x[vi[j]]; 743 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 744 } 745 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 746 x[i2] += xw[0]; 747 idiag -= 1; 748 i2 -= 1; 749 } 750 break; 751 case 2: 752 for (i=m-1; i>=0; i--) { 753 v = aa + 4*ai[i]; 754 vi = aj + ai[i]; 755 nz = ai[i+1] - ai[i]; 756 s[0] = b[i2]; s[1] = b[i2+1]; 757 for (j=0; j<nz; j++) { 758 idx = 2*vi[j]; 759 it = 4*j; 760 xw[0] = x[idx]; xw[1] = x[1+idx]; 761 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 762 } 763 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 764 x[i2] += xw[0]; x[i2+1] += xw[1]; 765 idiag -= 4; 766 i2 -= 2; 767 } 768 break; 769 case 3: 770 for (i=m-1; i>=0; i--) { 771 v = aa + 9*ai[i]; 772 vi = aj + ai[i]; 773 nz = ai[i+1] - ai[i]; 774 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 775 while (nz--) { 776 idx = 3*(*vi++); 777 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 778 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 779 v += 9; 780 } 781 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 782 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 783 idiag -= 9; 784 i2 -= 3; 785 } 786 break; 787 case 4: 788 for (i=m-1; i>=0; i--) { 789 v = aa + 16*ai[i]; 790 vi = aj + ai[i]; 791 nz = ai[i+1] - ai[i]; 792 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 793 while (nz--) { 794 idx = 4*(*vi++); 795 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 796 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 797 v += 16; 798 } 799 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 800 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 801 idiag -= 16; 802 i2 -= 4; 803 } 804 break; 805 case 5: 806 for (i=m-1; i>=0; i--) { 807 v = aa + 25*ai[i]; 808 vi = aj + ai[i]; 809 nz = ai[i+1] - ai[i]; 810 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 811 while (nz--) { 812 idx = 5*(*vi++); 813 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 814 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 815 v += 25; 816 } 817 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 818 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 819 idiag -= 25; 820 i2 -= 5; 821 } 822 break; 823 case 6: 824 for (i=m-1; i>=0; i--) { 825 v = aa + 36*ai[i]; 826 vi = aj + ai[i]; 827 nz = ai[i+1] - ai[i]; 828 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 829 while (nz--) { 830 idx = 6*(*vi++); 831 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 832 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 833 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 834 v += 36; 835 } 836 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 837 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 838 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 839 idiag -= 36; 840 i2 -= 6; 841 } 842 break; 843 case 7: 844 for (i=m-1; i>=0; i--) { 845 v = aa + 49*ai[i]; 846 vi = aj + ai[i]; 847 nz = ai[i+1] - ai[i]; 848 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 849 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 850 while (nz--) { 851 idx = 7*(*vi++); 852 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 853 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 854 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 855 v += 49; 856 } 857 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 858 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 859 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 860 idiag -= 49; 861 i2 -= 7; 862 } 863 break; 864 default: 865 for (i=m-1; i>=0; i--) { 866 v = aa + bs2*ai[i]; 867 vi = aj + ai[i]; 868 nz = ai[i+1] - ai[i]; 869 870 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 871 /* copy all rows of x that are needed into contiguous space */ 872 workt = work; 873 for (j=0; j<nz; j++) { 874 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 875 workt += bs; 876 } 877 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 878 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 879 880 idiag -= bs2; 881 i2 -= bs; 882 } 883 break; 884 } 885 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 886 } 887 } 888 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 889 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } 892 893 894 /* 895 Special version for direct calls from Fortran (Used in PETSc-fun3d) 896 */ 897 #if defined(PETSC_HAVE_FORTRAN_CAPS) 898 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 899 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 900 #define matsetvaluesblocked4_ matsetvaluesblocked4 901 #endif 902 903 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 904 { 905 Mat A = *AA; 906 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 907 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 908 PetscInt *ai =a->i,*ailen=a->ilen; 909 PetscInt *aj =a->j,stepval,lastcol = -1; 910 const PetscScalar *value = v; 911 MatScalar *ap,*aa = a->a,*bap; 912 913 PetscFunctionBegin; 914 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 915 stepval = (n-1)*4; 916 for (k=0; k<m; k++) { /* loop over added rows */ 917 row = im[k]; 918 rp = aj + ai[row]; 919 ap = aa + 16*ai[row]; 920 nrow = ailen[row]; 921 low = 0; 922 high = nrow; 923 for (l=0; l<n; l++) { /* loop over added columns */ 924 col = in[l]; 925 if (col <= lastcol) low = 0; 926 else high = nrow; 927 lastcol = col; 928 value = v + k*(stepval+4 + l)*4; 929 while (high-low > 7) { 930 t = (low+high)/2; 931 if (rp[t] > col) high = t; 932 else low = t; 933 } 934 for (i=low; i<high; i++) { 935 if (rp[i] > col) break; 936 if (rp[i] == col) { 937 bap = ap + 16*i; 938 for (ii=0; ii<4; ii++,value+=stepval) { 939 for (jj=ii; jj<16; jj+=4) { 940 bap[jj] += *value++; 941 } 942 } 943 goto noinsert2; 944 } 945 } 946 N = nrow++ - 1; 947 high++; /* added new column index thus must search to one higher than before */ 948 /* shift up all the later entries in this row */ 949 for (ii=N; ii>=i; ii--) { 950 rp[ii+1] = rp[ii]; 951 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 952 } 953 if (N >= i) { 954 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 955 } 956 rp[i] = col; 957 bap = ap + 16*i; 958 for (ii=0; ii<4; ii++,value+=stepval) { 959 for (jj=ii; jj<16; jj+=4) { 960 bap[jj] = *value++; 961 } 962 } 963 noinsert2:; 964 low = i; 965 } 966 ailen[row] = nrow; 967 } 968 PetscFunctionReturnVoid(); 969 } 970 971 #if defined(PETSC_HAVE_FORTRAN_CAPS) 972 #define matsetvalues4_ MATSETVALUES4 973 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 974 #define matsetvalues4_ matsetvalues4 975 #endif 976 977 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 978 { 979 Mat A = *AA; 980 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 981 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 982 PetscInt *ai=a->i,*ailen=a->ilen; 983 PetscInt *aj=a->j,brow,bcol; 984 PetscInt ridx,cidx,lastcol = -1; 985 MatScalar *ap,value,*aa=a->a,*bap; 986 987 PetscFunctionBegin; 988 for (k=0; k<m; k++) { /* loop over added rows */ 989 row = im[k]; brow = row/4; 990 rp = aj + ai[brow]; 991 ap = aa + 16*ai[brow]; 992 nrow = ailen[brow]; 993 low = 0; 994 high = nrow; 995 for (l=0; l<n; l++) { /* loop over added columns */ 996 col = in[l]; bcol = col/4; 997 ridx = row % 4; cidx = col % 4; 998 value = v[l + k*n]; 999 if (col <= lastcol) low = 0; 1000 else high = nrow; 1001 lastcol = col; 1002 while (high-low > 7) { 1003 t = (low+high)/2; 1004 if (rp[t] > bcol) high = t; 1005 else low = t; 1006 } 1007 for (i=low; i<high; i++) { 1008 if (rp[i] > bcol) break; 1009 if (rp[i] == bcol) { 1010 bap = ap + 16*i + 4*cidx + ridx; 1011 *bap += value; 1012 goto noinsert1; 1013 } 1014 } 1015 N = nrow++ - 1; 1016 high++; /* added new column thus must search to one higher than before */ 1017 /* shift up all the later entries in this row */ 1018 for (ii=N; ii>=i; ii--) { 1019 rp[ii+1] = rp[ii]; 1020 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1021 } 1022 if (N>=i) { 1023 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1024 } 1025 rp[i] = bcol; 1026 ap[16*i + 4*cidx + ridx] = value; 1027 noinsert1:; 1028 low = i; 1029 } 1030 ailen[brow] = nrow; 1031 } 1032 PetscFunctionReturnVoid(); 1033 } 1034 1035 /* 1036 Checks for missing diagonals 1037 */ 1038 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1039 { 1040 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1041 PetscErrorCode ierr; 1042 PetscInt *diag,*ii = a->i,i; 1043 1044 PetscFunctionBegin; 1045 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1046 *missing = PETSC_FALSE; 1047 if (A->rmap->n > 0 && !ii) { 1048 *missing = PETSC_TRUE; 1049 if (d) *d = 0; 1050 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 1051 } else { 1052 diag = a->diag; 1053 for (i=0; i<a->mbs; i++) { 1054 if (diag[i] >= ii[i+1]) { 1055 *missing = PETSC_TRUE; 1056 if (d) *d = i; 1057 ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr); 1058 break; 1059 } 1060 } 1061 } 1062 PetscFunctionReturn(0); 1063 } 1064 1065 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1066 { 1067 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1068 PetscErrorCode ierr; 1069 PetscInt i,j,m = a->mbs; 1070 1071 PetscFunctionBegin; 1072 if (!a->diag) { 1073 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1074 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1075 a->free_diag = PETSC_TRUE; 1076 } 1077 for (i=0; i<m; i++) { 1078 a->diag[i] = a->i[i+1]; 1079 for (j=a->i[i]; j<a->i[i+1]; j++) { 1080 if (a->j[j] == i) { 1081 a->diag[i] = j; 1082 break; 1083 } 1084 } 1085 } 1086 PetscFunctionReturn(0); 1087 } 1088 1089 1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1091 { 1092 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1093 PetscErrorCode ierr; 1094 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1095 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1096 1097 PetscFunctionBegin; 1098 *nn = n; 1099 if (!ia) PetscFunctionReturn(0); 1100 if (symmetric) { 1101 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1102 nz = tia[n]; 1103 } else { 1104 tia = a->i; tja = a->j; 1105 } 1106 1107 if (!blockcompressed && bs > 1) { 1108 (*nn) *= bs; 1109 /* malloc & create the natural set of indices */ 1110 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1111 if (n) { 1112 (*ia)[0] = oshift; 1113 for (j=1; j<bs; j++) { 1114 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1115 } 1116 } 1117 1118 for (i=1; i<n; i++) { 1119 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1120 for (j=1; j<bs; j++) { 1121 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1122 } 1123 } 1124 if (n) { 1125 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1126 } 1127 1128 if (inja) { 1129 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1130 cnt = 0; 1131 for (i=0; i<n; i++) { 1132 for (j=0; j<bs; j++) { 1133 for (k=tia[i]; k<tia[i+1]; k++) { 1134 for (l=0; l<bs; l++) { 1135 (*ja)[cnt++] = bs*tja[k] + l; 1136 } 1137 } 1138 } 1139 } 1140 } 1141 1142 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1143 ierr = PetscFree(tia);CHKERRQ(ierr); 1144 ierr = PetscFree(tja);CHKERRQ(ierr); 1145 } 1146 } else if (oshift == 1) { 1147 if (symmetric) { 1148 nz = tia[A->rmap->n/bs]; 1149 /* add 1 to i and j indices */ 1150 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1151 *ia = tia; 1152 if (ja) { 1153 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1154 *ja = tja; 1155 } 1156 } else { 1157 nz = a->i[A->rmap->n/bs]; 1158 /* malloc space and add 1 to i and j indices */ 1159 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1160 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1161 if (ja) { 1162 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1163 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1164 } 1165 } 1166 } else { 1167 *ia = tia; 1168 if (ja) *ja = tja; 1169 } 1170 PetscFunctionReturn(0); 1171 } 1172 1173 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1174 { 1175 PetscErrorCode ierr; 1176 1177 PetscFunctionBegin; 1178 if (!ia) PetscFunctionReturn(0); 1179 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1180 ierr = PetscFree(*ia);CHKERRQ(ierr); 1181 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1182 } 1183 PetscFunctionReturn(0); 1184 } 1185 1186 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1187 { 1188 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1189 PetscErrorCode ierr; 1190 1191 PetscFunctionBegin; 1192 #if defined(PETSC_USE_LOG) 1193 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1194 #endif 1195 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1196 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1197 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1198 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1199 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1200 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1201 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1202 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1203 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1204 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1205 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1206 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1207 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1208 1209 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1210 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1211 ierr = PetscFree(A->data);CHKERRQ(ierr); 1212 1213 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1214 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1215 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1224 #if defined(PETSC_HAVE_HYPRE) 1225 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr); 1226 #endif 1227 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1233 { 1234 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1235 PetscErrorCode ierr; 1236 1237 PetscFunctionBegin; 1238 switch (op) { 1239 case MAT_ROW_ORIENTED: 1240 a->roworiented = flg; 1241 break; 1242 case MAT_KEEP_NONZERO_PATTERN: 1243 a->keepnonzeropattern = flg; 1244 break; 1245 case MAT_NEW_NONZERO_LOCATIONS: 1246 a->nonew = (flg ? 0 : 1); 1247 break; 1248 case MAT_NEW_NONZERO_LOCATION_ERR: 1249 a->nonew = (flg ? -1 : 0); 1250 break; 1251 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1252 a->nonew = (flg ? -2 : 0); 1253 break; 1254 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1255 a->nounused = (flg ? -1 : 0); 1256 break; 1257 case MAT_NEW_DIAGONALS: 1258 case MAT_IGNORE_OFF_PROC_ENTRIES: 1259 case MAT_USE_HASH_TABLE: 1260 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1261 break; 1262 case MAT_SPD: 1263 case MAT_SYMMETRIC: 1264 case MAT_STRUCTURALLY_SYMMETRIC: 1265 case MAT_HERMITIAN: 1266 case MAT_SYMMETRY_ETERNAL: 1267 case MAT_SUBMAT_SINGLEIS: 1268 case MAT_STRUCTURE_ONLY: 1269 /* These options are handled directly by MatSetOption() */ 1270 break; 1271 default: 1272 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1273 } 1274 PetscFunctionReturn(0); 1275 } 1276 1277 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1278 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1279 { 1280 PetscErrorCode ierr; 1281 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1282 MatScalar *aa_i; 1283 PetscScalar *v_i; 1284 1285 PetscFunctionBegin; 1286 bs = A->rmap->bs; 1287 bs2 = bs*bs; 1288 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1289 1290 bn = row/bs; /* Block number */ 1291 bp = row % bs; /* Block Position */ 1292 M = ai[bn+1] - ai[bn]; 1293 *nz = bs*M; 1294 1295 if (v) { 1296 *v = 0; 1297 if (*nz) { 1298 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1299 for (i=0; i<M; i++) { /* for each block in the block row */ 1300 v_i = *v + i*bs; 1301 aa_i = aa + bs2*(ai[bn] + i); 1302 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1303 } 1304 } 1305 } 1306 1307 if (idx) { 1308 *idx = 0; 1309 if (*nz) { 1310 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1311 for (i=0; i<M; i++) { /* for each block in the block row */ 1312 idx_i = *idx + i*bs; 1313 itmp = bs*aj[ai[bn] + i]; 1314 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1315 } 1316 } 1317 } 1318 PetscFunctionReturn(0); 1319 } 1320 1321 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1322 { 1323 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1324 PetscErrorCode ierr; 1325 1326 PetscFunctionBegin; 1327 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1328 PetscFunctionReturn(0); 1329 } 1330 1331 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1332 { 1333 PetscErrorCode ierr; 1334 1335 PetscFunctionBegin; 1336 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1337 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1338 PetscFunctionReturn(0); 1339 } 1340 1341 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1342 { 1343 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at; 1344 Mat C; 1345 PetscErrorCode ierr; 1346 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill; 1347 PetscInt bs2=a->bs2,*ati,*atj,anzj,kr; 1348 MatScalar *ata,*aa=a->a; 1349 1350 PetscFunctionBegin; 1351 ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr); 1352 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1353 for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1354 1355 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1356 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1357 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1358 ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr); 1359 1360 at = (Mat_SeqBAIJ*)C->data; 1361 ati = at->i; 1362 for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i]; 1363 } else { 1364 C = *B; 1365 at = (Mat_SeqBAIJ*)C->data; 1366 ati = at->i; 1367 } 1368 1369 atj = at->j; 1370 ata = at->a; 1371 1372 /* Copy ati into atfill so we have locations of the next free space in atj */ 1373 ierr = PetscMemcpy(atfill,ati,nbs*sizeof(PetscInt));CHKERRQ(ierr); 1374 1375 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1376 for (i=0; i<mbs; i++) { 1377 anzj = ai[i+1] - ai[i]; 1378 for (j=0; j<anzj; j++) { 1379 atj[atfill[*aj]] = i; 1380 for (kr=0; kr<bs; kr++) { 1381 for (k=0; k<bs; k++) { 1382 ata[bs2*atfill[*aj]+k*bs+kr] = *aa++; 1383 } 1384 } 1385 atfill[*aj++] += 1; 1386 } 1387 } 1388 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1389 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1390 1391 /* Clean up temporary space and complete requests. */ 1392 ierr = PetscFree(atfill);CHKERRQ(ierr); 1393 1394 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1395 ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1396 *B = C; 1397 } else { 1398 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1399 } 1400 PetscFunctionReturn(0); 1401 } 1402 1403 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1404 { 1405 PetscErrorCode ierr; 1406 Mat Btrans; 1407 1408 PetscFunctionBegin; 1409 *f = PETSC_FALSE; 1410 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1411 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1412 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1417 { 1418 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1419 PetscErrorCode ierr; 1420 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1421 int fd; 1422 PetscScalar *aa; 1423 FILE *file; 1424 1425 PetscFunctionBegin; 1426 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1427 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1428 col_lens[0] = MAT_FILE_CLASSID; 1429 1430 col_lens[1] = A->rmap->N; 1431 col_lens[2] = A->cmap->n; 1432 col_lens[3] = a->nz*bs2; 1433 1434 /* store lengths of each row and write (including header) to file */ 1435 count = 0; 1436 for (i=0; i<a->mbs; i++) { 1437 for (j=0; j<bs; j++) { 1438 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1439 } 1440 } 1441 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1442 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1443 1444 /* store column indices (zero start index) */ 1445 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1446 count = 0; 1447 for (i=0; i<a->mbs; i++) { 1448 for (j=0; j<bs; j++) { 1449 for (k=a->i[i]; k<a->i[i+1]; k++) { 1450 for (l=0; l<bs; l++) { 1451 jj[count++] = bs*a->j[k] + l; 1452 } 1453 } 1454 } 1455 } 1456 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1457 ierr = PetscFree(jj);CHKERRQ(ierr); 1458 1459 /* store nonzero values */ 1460 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1461 count = 0; 1462 for (i=0; i<a->mbs; i++) { 1463 for (j=0; j<bs; j++) { 1464 for (k=a->i[i]; k<a->i[i+1]; k++) { 1465 for (l=0; l<bs; l++) { 1466 aa[count++] = a->a[bs2*k + l*bs + j]; 1467 } 1468 } 1469 } 1470 } 1471 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1472 ierr = PetscFree(aa);CHKERRQ(ierr); 1473 1474 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1475 if (file) { 1476 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1477 } 1478 PetscFunctionReturn(0); 1479 } 1480 1481 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1482 { 1483 PetscErrorCode ierr; 1484 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1485 PetscInt i,bs = A->rmap->bs,k; 1486 1487 PetscFunctionBegin; 1488 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1489 for (i=0; i<a->mbs; i++) { 1490 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1491 for (k=a->i[i]; k<a->i[i+1]; k++) { 1492 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1493 } 1494 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1495 } 1496 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } 1499 1500 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1501 { 1502 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1503 PetscErrorCode ierr; 1504 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1505 PetscViewerFormat format; 1506 1507 PetscFunctionBegin; 1508 if (A->structure_only) { 1509 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1510 PetscFunctionReturn(0); 1511 } 1512 1513 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1514 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1515 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1516 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1517 const char *matname; 1518 Mat aij; 1519 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1520 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1521 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1522 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1523 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1524 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1525 PetscFunctionReturn(0); 1526 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1527 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1528 for (i=0; i<a->mbs; i++) { 1529 for (j=0; j<bs; j++) { 1530 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1531 for (k=a->i[i]; k<a->i[i+1]; k++) { 1532 for (l=0; l<bs; l++) { 1533 #if defined(PETSC_USE_COMPLEX) 1534 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1535 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1536 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1537 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1538 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1539 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1540 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1541 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1542 } 1543 #else 1544 if (a->a[bs2*k + l*bs + j] != 0.0) { 1545 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1546 } 1547 #endif 1548 } 1549 } 1550 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1551 } 1552 } 1553 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1554 } else { 1555 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1556 for (i=0; i<a->mbs; i++) { 1557 for (j=0; j<bs; j++) { 1558 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1559 for (k=a->i[i]; k<a->i[i+1]; k++) { 1560 for (l=0; l<bs; l++) { 1561 #if defined(PETSC_USE_COMPLEX) 1562 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1563 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1564 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1565 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1566 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1567 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1568 } else { 1569 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1570 } 1571 #else 1572 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1573 #endif 1574 } 1575 } 1576 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1577 } 1578 } 1579 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1580 } 1581 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #include <petscdraw.h> 1586 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1587 { 1588 Mat A = (Mat) Aa; 1589 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1590 PetscErrorCode ierr; 1591 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1592 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1593 MatScalar *aa; 1594 PetscViewer viewer; 1595 PetscViewerFormat format; 1596 1597 PetscFunctionBegin; 1598 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1599 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1600 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1601 1602 /* loop over matrix elements drawing boxes */ 1603 1604 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1605 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1606 /* Blue for negative, Cyan for zero and Red for positive */ 1607 color = PETSC_DRAW_BLUE; 1608 for (i=0,row=0; i<mbs; i++,row+=bs) { 1609 for (j=a->i[i]; j<a->i[i+1]; j++) { 1610 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1611 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1612 aa = a->a + j*bs2; 1613 for (k=0; k<bs; k++) { 1614 for (l=0; l<bs; l++) { 1615 if (PetscRealPart(*aa++) >= 0.) continue; 1616 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1617 } 1618 } 1619 } 1620 } 1621 color = PETSC_DRAW_CYAN; 1622 for (i=0,row=0; i<mbs; i++,row+=bs) { 1623 for (j=a->i[i]; j<a->i[i+1]; j++) { 1624 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1625 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1626 aa = a->a + j*bs2; 1627 for (k=0; k<bs; k++) { 1628 for (l=0; l<bs; l++) { 1629 if (PetscRealPart(*aa++) != 0.) continue; 1630 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1631 } 1632 } 1633 } 1634 } 1635 color = PETSC_DRAW_RED; 1636 for (i=0,row=0; i<mbs; i++,row+=bs) { 1637 for (j=a->i[i]; j<a->i[i+1]; j++) { 1638 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1639 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1640 aa = a->a + j*bs2; 1641 for (k=0; k<bs; k++) { 1642 for (l=0; l<bs; l++) { 1643 if (PetscRealPart(*aa++) <= 0.) continue; 1644 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1645 } 1646 } 1647 } 1648 } 1649 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1650 } else { 1651 /* use contour shading to indicate magnitude of values */ 1652 /* first determine max of all nonzero values */ 1653 PetscReal minv = 0.0, maxv = 0.0; 1654 PetscDraw popup; 1655 1656 for (i=0; i<a->nz*a->bs2; i++) { 1657 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1658 } 1659 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1660 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1661 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1662 1663 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1664 for (i=0,row=0; i<mbs; i++,row+=bs) { 1665 for (j=a->i[i]; j<a->i[i+1]; j++) { 1666 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1667 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1668 aa = a->a + j*bs2; 1669 for (k=0; k<bs; k++) { 1670 for (l=0; l<bs; l++) { 1671 MatScalar v = *aa++; 1672 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1673 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1674 } 1675 } 1676 } 1677 } 1678 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1679 } 1680 PetscFunctionReturn(0); 1681 } 1682 1683 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1684 { 1685 PetscErrorCode ierr; 1686 PetscReal xl,yl,xr,yr,w,h; 1687 PetscDraw draw; 1688 PetscBool isnull; 1689 1690 PetscFunctionBegin; 1691 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1692 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1693 if (isnull) PetscFunctionReturn(0); 1694 1695 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1696 xr += w; yr += h; xl = -w; yl = -h; 1697 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1698 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1699 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1700 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1701 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1702 PetscFunctionReturn(0); 1703 } 1704 1705 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1706 { 1707 PetscErrorCode ierr; 1708 PetscBool iascii,isbinary,isdraw; 1709 1710 PetscFunctionBegin; 1711 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1712 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1713 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1714 if (iascii) { 1715 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1716 } else if (isbinary) { 1717 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1718 } else if (isdraw) { 1719 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1720 } else { 1721 Mat B; 1722 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1723 ierr = MatView(B,viewer);CHKERRQ(ierr); 1724 ierr = MatDestroy(&B);CHKERRQ(ierr); 1725 } 1726 PetscFunctionReturn(0); 1727 } 1728 1729 1730 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1731 { 1732 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1733 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1734 PetscInt *ai = a->i,*ailen = a->ilen; 1735 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1736 MatScalar *ap,*aa = a->a; 1737 1738 PetscFunctionBegin; 1739 for (k=0; k<m; k++) { /* loop over rows */ 1740 row = im[k]; brow = row/bs; 1741 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1742 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1743 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1744 nrow = ailen[brow]; 1745 for (l=0; l<n; l++) { /* loop over columns */ 1746 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1747 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1748 col = in[l]; 1749 bcol = col/bs; 1750 cidx = col%bs; 1751 ridx = row%bs; 1752 high = nrow; 1753 low = 0; /* assume unsorted */ 1754 while (high-low > 5) { 1755 t = (low+high)/2; 1756 if (rp[t] > bcol) high = t; 1757 else low = t; 1758 } 1759 for (i=low; i<high; i++) { 1760 if (rp[i] > bcol) break; 1761 if (rp[i] == bcol) { 1762 *v++ = ap[bs2*i+bs*cidx+ridx]; 1763 goto finished; 1764 } 1765 } 1766 *v++ = 0.0; 1767 finished:; 1768 } 1769 } 1770 PetscFunctionReturn(0); 1771 } 1772 1773 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1774 { 1775 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1776 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1777 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1778 PetscErrorCode ierr; 1779 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1780 PetscBool roworiented=a->roworiented; 1781 const PetscScalar *value = v; 1782 MatScalar *ap=NULL,*aa = a->a,*bap; 1783 1784 PetscFunctionBegin; 1785 if (roworiented) { 1786 stepval = (n-1)*bs; 1787 } else { 1788 stepval = (m-1)*bs; 1789 } 1790 for (k=0; k<m; k++) { /* loop over added rows */ 1791 row = im[k]; 1792 if (row < 0) continue; 1793 #if defined(PETSC_USE_DEBUG) 1794 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1795 #endif 1796 rp = aj + ai[row]; 1797 if (!A->structure_only) ap = aa + bs2*ai[row]; 1798 rmax = imax[row]; 1799 nrow = ailen[row]; 1800 low = 0; 1801 high = nrow; 1802 for (l=0; l<n; l++) { /* loop over added columns */ 1803 if (in[l] < 0) continue; 1804 #if defined(PETSC_USE_DEBUG) 1805 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1806 #endif 1807 col = in[l]; 1808 if (!A->structure_only) { 1809 if (roworiented) { 1810 value = v + (k*(stepval+bs) + l)*bs; 1811 } else { 1812 value = v + (l*(stepval+bs) + k)*bs; 1813 } 1814 } 1815 if (col <= lastcol) low = 0; 1816 else high = nrow; 1817 lastcol = col; 1818 while (high-low > 7) { 1819 t = (low+high)/2; 1820 if (rp[t] > col) high = t; 1821 else low = t; 1822 } 1823 for (i=low; i<high; i++) { 1824 if (rp[i] > col) break; 1825 if (rp[i] == col) { 1826 if (A->structure_only) goto noinsert2; 1827 bap = ap + bs2*i; 1828 if (roworiented) { 1829 if (is == ADD_VALUES) { 1830 for (ii=0; ii<bs; ii++,value+=stepval) { 1831 for (jj=ii; jj<bs2; jj+=bs) { 1832 bap[jj] += *value++; 1833 } 1834 } 1835 } else { 1836 for (ii=0; ii<bs; ii++,value+=stepval) { 1837 for (jj=ii; jj<bs2; jj+=bs) { 1838 bap[jj] = *value++; 1839 } 1840 } 1841 } 1842 } else { 1843 if (is == ADD_VALUES) { 1844 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1845 for (jj=0; jj<bs; jj++) { 1846 bap[jj] += value[jj]; 1847 } 1848 bap += bs; 1849 } 1850 } else { 1851 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1852 for (jj=0; jj<bs; jj++) { 1853 bap[jj] = value[jj]; 1854 } 1855 bap += bs; 1856 } 1857 } 1858 } 1859 goto noinsert2; 1860 } 1861 } 1862 if (nonew == 1) goto noinsert2; 1863 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1864 if (A->structure_only) { 1865 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1866 } else { 1867 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1868 } 1869 N = nrow++ - 1; high++; 1870 /* shift up all the later entries in this row */ 1871 for (ii=N; ii>=i; ii--) { 1872 rp[ii+1] = rp[ii]; 1873 if (!A->structure_only) { 1874 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1875 } 1876 } 1877 if (N >= i && !A->structure_only) { 1878 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1879 } 1880 1881 rp[i] = col; 1882 if (!A->structure_only) { 1883 bap = ap + bs2*i; 1884 if (roworiented) { 1885 for (ii=0; ii<bs; ii++,value+=stepval) { 1886 for (jj=ii; jj<bs2; jj+=bs) { 1887 bap[jj] = *value++; 1888 } 1889 } 1890 } else { 1891 for (ii=0; ii<bs; ii++,value+=stepval) { 1892 for (jj=0; jj<bs; jj++) { 1893 *bap++ = *value++; 1894 } 1895 } 1896 } 1897 } 1898 noinsert2:; 1899 low = i; 1900 } 1901 ailen[row] = nrow; 1902 } 1903 PetscFunctionReturn(0); 1904 } 1905 1906 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1907 { 1908 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1909 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1910 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1911 PetscErrorCode ierr; 1912 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1913 MatScalar *aa = a->a,*ap; 1914 PetscReal ratio=0.6; 1915 1916 PetscFunctionBegin; 1917 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1918 1919 if (m) rmax = ailen[0]; 1920 for (i=1; i<mbs; i++) { 1921 /* move each row back by the amount of empty slots (fshift) before it*/ 1922 fshift += imax[i-1] - ailen[i-1]; 1923 rmax = PetscMax(rmax,ailen[i]); 1924 if (fshift) { 1925 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1926 N = ailen[i]; 1927 for (j=0; j<N; j++) { 1928 ip[j-fshift] = ip[j]; 1929 if (!A->structure_only) { 1930 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1931 } 1932 } 1933 } 1934 ai[i] = ai[i-1] + ailen[i-1]; 1935 } 1936 if (mbs) { 1937 fshift += imax[mbs-1] - ailen[mbs-1]; 1938 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1939 } 1940 1941 /* reset ilen and imax for each row */ 1942 a->nonzerorowcnt = 0; 1943 if (A->structure_only) { 1944 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1945 } else { /* !A->structure_only */ 1946 for (i=0; i<mbs; i++) { 1947 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1948 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1949 } 1950 } 1951 a->nz = ai[mbs]; 1952 1953 /* diagonals may have moved, so kill the diagonal pointers */ 1954 a->idiagvalid = PETSC_FALSE; 1955 if (fshift && a->diag) { 1956 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1957 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1958 a->diag = 0; 1959 } 1960 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1961 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1962 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1963 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1964 1965 A->info.mallocs += a->reallocs; 1966 a->reallocs = 0; 1967 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1968 a->rmax = rmax; 1969 1970 if (!A->structure_only) { 1971 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1972 } 1973 PetscFunctionReturn(0); 1974 } 1975 1976 /* 1977 This function returns an array of flags which indicate the locations of contiguous 1978 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1979 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1980 Assume: sizes should be long enough to hold all the values. 1981 */ 1982 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1983 { 1984 PetscInt i,j,k,row; 1985 PetscBool flg; 1986 1987 PetscFunctionBegin; 1988 for (i=0,j=0; i<n; j++) { 1989 row = idx[i]; 1990 if (row%bs!=0) { /* Not the begining of a block */ 1991 sizes[j] = 1; 1992 i++; 1993 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1994 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1995 i++; 1996 } else { /* Begining of the block, so check if the complete block exists */ 1997 flg = PETSC_TRUE; 1998 for (k=1; k<bs; k++) { 1999 if (row+k != idx[i+k]) { /* break in the block */ 2000 flg = PETSC_FALSE; 2001 break; 2002 } 2003 } 2004 if (flg) { /* No break in the bs */ 2005 sizes[j] = bs; 2006 i += bs; 2007 } else { 2008 sizes[j] = 1; 2009 i++; 2010 } 2011 } 2012 } 2013 *bs_max = j; 2014 PetscFunctionReturn(0); 2015 } 2016 2017 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2018 { 2019 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2020 PetscErrorCode ierr; 2021 PetscInt i,j,k,count,*rows; 2022 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2023 PetscScalar zero = 0.0; 2024 MatScalar *aa; 2025 const PetscScalar *xx; 2026 PetscScalar *bb; 2027 2028 PetscFunctionBegin; 2029 /* fix right hand side if needed */ 2030 if (x && b) { 2031 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2032 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2033 for (i=0; i<is_n; i++) { 2034 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2035 } 2036 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2037 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2038 } 2039 2040 /* Make a copy of the IS and sort it */ 2041 /* allocate memory for rows,sizes */ 2042 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2043 2044 /* copy IS values to rows, and sort them */ 2045 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2046 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2047 2048 if (baij->keepnonzeropattern) { 2049 for (i=0; i<is_n; i++) sizes[i] = 1; 2050 bs_max = is_n; 2051 } else { 2052 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2053 A->nonzerostate++; 2054 } 2055 2056 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2057 row = rows[j]; 2058 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2059 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2060 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2061 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2062 if (diag != (PetscScalar)0.0) { 2063 if (baij->ilen[row/bs] > 0) { 2064 baij->ilen[row/bs] = 1; 2065 baij->j[baij->i[row/bs]] = row/bs; 2066 2067 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2068 } 2069 /* Now insert all the diagonal values for this bs */ 2070 for (k=0; k<bs; k++) { 2071 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2072 } 2073 } else { /* (diag == 0.0) */ 2074 baij->ilen[row/bs] = 0; 2075 } /* end (diag == 0.0) */ 2076 } else { /* (sizes[i] != bs) */ 2077 #if defined(PETSC_USE_DEBUG) 2078 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2079 #endif 2080 for (k=0; k<count; k++) { 2081 aa[0] = zero; 2082 aa += bs; 2083 } 2084 if (diag != (PetscScalar)0.0) { 2085 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2086 } 2087 } 2088 } 2089 2090 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2091 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2092 PetscFunctionReturn(0); 2093 } 2094 2095 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2096 { 2097 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2098 PetscErrorCode ierr; 2099 PetscInt i,j,k,count; 2100 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2101 PetscScalar zero = 0.0; 2102 MatScalar *aa; 2103 const PetscScalar *xx; 2104 PetscScalar *bb; 2105 PetscBool *zeroed,vecs = PETSC_FALSE; 2106 2107 PetscFunctionBegin; 2108 /* fix right hand side if needed */ 2109 if (x && b) { 2110 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2111 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2112 vecs = PETSC_TRUE; 2113 } 2114 2115 /* zero the columns */ 2116 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2117 for (i=0; i<is_n; i++) { 2118 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2119 zeroed[is_idx[i]] = PETSC_TRUE; 2120 } 2121 for (i=0; i<A->rmap->N; i++) { 2122 if (!zeroed[i]) { 2123 row = i/bs; 2124 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2125 for (k=0; k<bs; k++) { 2126 col = bs*baij->j[j] + k; 2127 if (zeroed[col]) { 2128 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2129 if (vecs) bb[i] -= aa[0]*xx[col]; 2130 aa[0] = 0.0; 2131 } 2132 } 2133 } 2134 } else if (vecs) bb[i] = diag*xx[i]; 2135 } 2136 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2137 if (vecs) { 2138 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2139 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2140 } 2141 2142 /* zero the rows */ 2143 for (i=0; i<is_n; i++) { 2144 row = is_idx[i]; 2145 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2146 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2147 for (k=0; k<count; k++) { 2148 aa[0] = zero; 2149 aa += bs; 2150 } 2151 if (diag != (PetscScalar)0.0) { 2152 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2153 } 2154 } 2155 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2156 PetscFunctionReturn(0); 2157 } 2158 2159 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2160 { 2161 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2162 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2163 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2164 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2165 PetscErrorCode ierr; 2166 PetscInt ridx,cidx,bs2=a->bs2; 2167 PetscBool roworiented=a->roworiented; 2168 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2169 2170 PetscFunctionBegin; 2171 for (k=0; k<m; k++) { /* loop over added rows */ 2172 row = im[k]; 2173 brow = row/bs; 2174 if (row < 0) continue; 2175 #if defined(PETSC_USE_DEBUG) 2176 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2177 #endif 2178 rp = aj + ai[brow]; 2179 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2180 rmax = imax[brow]; 2181 nrow = ailen[brow]; 2182 low = 0; 2183 high = nrow; 2184 for (l=0; l<n; l++) { /* loop over added columns */ 2185 if (in[l] < 0) continue; 2186 #if defined(PETSC_USE_DEBUG) 2187 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2188 #endif 2189 col = in[l]; bcol = col/bs; 2190 ridx = row % bs; cidx = col % bs; 2191 if (!A->structure_only) { 2192 if (roworiented) { 2193 value = v[l + k*n]; 2194 } else { 2195 value = v[k + l*m]; 2196 } 2197 } 2198 if (col <= lastcol) low = 0; else high = nrow; 2199 lastcol = col; 2200 while (high-low > 7) { 2201 t = (low+high)/2; 2202 if (rp[t] > bcol) high = t; 2203 else low = t; 2204 } 2205 for (i=low; i<high; i++) { 2206 if (rp[i] > bcol) break; 2207 if (rp[i] == bcol) { 2208 bap = ap + bs2*i + bs*cidx + ridx; 2209 if (!A->structure_only) { 2210 if (is == ADD_VALUES) *bap += value; 2211 else *bap = value; 2212 } 2213 goto noinsert1; 2214 } 2215 } 2216 if (nonew == 1) goto noinsert1; 2217 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2218 if (A->structure_only) { 2219 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2220 } else { 2221 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2222 } 2223 N = nrow++ - 1; high++; 2224 /* shift up all the later entries in this row */ 2225 for (ii=N; ii>=i; ii--) { 2226 rp[ii+1] = rp[ii]; 2227 if (!A->structure_only) { 2228 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2229 } 2230 } 2231 if (N>=i && !A->structure_only) { 2232 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2233 } 2234 rp[i] = bcol; 2235 if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value; 2236 a->nz++; 2237 A->nonzerostate++; 2238 noinsert1:; 2239 low = i; 2240 } 2241 ailen[brow] = nrow; 2242 } 2243 PetscFunctionReturn(0); 2244 } 2245 2246 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2247 { 2248 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2249 Mat outA; 2250 PetscErrorCode ierr; 2251 PetscBool row_identity,col_identity; 2252 2253 PetscFunctionBegin; 2254 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2255 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2256 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2257 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2258 2259 outA = inA; 2260 inA->factortype = MAT_FACTOR_LU; 2261 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2262 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2263 2264 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2265 2266 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2267 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2268 a->row = row; 2269 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2270 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2271 a->col = col; 2272 2273 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2274 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2275 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2276 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2277 2278 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2279 if (!a->solve_work) { 2280 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2281 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2282 } 2283 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2284 PetscFunctionReturn(0); 2285 } 2286 2287 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2288 { 2289 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2290 PetscInt i,nz,mbs; 2291 2292 PetscFunctionBegin; 2293 nz = baij->maxnz; 2294 mbs = baij->mbs; 2295 for (i=0; i<nz; i++) { 2296 baij->j[i] = indices[i]; 2297 } 2298 baij->nz = nz; 2299 for (i=0; i<mbs; i++) { 2300 baij->ilen[i] = baij->imax[i]; 2301 } 2302 PetscFunctionReturn(0); 2303 } 2304 2305 /*@ 2306 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2307 in the matrix. 2308 2309 Input Parameters: 2310 + mat - the SeqBAIJ matrix 2311 - indices - the column indices 2312 2313 Level: advanced 2314 2315 Notes: 2316 This can be called if you have precomputed the nonzero structure of the 2317 matrix and want to provide it to the matrix object to improve the performance 2318 of the MatSetValues() operation. 2319 2320 You MUST have set the correct numbers of nonzeros per row in the call to 2321 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2322 2323 MUST be called before any calls to MatSetValues(); 2324 2325 @*/ 2326 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2327 { 2328 PetscErrorCode ierr; 2329 2330 PetscFunctionBegin; 2331 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2332 PetscValidPointer(indices,2); 2333 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2334 PetscFunctionReturn(0); 2335 } 2336 2337 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2338 { 2339 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2340 PetscErrorCode ierr; 2341 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2342 PetscReal atmp; 2343 PetscScalar *x,zero = 0.0; 2344 MatScalar *aa; 2345 PetscInt ncols,brow,krow,kcol; 2346 2347 PetscFunctionBegin; 2348 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2349 bs = A->rmap->bs; 2350 aa = a->a; 2351 ai = a->i; 2352 aj = a->j; 2353 mbs = a->mbs; 2354 2355 ierr = VecSet(v,zero);CHKERRQ(ierr); 2356 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2357 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2358 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2359 for (i=0; i<mbs; i++) { 2360 ncols = ai[1] - ai[0]; ai++; 2361 brow = bs*i; 2362 for (j=0; j<ncols; j++) { 2363 for (kcol=0; kcol<bs; kcol++) { 2364 for (krow=0; krow<bs; krow++) { 2365 atmp = PetscAbsScalar(*aa);aa++; 2366 row = brow + krow; /* row index */ 2367 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2368 } 2369 } 2370 aj++; 2371 } 2372 } 2373 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2374 PetscFunctionReturn(0); 2375 } 2376 2377 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2378 { 2379 PetscErrorCode ierr; 2380 2381 PetscFunctionBegin; 2382 /* If the two matrices have the same copy implementation, use fast copy. */ 2383 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2384 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2385 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2386 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2387 2388 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2389 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2390 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2391 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2392 } else { 2393 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2394 } 2395 PetscFunctionReturn(0); 2396 } 2397 2398 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2399 { 2400 PetscErrorCode ierr; 2401 2402 PetscFunctionBegin; 2403 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2404 PetscFunctionReturn(0); 2405 } 2406 2407 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2408 { 2409 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2410 2411 PetscFunctionBegin; 2412 *array = a->a; 2413 PetscFunctionReturn(0); 2414 } 2415 2416 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2417 { 2418 PetscFunctionBegin; 2419 PetscFunctionReturn(0); 2420 } 2421 2422 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2423 { 2424 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2425 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2426 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2427 PetscErrorCode ierr; 2428 2429 PetscFunctionBegin; 2430 /* Set the number of nonzeros in the new matrix */ 2431 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2432 PetscFunctionReturn(0); 2433 } 2434 2435 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2436 { 2437 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2438 PetscErrorCode ierr; 2439 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2440 PetscBLASInt one=1; 2441 2442 PetscFunctionBegin; 2443 if (str == SAME_NONZERO_PATTERN) { 2444 PetscScalar alpha = a; 2445 PetscBLASInt bnz; 2446 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2447 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2448 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2449 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2450 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2451 } else { 2452 Mat B; 2453 PetscInt *nnz; 2454 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2455 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2456 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2457 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2458 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2459 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2460 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2461 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2462 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2463 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2464 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2465 ierr = PetscFree(nnz);CHKERRQ(ierr); 2466 } 2467 PetscFunctionReturn(0); 2468 } 2469 2470 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2471 { 2472 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2473 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2474 MatScalar *aa = a->a; 2475 2476 PetscFunctionBegin; 2477 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2478 PetscFunctionReturn(0); 2479 } 2480 2481 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2482 { 2483 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2484 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2485 MatScalar *aa = a->a; 2486 2487 PetscFunctionBegin; 2488 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2489 PetscFunctionReturn(0); 2490 } 2491 2492 /* 2493 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2494 */ 2495 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2496 { 2497 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2498 PetscErrorCode ierr; 2499 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2500 PetscInt nz = a->i[m],row,*jj,mr,col; 2501 2502 PetscFunctionBegin; 2503 *nn = n; 2504 if (!ia) PetscFunctionReturn(0); 2505 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2506 else { 2507 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2508 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2509 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2510 jj = a->j; 2511 for (i=0; i<nz; i++) { 2512 collengths[jj[i]]++; 2513 } 2514 cia[0] = oshift; 2515 for (i=0; i<n; i++) { 2516 cia[i+1] = cia[i] + collengths[i]; 2517 } 2518 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2519 jj = a->j; 2520 for (row=0; row<m; row++) { 2521 mr = a->i[row+1] - a->i[row]; 2522 for (i=0; i<mr; i++) { 2523 col = *jj++; 2524 2525 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2526 } 2527 } 2528 ierr = PetscFree(collengths);CHKERRQ(ierr); 2529 *ia = cia; *ja = cja; 2530 } 2531 PetscFunctionReturn(0); 2532 } 2533 2534 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2535 { 2536 PetscErrorCode ierr; 2537 2538 PetscFunctionBegin; 2539 if (!ia) PetscFunctionReturn(0); 2540 ierr = PetscFree(*ia);CHKERRQ(ierr); 2541 ierr = PetscFree(*ja);CHKERRQ(ierr); 2542 PetscFunctionReturn(0); 2543 } 2544 2545 /* 2546 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2547 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2548 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2549 */ 2550 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2551 { 2552 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2553 PetscErrorCode ierr; 2554 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2555 PetscInt nz = a->i[m],row,*jj,mr,col; 2556 PetscInt *cspidx; 2557 2558 PetscFunctionBegin; 2559 *nn = n; 2560 if (!ia) PetscFunctionReturn(0); 2561 2562 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2563 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2564 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2565 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2566 jj = a->j; 2567 for (i=0; i<nz; i++) { 2568 collengths[jj[i]]++; 2569 } 2570 cia[0] = oshift; 2571 for (i=0; i<n; i++) { 2572 cia[i+1] = cia[i] + collengths[i]; 2573 } 2574 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2575 jj = a->j; 2576 for (row=0; row<m; row++) { 2577 mr = a->i[row+1] - a->i[row]; 2578 for (i=0; i<mr; i++) { 2579 col = *jj++; 2580 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2581 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2582 } 2583 } 2584 ierr = PetscFree(collengths);CHKERRQ(ierr); 2585 *ia = cia; *ja = cja; 2586 *spidx = cspidx; 2587 PetscFunctionReturn(0); 2588 } 2589 2590 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2591 { 2592 PetscErrorCode ierr; 2593 2594 PetscFunctionBegin; 2595 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2596 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2597 PetscFunctionReturn(0); 2598 } 2599 2600 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2601 { 2602 PetscErrorCode ierr; 2603 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2604 2605 PetscFunctionBegin; 2606 if (!Y->preallocated || !aij->nz) { 2607 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2608 } 2609 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2610 PetscFunctionReturn(0); 2611 } 2612 2613 /* -------------------------------------------------------------------*/ 2614 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2615 MatGetRow_SeqBAIJ, 2616 MatRestoreRow_SeqBAIJ, 2617 MatMult_SeqBAIJ_N, 2618 /* 4*/ MatMultAdd_SeqBAIJ_N, 2619 MatMultTranspose_SeqBAIJ, 2620 MatMultTransposeAdd_SeqBAIJ, 2621 0, 2622 0, 2623 0, 2624 /* 10*/ 0, 2625 MatLUFactor_SeqBAIJ, 2626 0, 2627 0, 2628 MatTranspose_SeqBAIJ, 2629 /* 15*/ MatGetInfo_SeqBAIJ, 2630 MatEqual_SeqBAIJ, 2631 MatGetDiagonal_SeqBAIJ, 2632 MatDiagonalScale_SeqBAIJ, 2633 MatNorm_SeqBAIJ, 2634 /* 20*/ 0, 2635 MatAssemblyEnd_SeqBAIJ, 2636 MatSetOption_SeqBAIJ, 2637 MatZeroEntries_SeqBAIJ, 2638 /* 24*/ MatZeroRows_SeqBAIJ, 2639 0, 2640 0, 2641 0, 2642 0, 2643 /* 29*/ MatSetUp_SeqBAIJ, 2644 0, 2645 0, 2646 0, 2647 0, 2648 /* 34*/ MatDuplicate_SeqBAIJ, 2649 0, 2650 0, 2651 MatILUFactor_SeqBAIJ, 2652 0, 2653 /* 39*/ MatAXPY_SeqBAIJ, 2654 MatCreateSubMatrices_SeqBAIJ, 2655 MatIncreaseOverlap_SeqBAIJ, 2656 MatGetValues_SeqBAIJ, 2657 MatCopy_SeqBAIJ, 2658 /* 44*/ 0, 2659 MatScale_SeqBAIJ, 2660 MatShift_SeqBAIJ, 2661 0, 2662 MatZeroRowsColumns_SeqBAIJ, 2663 /* 49*/ 0, 2664 MatGetRowIJ_SeqBAIJ, 2665 MatRestoreRowIJ_SeqBAIJ, 2666 MatGetColumnIJ_SeqBAIJ, 2667 MatRestoreColumnIJ_SeqBAIJ, 2668 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2669 0, 2670 0, 2671 0, 2672 MatSetValuesBlocked_SeqBAIJ, 2673 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2674 MatDestroy_SeqBAIJ, 2675 MatView_SeqBAIJ, 2676 0, 2677 0, 2678 /* 64*/ 0, 2679 0, 2680 0, 2681 0, 2682 0, 2683 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2684 0, 2685 MatConvert_Basic, 2686 0, 2687 0, 2688 /* 74*/ 0, 2689 MatFDColoringApply_BAIJ, 2690 0, 2691 0, 2692 0, 2693 /* 79*/ 0, 2694 0, 2695 0, 2696 0, 2697 MatLoad_SeqBAIJ, 2698 /* 84*/ 0, 2699 0, 2700 0, 2701 0, 2702 0, 2703 /* 89*/ 0, 2704 0, 2705 0, 2706 0, 2707 0, 2708 /* 94*/ 0, 2709 0, 2710 0, 2711 0, 2712 0, 2713 /* 99*/ 0, 2714 0, 2715 0, 2716 0, 2717 0, 2718 /*104*/ 0, 2719 MatRealPart_SeqBAIJ, 2720 MatImaginaryPart_SeqBAIJ, 2721 0, 2722 0, 2723 /*109*/ 0, 2724 0, 2725 0, 2726 0, 2727 MatMissingDiagonal_SeqBAIJ, 2728 /*114*/ 0, 2729 0, 2730 0, 2731 0, 2732 0, 2733 /*119*/ 0, 2734 0, 2735 MatMultHermitianTranspose_SeqBAIJ, 2736 MatMultHermitianTransposeAdd_SeqBAIJ, 2737 0, 2738 /*124*/ 0, 2739 0, 2740 MatInvertBlockDiagonal_SeqBAIJ, 2741 0, 2742 0, 2743 /*129*/ 0, 2744 0, 2745 0, 2746 0, 2747 0, 2748 /*134*/ 0, 2749 0, 2750 0, 2751 0, 2752 0, 2753 /*139*/ MatSetBlockSizes_Default, 2754 0, 2755 0, 2756 MatFDColoringSetUp_SeqXAIJ, 2757 0, 2758 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2759 MatDestroySubMatrices_SeqBAIJ 2760 }; 2761 2762 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2763 { 2764 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2765 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2766 PetscErrorCode ierr; 2767 2768 PetscFunctionBegin; 2769 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2770 2771 /* allocate space for values if not already there */ 2772 if (!aij->saved_values) { 2773 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2774 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2775 } 2776 2777 /* copy values over */ 2778 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2779 PetscFunctionReturn(0); 2780 } 2781 2782 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2783 { 2784 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2785 PetscErrorCode ierr; 2786 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2787 2788 PetscFunctionBegin; 2789 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2790 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2791 2792 /* copy values over */ 2793 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2794 PetscFunctionReturn(0); 2795 } 2796 2797 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2798 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2799 2800 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2801 { 2802 Mat_SeqBAIJ *b; 2803 PetscErrorCode ierr; 2804 PetscInt i,mbs,nbs,bs2; 2805 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2806 2807 PetscFunctionBegin; 2808 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2809 if (nz == MAT_SKIP_ALLOCATION) { 2810 skipallocation = PETSC_TRUE; 2811 nz = 0; 2812 } 2813 2814 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2815 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2816 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2817 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2818 2819 B->preallocated = PETSC_TRUE; 2820 2821 mbs = B->rmap->n/bs; 2822 nbs = B->cmap->n/bs; 2823 bs2 = bs*bs; 2824 2825 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2826 2827 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2828 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2829 if (nnz) { 2830 for (i=0; i<mbs; i++) { 2831 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2832 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2833 } 2834 } 2835 2836 b = (Mat_SeqBAIJ*)B->data; 2837 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2838 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2839 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2840 2841 if (!flg) { 2842 switch (bs) { 2843 case 1: 2844 B->ops->mult = MatMult_SeqBAIJ_1; 2845 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2846 break; 2847 case 2: 2848 B->ops->mult = MatMult_SeqBAIJ_2; 2849 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2850 break; 2851 case 3: 2852 B->ops->mult = MatMult_SeqBAIJ_3; 2853 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2854 break; 2855 case 4: 2856 B->ops->mult = MatMult_SeqBAIJ_4; 2857 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2858 break; 2859 case 5: 2860 B->ops->mult = MatMult_SeqBAIJ_5; 2861 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2862 break; 2863 case 6: 2864 B->ops->mult = MatMult_SeqBAIJ_6; 2865 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2866 break; 2867 case 7: 2868 B->ops->mult = MatMult_SeqBAIJ_7; 2869 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2870 break; 2871 case 9: 2872 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2873 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2874 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2875 #else 2876 B->ops->mult = MatMult_SeqBAIJ_N; 2877 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2878 #endif 2879 break; 2880 case 11: 2881 B->ops->mult = MatMult_SeqBAIJ_11; 2882 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2883 break; 2884 case 15: 2885 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2886 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2887 break; 2888 default: 2889 B->ops->mult = MatMult_SeqBAIJ_N; 2890 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2891 break; 2892 } 2893 } 2894 B->ops->sor = MatSOR_SeqBAIJ; 2895 b->mbs = mbs; 2896 b->nbs = nbs; 2897 if (!skipallocation) { 2898 if (!b->imax) { 2899 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2900 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2901 2902 b->free_imax_ilen = PETSC_TRUE; 2903 } 2904 /* b->ilen will count nonzeros in each block row so far. */ 2905 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2906 if (!nnz) { 2907 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2908 else if (nz < 0) nz = 1; 2909 nz = PetscMin(nz,nbs); 2910 for (i=0; i<mbs; i++) b->imax[i] = nz; 2911 nz = nz*mbs; 2912 } else { 2913 nz = 0; 2914 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2915 } 2916 2917 /* allocate the matrix space */ 2918 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2919 if (B->structure_only) { 2920 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 2921 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 2922 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 2923 } else { 2924 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2925 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2926 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2927 } 2928 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2929 2930 if (B->structure_only) { 2931 b->singlemalloc = PETSC_FALSE; 2932 b->free_a = PETSC_FALSE; 2933 } else { 2934 b->singlemalloc = PETSC_TRUE; 2935 b->free_a = PETSC_TRUE; 2936 } 2937 b->free_ij = PETSC_TRUE; 2938 2939 b->i[0] = 0; 2940 for (i=1; i<mbs+1; i++) { 2941 b->i[i] = b->i[i-1] + b->imax[i-1]; 2942 } 2943 2944 } else { 2945 b->free_a = PETSC_FALSE; 2946 b->free_ij = PETSC_FALSE; 2947 } 2948 2949 b->bs2 = bs2; 2950 b->mbs = mbs; 2951 b->nz = 0; 2952 b->maxnz = nz; 2953 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2954 B->was_assembled = PETSC_FALSE; 2955 B->assembled = PETSC_FALSE; 2956 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2957 PetscFunctionReturn(0); 2958 } 2959 2960 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2961 { 2962 PetscInt i,m,nz,nz_max=0,*nnz; 2963 PetscScalar *values=0; 2964 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2965 PetscErrorCode ierr; 2966 2967 PetscFunctionBegin; 2968 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2969 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2970 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2971 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2972 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2973 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2974 m = B->rmap->n/bs; 2975 2976 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2977 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2978 for (i=0; i<m; i++) { 2979 nz = ii[i+1]- ii[i]; 2980 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2981 nz_max = PetscMax(nz_max, nz); 2982 nnz[i] = nz; 2983 } 2984 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2985 ierr = PetscFree(nnz);CHKERRQ(ierr); 2986 2987 values = (PetscScalar*)V; 2988 if (!values) { 2989 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2990 } 2991 for (i=0; i<m; i++) { 2992 PetscInt ncols = ii[i+1] - ii[i]; 2993 const PetscInt *icols = jj + ii[i]; 2994 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2995 if (!roworiented) { 2996 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2997 } else { 2998 PetscInt j; 2999 for (j=0; j<ncols; j++) { 3000 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 3001 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 3002 } 3003 } 3004 } 3005 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3006 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3007 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3008 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3009 PetscFunctionReturn(0); 3010 } 3011 3012 /*MC 3013 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3014 block sparse compressed row format. 3015 3016 Options Database Keys: 3017 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3018 3019 Level: beginner 3020 3021 .seealso: MatCreateSeqBAIJ() 3022 M*/ 3023 3024 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3025 3026 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3027 { 3028 PetscErrorCode ierr; 3029 PetscMPIInt size; 3030 Mat_SeqBAIJ *b; 3031 3032 PetscFunctionBegin; 3033 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3034 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3035 3036 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3037 B->data = (void*)b; 3038 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3039 3040 b->row = 0; 3041 b->col = 0; 3042 b->icol = 0; 3043 b->reallocs = 0; 3044 b->saved_values = 0; 3045 3046 b->roworiented = PETSC_TRUE; 3047 b->nonew = 0; 3048 b->diag = 0; 3049 B->spptr = 0; 3050 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3051 b->keepnonzeropattern = PETSC_FALSE; 3052 3053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3054 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3056 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3057 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3058 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3059 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3060 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3061 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3062 #if defined(PETSC_HAVE_HYPRE) 3063 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3064 #endif 3065 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 3066 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 3067 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3068 PetscFunctionReturn(0); 3069 } 3070 3071 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3072 { 3073 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3074 PetscErrorCode ierr; 3075 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3076 3077 PetscFunctionBegin; 3078 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3079 3080 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3081 c->imax = a->imax; 3082 c->ilen = a->ilen; 3083 c->free_imax_ilen = PETSC_FALSE; 3084 } else { 3085 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3086 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3087 for (i=0; i<mbs; i++) { 3088 c->imax[i] = a->imax[i]; 3089 c->ilen[i] = a->ilen[i]; 3090 } 3091 c->free_imax_ilen = PETSC_TRUE; 3092 } 3093 3094 /* allocate the matrix space */ 3095 if (mallocmatspace) { 3096 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3097 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3098 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3099 3100 c->i = a->i; 3101 c->j = a->j; 3102 c->singlemalloc = PETSC_FALSE; 3103 c->free_a = PETSC_TRUE; 3104 c->free_ij = PETSC_FALSE; 3105 c->parent = A; 3106 C->preallocated = PETSC_TRUE; 3107 C->assembled = PETSC_TRUE; 3108 3109 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3110 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3111 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3112 } else { 3113 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3114 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3115 3116 c->singlemalloc = PETSC_TRUE; 3117 c->free_a = PETSC_TRUE; 3118 c->free_ij = PETSC_TRUE; 3119 3120 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3121 if (mbs > 0) { 3122 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3123 if (cpvalues == MAT_COPY_VALUES) { 3124 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3125 } else { 3126 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3127 } 3128 } 3129 C->preallocated = PETSC_TRUE; 3130 C->assembled = PETSC_TRUE; 3131 } 3132 } 3133 3134 c->roworiented = a->roworiented; 3135 c->nonew = a->nonew; 3136 3137 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3138 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3139 3140 c->bs2 = a->bs2; 3141 c->mbs = a->mbs; 3142 c->nbs = a->nbs; 3143 3144 if (a->diag) { 3145 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3146 c->diag = a->diag; 3147 c->free_diag = PETSC_FALSE; 3148 } else { 3149 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3150 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3151 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3152 c->free_diag = PETSC_TRUE; 3153 } 3154 } else c->diag = 0; 3155 3156 c->nz = a->nz; 3157 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3158 c->solve_work = NULL; 3159 c->mult_work = NULL; 3160 c->sor_workt = NULL; 3161 c->sor_work = NULL; 3162 3163 c->compressedrow.use = a->compressedrow.use; 3164 c->compressedrow.nrows = a->compressedrow.nrows; 3165 if (a->compressedrow.use) { 3166 i = a->compressedrow.nrows; 3167 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3168 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3169 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3170 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3171 } else { 3172 c->compressedrow.use = PETSC_FALSE; 3173 c->compressedrow.i = NULL; 3174 c->compressedrow.rindex = NULL; 3175 } 3176 C->nonzerostate = A->nonzerostate; 3177 3178 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3179 PetscFunctionReturn(0); 3180 } 3181 3182 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3183 { 3184 PetscErrorCode ierr; 3185 3186 PetscFunctionBegin; 3187 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3188 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3189 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3190 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3191 PetscFunctionReturn(0); 3192 } 3193 3194 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3195 { 3196 Mat_SeqBAIJ *a; 3197 PetscErrorCode ierr; 3198 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3199 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3200 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3201 PetscInt *masked,nmask,tmp,bs2,ishift; 3202 PetscMPIInt size; 3203 int fd; 3204 PetscScalar *aa; 3205 MPI_Comm comm; 3206 PetscBool isbinary; 3207 3208 PetscFunctionBegin; 3209 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3210 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name); 3211 3212 /* force binary viewer to load .info file if it has not yet done so */ 3213 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3214 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3215 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3216 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3217 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3218 if (bs < 0) bs = 1; 3219 bs2 = bs*bs; 3220 3221 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3222 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3223 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3224 ierr = PetscBinaryRead(fd,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3225 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3226 M = header[1]; N = header[2]; nz = header[3]; 3227 3228 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3229 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3230 3231 /* 3232 This code adds extra rows to make sure the number of rows is 3233 divisible by the blocksize 3234 */ 3235 mbs = M/bs; 3236 extra_rows = bs - M + bs*(mbs); 3237 if (extra_rows == bs) extra_rows = 0; 3238 else mbs++; 3239 if (extra_rows) { 3240 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3241 } 3242 3243 /* Set global sizes if not already set */ 3244 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3245 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3246 } else { /* Check if the matrix global sizes are correct */ 3247 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3248 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3249 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3250 } 3251 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3252 } 3253 3254 /* read in row lengths */ 3255 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3256 ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr); 3257 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3258 3259 /* read in column indices */ 3260 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3261 ierr = PetscBinaryRead(fd,jj,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3262 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3263 3264 /* loop over row lengths determining block row lengths */ 3265 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3266 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3267 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3268 rowcount = 0; 3269 nzcount = 0; 3270 for (i=0; i<mbs; i++) { 3271 nmask = 0; 3272 for (j=0; j<bs; j++) { 3273 kmax = rowlengths[rowcount]; 3274 for (k=0; k<kmax; k++) { 3275 tmp = jj[nzcount++]/bs; 3276 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3277 } 3278 rowcount++; 3279 } 3280 browlengths[i] += nmask; 3281 /* zero out the mask elements we set */ 3282 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3283 } 3284 3285 /* Do preallocation */ 3286 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3287 a = (Mat_SeqBAIJ*)newmat->data; 3288 3289 /* set matrix "i" values */ 3290 a->i[0] = 0; 3291 for (i=1; i<= mbs; i++) { 3292 a->i[i] = a->i[i-1] + browlengths[i-1]; 3293 a->ilen[i-1] = browlengths[i-1]; 3294 } 3295 a->nz = 0; 3296 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3297 3298 /* read in nonzero values */ 3299 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3300 ierr = PetscBinaryRead(fd,aa,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3301 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3302 3303 /* set "a" and "j" values into matrix */ 3304 nzcount = 0; jcount = 0; 3305 for (i=0; i<mbs; i++) { 3306 nzcountb = nzcount; 3307 nmask = 0; 3308 for (j=0; j<bs; j++) { 3309 kmax = rowlengths[i*bs+j]; 3310 for (k=0; k<kmax; k++) { 3311 tmp = jj[nzcount++]/bs; 3312 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3313 } 3314 } 3315 /* sort the masked values */ 3316 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3317 3318 /* set "j" values into matrix */ 3319 maskcount = 1; 3320 for (j=0; j<nmask; j++) { 3321 a->j[jcount++] = masked[j]; 3322 mask[masked[j]] = maskcount++; 3323 } 3324 /* set "a" values into matrix */ 3325 ishift = bs2*a->i[i]; 3326 for (j=0; j<bs; j++) { 3327 kmax = rowlengths[i*bs+j]; 3328 for (k=0; k<kmax; k++) { 3329 tmp = jj[nzcountb]/bs; 3330 block = mask[tmp] - 1; 3331 point = jj[nzcountb] - bs*tmp; 3332 idx = ishift + bs2*block + j + bs*point; 3333 a->a[idx] = (MatScalar)aa[nzcountb++]; 3334 } 3335 } 3336 /* zero out the mask elements we set */ 3337 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3338 } 3339 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3340 3341 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3342 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3343 ierr = PetscFree(aa);CHKERRQ(ierr); 3344 ierr = PetscFree(jj);CHKERRQ(ierr); 3345 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3346 3347 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3348 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3349 PetscFunctionReturn(0); 3350 } 3351 3352 /*@C 3353 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3354 compressed row) format. For good matrix assembly performance the 3355 user should preallocate the matrix storage by setting the parameter nz 3356 (or the array nnz). By setting these parameters accurately, performance 3357 during matrix assembly can be increased by more than a factor of 50. 3358 3359 Collective on MPI_Comm 3360 3361 Input Parameters: 3362 + comm - MPI communicator, set to PETSC_COMM_SELF 3363 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3364 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3365 . m - number of rows 3366 . n - number of columns 3367 . nz - number of nonzero blocks per block row (same for all rows) 3368 - nnz - array containing the number of nonzero blocks in the various block rows 3369 (possibly different for each block row) or NULL 3370 3371 Output Parameter: 3372 . A - the matrix 3373 3374 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3375 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3376 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3377 3378 Options Database Keys: 3379 . -mat_no_unroll - uses code that does not unroll the loops in the 3380 block calculations (much slower) 3381 . -mat_block_size - size of the blocks to use 3382 3383 Level: intermediate 3384 3385 Notes: 3386 The number of rows and columns must be divisible by blocksize. 3387 3388 If the nnz parameter is given then the nz parameter is ignored 3389 3390 A nonzero block is any block that as 1 or more nonzeros in it 3391 3392 The block AIJ format is fully compatible with standard Fortran 77 3393 storage. That is, the stored row and column indices can begin at 3394 either one (as in Fortran) or zero. See the users' manual for details. 3395 3396 Specify the preallocated storage with either nz or nnz (not both). 3397 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3398 allocation. See Users-Manual: ch_mat for details. 3399 matrices. 3400 3401 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3402 @*/ 3403 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3404 { 3405 PetscErrorCode ierr; 3406 3407 PetscFunctionBegin; 3408 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3409 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3410 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3411 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3412 PetscFunctionReturn(0); 3413 } 3414 3415 /*@C 3416 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3417 per row in the matrix. For good matrix assembly performance the 3418 user should preallocate the matrix storage by setting the parameter nz 3419 (or the array nnz). By setting these parameters accurately, performance 3420 during matrix assembly can be increased by more than a factor of 50. 3421 3422 Collective on MPI_Comm 3423 3424 Input Parameters: 3425 + B - the matrix 3426 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3427 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3428 . nz - number of block nonzeros per block row (same for all rows) 3429 - nnz - array containing the number of block nonzeros in the various block rows 3430 (possibly different for each block row) or NULL 3431 3432 Options Database Keys: 3433 . -mat_no_unroll - uses code that does not unroll the loops in the 3434 block calculations (much slower) 3435 . -mat_block_size - size of the blocks to use 3436 3437 Level: intermediate 3438 3439 Notes: 3440 If the nnz parameter is given then the nz parameter is ignored 3441 3442 You can call MatGetInfo() to get information on how effective the preallocation was; 3443 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3444 You can also run with the option -info and look for messages with the string 3445 malloc in them to see if additional memory allocation was needed. 3446 3447 The block AIJ format is fully compatible with standard Fortran 77 3448 storage. That is, the stored row and column indices can begin at 3449 either one (as in Fortran) or zero. See the users' manual for details. 3450 3451 Specify the preallocated storage with either nz or nnz (not both). 3452 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3453 allocation. See Users-Manual: ch_mat for details. 3454 3455 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3456 @*/ 3457 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3458 { 3459 PetscErrorCode ierr; 3460 3461 PetscFunctionBegin; 3462 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3463 PetscValidType(B,1); 3464 PetscValidLogicalCollectiveInt(B,bs,2); 3465 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3466 PetscFunctionReturn(0); 3467 } 3468 3469 /*@C 3470 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3471 (the default sequential PETSc format). 3472 3473 Collective on MPI_Comm 3474 3475 Input Parameters: 3476 + B - the matrix 3477 . i - the indices into j for the start of each local row (starts with zero) 3478 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3479 - v - optional values in the matrix 3480 3481 Level: developer 3482 3483 Notes: 3484 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3485 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3486 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3487 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3488 block column and the second index is over columns within a block. 3489 3490 .keywords: matrix, aij, compressed row, sparse 3491 3492 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3493 @*/ 3494 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3495 { 3496 PetscErrorCode ierr; 3497 3498 PetscFunctionBegin; 3499 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3500 PetscValidType(B,1); 3501 PetscValidLogicalCollectiveInt(B,bs,2); 3502 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3503 PetscFunctionReturn(0); 3504 } 3505 3506 3507 /*@ 3508 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3509 3510 Collective on MPI_Comm 3511 3512 Input Parameters: 3513 + comm - must be an MPI communicator of size 1 3514 . bs - size of block 3515 . m - number of rows 3516 . n - number of columns 3517 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3518 . j - column indices 3519 - a - matrix values 3520 3521 Output Parameter: 3522 . mat - the matrix 3523 3524 Level: advanced 3525 3526 Notes: 3527 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3528 once the matrix is destroyed 3529 3530 You cannot set new nonzero locations into this matrix, that will generate an error. 3531 3532 The i and j indices are 0 based 3533 3534 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3535 3536 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3537 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3538 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3539 with column-major ordering within blocks. 3540 3541 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3542 3543 @*/ 3544 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3545 { 3546 PetscErrorCode ierr; 3547 PetscInt ii; 3548 Mat_SeqBAIJ *baij; 3549 3550 PetscFunctionBegin; 3551 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3552 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3553 3554 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3555 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3556 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3557 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3558 baij = (Mat_SeqBAIJ*)(*mat)->data; 3559 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3560 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3561 3562 baij->i = i; 3563 baij->j = j; 3564 baij->a = a; 3565 3566 baij->singlemalloc = PETSC_FALSE; 3567 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3568 baij->free_a = PETSC_FALSE; 3569 baij->free_ij = PETSC_FALSE; 3570 3571 for (ii=0; ii<m; ii++) { 3572 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3573 #if defined(PETSC_USE_DEBUG) 3574 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3575 #endif 3576 } 3577 #if defined(PETSC_USE_DEBUG) 3578 for (ii=0; ii<baij->i[m]; ii++) { 3579 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3580 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3581 } 3582 #endif 3583 3584 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3585 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3586 PetscFunctionReturn(0); 3587 } 3588 3589 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3590 { 3591 PetscErrorCode ierr; 3592 PetscMPIInt size; 3593 3594 PetscFunctionBegin; 3595 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3596 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3597 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3598 } else { 3599 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3600 } 3601 PetscFunctionReturn(0); 3602 } 3603