1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17 #endif 18 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 19 20 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 21 { 22 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 23 PetscErrorCode ierr; 24 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 25 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 26 PetscReal shift = 0.0; 27 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 28 29 PetscFunctionBegin; 30 allowzeropivot = PetscNot(A->erroriffailure); 31 32 if (a->idiagvalid) { 33 if (values) *values = a->idiag; 34 PetscFunctionReturn(0); 35 } 36 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 37 diag_offset = a->diag; 38 if (!a->idiag) { 39 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 40 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 41 } 42 diag = a->idiag; 43 if (values) *values = a->idiag; 44 /* factor and invert each block */ 45 switch (bs) { 46 case 1: 47 for (i=0; i<mbs; i++) { 48 odiag = v + 1*diag_offset[i]; 49 diag[0] = odiag[0]; 50 51 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 52 if (allowzeropivot) { 53 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 54 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 55 A->factorerror_zeropivot_row = i; 56 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 57 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 58 } 59 60 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 61 diag += 1; 62 } 63 break; 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 69 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 70 diag += 4; 71 } 72 break; 73 case 3: 74 for (i=0; i<mbs; i++) { 75 odiag = v + 9*diag_offset[i]; 76 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 77 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 78 diag[8] = odiag[8]; 79 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 80 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 81 diag += 9; 82 } 83 break; 84 case 4: 85 for (i=0; i<mbs; i++) { 86 odiag = v + 16*diag_offset[i]; 87 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 88 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 89 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 90 diag += 16; 91 } 92 break; 93 case 5: 94 for (i=0; i<mbs; i++) { 95 odiag = v + 25*diag_offset[i]; 96 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 97 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 98 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 99 diag += 25; 100 } 101 break; 102 case 6: 103 for (i=0; i<mbs; i++) { 104 odiag = v + 36*diag_offset[i]; 105 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 107 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 108 diag += 36; 109 } 110 break; 111 case 7: 112 for (i=0; i<mbs; i++) { 113 odiag = v + 49*diag_offset[i]; 114 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 115 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 116 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 117 diag += 49; 118 } 119 break; 120 default: 121 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 122 for (i=0; i<mbs; i++) { 123 odiag = v + bs2*diag_offset[i]; 124 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 125 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 126 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 127 diag += bs2; 128 } 129 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 130 } 131 a->idiagvalid = PETSC_TRUE; 132 PetscFunctionReturn(0); 133 } 134 135 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 136 { 137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 138 PetscScalar *x,*work,*w,*workt,*t; 139 const MatScalar *v,*aa = a->a, *idiag; 140 const PetscScalar *b,*xb; 141 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 142 PetscErrorCode ierr; 143 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 144 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 145 146 PetscFunctionBegin; 147 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 148 its = its*lits; 149 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 150 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 151 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 152 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 153 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 154 155 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 156 157 if (!m) PetscFunctionReturn(0); 158 diag = a->diag; 159 idiag = a->idiag; 160 k = PetscMax(A->rmap->n,A->cmap->n); 161 if (!a->mult_work) { 162 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 163 } 164 if (!a->sor_workt) { 165 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 166 } 167 if (!a->sor_work) { 168 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 169 } 170 work = a->mult_work; 171 t = a->sor_workt; 172 w = a->sor_work; 173 174 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 175 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 176 177 if (flag & SOR_ZERO_INITIAL_GUESS) { 178 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 179 switch (bs) { 180 case 1: 181 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 182 t[0] = b[0]; 183 i2 = 1; 184 idiag += 1; 185 for (i=1; i<m; i++) { 186 v = aa + ai[i]; 187 vi = aj + ai[i]; 188 nz = diag[i] - ai[i]; 189 s[0] = b[i2]; 190 for (j=0; j<nz; j++) { 191 xw[0] = x[vi[j]]; 192 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 193 } 194 t[i2] = s[0]; 195 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 196 x[i2] = xw[0]; 197 idiag += 1; 198 i2 += 1; 199 } 200 break; 201 case 2: 202 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 203 t[0] = b[0]; t[1] = b[1]; 204 i2 = 2; 205 idiag += 4; 206 for (i=1; i<m; i++) { 207 v = aa + 4*ai[i]; 208 vi = aj + ai[i]; 209 nz = diag[i] - ai[i]; 210 s[0] = b[i2]; s[1] = b[i2+1]; 211 for (j=0; j<nz; j++) { 212 idx = 2*vi[j]; 213 it = 4*j; 214 xw[0] = x[idx]; xw[1] = x[1+idx]; 215 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 216 } 217 t[i2] = s[0]; t[i2+1] = s[1]; 218 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 219 x[i2] = xw[0]; x[i2+1] = xw[1]; 220 idiag += 4; 221 i2 += 2; 222 } 223 break; 224 case 3: 225 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 226 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 227 i2 = 3; 228 idiag += 9; 229 for (i=1; i<m; i++) { 230 v = aa + 9*ai[i]; 231 vi = aj + ai[i]; 232 nz = diag[i] - ai[i]; 233 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 234 while (nz--) { 235 idx = 3*(*vi++); 236 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 237 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 238 v += 9; 239 } 240 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 241 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 242 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 243 idiag += 9; 244 i2 += 3; 245 } 246 break; 247 case 4: 248 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 249 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 250 i2 = 4; 251 idiag += 16; 252 for (i=1; i<m; i++) { 253 v = aa + 16*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 257 while (nz--) { 258 idx = 4*(*vi++); 259 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 260 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 261 v += 16; 262 } 263 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 264 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 265 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 266 idiag += 16; 267 i2 += 4; 268 } 269 break; 270 case 5: 271 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 272 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 273 i2 = 5; 274 idiag += 25; 275 for (i=1; i<m; i++) { 276 v = aa + 25*ai[i]; 277 vi = aj + ai[i]; 278 nz = diag[i] - ai[i]; 279 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 280 while (nz--) { 281 idx = 5*(*vi++); 282 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 283 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 284 v += 25; 285 } 286 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 287 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 288 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 289 idiag += 25; 290 i2 += 5; 291 } 292 break; 293 case 6: 294 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 295 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 296 i2 = 6; 297 idiag += 36; 298 for (i=1; i<m; i++) { 299 v = aa + 36*ai[i]; 300 vi = aj + ai[i]; 301 nz = diag[i] - ai[i]; 302 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 303 while (nz--) { 304 idx = 6*(*vi++); 305 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 306 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 307 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 308 v += 36; 309 } 310 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 311 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 312 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 313 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 314 idiag += 36; 315 i2 += 6; 316 } 317 break; 318 case 7: 319 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 320 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 321 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 322 i2 = 7; 323 idiag += 49; 324 for (i=1; i<m; i++) { 325 v = aa + 49*ai[i]; 326 vi = aj + ai[i]; 327 nz = diag[i] - ai[i]; 328 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 329 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 330 while (nz--) { 331 idx = 7*(*vi++); 332 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 333 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 334 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 335 v += 49; 336 } 337 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 338 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 339 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 340 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 341 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 342 idiag += 49; 343 i2 += 7; 344 } 345 break; 346 default: 347 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 348 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 349 i2 = bs; 350 idiag += bs2; 351 for (i=1; i<m; i++) { 352 v = aa + bs2*ai[i]; 353 vi = aj + ai[i]; 354 nz = diag[i] - ai[i]; 355 356 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 357 /* copy all rows of x that are needed into contiguous space */ 358 workt = work; 359 for (j=0; j<nz; j++) { 360 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 361 workt += bs; 362 } 363 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 364 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 365 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 366 367 idiag += bs2; 368 i2 += bs; 369 } 370 break; 371 } 372 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 373 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 374 xb = t; 375 } 376 else xb = b; 377 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 378 idiag = a->idiag+bs2*(a->mbs-1); 379 i2 = bs * (m-1); 380 switch (bs) { 381 case 1: 382 s[0] = xb[i2]; 383 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 384 x[i2] = xw[0]; 385 i2 -= 1; 386 for (i=m-2; i>=0; i--) { 387 v = aa + (diag[i]+1); 388 vi = aj + diag[i] + 1; 389 nz = ai[i+1] - diag[i] - 1; 390 s[0] = xb[i2]; 391 for (j=0; j<nz; j++) { 392 xw[0] = x[vi[j]]; 393 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 394 } 395 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 396 x[i2] = xw[0]; 397 idiag -= 1; 398 i2 -= 1; 399 } 400 break; 401 case 2: 402 s[0] = xb[i2]; s[1] = xb[i2+1]; 403 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 404 x[i2] = xw[0]; x[i2+1] = xw[1]; 405 i2 -= 2; 406 idiag -= 4; 407 for (i=m-2; i>=0; i--) { 408 v = aa + 4*(diag[i] + 1); 409 vi = aj + diag[i] + 1; 410 nz = ai[i+1] - diag[i] - 1; 411 s[0] = xb[i2]; s[1] = xb[i2+1]; 412 for (j=0; j<nz; j++) { 413 idx = 2*vi[j]; 414 it = 4*j; 415 xw[0] = x[idx]; xw[1] = x[1+idx]; 416 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 417 } 418 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 419 x[i2] = xw[0]; x[i2+1] = xw[1]; 420 idiag -= 4; 421 i2 -= 2; 422 } 423 break; 424 case 3: 425 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 426 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 427 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 428 i2 -= 3; 429 idiag -= 9; 430 for (i=m-2; i>=0; i--) { 431 v = aa + 9*(diag[i]+1); 432 vi = aj + diag[i] + 1; 433 nz = ai[i+1] - diag[i] - 1; 434 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 435 while (nz--) { 436 idx = 3*(*vi++); 437 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 438 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 439 v += 9; 440 } 441 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 442 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 443 idiag -= 9; 444 i2 -= 3; 445 } 446 break; 447 case 4: 448 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 449 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 450 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 451 i2 -= 4; 452 idiag -= 16; 453 for (i=m-2; i>=0; i--) { 454 v = aa + 16*(diag[i]+1); 455 vi = aj + diag[i] + 1; 456 nz = ai[i+1] - diag[i] - 1; 457 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 458 while (nz--) { 459 idx = 4*(*vi++); 460 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 461 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 462 v += 16; 463 } 464 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 465 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 466 idiag -= 16; 467 i2 -= 4; 468 } 469 break; 470 case 5: 471 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 472 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 473 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 474 i2 -= 5; 475 idiag -= 25; 476 for (i=m-2; i>=0; i--) { 477 v = aa + 25*(diag[i]+1); 478 vi = aj + diag[i] + 1; 479 nz = ai[i+1] - diag[i] - 1; 480 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 481 while (nz--) { 482 idx = 5*(*vi++); 483 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 484 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 485 v += 25; 486 } 487 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 488 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 489 idiag -= 25; 490 i2 -= 5; 491 } 492 break; 493 case 6: 494 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 495 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 496 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 497 i2 -= 6; 498 idiag -= 36; 499 for (i=m-2; i>=0; i--) { 500 v = aa + 36*(diag[i]+1); 501 vi = aj + diag[i] + 1; 502 nz = ai[i+1] - diag[i] - 1; 503 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 504 while (nz--) { 505 idx = 6*(*vi++); 506 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 507 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 508 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 509 v += 36; 510 } 511 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 512 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 513 idiag -= 36; 514 i2 -= 6; 515 } 516 break; 517 case 7: 518 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 519 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 520 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 521 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 522 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 523 i2 -= 7; 524 idiag -= 49; 525 for (i=m-2; i>=0; i--) { 526 v = aa + 49*(diag[i]+1); 527 vi = aj + diag[i] + 1; 528 nz = ai[i+1] - diag[i] - 1; 529 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 530 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 531 while (nz--) { 532 idx = 7*(*vi++); 533 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 534 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 535 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 536 v += 49; 537 } 538 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 539 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 540 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 541 idiag -= 49; 542 i2 -= 7; 543 } 544 break; 545 default: 546 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 547 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 548 i2 -= bs; 549 idiag -= bs2; 550 for (i=m-2; i>=0; i--) { 551 v = aa + bs2*(diag[i]+1); 552 vi = aj + diag[i] + 1; 553 nz = ai[i+1] - diag[i] - 1; 554 555 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 556 /* copy all rows of x that are needed into contiguous space */ 557 workt = work; 558 for (j=0; j<nz; j++) { 559 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 560 workt += bs; 561 } 562 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 563 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 564 565 idiag -= bs2; 566 i2 -= bs; 567 } 568 break; 569 } 570 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 571 } 572 its--; 573 } 574 while (its--) { 575 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 576 idiag = a->idiag; 577 i2 = 0; 578 switch (bs) { 579 case 1: 580 for (i=0; i<m; i++) { 581 v = aa + ai[i]; 582 vi = aj + ai[i]; 583 nz = ai[i+1] - ai[i]; 584 s[0] = b[i2]; 585 for (j=0; j<nz; j++) { 586 xw[0] = x[vi[j]]; 587 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 588 } 589 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 590 x[i2] += xw[0]; 591 idiag += 1; 592 i2 += 1; 593 } 594 break; 595 case 2: 596 for (i=0; i<m; i++) { 597 v = aa + 4*ai[i]; 598 vi = aj + ai[i]; 599 nz = ai[i+1] - ai[i]; 600 s[0] = b[i2]; s[1] = b[i2+1]; 601 for (j=0; j<nz; j++) { 602 idx = 2*vi[j]; 603 it = 4*j; 604 xw[0] = x[idx]; xw[1] = x[1+idx]; 605 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 606 } 607 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 608 x[i2] += xw[0]; x[i2+1] += xw[1]; 609 idiag += 4; 610 i2 += 2; 611 } 612 break; 613 case 3: 614 for (i=0; i<m; i++) { 615 v = aa + 9*ai[i]; 616 vi = aj + ai[i]; 617 nz = ai[i+1] - ai[i]; 618 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 619 while (nz--) { 620 idx = 3*(*vi++); 621 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 622 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 623 v += 9; 624 } 625 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 626 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 627 idiag += 9; 628 i2 += 3; 629 } 630 break; 631 case 4: 632 for (i=0; i<m; i++) { 633 v = aa + 16*ai[i]; 634 vi = aj + ai[i]; 635 nz = ai[i+1] - ai[i]; 636 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 637 while (nz--) { 638 idx = 4*(*vi++); 639 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 640 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 641 v += 16; 642 } 643 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 644 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 645 idiag += 16; 646 i2 += 4; 647 } 648 break; 649 case 5: 650 for (i=0; i<m; i++) { 651 v = aa + 25*ai[i]; 652 vi = aj + ai[i]; 653 nz = ai[i+1] - ai[i]; 654 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 655 while (nz--) { 656 idx = 5*(*vi++); 657 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 658 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 659 v += 25; 660 } 661 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 662 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 663 idiag += 25; 664 i2 += 5; 665 } 666 break; 667 case 6: 668 for (i=0; i<m; i++) { 669 v = aa + 36*ai[i]; 670 vi = aj + ai[i]; 671 nz = ai[i+1] - ai[i]; 672 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 673 while (nz--) { 674 idx = 6*(*vi++); 675 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 676 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 677 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 678 v += 36; 679 } 680 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 681 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 682 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 683 idiag += 36; 684 i2 += 6; 685 } 686 break; 687 case 7: 688 for (i=0; i<m; i++) { 689 v = aa + 49*ai[i]; 690 vi = aj + ai[i]; 691 nz = ai[i+1] - ai[i]; 692 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 693 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 694 while (nz--) { 695 idx = 7*(*vi++); 696 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 697 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 698 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 699 v += 49; 700 } 701 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 702 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 703 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 704 idiag += 49; 705 i2 += 7; 706 } 707 break; 708 default: 709 for (i=0; i<m; i++) { 710 v = aa + bs2*ai[i]; 711 vi = aj + ai[i]; 712 nz = ai[i+1] - ai[i]; 713 714 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 715 /* copy all rows of x that are needed into contiguous space */ 716 workt = work; 717 for (j=0; j<nz; j++) { 718 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 719 workt += bs; 720 } 721 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 722 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 723 724 idiag += bs2; 725 i2 += bs; 726 } 727 break; 728 } 729 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 730 } 731 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 732 idiag = a->idiag+bs2*(a->mbs-1); 733 i2 = bs * (m-1); 734 switch (bs) { 735 case 1: 736 for (i=m-1; i>=0; i--) { 737 v = aa + ai[i]; 738 vi = aj + ai[i]; 739 nz = ai[i+1] - ai[i]; 740 s[0] = b[i2]; 741 for (j=0; j<nz; j++) { 742 xw[0] = x[vi[j]]; 743 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 744 } 745 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 746 x[i2] += xw[0]; 747 idiag -= 1; 748 i2 -= 1; 749 } 750 break; 751 case 2: 752 for (i=m-1; i>=0; i--) { 753 v = aa + 4*ai[i]; 754 vi = aj + ai[i]; 755 nz = ai[i+1] - ai[i]; 756 s[0] = b[i2]; s[1] = b[i2+1]; 757 for (j=0; j<nz; j++) { 758 idx = 2*vi[j]; 759 it = 4*j; 760 xw[0] = x[idx]; xw[1] = x[1+idx]; 761 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 762 } 763 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 764 x[i2] += xw[0]; x[i2+1] += xw[1]; 765 idiag -= 4; 766 i2 -= 2; 767 } 768 break; 769 case 3: 770 for (i=m-1; i>=0; i--) { 771 v = aa + 9*ai[i]; 772 vi = aj + ai[i]; 773 nz = ai[i+1] - ai[i]; 774 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 775 while (nz--) { 776 idx = 3*(*vi++); 777 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 778 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 779 v += 9; 780 } 781 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 782 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 783 idiag -= 9; 784 i2 -= 3; 785 } 786 break; 787 case 4: 788 for (i=m-1; i>=0; i--) { 789 v = aa + 16*ai[i]; 790 vi = aj + ai[i]; 791 nz = ai[i+1] - ai[i]; 792 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 793 while (nz--) { 794 idx = 4*(*vi++); 795 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 796 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 797 v += 16; 798 } 799 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 800 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 801 idiag -= 16; 802 i2 -= 4; 803 } 804 break; 805 case 5: 806 for (i=m-1; i>=0; i--) { 807 v = aa + 25*ai[i]; 808 vi = aj + ai[i]; 809 nz = ai[i+1] - ai[i]; 810 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 811 while (nz--) { 812 idx = 5*(*vi++); 813 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 814 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 815 v += 25; 816 } 817 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 818 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 819 idiag -= 25; 820 i2 -= 5; 821 } 822 break; 823 case 6: 824 for (i=m-1; i>=0; i--) { 825 v = aa + 36*ai[i]; 826 vi = aj + ai[i]; 827 nz = ai[i+1] - ai[i]; 828 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 829 while (nz--) { 830 idx = 6*(*vi++); 831 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 832 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 833 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 834 v += 36; 835 } 836 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 837 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 838 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 839 idiag -= 36; 840 i2 -= 6; 841 } 842 break; 843 case 7: 844 for (i=m-1; i>=0; i--) { 845 v = aa + 49*ai[i]; 846 vi = aj + ai[i]; 847 nz = ai[i+1] - ai[i]; 848 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 849 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 850 while (nz--) { 851 idx = 7*(*vi++); 852 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 853 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 854 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 855 v += 49; 856 } 857 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 858 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 859 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 860 idiag -= 49; 861 i2 -= 7; 862 } 863 break; 864 default: 865 for (i=m-1; i>=0; i--) { 866 v = aa + bs2*ai[i]; 867 vi = aj + ai[i]; 868 nz = ai[i+1] - ai[i]; 869 870 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 871 /* copy all rows of x that are needed into contiguous space */ 872 workt = work; 873 for (j=0; j<nz; j++) { 874 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 875 workt += bs; 876 } 877 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 878 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 879 880 idiag -= bs2; 881 i2 -= bs; 882 } 883 break; 884 } 885 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 886 } 887 } 888 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 889 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } 892 893 894 /* 895 Special version for direct calls from Fortran (Used in PETSc-fun3d) 896 */ 897 #if defined(PETSC_HAVE_FORTRAN_CAPS) 898 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 899 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 900 #define matsetvaluesblocked4_ matsetvaluesblocked4 901 #endif 902 903 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 904 { 905 Mat A = *AA; 906 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 907 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 908 PetscInt *ai =a->i,*ailen=a->ilen; 909 PetscInt *aj =a->j,stepval,lastcol = -1; 910 const PetscScalar *value = v; 911 MatScalar *ap,*aa = a->a,*bap; 912 913 PetscFunctionBegin; 914 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 915 stepval = (n-1)*4; 916 for (k=0; k<m; k++) { /* loop over added rows */ 917 row = im[k]; 918 rp = aj + ai[row]; 919 ap = aa + 16*ai[row]; 920 nrow = ailen[row]; 921 low = 0; 922 high = nrow; 923 for (l=0; l<n; l++) { /* loop over added columns */ 924 col = in[l]; 925 if (col <= lastcol) low = 0; 926 else high = nrow; 927 lastcol = col; 928 value = v + k*(stepval+4 + l)*4; 929 while (high-low > 7) { 930 t = (low+high)/2; 931 if (rp[t] > col) high = t; 932 else low = t; 933 } 934 for (i=low; i<high; i++) { 935 if (rp[i] > col) break; 936 if (rp[i] == col) { 937 bap = ap + 16*i; 938 for (ii=0; ii<4; ii++,value+=stepval) { 939 for (jj=ii; jj<16; jj+=4) { 940 bap[jj] += *value++; 941 } 942 } 943 goto noinsert2; 944 } 945 } 946 N = nrow++ - 1; 947 high++; /* added new column index thus must search to one higher than before */ 948 /* shift up all the later entries in this row */ 949 for (ii=N; ii>=i; ii--) { 950 rp[ii+1] = rp[ii]; 951 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 952 } 953 if (N >= i) { 954 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 955 } 956 rp[i] = col; 957 bap = ap + 16*i; 958 for (ii=0; ii<4; ii++,value+=stepval) { 959 for (jj=ii; jj<16; jj+=4) { 960 bap[jj] = *value++; 961 } 962 } 963 noinsert2:; 964 low = i; 965 } 966 ailen[row] = nrow; 967 } 968 PetscFunctionReturnVoid(); 969 } 970 971 #if defined(PETSC_HAVE_FORTRAN_CAPS) 972 #define matsetvalues4_ MATSETVALUES4 973 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 974 #define matsetvalues4_ matsetvalues4 975 #endif 976 977 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 978 { 979 Mat A = *AA; 980 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 981 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 982 PetscInt *ai=a->i,*ailen=a->ilen; 983 PetscInt *aj=a->j,brow,bcol; 984 PetscInt ridx,cidx,lastcol = -1; 985 MatScalar *ap,value,*aa=a->a,*bap; 986 987 PetscFunctionBegin; 988 for (k=0; k<m; k++) { /* loop over added rows */ 989 row = im[k]; brow = row/4; 990 rp = aj + ai[brow]; 991 ap = aa + 16*ai[brow]; 992 nrow = ailen[brow]; 993 low = 0; 994 high = nrow; 995 for (l=0; l<n; l++) { /* loop over added columns */ 996 col = in[l]; bcol = col/4; 997 ridx = row % 4; cidx = col % 4; 998 value = v[l + k*n]; 999 if (col <= lastcol) low = 0; 1000 else high = nrow; 1001 lastcol = col; 1002 while (high-low > 7) { 1003 t = (low+high)/2; 1004 if (rp[t] > bcol) high = t; 1005 else low = t; 1006 } 1007 for (i=low; i<high; i++) { 1008 if (rp[i] > bcol) break; 1009 if (rp[i] == bcol) { 1010 bap = ap + 16*i + 4*cidx + ridx; 1011 *bap += value; 1012 goto noinsert1; 1013 } 1014 } 1015 N = nrow++ - 1; 1016 high++; /* added new column thus must search to one higher than before */ 1017 /* shift up all the later entries in this row */ 1018 for (ii=N; ii>=i; ii--) { 1019 rp[ii+1] = rp[ii]; 1020 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1021 } 1022 if (N>=i) { 1023 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1024 } 1025 rp[i] = bcol; 1026 ap[16*i + 4*cidx + ridx] = value; 1027 noinsert1:; 1028 low = i; 1029 } 1030 ailen[brow] = nrow; 1031 } 1032 PetscFunctionReturnVoid(); 1033 } 1034 1035 /* 1036 Checks for missing diagonals 1037 */ 1038 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1039 { 1040 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1041 PetscErrorCode ierr; 1042 PetscInt *diag,*ii = a->i,i; 1043 1044 PetscFunctionBegin; 1045 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1046 *missing = PETSC_FALSE; 1047 if (A->rmap->n > 0 && !ii) { 1048 *missing = PETSC_TRUE; 1049 if (d) *d = 0; 1050 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 1051 } else { 1052 diag = a->diag; 1053 for (i=0; i<a->mbs; i++) { 1054 if (diag[i] >= ii[i+1]) { 1055 *missing = PETSC_TRUE; 1056 if (d) *d = i; 1057 ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr); 1058 break; 1059 } 1060 } 1061 } 1062 PetscFunctionReturn(0); 1063 } 1064 1065 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1066 { 1067 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1068 PetscErrorCode ierr; 1069 PetscInt i,j,m = a->mbs; 1070 1071 PetscFunctionBegin; 1072 if (!a->diag) { 1073 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1074 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1075 a->free_diag = PETSC_TRUE; 1076 } 1077 for (i=0; i<m; i++) { 1078 a->diag[i] = a->i[i+1]; 1079 for (j=a->i[i]; j<a->i[i+1]; j++) { 1080 if (a->j[j] == i) { 1081 a->diag[i] = j; 1082 break; 1083 } 1084 } 1085 } 1086 PetscFunctionReturn(0); 1087 } 1088 1089 1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1091 { 1092 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1093 PetscErrorCode ierr; 1094 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1095 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1096 1097 PetscFunctionBegin; 1098 *nn = n; 1099 if (!ia) PetscFunctionReturn(0); 1100 if (symmetric) { 1101 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1102 nz = tia[n]; 1103 } else { 1104 tia = a->i; tja = a->j; 1105 } 1106 1107 if (!blockcompressed && bs > 1) { 1108 (*nn) *= bs; 1109 /* malloc & create the natural set of indices */ 1110 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1111 if (n) { 1112 (*ia)[0] = oshift; 1113 for (j=1; j<bs; j++) { 1114 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1115 } 1116 } 1117 1118 for (i=1; i<n; i++) { 1119 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1120 for (j=1; j<bs; j++) { 1121 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1122 } 1123 } 1124 if (n) { 1125 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1126 } 1127 1128 if (inja) { 1129 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1130 cnt = 0; 1131 for (i=0; i<n; i++) { 1132 for (j=0; j<bs; j++) { 1133 for (k=tia[i]; k<tia[i+1]; k++) { 1134 for (l=0; l<bs; l++) { 1135 (*ja)[cnt++] = bs*tja[k] + l; 1136 } 1137 } 1138 } 1139 } 1140 } 1141 1142 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1143 ierr = PetscFree(tia);CHKERRQ(ierr); 1144 ierr = PetscFree(tja);CHKERRQ(ierr); 1145 } 1146 } else if (oshift == 1) { 1147 if (symmetric) { 1148 nz = tia[A->rmap->n/bs]; 1149 /* add 1 to i and j indices */ 1150 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1151 *ia = tia; 1152 if (ja) { 1153 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1154 *ja = tja; 1155 } 1156 } else { 1157 nz = a->i[A->rmap->n/bs]; 1158 /* malloc space and add 1 to i and j indices */ 1159 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1160 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1161 if (ja) { 1162 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1163 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1164 } 1165 } 1166 } else { 1167 *ia = tia; 1168 if (ja) *ja = tja; 1169 } 1170 PetscFunctionReturn(0); 1171 } 1172 1173 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1174 { 1175 PetscErrorCode ierr; 1176 1177 PetscFunctionBegin; 1178 if (!ia) PetscFunctionReturn(0); 1179 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1180 ierr = PetscFree(*ia);CHKERRQ(ierr); 1181 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1182 } 1183 PetscFunctionReturn(0); 1184 } 1185 1186 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1187 { 1188 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1189 PetscErrorCode ierr; 1190 1191 PetscFunctionBegin; 1192 #if defined(PETSC_USE_LOG) 1193 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1194 #endif 1195 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1196 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1197 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1198 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1199 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1200 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1201 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1202 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1203 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1204 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1205 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1206 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1207 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1208 1209 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1210 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1211 ierr = PetscFree(A->data);CHKERRQ(ierr); 1212 1213 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1214 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1215 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1224 #if defined(PETSC_HAVE_HYPRE) 1225 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1226 #endif 1227 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1232 { 1233 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1234 PetscErrorCode ierr; 1235 1236 PetscFunctionBegin; 1237 switch (op) { 1238 case MAT_ROW_ORIENTED: 1239 a->roworiented = flg; 1240 break; 1241 case MAT_KEEP_NONZERO_PATTERN: 1242 a->keepnonzeropattern = flg; 1243 break; 1244 case MAT_NEW_NONZERO_LOCATIONS: 1245 a->nonew = (flg ? 0 : 1); 1246 break; 1247 case MAT_NEW_NONZERO_LOCATION_ERR: 1248 a->nonew = (flg ? -1 : 0); 1249 break; 1250 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1251 a->nonew = (flg ? -2 : 0); 1252 break; 1253 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1254 a->nounused = (flg ? -1 : 0); 1255 break; 1256 case MAT_NEW_DIAGONALS: 1257 case MAT_IGNORE_OFF_PROC_ENTRIES: 1258 case MAT_USE_HASH_TABLE: 1259 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1260 break; 1261 case MAT_SPD: 1262 case MAT_SYMMETRIC: 1263 case MAT_STRUCTURALLY_SYMMETRIC: 1264 case MAT_HERMITIAN: 1265 case MAT_SYMMETRY_ETERNAL: 1266 case MAT_SUBMAT_SINGLEIS: 1267 case MAT_STRUCTURE_ONLY: 1268 /* These options are handled directly by MatSetOption() */ 1269 break; 1270 default: 1271 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1272 } 1273 PetscFunctionReturn(0); 1274 } 1275 1276 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1277 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1278 { 1279 PetscErrorCode ierr; 1280 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1281 MatScalar *aa_i; 1282 PetscScalar *v_i; 1283 1284 PetscFunctionBegin; 1285 bs = A->rmap->bs; 1286 bs2 = bs*bs; 1287 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1288 1289 bn = row/bs; /* Block number */ 1290 bp = row % bs; /* Block Position */ 1291 M = ai[bn+1] - ai[bn]; 1292 *nz = bs*M; 1293 1294 if (v) { 1295 *v = 0; 1296 if (*nz) { 1297 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1298 for (i=0; i<M; i++) { /* for each block in the block row */ 1299 v_i = *v + i*bs; 1300 aa_i = aa + bs2*(ai[bn] + i); 1301 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1302 } 1303 } 1304 } 1305 1306 if (idx) { 1307 *idx = 0; 1308 if (*nz) { 1309 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1310 for (i=0; i<M; i++) { /* for each block in the block row */ 1311 idx_i = *idx + i*bs; 1312 itmp = bs*aj[ai[bn] + i]; 1313 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1314 } 1315 } 1316 } 1317 PetscFunctionReturn(0); 1318 } 1319 1320 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1321 { 1322 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1323 PetscErrorCode ierr; 1324 1325 PetscFunctionBegin; 1326 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1327 PetscFunctionReturn(0); 1328 } 1329 1330 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1331 { 1332 PetscErrorCode ierr; 1333 1334 PetscFunctionBegin; 1335 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1336 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1337 PetscFunctionReturn(0); 1338 } 1339 1340 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1341 { 1342 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1343 Mat C; 1344 PetscErrorCode ierr; 1345 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1346 PetscInt *rows,*cols,bs2=a->bs2; 1347 MatScalar *array; 1348 1349 PetscFunctionBegin; 1350 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1351 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1352 1353 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1354 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1355 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1356 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1357 ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr); 1358 ierr = PetscFree(col);CHKERRQ(ierr); 1359 } else { 1360 C = *B; 1361 } 1362 1363 array = a->a; 1364 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1365 for (i=0; i<mbs; i++) { 1366 cols[0] = i*bs; 1367 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1368 len = ai[i+1] - ai[i]; 1369 for (j=0; j<len; j++) { 1370 rows[0] = (*aj++)*bs; 1371 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1372 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1373 array += bs2; 1374 } 1375 } 1376 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1377 1378 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1379 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1380 1381 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1382 *B = C; 1383 } else { 1384 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1385 } 1386 PetscFunctionReturn(0); 1387 } 1388 1389 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1390 { 1391 PetscErrorCode ierr; 1392 Mat Btrans; 1393 1394 PetscFunctionBegin; 1395 *f = PETSC_FALSE; 1396 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1397 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1398 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1399 PetscFunctionReturn(0); 1400 } 1401 1402 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1403 { 1404 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1405 PetscErrorCode ierr; 1406 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1407 int fd; 1408 PetscScalar *aa; 1409 FILE *file; 1410 1411 PetscFunctionBegin; 1412 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1413 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1414 col_lens[0] = MAT_FILE_CLASSID; 1415 1416 col_lens[1] = A->rmap->N; 1417 col_lens[2] = A->cmap->n; 1418 col_lens[3] = a->nz*bs2; 1419 1420 /* store lengths of each row and write (including header) to file */ 1421 count = 0; 1422 for (i=0; i<a->mbs; i++) { 1423 for (j=0; j<bs; j++) { 1424 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1425 } 1426 } 1427 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1428 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1429 1430 /* store column indices (zero start index) */ 1431 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1432 count = 0; 1433 for (i=0; i<a->mbs; i++) { 1434 for (j=0; j<bs; j++) { 1435 for (k=a->i[i]; k<a->i[i+1]; k++) { 1436 for (l=0; l<bs; l++) { 1437 jj[count++] = bs*a->j[k] + l; 1438 } 1439 } 1440 } 1441 } 1442 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1443 ierr = PetscFree(jj);CHKERRQ(ierr); 1444 1445 /* store nonzero values */ 1446 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1447 count = 0; 1448 for (i=0; i<a->mbs; i++) { 1449 for (j=0; j<bs; j++) { 1450 for (k=a->i[i]; k<a->i[i+1]; k++) { 1451 for (l=0; l<bs; l++) { 1452 aa[count++] = a->a[bs2*k + l*bs + j]; 1453 } 1454 } 1455 } 1456 } 1457 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1458 ierr = PetscFree(aa);CHKERRQ(ierr); 1459 1460 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1461 if (file) { 1462 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1463 } 1464 PetscFunctionReturn(0); 1465 } 1466 1467 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1468 { 1469 PetscErrorCode ierr; 1470 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1471 PetscInt i,bs = A->rmap->bs,k; 1472 1473 PetscFunctionBegin; 1474 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1475 for (i=0; i<a->mbs; i++) { 1476 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1477 for (k=a->i[i]; k<a->i[i+1]; k++) { 1478 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1479 } 1480 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1481 } 1482 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1483 PetscFunctionReturn(0); 1484 } 1485 1486 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1487 { 1488 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1489 PetscErrorCode ierr; 1490 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1491 PetscViewerFormat format; 1492 1493 PetscFunctionBegin; 1494 if (A->structure_only) { 1495 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1496 PetscFunctionReturn(0); 1497 } 1498 1499 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1500 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1501 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1502 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1503 const char *matname; 1504 Mat aij; 1505 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1506 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1507 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1508 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1509 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1510 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1511 PetscFunctionReturn(0); 1512 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1513 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1514 for (i=0; i<a->mbs; i++) { 1515 for (j=0; j<bs; j++) { 1516 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1517 for (k=a->i[i]; k<a->i[i+1]; k++) { 1518 for (l=0; l<bs; l++) { 1519 #if defined(PETSC_USE_COMPLEX) 1520 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1521 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1522 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1523 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1524 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1525 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1526 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1527 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1528 } 1529 #else 1530 if (a->a[bs2*k + l*bs + j] != 0.0) { 1531 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1532 } 1533 #endif 1534 } 1535 } 1536 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1537 } 1538 } 1539 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1540 } else { 1541 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1542 for (i=0; i<a->mbs; i++) { 1543 for (j=0; j<bs; j++) { 1544 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1545 for (k=a->i[i]; k<a->i[i+1]; k++) { 1546 for (l=0; l<bs; l++) { 1547 #if defined(PETSC_USE_COMPLEX) 1548 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1549 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1550 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1551 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1552 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1553 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1554 } else { 1555 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1556 } 1557 #else 1558 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1559 #endif 1560 } 1561 } 1562 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1563 } 1564 } 1565 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1566 } 1567 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #include <petscdraw.h> 1572 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1573 { 1574 Mat A = (Mat) Aa; 1575 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1576 PetscErrorCode ierr; 1577 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1578 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1579 MatScalar *aa; 1580 PetscViewer viewer; 1581 PetscViewerFormat format; 1582 1583 PetscFunctionBegin; 1584 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1585 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1586 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1587 1588 /* loop over matrix elements drawing boxes */ 1589 1590 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1591 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1592 /* Blue for negative, Cyan for zero and Red for positive */ 1593 color = PETSC_DRAW_BLUE; 1594 for (i=0,row=0; i<mbs; i++,row+=bs) { 1595 for (j=a->i[i]; j<a->i[i+1]; j++) { 1596 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1597 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1598 aa = a->a + j*bs2; 1599 for (k=0; k<bs; k++) { 1600 for (l=0; l<bs; l++) { 1601 if (PetscRealPart(*aa++) >= 0.) continue; 1602 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1603 } 1604 } 1605 } 1606 } 1607 color = PETSC_DRAW_CYAN; 1608 for (i=0,row=0; i<mbs; i++,row+=bs) { 1609 for (j=a->i[i]; j<a->i[i+1]; j++) { 1610 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1611 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1612 aa = a->a + j*bs2; 1613 for (k=0; k<bs; k++) { 1614 for (l=0; l<bs; l++) { 1615 if (PetscRealPart(*aa++) != 0.) continue; 1616 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1617 } 1618 } 1619 } 1620 } 1621 color = PETSC_DRAW_RED; 1622 for (i=0,row=0; i<mbs; i++,row+=bs) { 1623 for (j=a->i[i]; j<a->i[i+1]; j++) { 1624 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1625 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1626 aa = a->a + j*bs2; 1627 for (k=0; k<bs; k++) { 1628 for (l=0; l<bs; l++) { 1629 if (PetscRealPart(*aa++) <= 0.) continue; 1630 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1631 } 1632 } 1633 } 1634 } 1635 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1636 } else { 1637 /* use contour shading to indicate magnitude of values */ 1638 /* first determine max of all nonzero values */ 1639 PetscReal minv = 0.0, maxv = 0.0; 1640 PetscDraw popup; 1641 1642 for (i=0; i<a->nz*a->bs2; i++) { 1643 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1644 } 1645 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1646 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1647 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1648 1649 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1650 for (i=0,row=0; i<mbs; i++,row+=bs) { 1651 for (j=a->i[i]; j<a->i[i+1]; j++) { 1652 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1653 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1654 aa = a->a + j*bs2; 1655 for (k=0; k<bs; k++) { 1656 for (l=0; l<bs; l++) { 1657 MatScalar v = *aa++; 1658 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1659 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1660 } 1661 } 1662 } 1663 } 1664 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1665 } 1666 PetscFunctionReturn(0); 1667 } 1668 1669 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1670 { 1671 PetscErrorCode ierr; 1672 PetscReal xl,yl,xr,yr,w,h; 1673 PetscDraw draw; 1674 PetscBool isnull; 1675 1676 PetscFunctionBegin; 1677 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1678 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1679 if (isnull) PetscFunctionReturn(0); 1680 1681 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1682 xr += w; yr += h; xl = -w; yl = -h; 1683 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1684 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1685 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1686 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1687 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1688 PetscFunctionReturn(0); 1689 } 1690 1691 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1692 { 1693 PetscErrorCode ierr; 1694 PetscBool iascii,isbinary,isdraw; 1695 1696 PetscFunctionBegin; 1697 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1698 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1699 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1700 if (iascii) { 1701 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1702 } else if (isbinary) { 1703 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1704 } else if (isdraw) { 1705 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1706 } else { 1707 Mat B; 1708 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1709 ierr = MatView(B,viewer);CHKERRQ(ierr); 1710 ierr = MatDestroy(&B);CHKERRQ(ierr); 1711 } 1712 PetscFunctionReturn(0); 1713 } 1714 1715 1716 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1717 { 1718 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1719 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1720 PetscInt *ai = a->i,*ailen = a->ilen; 1721 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1722 MatScalar *ap,*aa = a->a; 1723 1724 PetscFunctionBegin; 1725 for (k=0; k<m; k++) { /* loop over rows */ 1726 row = im[k]; brow = row/bs; 1727 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1728 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1729 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1730 nrow = ailen[brow]; 1731 for (l=0; l<n; l++) { /* loop over columns */ 1732 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1733 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1734 col = in[l]; 1735 bcol = col/bs; 1736 cidx = col%bs; 1737 ridx = row%bs; 1738 high = nrow; 1739 low = 0; /* assume unsorted */ 1740 while (high-low > 5) { 1741 t = (low+high)/2; 1742 if (rp[t] > bcol) high = t; 1743 else low = t; 1744 } 1745 for (i=low; i<high; i++) { 1746 if (rp[i] > bcol) break; 1747 if (rp[i] == bcol) { 1748 *v++ = ap[bs2*i+bs*cidx+ridx]; 1749 goto finished; 1750 } 1751 } 1752 *v++ = 0.0; 1753 finished:; 1754 } 1755 } 1756 PetscFunctionReturn(0); 1757 } 1758 1759 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1760 { 1761 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1762 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1763 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1764 PetscErrorCode ierr; 1765 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1766 PetscBool roworiented=a->roworiented; 1767 const PetscScalar *value = v; 1768 MatScalar *ap=NULL,*aa = a->a,*bap; 1769 1770 PetscFunctionBegin; 1771 if (roworiented) { 1772 stepval = (n-1)*bs; 1773 } else { 1774 stepval = (m-1)*bs; 1775 } 1776 for (k=0; k<m; k++) { /* loop over added rows */ 1777 row = im[k]; 1778 if (row < 0) continue; 1779 #if defined(PETSC_USE_DEBUG) 1780 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1781 #endif 1782 rp = aj + ai[row]; 1783 if (!A->structure_only) ap = aa + bs2*ai[row]; 1784 rmax = imax[row]; 1785 nrow = ailen[row]; 1786 low = 0; 1787 high = nrow; 1788 for (l=0; l<n; l++) { /* loop over added columns */ 1789 if (in[l] < 0) continue; 1790 #if defined(PETSC_USE_DEBUG) 1791 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1792 #endif 1793 col = in[l]; 1794 if (!A->structure_only) { 1795 if (roworiented) { 1796 value = v + (k*(stepval+bs) + l)*bs; 1797 } else { 1798 value = v + (l*(stepval+bs) + k)*bs; 1799 } 1800 } 1801 if (col <= lastcol) low = 0; 1802 else high = nrow; 1803 lastcol = col; 1804 while (high-low > 7) { 1805 t = (low+high)/2; 1806 if (rp[t] > col) high = t; 1807 else low = t; 1808 } 1809 for (i=low; i<high; i++) { 1810 if (rp[i] > col) break; 1811 if (rp[i] == col) { 1812 if (A->structure_only) goto noinsert2; 1813 bap = ap + bs2*i; 1814 if (roworiented) { 1815 if (is == ADD_VALUES) { 1816 for (ii=0; ii<bs; ii++,value+=stepval) { 1817 for (jj=ii; jj<bs2; jj+=bs) { 1818 bap[jj] += *value++; 1819 } 1820 } 1821 } else { 1822 for (ii=0; ii<bs; ii++,value+=stepval) { 1823 for (jj=ii; jj<bs2; jj+=bs) { 1824 bap[jj] = *value++; 1825 } 1826 } 1827 } 1828 } else { 1829 if (is == ADD_VALUES) { 1830 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1831 for (jj=0; jj<bs; jj++) { 1832 bap[jj] += value[jj]; 1833 } 1834 bap += bs; 1835 } 1836 } else { 1837 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1838 for (jj=0; jj<bs; jj++) { 1839 bap[jj] = value[jj]; 1840 } 1841 bap += bs; 1842 } 1843 } 1844 } 1845 goto noinsert2; 1846 } 1847 } 1848 if (nonew == 1) goto noinsert2; 1849 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1850 if (A->structure_only) { 1851 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1852 } else { 1853 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1854 } 1855 N = nrow++ - 1; high++; 1856 /* shift up all the later entries in this row */ 1857 for (ii=N; ii>=i; ii--) { 1858 rp[ii+1] = rp[ii]; 1859 if (!A->structure_only) { 1860 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1861 } 1862 } 1863 if (N >= i && !A->structure_only) { 1864 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1865 } 1866 1867 rp[i] = col; 1868 if (!A->structure_only) { 1869 bap = ap + bs2*i; 1870 if (roworiented) { 1871 for (ii=0; ii<bs; ii++,value+=stepval) { 1872 for (jj=ii; jj<bs2; jj+=bs) { 1873 bap[jj] = *value++; 1874 } 1875 } 1876 } else { 1877 for (ii=0; ii<bs; ii++,value+=stepval) { 1878 for (jj=0; jj<bs; jj++) { 1879 *bap++ = *value++; 1880 } 1881 } 1882 } 1883 } 1884 noinsert2:; 1885 low = i; 1886 } 1887 ailen[row] = nrow; 1888 } 1889 PetscFunctionReturn(0); 1890 } 1891 1892 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1893 { 1894 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1895 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1896 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1897 PetscErrorCode ierr; 1898 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1899 MatScalar *aa = a->a,*ap; 1900 PetscReal ratio=0.6; 1901 1902 PetscFunctionBegin; 1903 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1904 1905 if (m) rmax = ailen[0]; 1906 for (i=1; i<mbs; i++) { 1907 /* move each row back by the amount of empty slots (fshift) before it*/ 1908 fshift += imax[i-1] - ailen[i-1]; 1909 rmax = PetscMax(rmax,ailen[i]); 1910 if (fshift) { 1911 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1912 N = ailen[i]; 1913 for (j=0; j<N; j++) { 1914 ip[j-fshift] = ip[j]; 1915 if (!A->structure_only) { 1916 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1917 } 1918 } 1919 } 1920 ai[i] = ai[i-1] + ailen[i-1]; 1921 } 1922 if (mbs) { 1923 fshift += imax[mbs-1] - ailen[mbs-1]; 1924 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1925 } 1926 1927 /* reset ilen and imax for each row */ 1928 a->nonzerorowcnt = 0; 1929 if (A->structure_only) { 1930 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1931 } else { /* !A->structure_only */ 1932 for (i=0; i<mbs; i++) { 1933 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1934 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1935 } 1936 } 1937 a->nz = ai[mbs]; 1938 1939 /* diagonals may have moved, so kill the diagonal pointers */ 1940 a->idiagvalid = PETSC_FALSE; 1941 if (fshift && a->diag) { 1942 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1943 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1944 a->diag = 0; 1945 } 1946 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1947 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1948 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1949 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1950 1951 A->info.mallocs += a->reallocs; 1952 a->reallocs = 0; 1953 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1954 a->rmax = rmax; 1955 1956 if (!A->structure_only) { 1957 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1958 } 1959 PetscFunctionReturn(0); 1960 } 1961 1962 /* 1963 This function returns an array of flags which indicate the locations of contiguous 1964 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1965 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1966 Assume: sizes should be long enough to hold all the values. 1967 */ 1968 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1969 { 1970 PetscInt i,j,k,row; 1971 PetscBool flg; 1972 1973 PetscFunctionBegin; 1974 for (i=0,j=0; i<n; j++) { 1975 row = idx[i]; 1976 if (row%bs!=0) { /* Not the begining of a block */ 1977 sizes[j] = 1; 1978 i++; 1979 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1980 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1981 i++; 1982 } else { /* Begining of the block, so check if the complete block exists */ 1983 flg = PETSC_TRUE; 1984 for (k=1; k<bs; k++) { 1985 if (row+k != idx[i+k]) { /* break in the block */ 1986 flg = PETSC_FALSE; 1987 break; 1988 } 1989 } 1990 if (flg) { /* No break in the bs */ 1991 sizes[j] = bs; 1992 i += bs; 1993 } else { 1994 sizes[j] = 1; 1995 i++; 1996 } 1997 } 1998 } 1999 *bs_max = j; 2000 PetscFunctionReturn(0); 2001 } 2002 2003 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2004 { 2005 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2006 PetscErrorCode ierr; 2007 PetscInt i,j,k,count,*rows; 2008 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2009 PetscScalar zero = 0.0; 2010 MatScalar *aa; 2011 const PetscScalar *xx; 2012 PetscScalar *bb; 2013 2014 PetscFunctionBegin; 2015 /* fix right hand side if needed */ 2016 if (x && b) { 2017 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2018 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2019 for (i=0; i<is_n; i++) { 2020 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2021 } 2022 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2023 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2024 } 2025 2026 /* Make a copy of the IS and sort it */ 2027 /* allocate memory for rows,sizes */ 2028 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2029 2030 /* copy IS values to rows, and sort them */ 2031 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2032 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2033 2034 if (baij->keepnonzeropattern) { 2035 for (i=0; i<is_n; i++) sizes[i] = 1; 2036 bs_max = is_n; 2037 } else { 2038 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2039 A->nonzerostate++; 2040 } 2041 2042 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2043 row = rows[j]; 2044 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2045 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2046 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2047 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2048 if (diag != (PetscScalar)0.0) { 2049 if (baij->ilen[row/bs] > 0) { 2050 baij->ilen[row/bs] = 1; 2051 baij->j[baij->i[row/bs]] = row/bs; 2052 2053 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2054 } 2055 /* Now insert all the diagonal values for this bs */ 2056 for (k=0; k<bs; k++) { 2057 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2058 } 2059 } else { /* (diag == 0.0) */ 2060 baij->ilen[row/bs] = 0; 2061 } /* end (diag == 0.0) */ 2062 } else { /* (sizes[i] != bs) */ 2063 #if defined(PETSC_USE_DEBUG) 2064 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2065 #endif 2066 for (k=0; k<count; k++) { 2067 aa[0] = zero; 2068 aa += bs; 2069 } 2070 if (diag != (PetscScalar)0.0) { 2071 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2072 } 2073 } 2074 } 2075 2076 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2077 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 PetscFunctionReturn(0); 2079 } 2080 2081 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2082 { 2083 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2084 PetscErrorCode ierr; 2085 PetscInt i,j,k,count; 2086 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2087 PetscScalar zero = 0.0; 2088 MatScalar *aa; 2089 const PetscScalar *xx; 2090 PetscScalar *bb; 2091 PetscBool *zeroed,vecs = PETSC_FALSE; 2092 2093 PetscFunctionBegin; 2094 /* fix right hand side if needed */ 2095 if (x && b) { 2096 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2097 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2098 vecs = PETSC_TRUE; 2099 } 2100 2101 /* zero the columns */ 2102 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2103 for (i=0; i<is_n; i++) { 2104 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2105 zeroed[is_idx[i]] = PETSC_TRUE; 2106 } 2107 for (i=0; i<A->rmap->N; i++) { 2108 if (!zeroed[i]) { 2109 row = i/bs; 2110 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2111 for (k=0; k<bs; k++) { 2112 col = bs*baij->j[j] + k; 2113 if (zeroed[col]) { 2114 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2115 if (vecs) bb[i] -= aa[0]*xx[col]; 2116 aa[0] = 0.0; 2117 } 2118 } 2119 } 2120 } else if (vecs) bb[i] = diag*xx[i]; 2121 } 2122 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2123 if (vecs) { 2124 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2125 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2126 } 2127 2128 /* zero the rows */ 2129 for (i=0; i<is_n; i++) { 2130 row = is_idx[i]; 2131 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2132 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2133 for (k=0; k<count; k++) { 2134 aa[0] = zero; 2135 aa += bs; 2136 } 2137 if (diag != (PetscScalar)0.0) { 2138 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2139 } 2140 } 2141 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2146 { 2147 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2148 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2149 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2150 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2151 PetscErrorCode ierr; 2152 PetscInt ridx,cidx,bs2=a->bs2; 2153 PetscBool roworiented=a->roworiented; 2154 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2155 2156 PetscFunctionBegin; 2157 for (k=0; k<m; k++) { /* loop over added rows */ 2158 row = im[k]; 2159 brow = row/bs; 2160 if (row < 0) continue; 2161 #if defined(PETSC_USE_DEBUG) 2162 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2163 #endif 2164 rp = aj + ai[brow]; 2165 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2166 rmax = imax[brow]; 2167 nrow = ailen[brow]; 2168 low = 0; 2169 high = nrow; 2170 for (l=0; l<n; l++) { /* loop over added columns */ 2171 if (in[l] < 0) continue; 2172 #if defined(PETSC_USE_DEBUG) 2173 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2174 #endif 2175 col = in[l]; bcol = col/bs; 2176 ridx = row % bs; cidx = col % bs; 2177 if (!A->structure_only) { 2178 if (roworiented) { 2179 value = v[l + k*n]; 2180 } else { 2181 value = v[k + l*m]; 2182 } 2183 } 2184 if (col <= lastcol) low = 0; else high = nrow; 2185 lastcol = col; 2186 while (high-low > 7) { 2187 t = (low+high)/2; 2188 if (rp[t] > bcol) high = t; 2189 else low = t; 2190 } 2191 for (i=low; i<high; i++) { 2192 if (rp[i] > bcol) break; 2193 if (rp[i] == bcol) { 2194 bap = ap + bs2*i + bs*cidx + ridx; 2195 if (!A->structure_only) { 2196 if (is == ADD_VALUES) *bap += value; 2197 else *bap = value; 2198 } 2199 goto noinsert1; 2200 } 2201 } 2202 if (nonew == 1) goto noinsert1; 2203 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2204 if (A->structure_only) { 2205 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2206 } else { 2207 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2208 } 2209 N = nrow++ - 1; high++; 2210 /* shift up all the later entries in this row */ 2211 for (ii=N; ii>=i; ii--) { 2212 rp[ii+1] = rp[ii]; 2213 if (!A->structure_only) { 2214 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2215 } 2216 } 2217 if (N>=i && !A->structure_only) { 2218 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2219 } 2220 rp[i] = bcol; 2221 if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value; 2222 a->nz++; 2223 A->nonzerostate++; 2224 noinsert1:; 2225 low = i; 2226 } 2227 ailen[brow] = nrow; 2228 } 2229 PetscFunctionReturn(0); 2230 } 2231 2232 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2233 { 2234 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2235 Mat outA; 2236 PetscErrorCode ierr; 2237 PetscBool row_identity,col_identity; 2238 2239 PetscFunctionBegin; 2240 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2241 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2242 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2243 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2244 2245 outA = inA; 2246 inA->factortype = MAT_FACTOR_LU; 2247 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2248 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2249 2250 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2251 2252 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2253 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2254 a->row = row; 2255 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2256 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2257 a->col = col; 2258 2259 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2260 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2261 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2262 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2263 2264 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2265 if (!a->solve_work) { 2266 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2267 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2268 } 2269 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2270 PetscFunctionReturn(0); 2271 } 2272 2273 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2274 { 2275 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2276 PetscInt i,nz,mbs; 2277 2278 PetscFunctionBegin; 2279 nz = baij->maxnz; 2280 mbs = baij->mbs; 2281 for (i=0; i<nz; i++) { 2282 baij->j[i] = indices[i]; 2283 } 2284 baij->nz = nz; 2285 for (i=0; i<mbs; i++) { 2286 baij->ilen[i] = baij->imax[i]; 2287 } 2288 PetscFunctionReturn(0); 2289 } 2290 2291 /*@ 2292 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2293 in the matrix. 2294 2295 Input Parameters: 2296 + mat - the SeqBAIJ matrix 2297 - indices - the column indices 2298 2299 Level: advanced 2300 2301 Notes: 2302 This can be called if you have precomputed the nonzero structure of the 2303 matrix and want to provide it to the matrix object to improve the performance 2304 of the MatSetValues() operation. 2305 2306 You MUST have set the correct numbers of nonzeros per row in the call to 2307 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2308 2309 MUST be called before any calls to MatSetValues(); 2310 2311 @*/ 2312 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2313 { 2314 PetscErrorCode ierr; 2315 2316 PetscFunctionBegin; 2317 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2318 PetscValidPointer(indices,2); 2319 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2320 PetscFunctionReturn(0); 2321 } 2322 2323 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2324 { 2325 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2326 PetscErrorCode ierr; 2327 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2328 PetscReal atmp; 2329 PetscScalar *x,zero = 0.0; 2330 MatScalar *aa; 2331 PetscInt ncols,brow,krow,kcol; 2332 2333 PetscFunctionBegin; 2334 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2335 bs = A->rmap->bs; 2336 aa = a->a; 2337 ai = a->i; 2338 aj = a->j; 2339 mbs = a->mbs; 2340 2341 ierr = VecSet(v,zero);CHKERRQ(ierr); 2342 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2343 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2344 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2345 for (i=0; i<mbs; i++) { 2346 ncols = ai[1] - ai[0]; ai++; 2347 brow = bs*i; 2348 for (j=0; j<ncols; j++) { 2349 for (kcol=0; kcol<bs; kcol++) { 2350 for (krow=0; krow<bs; krow++) { 2351 atmp = PetscAbsScalar(*aa);aa++; 2352 row = brow + krow; /* row index */ 2353 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2354 } 2355 } 2356 aj++; 2357 } 2358 } 2359 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2360 PetscFunctionReturn(0); 2361 } 2362 2363 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2364 { 2365 PetscErrorCode ierr; 2366 2367 PetscFunctionBegin; 2368 /* If the two matrices have the same copy implementation, use fast copy. */ 2369 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2370 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2371 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2372 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2373 2374 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2375 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2376 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2377 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2378 } else { 2379 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2380 } 2381 PetscFunctionReturn(0); 2382 } 2383 2384 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2385 { 2386 PetscErrorCode ierr; 2387 2388 PetscFunctionBegin; 2389 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2390 PetscFunctionReturn(0); 2391 } 2392 2393 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2394 { 2395 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2396 2397 PetscFunctionBegin; 2398 *array = a->a; 2399 PetscFunctionReturn(0); 2400 } 2401 2402 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2403 { 2404 PetscFunctionBegin; 2405 PetscFunctionReturn(0); 2406 } 2407 2408 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2409 { 2410 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2411 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2412 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2413 PetscErrorCode ierr; 2414 2415 PetscFunctionBegin; 2416 /* Set the number of nonzeros in the new matrix */ 2417 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2422 { 2423 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2424 PetscErrorCode ierr; 2425 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2426 PetscBLASInt one=1; 2427 2428 PetscFunctionBegin; 2429 if (str == SAME_NONZERO_PATTERN) { 2430 PetscScalar alpha = a; 2431 PetscBLASInt bnz; 2432 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2433 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2434 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2435 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2436 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2437 } else { 2438 Mat B; 2439 PetscInt *nnz; 2440 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2441 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2442 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2443 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2444 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2445 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2446 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2447 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2448 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2449 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2450 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2451 ierr = PetscFree(nnz);CHKERRQ(ierr); 2452 } 2453 PetscFunctionReturn(0); 2454 } 2455 2456 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2457 { 2458 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2459 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2460 MatScalar *aa = a->a; 2461 2462 PetscFunctionBegin; 2463 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2464 PetscFunctionReturn(0); 2465 } 2466 2467 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2468 { 2469 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2470 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2471 MatScalar *aa = a->a; 2472 2473 PetscFunctionBegin; 2474 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2475 PetscFunctionReturn(0); 2476 } 2477 2478 /* 2479 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2480 */ 2481 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2482 { 2483 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2484 PetscErrorCode ierr; 2485 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2486 PetscInt nz = a->i[m],row,*jj,mr,col; 2487 2488 PetscFunctionBegin; 2489 *nn = n; 2490 if (!ia) PetscFunctionReturn(0); 2491 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2492 else { 2493 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2494 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2495 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2496 jj = a->j; 2497 for (i=0; i<nz; i++) { 2498 collengths[jj[i]]++; 2499 } 2500 cia[0] = oshift; 2501 for (i=0; i<n; i++) { 2502 cia[i+1] = cia[i] + collengths[i]; 2503 } 2504 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2505 jj = a->j; 2506 for (row=0; row<m; row++) { 2507 mr = a->i[row+1] - a->i[row]; 2508 for (i=0; i<mr; i++) { 2509 col = *jj++; 2510 2511 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2512 } 2513 } 2514 ierr = PetscFree(collengths);CHKERRQ(ierr); 2515 *ia = cia; *ja = cja; 2516 } 2517 PetscFunctionReturn(0); 2518 } 2519 2520 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2521 { 2522 PetscErrorCode ierr; 2523 2524 PetscFunctionBegin; 2525 if (!ia) PetscFunctionReturn(0); 2526 ierr = PetscFree(*ia);CHKERRQ(ierr); 2527 ierr = PetscFree(*ja);CHKERRQ(ierr); 2528 PetscFunctionReturn(0); 2529 } 2530 2531 /* 2532 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2533 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2534 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2535 */ 2536 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2537 { 2538 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2539 PetscErrorCode ierr; 2540 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2541 PetscInt nz = a->i[m],row,*jj,mr,col; 2542 PetscInt *cspidx; 2543 2544 PetscFunctionBegin; 2545 *nn = n; 2546 if (!ia) PetscFunctionReturn(0); 2547 2548 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2549 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2550 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2551 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2552 jj = a->j; 2553 for (i=0; i<nz; i++) { 2554 collengths[jj[i]]++; 2555 } 2556 cia[0] = oshift; 2557 for (i=0; i<n; i++) { 2558 cia[i+1] = cia[i] + collengths[i]; 2559 } 2560 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2561 jj = a->j; 2562 for (row=0; row<m; row++) { 2563 mr = a->i[row+1] - a->i[row]; 2564 for (i=0; i<mr; i++) { 2565 col = *jj++; 2566 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2567 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2568 } 2569 } 2570 ierr = PetscFree(collengths);CHKERRQ(ierr); 2571 *ia = cia; *ja = cja; 2572 *spidx = cspidx; 2573 PetscFunctionReturn(0); 2574 } 2575 2576 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2577 { 2578 PetscErrorCode ierr; 2579 2580 PetscFunctionBegin; 2581 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2582 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2583 PetscFunctionReturn(0); 2584 } 2585 2586 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2587 { 2588 PetscErrorCode ierr; 2589 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2590 2591 PetscFunctionBegin; 2592 if (!Y->preallocated || !aij->nz) { 2593 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2594 } 2595 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2596 PetscFunctionReturn(0); 2597 } 2598 2599 /* -------------------------------------------------------------------*/ 2600 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2601 MatGetRow_SeqBAIJ, 2602 MatRestoreRow_SeqBAIJ, 2603 MatMult_SeqBAIJ_N, 2604 /* 4*/ MatMultAdd_SeqBAIJ_N, 2605 MatMultTranspose_SeqBAIJ, 2606 MatMultTransposeAdd_SeqBAIJ, 2607 0, 2608 0, 2609 0, 2610 /* 10*/ 0, 2611 MatLUFactor_SeqBAIJ, 2612 0, 2613 0, 2614 MatTranspose_SeqBAIJ, 2615 /* 15*/ MatGetInfo_SeqBAIJ, 2616 MatEqual_SeqBAIJ, 2617 MatGetDiagonal_SeqBAIJ, 2618 MatDiagonalScale_SeqBAIJ, 2619 MatNorm_SeqBAIJ, 2620 /* 20*/ 0, 2621 MatAssemblyEnd_SeqBAIJ, 2622 MatSetOption_SeqBAIJ, 2623 MatZeroEntries_SeqBAIJ, 2624 /* 24*/ MatZeroRows_SeqBAIJ, 2625 0, 2626 0, 2627 0, 2628 0, 2629 /* 29*/ MatSetUp_SeqBAIJ, 2630 0, 2631 0, 2632 0, 2633 0, 2634 /* 34*/ MatDuplicate_SeqBAIJ, 2635 0, 2636 0, 2637 MatILUFactor_SeqBAIJ, 2638 0, 2639 /* 39*/ MatAXPY_SeqBAIJ, 2640 MatCreateSubMatrices_SeqBAIJ, 2641 MatIncreaseOverlap_SeqBAIJ, 2642 MatGetValues_SeqBAIJ, 2643 MatCopy_SeqBAIJ, 2644 /* 44*/ 0, 2645 MatScale_SeqBAIJ, 2646 MatShift_SeqBAIJ, 2647 0, 2648 MatZeroRowsColumns_SeqBAIJ, 2649 /* 49*/ 0, 2650 MatGetRowIJ_SeqBAIJ, 2651 MatRestoreRowIJ_SeqBAIJ, 2652 MatGetColumnIJ_SeqBAIJ, 2653 MatRestoreColumnIJ_SeqBAIJ, 2654 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2655 0, 2656 0, 2657 0, 2658 MatSetValuesBlocked_SeqBAIJ, 2659 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2660 MatDestroy_SeqBAIJ, 2661 MatView_SeqBAIJ, 2662 0, 2663 0, 2664 /* 64*/ 0, 2665 0, 2666 0, 2667 0, 2668 0, 2669 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2670 0, 2671 MatConvert_Basic, 2672 0, 2673 0, 2674 /* 74*/ 0, 2675 MatFDColoringApply_BAIJ, 2676 0, 2677 0, 2678 0, 2679 /* 79*/ 0, 2680 0, 2681 0, 2682 0, 2683 MatLoad_SeqBAIJ, 2684 /* 84*/ 0, 2685 0, 2686 0, 2687 0, 2688 0, 2689 /* 89*/ 0, 2690 0, 2691 0, 2692 0, 2693 0, 2694 /* 94*/ 0, 2695 0, 2696 0, 2697 0, 2698 0, 2699 /* 99*/ 0, 2700 0, 2701 0, 2702 0, 2703 0, 2704 /*104*/ 0, 2705 MatRealPart_SeqBAIJ, 2706 MatImaginaryPart_SeqBAIJ, 2707 0, 2708 0, 2709 /*109*/ 0, 2710 0, 2711 0, 2712 0, 2713 MatMissingDiagonal_SeqBAIJ, 2714 /*114*/ 0, 2715 0, 2716 0, 2717 0, 2718 0, 2719 /*119*/ 0, 2720 0, 2721 MatMultHermitianTranspose_SeqBAIJ, 2722 MatMultHermitianTransposeAdd_SeqBAIJ, 2723 0, 2724 /*124*/ 0, 2725 0, 2726 MatInvertBlockDiagonal_SeqBAIJ, 2727 0, 2728 0, 2729 /*129*/ 0, 2730 0, 2731 0, 2732 0, 2733 0, 2734 /*134*/ 0, 2735 0, 2736 0, 2737 0, 2738 0, 2739 /*139*/ MatSetBlockSizes_Default, 2740 0, 2741 0, 2742 MatFDColoringSetUp_SeqXAIJ, 2743 0, 2744 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2745 MatDestroySubMatrices_SeqBAIJ 2746 }; 2747 2748 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2749 { 2750 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2751 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2752 PetscErrorCode ierr; 2753 2754 PetscFunctionBegin; 2755 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2756 2757 /* allocate space for values if not already there */ 2758 if (!aij->saved_values) { 2759 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2760 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2761 } 2762 2763 /* copy values over */ 2764 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2765 PetscFunctionReturn(0); 2766 } 2767 2768 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2769 { 2770 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2771 PetscErrorCode ierr; 2772 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2773 2774 PetscFunctionBegin; 2775 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2776 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2777 2778 /* copy values over */ 2779 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2780 PetscFunctionReturn(0); 2781 } 2782 2783 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2784 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2785 2786 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2787 { 2788 Mat_SeqBAIJ *b; 2789 PetscErrorCode ierr; 2790 PetscInt i,mbs,nbs,bs2; 2791 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2792 2793 PetscFunctionBegin; 2794 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2795 if (nz == MAT_SKIP_ALLOCATION) { 2796 skipallocation = PETSC_TRUE; 2797 nz = 0; 2798 } 2799 2800 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2801 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2802 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2803 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2804 2805 B->preallocated = PETSC_TRUE; 2806 2807 mbs = B->rmap->n/bs; 2808 nbs = B->cmap->n/bs; 2809 bs2 = bs*bs; 2810 2811 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2812 2813 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2814 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2815 if (nnz) { 2816 for (i=0; i<mbs; i++) { 2817 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2818 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2819 } 2820 } 2821 2822 b = (Mat_SeqBAIJ*)B->data; 2823 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2824 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2825 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2826 2827 if (!flg) { 2828 switch (bs) { 2829 case 1: 2830 B->ops->mult = MatMult_SeqBAIJ_1; 2831 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2832 break; 2833 case 2: 2834 B->ops->mult = MatMult_SeqBAIJ_2; 2835 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2836 break; 2837 case 3: 2838 B->ops->mult = MatMult_SeqBAIJ_3; 2839 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2840 break; 2841 case 4: 2842 B->ops->mult = MatMult_SeqBAIJ_4; 2843 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2844 break; 2845 case 5: 2846 B->ops->mult = MatMult_SeqBAIJ_5; 2847 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2848 break; 2849 case 6: 2850 B->ops->mult = MatMult_SeqBAIJ_6; 2851 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2852 break; 2853 case 7: 2854 B->ops->mult = MatMult_SeqBAIJ_7; 2855 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2856 break; 2857 case 9: 2858 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2859 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2860 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2861 #else 2862 B->ops->mult = MatMult_SeqBAIJ_N; 2863 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2864 #endif 2865 break; 2866 case 11: 2867 B->ops->mult = MatMult_SeqBAIJ_11; 2868 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2869 break; 2870 case 15: 2871 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2872 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2873 break; 2874 default: 2875 B->ops->mult = MatMult_SeqBAIJ_N; 2876 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2877 break; 2878 } 2879 } 2880 B->ops->sor = MatSOR_SeqBAIJ; 2881 b->mbs = mbs; 2882 b->nbs = nbs; 2883 if (!skipallocation) { 2884 if (!b->imax) { 2885 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2886 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2887 2888 b->free_imax_ilen = PETSC_TRUE; 2889 } 2890 /* b->ilen will count nonzeros in each block row so far. */ 2891 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2892 if (!nnz) { 2893 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2894 else if (nz < 0) nz = 1; 2895 for (i=0; i<mbs; i++) b->imax[i] = nz; 2896 nz = nz*mbs; 2897 } else { 2898 nz = 0; 2899 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2900 } 2901 2902 /* allocate the matrix space */ 2903 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2904 if (B->structure_only) { 2905 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 2906 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 2907 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 2908 } else { 2909 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2910 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2911 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2912 } 2913 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2914 2915 if (B->structure_only) { 2916 b->singlemalloc = PETSC_FALSE; 2917 b->free_a = PETSC_FALSE; 2918 } else { 2919 b->singlemalloc = PETSC_TRUE; 2920 b->free_a = PETSC_TRUE; 2921 } 2922 b->free_ij = PETSC_TRUE; 2923 2924 b->i[0] = 0; 2925 for (i=1; i<mbs+1; i++) { 2926 b->i[i] = b->i[i-1] + b->imax[i-1]; 2927 } 2928 2929 } else { 2930 b->free_a = PETSC_FALSE; 2931 b->free_ij = PETSC_FALSE; 2932 } 2933 2934 b->bs2 = bs2; 2935 b->mbs = mbs; 2936 b->nz = 0; 2937 b->maxnz = nz; 2938 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2939 B->was_assembled = PETSC_FALSE; 2940 B->assembled = PETSC_FALSE; 2941 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2942 PetscFunctionReturn(0); 2943 } 2944 2945 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2946 { 2947 PetscInt i,m,nz,nz_max=0,*nnz; 2948 PetscScalar *values=0; 2949 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2950 PetscErrorCode ierr; 2951 2952 PetscFunctionBegin; 2953 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2954 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2955 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2956 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2957 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2958 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2959 m = B->rmap->n/bs; 2960 2961 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2962 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2963 for (i=0; i<m; i++) { 2964 nz = ii[i+1]- ii[i]; 2965 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2966 nz_max = PetscMax(nz_max, nz); 2967 nnz[i] = nz; 2968 } 2969 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2970 ierr = PetscFree(nnz);CHKERRQ(ierr); 2971 2972 values = (PetscScalar*)V; 2973 if (!values) { 2974 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2975 } 2976 for (i=0; i<m; i++) { 2977 PetscInt ncols = ii[i+1] - ii[i]; 2978 const PetscInt *icols = jj + ii[i]; 2979 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2980 if (!roworiented) { 2981 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2982 } else { 2983 PetscInt j; 2984 for (j=0; j<ncols; j++) { 2985 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2986 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2987 } 2988 } 2989 } 2990 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2991 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2992 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2993 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2994 PetscFunctionReturn(0); 2995 } 2996 2997 /*MC 2998 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2999 block sparse compressed row format. 3000 3001 Options Database Keys: 3002 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3003 3004 Level: beginner 3005 3006 .seealso: MatCreateSeqBAIJ() 3007 M*/ 3008 3009 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3010 3011 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3012 { 3013 PetscErrorCode ierr; 3014 PetscMPIInt size; 3015 Mat_SeqBAIJ *b; 3016 3017 PetscFunctionBegin; 3018 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3019 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3020 3021 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3022 B->data = (void*)b; 3023 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3024 3025 b->row = 0; 3026 b->col = 0; 3027 b->icol = 0; 3028 b->reallocs = 0; 3029 b->saved_values = 0; 3030 3031 b->roworiented = PETSC_TRUE; 3032 b->nonew = 0; 3033 b->diag = 0; 3034 B->spptr = 0; 3035 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3036 b->keepnonzeropattern = PETSC_FALSE; 3037 3038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3039 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3047 #if defined(PETSC_HAVE_HYPRE) 3048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3049 #endif 3050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 3051 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3052 PetscFunctionReturn(0); 3053 } 3054 3055 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3056 { 3057 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3058 PetscErrorCode ierr; 3059 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3060 3061 PetscFunctionBegin; 3062 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3063 3064 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3065 c->imax = a->imax; 3066 c->ilen = a->ilen; 3067 c->free_imax_ilen = PETSC_FALSE; 3068 } else { 3069 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3070 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3071 for (i=0; i<mbs; i++) { 3072 c->imax[i] = a->imax[i]; 3073 c->ilen[i] = a->ilen[i]; 3074 } 3075 c->free_imax_ilen = PETSC_TRUE; 3076 } 3077 3078 /* allocate the matrix space */ 3079 if (mallocmatspace) { 3080 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3081 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3082 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3083 3084 c->i = a->i; 3085 c->j = a->j; 3086 c->singlemalloc = PETSC_FALSE; 3087 c->free_a = PETSC_TRUE; 3088 c->free_ij = PETSC_FALSE; 3089 c->parent = A; 3090 C->preallocated = PETSC_TRUE; 3091 C->assembled = PETSC_TRUE; 3092 3093 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3094 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3095 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3096 } else { 3097 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3098 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3099 3100 c->singlemalloc = PETSC_TRUE; 3101 c->free_a = PETSC_TRUE; 3102 c->free_ij = PETSC_TRUE; 3103 3104 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3105 if (mbs > 0) { 3106 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3107 if (cpvalues == MAT_COPY_VALUES) { 3108 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3109 } else { 3110 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3111 } 3112 } 3113 C->preallocated = PETSC_TRUE; 3114 C->assembled = PETSC_TRUE; 3115 } 3116 } 3117 3118 c->roworiented = a->roworiented; 3119 c->nonew = a->nonew; 3120 3121 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3122 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3123 3124 c->bs2 = a->bs2; 3125 c->mbs = a->mbs; 3126 c->nbs = a->nbs; 3127 3128 if (a->diag) { 3129 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3130 c->diag = a->diag; 3131 c->free_diag = PETSC_FALSE; 3132 } else { 3133 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3134 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3135 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3136 c->free_diag = PETSC_TRUE; 3137 } 3138 } else c->diag = 0; 3139 3140 c->nz = a->nz; 3141 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3142 c->solve_work = NULL; 3143 c->mult_work = NULL; 3144 c->sor_workt = NULL; 3145 c->sor_work = NULL; 3146 3147 c->compressedrow.use = a->compressedrow.use; 3148 c->compressedrow.nrows = a->compressedrow.nrows; 3149 if (a->compressedrow.use) { 3150 i = a->compressedrow.nrows; 3151 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3152 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3153 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3154 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3155 } else { 3156 c->compressedrow.use = PETSC_FALSE; 3157 c->compressedrow.i = NULL; 3158 c->compressedrow.rindex = NULL; 3159 } 3160 C->nonzerostate = A->nonzerostate; 3161 3162 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3163 PetscFunctionReturn(0); 3164 } 3165 3166 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3167 { 3168 PetscErrorCode ierr; 3169 3170 PetscFunctionBegin; 3171 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3172 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3173 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3174 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3175 PetscFunctionReturn(0); 3176 } 3177 3178 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3179 { 3180 Mat_SeqBAIJ *a; 3181 PetscErrorCode ierr; 3182 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3183 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3184 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3185 PetscInt *masked,nmask,tmp,bs2,ishift; 3186 PetscMPIInt size; 3187 int fd; 3188 PetscScalar *aa; 3189 MPI_Comm comm; 3190 3191 PetscFunctionBegin; 3192 /* force binary viewer to load .info file if it has not yet done so */ 3193 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3194 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3195 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3196 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3197 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3198 if (bs < 0) bs = 1; 3199 bs2 = bs*bs; 3200 3201 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3202 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3203 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3204 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3205 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3206 M = header[1]; N = header[2]; nz = header[3]; 3207 3208 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3209 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3210 3211 /* 3212 This code adds extra rows to make sure the number of rows is 3213 divisible by the blocksize 3214 */ 3215 mbs = M/bs; 3216 extra_rows = bs - M + bs*(mbs); 3217 if (extra_rows == bs) extra_rows = 0; 3218 else mbs++; 3219 if (extra_rows) { 3220 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3221 } 3222 3223 /* Set global sizes if not already set */ 3224 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3225 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3226 } else { /* Check if the matrix global sizes are correct */ 3227 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3228 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3229 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3230 } 3231 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3232 } 3233 3234 /* read in row lengths */ 3235 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3236 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3237 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3238 3239 /* read in column indices */ 3240 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3241 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3242 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3243 3244 /* loop over row lengths determining block row lengths */ 3245 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3246 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3247 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3248 rowcount = 0; 3249 nzcount = 0; 3250 for (i=0; i<mbs; i++) { 3251 nmask = 0; 3252 for (j=0; j<bs; j++) { 3253 kmax = rowlengths[rowcount]; 3254 for (k=0; k<kmax; k++) { 3255 tmp = jj[nzcount++]/bs; 3256 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3257 } 3258 rowcount++; 3259 } 3260 browlengths[i] += nmask; 3261 /* zero out the mask elements we set */ 3262 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3263 } 3264 3265 /* Do preallocation */ 3266 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3267 a = (Mat_SeqBAIJ*)newmat->data; 3268 3269 /* set matrix "i" values */ 3270 a->i[0] = 0; 3271 for (i=1; i<= mbs; i++) { 3272 a->i[i] = a->i[i-1] + browlengths[i-1]; 3273 a->ilen[i-1] = browlengths[i-1]; 3274 } 3275 a->nz = 0; 3276 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3277 3278 /* read in nonzero values */ 3279 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3280 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3281 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3282 3283 /* set "a" and "j" values into matrix */ 3284 nzcount = 0; jcount = 0; 3285 for (i=0; i<mbs; i++) { 3286 nzcountb = nzcount; 3287 nmask = 0; 3288 for (j=0; j<bs; j++) { 3289 kmax = rowlengths[i*bs+j]; 3290 for (k=0; k<kmax; k++) { 3291 tmp = jj[nzcount++]/bs; 3292 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3293 } 3294 } 3295 /* sort the masked values */ 3296 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3297 3298 /* set "j" values into matrix */ 3299 maskcount = 1; 3300 for (j=0; j<nmask; j++) { 3301 a->j[jcount++] = masked[j]; 3302 mask[masked[j]] = maskcount++; 3303 } 3304 /* set "a" values into matrix */ 3305 ishift = bs2*a->i[i]; 3306 for (j=0; j<bs; j++) { 3307 kmax = rowlengths[i*bs+j]; 3308 for (k=0; k<kmax; k++) { 3309 tmp = jj[nzcountb]/bs; 3310 block = mask[tmp] - 1; 3311 point = jj[nzcountb] - bs*tmp; 3312 idx = ishift + bs2*block + j + bs*point; 3313 a->a[idx] = (MatScalar)aa[nzcountb++]; 3314 } 3315 } 3316 /* zero out the mask elements we set */ 3317 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3318 } 3319 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3320 3321 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3322 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3323 ierr = PetscFree(aa);CHKERRQ(ierr); 3324 ierr = PetscFree(jj);CHKERRQ(ierr); 3325 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3326 3327 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3328 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3329 PetscFunctionReturn(0); 3330 } 3331 3332 /*@C 3333 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3334 compressed row) format. For good matrix assembly performance the 3335 user should preallocate the matrix storage by setting the parameter nz 3336 (or the array nnz). By setting these parameters accurately, performance 3337 during matrix assembly can be increased by more than a factor of 50. 3338 3339 Collective on MPI_Comm 3340 3341 Input Parameters: 3342 + comm - MPI communicator, set to PETSC_COMM_SELF 3343 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3344 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3345 . m - number of rows 3346 . n - number of columns 3347 . nz - number of nonzero blocks per block row (same for all rows) 3348 - nnz - array containing the number of nonzero blocks in the various block rows 3349 (possibly different for each block row) or NULL 3350 3351 Output Parameter: 3352 . A - the matrix 3353 3354 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3355 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3356 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3357 3358 Options Database Keys: 3359 . -mat_no_unroll - uses code that does not unroll the loops in the 3360 block calculations (much slower) 3361 . -mat_block_size - size of the blocks to use 3362 3363 Level: intermediate 3364 3365 Notes: 3366 The number of rows and columns must be divisible by blocksize. 3367 3368 If the nnz parameter is given then the nz parameter is ignored 3369 3370 A nonzero block is any block that as 1 or more nonzeros in it 3371 3372 The block AIJ format is fully compatible with standard Fortran 77 3373 storage. That is, the stored row and column indices can begin at 3374 either one (as in Fortran) or zero. See the users' manual for details. 3375 3376 Specify the preallocated storage with either nz or nnz (not both). 3377 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3378 allocation. See Users-Manual: ch_mat for details. 3379 matrices. 3380 3381 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3382 @*/ 3383 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3384 { 3385 PetscErrorCode ierr; 3386 3387 PetscFunctionBegin; 3388 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3389 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3390 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3391 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3392 PetscFunctionReturn(0); 3393 } 3394 3395 /*@C 3396 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3397 per row in the matrix. For good matrix assembly performance the 3398 user should preallocate the matrix storage by setting the parameter nz 3399 (or the array nnz). By setting these parameters accurately, performance 3400 during matrix assembly can be increased by more than a factor of 50. 3401 3402 Collective on MPI_Comm 3403 3404 Input Parameters: 3405 + B - the matrix 3406 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3407 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3408 . nz - number of block nonzeros per block row (same for all rows) 3409 - nnz - array containing the number of block nonzeros in the various block rows 3410 (possibly different for each block row) or NULL 3411 3412 Options Database Keys: 3413 . -mat_no_unroll - uses code that does not unroll the loops in the 3414 block calculations (much slower) 3415 . -mat_block_size - size of the blocks to use 3416 3417 Level: intermediate 3418 3419 Notes: 3420 If the nnz parameter is given then the nz parameter is ignored 3421 3422 You can call MatGetInfo() to get information on how effective the preallocation was; 3423 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3424 You can also run with the option -info and look for messages with the string 3425 malloc in them to see if additional memory allocation was needed. 3426 3427 The block AIJ format is fully compatible with standard Fortran 77 3428 storage. That is, the stored row and column indices can begin at 3429 either one (as in Fortran) or zero. See the users' manual for details. 3430 3431 Specify the preallocated storage with either nz or nnz (not both). 3432 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3433 allocation. See Users-Manual: ch_mat for details. 3434 3435 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3436 @*/ 3437 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3438 { 3439 PetscErrorCode ierr; 3440 3441 PetscFunctionBegin; 3442 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3443 PetscValidType(B,1); 3444 PetscValidLogicalCollectiveInt(B,bs,2); 3445 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3446 PetscFunctionReturn(0); 3447 } 3448 3449 /*@C 3450 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3451 (the default sequential PETSc format). 3452 3453 Collective on MPI_Comm 3454 3455 Input Parameters: 3456 + B - the matrix 3457 . i - the indices into j for the start of each local row (starts with zero) 3458 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3459 - v - optional values in the matrix 3460 3461 Level: developer 3462 3463 Notes: 3464 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3465 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3466 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3467 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3468 block column and the second index is over columns within a block. 3469 3470 .keywords: matrix, aij, compressed row, sparse 3471 3472 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3473 @*/ 3474 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3475 { 3476 PetscErrorCode ierr; 3477 3478 PetscFunctionBegin; 3479 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3480 PetscValidType(B,1); 3481 PetscValidLogicalCollectiveInt(B,bs,2); 3482 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3483 PetscFunctionReturn(0); 3484 } 3485 3486 3487 /*@ 3488 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3489 3490 Collective on MPI_Comm 3491 3492 Input Parameters: 3493 + comm - must be an MPI communicator of size 1 3494 . bs - size of block 3495 . m - number of rows 3496 . n - number of columns 3497 . i - row indices 3498 . j - column indices 3499 - a - matrix values 3500 3501 Output Parameter: 3502 . mat - the matrix 3503 3504 Level: advanced 3505 3506 Notes: 3507 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3508 once the matrix is destroyed 3509 3510 You cannot set new nonzero locations into this matrix, that will generate an error. 3511 3512 The i and j indices are 0 based 3513 3514 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3515 3516 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3517 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3518 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3519 with column-major ordering within blocks. 3520 3521 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3522 3523 @*/ 3524 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3525 { 3526 PetscErrorCode ierr; 3527 PetscInt ii; 3528 Mat_SeqBAIJ *baij; 3529 3530 PetscFunctionBegin; 3531 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3532 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3533 3534 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3535 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3536 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3537 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3538 baij = (Mat_SeqBAIJ*)(*mat)->data; 3539 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3540 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3541 3542 baij->i = i; 3543 baij->j = j; 3544 baij->a = a; 3545 3546 baij->singlemalloc = PETSC_FALSE; 3547 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3548 baij->free_a = PETSC_FALSE; 3549 baij->free_ij = PETSC_FALSE; 3550 3551 for (ii=0; ii<m; ii++) { 3552 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3553 #if defined(PETSC_USE_DEBUG) 3554 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3555 #endif 3556 } 3557 #if defined(PETSC_USE_DEBUG) 3558 for (ii=0; ii<baij->i[m]; ii++) { 3559 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3560 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3561 } 3562 #endif 3563 3564 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3565 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3566 PetscFunctionReturn(0); 3567 } 3568 3569 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3570 { 3571 PetscErrorCode ierr; 3572 PetscMPIInt size; 3573 3574 PetscFunctionBegin; 3575 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3576 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3577 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3578 } else { 3579 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3580 } 3581 PetscFunctionReturn(0); 3582 } 3583