1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,const MatType,MatReuse,Mat*); 17 #endif 18 19 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 20 { 21 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 22 PetscErrorCode ierr; 23 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 24 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 25 PetscReal shift = 0.0; 26 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 27 28 PetscFunctionBegin; 29 allowzeropivot = PetscNot(A->erroriffailure); 30 31 if (a->idiagvalid) { 32 if (values) *values = a->idiag; 33 PetscFunctionReturn(0); 34 } 35 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 36 diag_offset = a->diag; 37 if (!a->idiag) { 38 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 39 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 40 } 41 diag = a->idiag; 42 if (values) *values = a->idiag; 43 /* factor and invert each block */ 44 switch (bs) { 45 case 1: 46 for (i=0; i<mbs; i++) { 47 odiag = v + 1*diag_offset[i]; 48 diag[0] = odiag[0]; 49 50 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 51 if (allowzeropivot) { 52 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 53 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 54 A->factorerror_zeropivot_row = i; 55 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 56 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 57 } 58 59 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 60 diag += 1; 61 } 62 break; 63 case 2: 64 for (i=0; i<mbs; i++) { 65 odiag = v + 4*diag_offset[i]; 66 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 67 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 68 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 69 diag += 4; 70 } 71 break; 72 case 3: 73 for (i=0; i<mbs; i++) { 74 odiag = v + 9*diag_offset[i]; 75 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 76 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 77 diag[8] = odiag[8]; 78 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 79 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 80 diag += 9; 81 } 82 break; 83 case 4: 84 for (i=0; i<mbs; i++) { 85 odiag = v + 16*diag_offset[i]; 86 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 87 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 88 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 89 diag += 16; 90 } 91 break; 92 case 5: 93 for (i=0; i<mbs; i++) { 94 odiag = v + 25*diag_offset[i]; 95 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 97 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 98 diag += 25; 99 } 100 break; 101 case 6: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 36*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 106 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 107 diag += 36; 108 } 109 break; 110 case 7: 111 for (i=0; i<mbs; i++) { 112 odiag = v + 49*diag_offset[i]; 113 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 114 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 115 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 116 diag += 49; 117 } 118 break; 119 default: 120 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 121 for (i=0; i<mbs; i++) { 122 odiag = v + bs2*diag_offset[i]; 123 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 124 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 125 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 126 diag += bs2; 127 } 128 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 129 } 130 a->idiagvalid = PETSC_TRUE; 131 PetscFunctionReturn(0); 132 } 133 134 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 135 { 136 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 137 PetscScalar *x,*work,*w,*workt,*t; 138 const MatScalar *v,*aa = a->a, *idiag; 139 const PetscScalar *b,*xb; 140 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 141 PetscErrorCode ierr; 142 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 143 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 144 145 PetscFunctionBegin; 146 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 147 its = its*lits; 148 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 149 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 150 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 151 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 152 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 153 154 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 155 156 if (!m) PetscFunctionReturn(0); 157 diag = a->diag; 158 idiag = a->idiag; 159 k = PetscMax(A->rmap->n,A->cmap->n); 160 if (!a->mult_work) { 161 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 162 } 163 if (!a->sor_workt) { 164 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 165 } 166 if (!a->sor_work) { 167 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 168 } 169 work = a->mult_work; 170 t = a->sor_workt; 171 w = a->sor_work; 172 173 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 174 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 175 176 if (flag & SOR_ZERO_INITIAL_GUESS) { 177 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 178 switch (bs) { 179 case 1: 180 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 181 t[0] = b[0]; 182 i2 = 1; 183 idiag += 1; 184 for (i=1; i<m; i++) { 185 v = aa + ai[i]; 186 vi = aj + ai[i]; 187 nz = diag[i] - ai[i]; 188 s[0] = b[i2]; 189 for (j=0; j<nz; j++) { 190 xw[0] = x[vi[j]]; 191 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 192 } 193 t[i2] = s[0]; 194 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 195 x[i2] = xw[0]; 196 idiag += 1; 197 i2 += 1; 198 } 199 break; 200 case 2: 201 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 202 t[0] = b[0]; t[1] = b[1]; 203 i2 = 2; 204 idiag += 4; 205 for (i=1; i<m; i++) { 206 v = aa + 4*ai[i]; 207 vi = aj + ai[i]; 208 nz = diag[i] - ai[i]; 209 s[0] = b[i2]; s[1] = b[i2+1]; 210 for (j=0; j<nz; j++) { 211 idx = 2*vi[j]; 212 it = 4*j; 213 xw[0] = x[idx]; xw[1] = x[1+idx]; 214 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 215 } 216 t[i2] = s[0]; t[i2+1] = s[1]; 217 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 218 x[i2] = xw[0]; x[i2+1] = xw[1]; 219 idiag += 4; 220 i2 += 2; 221 } 222 break; 223 case 3: 224 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 225 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 226 i2 = 3; 227 idiag += 9; 228 for (i=1; i<m; i++) { 229 v = aa + 9*ai[i]; 230 vi = aj + ai[i]; 231 nz = diag[i] - ai[i]; 232 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 233 while (nz--) { 234 idx = 3*(*vi++); 235 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 236 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 237 v += 9; 238 } 239 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 240 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 241 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 242 idiag += 9; 243 i2 += 3; 244 } 245 break; 246 case 4: 247 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 248 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 249 i2 = 4; 250 idiag += 16; 251 for (i=1; i<m; i++) { 252 v = aa + 16*ai[i]; 253 vi = aj + ai[i]; 254 nz = diag[i] - ai[i]; 255 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 256 while (nz--) { 257 idx = 4*(*vi++); 258 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 259 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 260 v += 16; 261 } 262 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 263 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 264 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 265 idiag += 16; 266 i2 += 4; 267 } 268 break; 269 case 5: 270 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 271 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 272 i2 = 5; 273 idiag += 25; 274 for (i=1; i<m; i++) { 275 v = aa + 25*ai[i]; 276 vi = aj + ai[i]; 277 nz = diag[i] - ai[i]; 278 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 279 while (nz--) { 280 idx = 5*(*vi++); 281 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 282 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 283 v += 25; 284 } 285 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 286 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 287 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 288 idiag += 25; 289 i2 += 5; 290 } 291 break; 292 case 6: 293 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 294 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 295 i2 = 6; 296 idiag += 36; 297 for (i=1; i<m; i++) { 298 v = aa + 36*ai[i]; 299 vi = aj + ai[i]; 300 nz = diag[i] - ai[i]; 301 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 302 while (nz--) { 303 idx = 6*(*vi++); 304 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 305 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 306 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 307 v += 36; 308 } 309 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 310 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 311 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 312 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 313 idiag += 36; 314 i2 += 6; 315 } 316 break; 317 case 7: 318 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 319 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 320 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 321 i2 = 7; 322 idiag += 49; 323 for (i=1; i<m; i++) { 324 v = aa + 49*ai[i]; 325 vi = aj + ai[i]; 326 nz = diag[i] - ai[i]; 327 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 328 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 329 while (nz--) { 330 idx = 7*(*vi++); 331 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 332 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 333 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 334 v += 49; 335 } 336 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 337 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 338 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 339 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 340 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 341 idiag += 49; 342 i2 += 7; 343 } 344 break; 345 default: 346 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 347 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 348 i2 = bs; 349 idiag += bs2; 350 for (i=1; i<m; i++) { 351 v = aa + bs2*ai[i]; 352 vi = aj + ai[i]; 353 nz = diag[i] - ai[i]; 354 355 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 356 /* copy all rows of x that are needed into contiguous space */ 357 workt = work; 358 for (j=0; j<nz; j++) { 359 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 360 workt += bs; 361 } 362 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 363 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 364 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 365 366 idiag += bs2; 367 i2 += bs; 368 } 369 break; 370 } 371 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 372 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 373 xb = t; 374 } 375 else xb = b; 376 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 377 idiag = a->idiag+bs2*(a->mbs-1); 378 i2 = bs * (m-1); 379 switch (bs) { 380 case 1: 381 s[0] = xb[i2]; 382 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 383 x[i2] = xw[0]; 384 i2 -= 1; 385 for (i=m-2; i>=0; i--) { 386 v = aa + (diag[i]+1); 387 vi = aj + diag[i] + 1; 388 nz = ai[i+1] - diag[i] - 1; 389 s[0] = xb[i2]; 390 for (j=0; j<nz; j++) { 391 xw[0] = x[vi[j]]; 392 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 393 } 394 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 395 x[i2] = xw[0]; 396 idiag -= 1; 397 i2 -= 1; 398 } 399 break; 400 case 2: 401 s[0] = xb[i2]; s[1] = xb[i2+1]; 402 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 403 x[i2] = xw[0]; x[i2+1] = xw[1]; 404 i2 -= 2; 405 idiag -= 4; 406 for (i=m-2; i>=0; i--) { 407 v = aa + 4*(diag[i] + 1); 408 vi = aj + diag[i] + 1; 409 nz = ai[i+1] - diag[i] - 1; 410 s[0] = xb[i2]; s[1] = xb[i2+1]; 411 for (j=0; j<nz; j++) { 412 idx = 2*vi[j]; 413 it = 4*j; 414 xw[0] = x[idx]; xw[1] = x[1+idx]; 415 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 416 } 417 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 418 x[i2] = xw[0]; x[i2+1] = xw[1]; 419 idiag -= 4; 420 i2 -= 2; 421 } 422 break; 423 case 3: 424 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 425 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 426 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 427 i2 -= 3; 428 idiag -= 9; 429 for (i=m-2; i>=0; i--) { 430 v = aa + 9*(diag[i]+1); 431 vi = aj + diag[i] + 1; 432 nz = ai[i+1] - diag[i] - 1; 433 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 434 while (nz--) { 435 idx = 3*(*vi++); 436 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 437 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 438 v += 9; 439 } 440 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 441 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 442 idiag -= 9; 443 i2 -= 3; 444 } 445 break; 446 case 4: 447 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 448 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 449 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 450 i2 -= 4; 451 idiag -= 16; 452 for (i=m-2; i>=0; i--) { 453 v = aa + 16*(diag[i]+1); 454 vi = aj + diag[i] + 1; 455 nz = ai[i+1] - diag[i] - 1; 456 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 457 while (nz--) { 458 idx = 4*(*vi++); 459 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 460 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 461 v += 16; 462 } 463 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 464 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 465 idiag -= 16; 466 i2 -= 4; 467 } 468 break; 469 case 5: 470 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 471 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 472 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 473 i2 -= 5; 474 idiag -= 25; 475 for (i=m-2; i>=0; i--) { 476 v = aa + 25*(diag[i]+1); 477 vi = aj + diag[i] + 1; 478 nz = ai[i+1] - diag[i] - 1; 479 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 480 while (nz--) { 481 idx = 5*(*vi++); 482 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 483 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 484 v += 25; 485 } 486 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 487 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 488 idiag -= 25; 489 i2 -= 5; 490 } 491 break; 492 case 6: 493 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 494 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 495 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 496 i2 -= 6; 497 idiag -= 36; 498 for (i=m-2; i>=0; i--) { 499 v = aa + 36*(diag[i]+1); 500 vi = aj + diag[i] + 1; 501 nz = ai[i+1] - diag[i] - 1; 502 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 503 while (nz--) { 504 idx = 6*(*vi++); 505 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 506 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 507 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 508 v += 36; 509 } 510 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 511 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 512 idiag -= 36; 513 i2 -= 6; 514 } 515 break; 516 case 7: 517 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 518 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 519 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 520 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 521 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 522 i2 -= 7; 523 idiag -= 49; 524 for (i=m-2; i>=0; i--) { 525 v = aa + 49*(diag[i]+1); 526 vi = aj + diag[i] + 1; 527 nz = ai[i+1] - diag[i] - 1; 528 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 529 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 530 while (nz--) { 531 idx = 7*(*vi++); 532 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 533 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 534 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 535 v += 49; 536 } 537 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 538 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 539 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 540 idiag -= 49; 541 i2 -= 7; 542 } 543 break; 544 default: 545 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 546 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 547 i2 -= bs; 548 idiag -= bs2; 549 for (i=m-2; i>=0; i--) { 550 v = aa + bs2*(diag[i]+1); 551 vi = aj + diag[i] + 1; 552 nz = ai[i+1] - diag[i] - 1; 553 554 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 555 /* copy all rows of x that are needed into contiguous space */ 556 workt = work; 557 for (j=0; j<nz; j++) { 558 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 559 workt += bs; 560 } 561 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 562 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 563 564 idiag -= bs2; 565 i2 -= bs; 566 } 567 break; 568 } 569 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 570 } 571 its--; 572 } 573 while (its--) { 574 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 575 idiag = a->idiag; 576 i2 = 0; 577 switch (bs) { 578 case 1: 579 for (i=0; i<m; i++) { 580 v = aa + ai[i]; 581 vi = aj + ai[i]; 582 nz = ai[i+1] - ai[i]; 583 s[0] = b[i2]; 584 for (j=0; j<nz; j++) { 585 xw[0] = x[vi[j]]; 586 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 587 } 588 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 589 x[i2] += xw[0]; 590 idiag += 1; 591 i2 += 1; 592 } 593 break; 594 case 2: 595 for (i=0; i<m; i++) { 596 v = aa + 4*ai[i]; 597 vi = aj + ai[i]; 598 nz = ai[i+1] - ai[i]; 599 s[0] = b[i2]; s[1] = b[i2+1]; 600 for (j=0; j<nz; j++) { 601 idx = 2*vi[j]; 602 it = 4*j; 603 xw[0] = x[idx]; xw[1] = x[1+idx]; 604 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 605 } 606 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 607 x[i2] += xw[0]; x[i2+1] += xw[1]; 608 idiag += 4; 609 i2 += 2; 610 } 611 break; 612 case 3: 613 for (i=0; i<m; i++) { 614 v = aa + 9*ai[i]; 615 vi = aj + ai[i]; 616 nz = ai[i+1] - ai[i]; 617 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 618 while (nz--) { 619 idx = 3*(*vi++); 620 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 621 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 622 v += 9; 623 } 624 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 625 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 626 idiag += 9; 627 i2 += 3; 628 } 629 break; 630 case 4: 631 for (i=0; i<m; i++) { 632 v = aa + 16*ai[i]; 633 vi = aj + ai[i]; 634 nz = ai[i+1] - ai[i]; 635 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 636 while (nz--) { 637 idx = 4*(*vi++); 638 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 639 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 640 v += 16; 641 } 642 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 643 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 644 idiag += 16; 645 i2 += 4; 646 } 647 break; 648 case 5: 649 for (i=0; i<m; i++) { 650 v = aa + 25*ai[i]; 651 vi = aj + ai[i]; 652 nz = ai[i+1] - ai[i]; 653 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 654 while (nz--) { 655 idx = 5*(*vi++); 656 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 657 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 658 v += 25; 659 } 660 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 661 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 662 idiag += 25; 663 i2 += 5; 664 } 665 break; 666 case 6: 667 for (i=0; i<m; i++) { 668 v = aa + 36*ai[i]; 669 vi = aj + ai[i]; 670 nz = ai[i+1] - ai[i]; 671 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 672 while (nz--) { 673 idx = 6*(*vi++); 674 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 675 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 676 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 677 v += 36; 678 } 679 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 680 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 681 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 682 idiag += 36; 683 i2 += 6; 684 } 685 break; 686 case 7: 687 for (i=0; i<m; i++) { 688 v = aa + 49*ai[i]; 689 vi = aj + ai[i]; 690 nz = ai[i+1] - ai[i]; 691 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 692 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 693 while (nz--) { 694 idx = 7*(*vi++); 695 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 696 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 697 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 698 v += 49; 699 } 700 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 701 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 702 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 703 idiag += 49; 704 i2 += 7; 705 } 706 break; 707 default: 708 for (i=0; i<m; i++) { 709 v = aa + bs2*ai[i]; 710 vi = aj + ai[i]; 711 nz = ai[i+1] - ai[i]; 712 713 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 714 /* copy all rows of x that are needed into contiguous space */ 715 workt = work; 716 for (j=0; j<nz; j++) { 717 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 718 workt += bs; 719 } 720 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 721 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 722 723 idiag += bs2; 724 i2 += bs; 725 } 726 break; 727 } 728 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 729 } 730 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 731 idiag = a->idiag+bs2*(a->mbs-1); 732 i2 = bs * (m-1); 733 switch (bs) { 734 case 1: 735 for (i=m-1; i>=0; i--) { 736 v = aa + ai[i]; 737 vi = aj + ai[i]; 738 nz = ai[i+1] - ai[i]; 739 s[0] = b[i2]; 740 for (j=0; j<nz; j++) { 741 xw[0] = x[vi[j]]; 742 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 743 } 744 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 745 x[i2] += xw[0]; 746 idiag -= 1; 747 i2 -= 1; 748 } 749 break; 750 case 2: 751 for (i=m-1; i>=0; i--) { 752 v = aa + 4*ai[i]; 753 vi = aj + ai[i]; 754 nz = ai[i+1] - ai[i]; 755 s[0] = b[i2]; s[1] = b[i2+1]; 756 for (j=0; j<nz; j++) { 757 idx = 2*vi[j]; 758 it = 4*j; 759 xw[0] = x[idx]; xw[1] = x[1+idx]; 760 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 761 } 762 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 763 x[i2] += xw[0]; x[i2+1] += xw[1]; 764 idiag -= 4; 765 i2 -= 2; 766 } 767 break; 768 case 3: 769 for (i=m-1; i>=0; i--) { 770 v = aa + 9*ai[i]; 771 vi = aj + ai[i]; 772 nz = ai[i+1] - ai[i]; 773 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 774 while (nz--) { 775 idx = 3*(*vi++); 776 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 777 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 778 v += 9; 779 } 780 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 781 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 782 idiag -= 9; 783 i2 -= 3; 784 } 785 break; 786 case 4: 787 for (i=m-1; i>=0; i--) { 788 v = aa + 16*ai[i]; 789 vi = aj + ai[i]; 790 nz = ai[i+1] - ai[i]; 791 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 792 while (nz--) { 793 idx = 4*(*vi++); 794 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 795 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 796 v += 16; 797 } 798 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 799 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 800 idiag -= 16; 801 i2 -= 4; 802 } 803 break; 804 case 5: 805 for (i=m-1; i>=0; i--) { 806 v = aa + 25*ai[i]; 807 vi = aj + ai[i]; 808 nz = ai[i+1] - ai[i]; 809 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 810 while (nz--) { 811 idx = 5*(*vi++); 812 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 813 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 814 v += 25; 815 } 816 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 817 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 818 idiag -= 25; 819 i2 -= 5; 820 } 821 break; 822 case 6: 823 for (i=m-1; i>=0; i--) { 824 v = aa + 36*ai[i]; 825 vi = aj + ai[i]; 826 nz = ai[i+1] - ai[i]; 827 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 828 while (nz--) { 829 idx = 6*(*vi++); 830 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 831 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 832 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 833 v += 36; 834 } 835 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 836 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 837 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 838 idiag -= 36; 839 i2 -= 6; 840 } 841 break; 842 case 7: 843 for (i=m-1; i>=0; i--) { 844 v = aa + 49*ai[i]; 845 vi = aj + ai[i]; 846 nz = ai[i+1] - ai[i]; 847 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 848 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 849 while (nz--) { 850 idx = 7*(*vi++); 851 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 852 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 853 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 854 v += 49; 855 } 856 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 857 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 858 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 859 idiag -= 49; 860 i2 -= 7; 861 } 862 break; 863 default: 864 for (i=m-1; i>=0; i--) { 865 v = aa + bs2*ai[i]; 866 vi = aj + ai[i]; 867 nz = ai[i+1] - ai[i]; 868 869 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 870 /* copy all rows of x that are needed into contiguous space */ 871 workt = work; 872 for (j=0; j<nz; j++) { 873 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 874 workt += bs; 875 } 876 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 877 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 878 879 idiag -= bs2; 880 i2 -= bs; 881 } 882 break; 883 } 884 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 885 } 886 } 887 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 888 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 889 PetscFunctionReturn(0); 890 } 891 892 893 /* 894 Special version for direct calls from Fortran (Used in PETSc-fun3d) 895 */ 896 #if defined(PETSC_HAVE_FORTRAN_CAPS) 897 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 898 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 899 #define matsetvaluesblocked4_ matsetvaluesblocked4 900 #endif 901 902 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 903 { 904 Mat A = *AA; 905 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 906 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 907 PetscInt *ai =a->i,*ailen=a->ilen; 908 PetscInt *aj =a->j,stepval,lastcol = -1; 909 const PetscScalar *value = v; 910 MatScalar *ap,*aa = a->a,*bap; 911 912 PetscFunctionBegin; 913 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 914 stepval = (n-1)*4; 915 for (k=0; k<m; k++) { /* loop over added rows */ 916 row = im[k]; 917 rp = aj + ai[row]; 918 ap = aa + 16*ai[row]; 919 nrow = ailen[row]; 920 low = 0; 921 high = nrow; 922 for (l=0; l<n; l++) { /* loop over added columns */ 923 col = in[l]; 924 if (col <= lastcol) low = 0; 925 else high = nrow; 926 lastcol = col; 927 value = v + k*(stepval+4 + l)*4; 928 while (high-low > 7) { 929 t = (low+high)/2; 930 if (rp[t] > col) high = t; 931 else low = t; 932 } 933 for (i=low; i<high; i++) { 934 if (rp[i] > col) break; 935 if (rp[i] == col) { 936 bap = ap + 16*i; 937 for (ii=0; ii<4; ii++,value+=stepval) { 938 for (jj=ii; jj<16; jj+=4) { 939 bap[jj] += *value++; 940 } 941 } 942 goto noinsert2; 943 } 944 } 945 N = nrow++ - 1; 946 high++; /* added new column index thus must search to one higher than before */ 947 /* shift up all the later entries in this row */ 948 for (ii=N; ii>=i; ii--) { 949 rp[ii+1] = rp[ii]; 950 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 951 } 952 if (N >= i) { 953 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 954 } 955 rp[i] = col; 956 bap = ap + 16*i; 957 for (ii=0; ii<4; ii++,value+=stepval) { 958 for (jj=ii; jj<16; jj+=4) { 959 bap[jj] = *value++; 960 } 961 } 962 noinsert2:; 963 low = i; 964 } 965 ailen[row] = nrow; 966 } 967 PetscFunctionReturnVoid(); 968 } 969 970 #if defined(PETSC_HAVE_FORTRAN_CAPS) 971 #define matsetvalues4_ MATSETVALUES4 972 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 973 #define matsetvalues4_ matsetvalues4 974 #endif 975 976 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 977 { 978 Mat A = *AA; 979 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 980 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 981 PetscInt *ai=a->i,*ailen=a->ilen; 982 PetscInt *aj=a->j,brow,bcol; 983 PetscInt ridx,cidx,lastcol = -1; 984 MatScalar *ap,value,*aa=a->a,*bap; 985 986 PetscFunctionBegin; 987 for (k=0; k<m; k++) { /* loop over added rows */ 988 row = im[k]; brow = row/4; 989 rp = aj + ai[brow]; 990 ap = aa + 16*ai[brow]; 991 nrow = ailen[brow]; 992 low = 0; 993 high = nrow; 994 for (l=0; l<n; l++) { /* loop over added columns */ 995 col = in[l]; bcol = col/4; 996 ridx = row % 4; cidx = col % 4; 997 value = v[l + k*n]; 998 if (col <= lastcol) low = 0; 999 else high = nrow; 1000 lastcol = col; 1001 while (high-low > 7) { 1002 t = (low+high)/2; 1003 if (rp[t] > bcol) high = t; 1004 else low = t; 1005 } 1006 for (i=low; i<high; i++) { 1007 if (rp[i] > bcol) break; 1008 if (rp[i] == bcol) { 1009 bap = ap + 16*i + 4*cidx + ridx; 1010 *bap += value; 1011 goto noinsert1; 1012 } 1013 } 1014 N = nrow++ - 1; 1015 high++; /* added new column thus must search to one higher than before */ 1016 /* shift up all the later entries in this row */ 1017 for (ii=N; ii>=i; ii--) { 1018 rp[ii+1] = rp[ii]; 1019 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1020 } 1021 if (N>=i) { 1022 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1023 } 1024 rp[i] = bcol; 1025 ap[16*i + 4*cidx + ridx] = value; 1026 noinsert1:; 1027 low = i; 1028 } 1029 ailen[brow] = nrow; 1030 } 1031 PetscFunctionReturnVoid(); 1032 } 1033 1034 /* 1035 Checks for missing diagonals 1036 */ 1037 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1038 { 1039 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1040 PetscErrorCode ierr; 1041 PetscInt *diag,*ii = a->i,i; 1042 1043 PetscFunctionBegin; 1044 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1045 *missing = PETSC_FALSE; 1046 if (A->rmap->n > 0 && !ii) { 1047 *missing = PETSC_TRUE; 1048 if (d) *d = 0; 1049 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1050 } else { 1051 diag = a->diag; 1052 for (i=0; i<a->mbs; i++) { 1053 if (diag[i] >= ii[i+1]) { 1054 *missing = PETSC_TRUE; 1055 if (d) *d = i; 1056 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1057 break; 1058 } 1059 } 1060 } 1061 PetscFunctionReturn(0); 1062 } 1063 1064 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1065 { 1066 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1067 PetscErrorCode ierr; 1068 PetscInt i,j,m = a->mbs; 1069 1070 PetscFunctionBegin; 1071 if (!a->diag) { 1072 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1073 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1074 a->free_diag = PETSC_TRUE; 1075 } 1076 for (i=0; i<m; i++) { 1077 a->diag[i] = a->i[i+1]; 1078 for (j=a->i[i]; j<a->i[i+1]; j++) { 1079 if (a->j[j] == i) { 1080 a->diag[i] = j; 1081 break; 1082 } 1083 } 1084 } 1085 PetscFunctionReturn(0); 1086 } 1087 1088 1089 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1090 { 1091 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1092 PetscErrorCode ierr; 1093 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1094 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1095 1096 PetscFunctionBegin; 1097 *nn = n; 1098 if (!ia) PetscFunctionReturn(0); 1099 if (symmetric) { 1100 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1101 nz = tia[n]; 1102 } else { 1103 tia = a->i; tja = a->j; 1104 } 1105 1106 if (!blockcompressed && bs > 1) { 1107 (*nn) *= bs; 1108 /* malloc & create the natural set of indices */ 1109 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1110 if (n) { 1111 (*ia)[0] = oshift; 1112 for (j=1; j<bs; j++) { 1113 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1114 } 1115 } 1116 1117 for (i=1; i<n; i++) { 1118 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1119 for (j=1; j<bs; j++) { 1120 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1121 } 1122 } 1123 if (n) { 1124 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1125 } 1126 1127 if (inja) { 1128 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1129 cnt = 0; 1130 for (i=0; i<n; i++) { 1131 for (j=0; j<bs; j++) { 1132 for (k=tia[i]; k<tia[i+1]; k++) { 1133 for (l=0; l<bs; l++) { 1134 (*ja)[cnt++] = bs*tja[k] + l; 1135 } 1136 } 1137 } 1138 } 1139 } 1140 1141 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1142 ierr = PetscFree(tia);CHKERRQ(ierr); 1143 ierr = PetscFree(tja);CHKERRQ(ierr); 1144 } 1145 } else if (oshift == 1) { 1146 if (symmetric) { 1147 nz = tia[A->rmap->n/bs]; 1148 /* add 1 to i and j indices */ 1149 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1150 *ia = tia; 1151 if (ja) { 1152 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1153 *ja = tja; 1154 } 1155 } else { 1156 nz = a->i[A->rmap->n/bs]; 1157 /* malloc space and add 1 to i and j indices */ 1158 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1159 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1160 if (ja) { 1161 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1162 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1163 } 1164 } 1165 } else { 1166 *ia = tia; 1167 if (ja) *ja = tja; 1168 } 1169 PetscFunctionReturn(0); 1170 } 1171 1172 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1173 { 1174 PetscErrorCode ierr; 1175 1176 PetscFunctionBegin; 1177 if (!ia) PetscFunctionReturn(0); 1178 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1179 ierr = PetscFree(*ia);CHKERRQ(ierr); 1180 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1181 } 1182 PetscFunctionReturn(0); 1183 } 1184 1185 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1186 { 1187 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1188 PetscErrorCode ierr; 1189 1190 PetscFunctionBegin; 1191 #if defined(PETSC_USE_LOG) 1192 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1193 #endif 1194 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1195 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1196 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1197 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1198 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1199 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1200 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1201 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1202 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1203 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1204 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1205 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1206 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1207 1208 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1209 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1210 ierr = PetscFree(A->data);CHKERRQ(ierr); 1211 1212 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1213 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1214 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1215 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1223 #if defined(PETSC_HAVE_HYPRE) 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1225 #endif 1226 PetscFunctionReturn(0); 1227 } 1228 1229 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1230 { 1231 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 switch (op) { 1236 case MAT_ROW_ORIENTED: 1237 a->roworiented = flg; 1238 break; 1239 case MAT_KEEP_NONZERO_PATTERN: 1240 a->keepnonzeropattern = flg; 1241 break; 1242 case MAT_NEW_NONZERO_LOCATIONS: 1243 a->nonew = (flg ? 0 : 1); 1244 break; 1245 case MAT_NEW_NONZERO_LOCATION_ERR: 1246 a->nonew = (flg ? -1 : 0); 1247 break; 1248 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1249 a->nonew = (flg ? -2 : 0); 1250 break; 1251 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1252 a->nounused = (flg ? -1 : 0); 1253 break; 1254 case MAT_NEW_DIAGONALS: 1255 case MAT_IGNORE_OFF_PROC_ENTRIES: 1256 case MAT_USE_HASH_TABLE: 1257 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1258 break; 1259 case MAT_SPD: 1260 case MAT_SYMMETRIC: 1261 case MAT_STRUCTURALLY_SYMMETRIC: 1262 case MAT_HERMITIAN: 1263 case MAT_SYMMETRY_ETERNAL: 1264 case MAT_SUBMAT_SINGLEIS: 1265 case MAT_STRUCTURE_ONLY: 1266 /* These options are handled directly by MatSetOption() */ 1267 break; 1268 default: 1269 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1270 } 1271 PetscFunctionReturn(0); 1272 } 1273 1274 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1275 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1276 { 1277 PetscErrorCode ierr; 1278 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1279 MatScalar *aa_i; 1280 PetscScalar *v_i; 1281 1282 PetscFunctionBegin; 1283 bs = A->rmap->bs; 1284 bs2 = bs*bs; 1285 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1286 1287 bn = row/bs; /* Block number */ 1288 bp = row % bs; /* Block Position */ 1289 M = ai[bn+1] - ai[bn]; 1290 *nz = bs*M; 1291 1292 if (v) { 1293 *v = 0; 1294 if (*nz) { 1295 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1296 for (i=0; i<M; i++) { /* for each block in the block row */ 1297 v_i = *v + i*bs; 1298 aa_i = aa + bs2*(ai[bn] + i); 1299 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1300 } 1301 } 1302 } 1303 1304 if (idx) { 1305 *idx = 0; 1306 if (*nz) { 1307 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1308 for (i=0; i<M; i++) { /* for each block in the block row */ 1309 idx_i = *idx + i*bs; 1310 itmp = bs*aj[ai[bn] + i]; 1311 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1312 } 1313 } 1314 } 1315 PetscFunctionReturn(0); 1316 } 1317 1318 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1319 { 1320 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1321 PetscErrorCode ierr; 1322 1323 PetscFunctionBegin; 1324 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1325 PetscFunctionReturn(0); 1326 } 1327 1328 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1329 { 1330 PetscErrorCode ierr; 1331 1332 PetscFunctionBegin; 1333 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1334 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1335 PetscFunctionReturn(0); 1336 } 1337 1338 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1339 { 1340 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1341 Mat C; 1342 PetscErrorCode ierr; 1343 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1344 PetscInt *rows,*cols,bs2=a->bs2; 1345 MatScalar *array; 1346 1347 PetscFunctionBegin; 1348 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1349 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1350 1351 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1352 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1353 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1354 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1355 ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr); 1356 ierr = PetscFree(col);CHKERRQ(ierr); 1357 } else { 1358 C = *B; 1359 } 1360 1361 array = a->a; 1362 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1363 for (i=0; i<mbs; i++) { 1364 cols[0] = i*bs; 1365 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1366 len = ai[i+1] - ai[i]; 1367 for (j=0; j<len; j++) { 1368 rows[0] = (*aj++)*bs; 1369 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1370 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1371 array += bs2; 1372 } 1373 } 1374 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1375 1376 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1377 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1378 1379 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1380 *B = C; 1381 } else { 1382 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1383 } 1384 PetscFunctionReturn(0); 1385 } 1386 1387 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1388 { 1389 PetscErrorCode ierr; 1390 Mat Btrans; 1391 1392 PetscFunctionBegin; 1393 *f = PETSC_FALSE; 1394 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1395 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1396 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1397 PetscFunctionReturn(0); 1398 } 1399 1400 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1401 { 1402 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1403 PetscErrorCode ierr; 1404 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1405 int fd; 1406 PetscScalar *aa; 1407 FILE *file; 1408 1409 PetscFunctionBegin; 1410 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1411 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1412 col_lens[0] = MAT_FILE_CLASSID; 1413 1414 col_lens[1] = A->rmap->N; 1415 col_lens[2] = A->cmap->n; 1416 col_lens[3] = a->nz*bs2; 1417 1418 /* store lengths of each row and write (including header) to file */ 1419 count = 0; 1420 for (i=0; i<a->mbs; i++) { 1421 for (j=0; j<bs; j++) { 1422 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1423 } 1424 } 1425 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1426 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1427 1428 /* store column indices (zero start index) */ 1429 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1430 count = 0; 1431 for (i=0; i<a->mbs; i++) { 1432 for (j=0; j<bs; j++) { 1433 for (k=a->i[i]; k<a->i[i+1]; k++) { 1434 for (l=0; l<bs; l++) { 1435 jj[count++] = bs*a->j[k] + l; 1436 } 1437 } 1438 } 1439 } 1440 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1441 ierr = PetscFree(jj);CHKERRQ(ierr); 1442 1443 /* store nonzero values */ 1444 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1445 count = 0; 1446 for (i=0; i<a->mbs; i++) { 1447 for (j=0; j<bs; j++) { 1448 for (k=a->i[i]; k<a->i[i+1]; k++) { 1449 for (l=0; l<bs; l++) { 1450 aa[count++] = a->a[bs2*k + l*bs + j]; 1451 } 1452 } 1453 } 1454 } 1455 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1456 ierr = PetscFree(aa);CHKERRQ(ierr); 1457 1458 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1459 if (file) { 1460 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1461 } 1462 PetscFunctionReturn(0); 1463 } 1464 1465 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1466 { 1467 PetscErrorCode ierr; 1468 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1469 PetscInt i,bs = A->rmap->bs,k; 1470 1471 PetscFunctionBegin; 1472 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1473 for (i=0; i<a->mbs; i++) { 1474 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1475 for (k=a->i[i]; k<a->i[i+1]; k++) { 1476 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1477 } 1478 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1479 } 1480 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1481 PetscFunctionReturn(0); 1482 } 1483 1484 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1485 { 1486 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1487 PetscErrorCode ierr; 1488 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1489 PetscViewerFormat format; 1490 1491 PetscFunctionBegin; 1492 if (A->structure_only) { 1493 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1494 PetscFunctionReturn(0); 1495 } 1496 1497 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1498 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1499 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1500 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1501 const char *matname; 1502 Mat aij; 1503 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1504 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1505 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1506 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1507 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1508 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1509 PetscFunctionReturn(0); 1510 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1511 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1512 for (i=0; i<a->mbs; i++) { 1513 for (j=0; j<bs; j++) { 1514 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1515 for (k=a->i[i]; k<a->i[i+1]; k++) { 1516 for (l=0; l<bs; l++) { 1517 #if defined(PETSC_USE_COMPLEX) 1518 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1519 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1520 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1521 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1522 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1523 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1524 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1525 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1526 } 1527 #else 1528 if (a->a[bs2*k + l*bs + j] != 0.0) { 1529 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1530 } 1531 #endif 1532 } 1533 } 1534 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1535 } 1536 } 1537 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1538 } else { 1539 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1540 for (i=0; i<a->mbs; i++) { 1541 for (j=0; j<bs; j++) { 1542 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1543 for (k=a->i[i]; k<a->i[i+1]; k++) { 1544 for (l=0; l<bs; l++) { 1545 #if defined(PETSC_USE_COMPLEX) 1546 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1547 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1548 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1549 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1550 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1551 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1552 } else { 1553 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1554 } 1555 #else 1556 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1557 #endif 1558 } 1559 } 1560 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1561 } 1562 } 1563 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1564 } 1565 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1566 PetscFunctionReturn(0); 1567 } 1568 1569 #include <petscdraw.h> 1570 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1571 { 1572 Mat A = (Mat) Aa; 1573 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1574 PetscErrorCode ierr; 1575 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1576 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1577 MatScalar *aa; 1578 PetscViewer viewer; 1579 PetscViewerFormat format; 1580 1581 PetscFunctionBegin; 1582 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1583 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1584 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1585 1586 /* loop over matrix elements drawing boxes */ 1587 1588 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1589 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1590 /* Blue for negative, Cyan for zero and Red for positive */ 1591 color = PETSC_DRAW_BLUE; 1592 for (i=0,row=0; i<mbs; i++,row+=bs) { 1593 for (j=a->i[i]; j<a->i[i+1]; j++) { 1594 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1595 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1596 aa = a->a + j*bs2; 1597 for (k=0; k<bs; k++) { 1598 for (l=0; l<bs; l++) { 1599 if (PetscRealPart(*aa++) >= 0.) continue; 1600 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1601 } 1602 } 1603 } 1604 } 1605 color = PETSC_DRAW_CYAN; 1606 for (i=0,row=0; i<mbs; i++,row+=bs) { 1607 for (j=a->i[i]; j<a->i[i+1]; j++) { 1608 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1609 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1610 aa = a->a + j*bs2; 1611 for (k=0; k<bs; k++) { 1612 for (l=0; l<bs; l++) { 1613 if (PetscRealPart(*aa++) != 0.) continue; 1614 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1615 } 1616 } 1617 } 1618 } 1619 color = PETSC_DRAW_RED; 1620 for (i=0,row=0; i<mbs; i++,row+=bs) { 1621 for (j=a->i[i]; j<a->i[i+1]; j++) { 1622 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1623 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1624 aa = a->a + j*bs2; 1625 for (k=0; k<bs; k++) { 1626 for (l=0; l<bs; l++) { 1627 if (PetscRealPart(*aa++) <= 0.) continue; 1628 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1629 } 1630 } 1631 } 1632 } 1633 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1634 } else { 1635 /* use contour shading to indicate magnitude of values */ 1636 /* first determine max of all nonzero values */ 1637 PetscReal minv = 0.0, maxv = 0.0; 1638 PetscDraw popup; 1639 1640 for (i=0; i<a->nz*a->bs2; i++) { 1641 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1642 } 1643 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1644 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1645 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1646 1647 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1648 for (i=0,row=0; i<mbs; i++,row+=bs) { 1649 for (j=a->i[i]; j<a->i[i+1]; j++) { 1650 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1651 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1652 aa = a->a + j*bs2; 1653 for (k=0; k<bs; k++) { 1654 for (l=0; l<bs; l++) { 1655 MatScalar v = *aa++; 1656 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1657 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1658 } 1659 } 1660 } 1661 } 1662 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1663 } 1664 PetscFunctionReturn(0); 1665 } 1666 1667 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1668 { 1669 PetscErrorCode ierr; 1670 PetscReal xl,yl,xr,yr,w,h; 1671 PetscDraw draw; 1672 PetscBool isnull; 1673 1674 PetscFunctionBegin; 1675 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1676 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1677 if (isnull) PetscFunctionReturn(0); 1678 1679 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1680 xr += w; yr += h; xl = -w; yl = -h; 1681 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1682 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1683 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1684 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1685 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1686 PetscFunctionReturn(0); 1687 } 1688 1689 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1690 { 1691 PetscErrorCode ierr; 1692 PetscBool iascii,isbinary,isdraw; 1693 1694 PetscFunctionBegin; 1695 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1696 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1697 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1698 if (iascii) { 1699 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1700 } else if (isbinary) { 1701 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1702 } else if (isdraw) { 1703 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1704 } else { 1705 Mat B; 1706 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1707 ierr = MatView(B,viewer);CHKERRQ(ierr); 1708 ierr = MatDestroy(&B);CHKERRQ(ierr); 1709 } 1710 PetscFunctionReturn(0); 1711 } 1712 1713 1714 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1715 { 1716 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1717 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1718 PetscInt *ai = a->i,*ailen = a->ilen; 1719 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1720 MatScalar *ap,*aa = a->a; 1721 1722 PetscFunctionBegin; 1723 for (k=0; k<m; k++) { /* loop over rows */ 1724 row = im[k]; brow = row/bs; 1725 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1726 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1727 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1728 nrow = ailen[brow]; 1729 for (l=0; l<n; l++) { /* loop over columns */ 1730 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1731 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1732 col = in[l]; 1733 bcol = col/bs; 1734 cidx = col%bs; 1735 ridx = row%bs; 1736 high = nrow; 1737 low = 0; /* assume unsorted */ 1738 while (high-low > 5) { 1739 t = (low+high)/2; 1740 if (rp[t] > bcol) high = t; 1741 else low = t; 1742 } 1743 for (i=low; i<high; i++) { 1744 if (rp[i] > bcol) break; 1745 if (rp[i] == bcol) { 1746 *v++ = ap[bs2*i+bs*cidx+ridx]; 1747 goto finished; 1748 } 1749 } 1750 *v++ = 0.0; 1751 finished:; 1752 } 1753 } 1754 PetscFunctionReturn(0); 1755 } 1756 1757 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1758 { 1759 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1760 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1761 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1762 PetscErrorCode ierr; 1763 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1764 PetscBool roworiented=a->roworiented; 1765 const PetscScalar *value = v; 1766 MatScalar *ap=NULL,*aa = a->a,*bap; 1767 1768 PetscFunctionBegin; 1769 if (roworiented) { 1770 stepval = (n-1)*bs; 1771 } else { 1772 stepval = (m-1)*bs; 1773 } 1774 for (k=0; k<m; k++) { /* loop over added rows */ 1775 row = im[k]; 1776 if (row < 0) continue; 1777 #if defined(PETSC_USE_DEBUG) 1778 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1779 #endif 1780 rp = aj + ai[row]; 1781 if (!A->structure_only) ap = aa + bs2*ai[row]; 1782 rmax = imax[row]; 1783 nrow = ailen[row]; 1784 low = 0; 1785 high = nrow; 1786 for (l=0; l<n; l++) { /* loop over added columns */ 1787 if (in[l] < 0) continue; 1788 #if defined(PETSC_USE_DEBUG) 1789 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1790 #endif 1791 col = in[l]; 1792 if (!A->structure_only) { 1793 if (roworiented) { 1794 value = v + (k*(stepval+bs) + l)*bs; 1795 } else { 1796 value = v + (l*(stepval+bs) + k)*bs; 1797 } 1798 } 1799 if (col <= lastcol) low = 0; 1800 else high = nrow; 1801 lastcol = col; 1802 while (high-low > 7) { 1803 t = (low+high)/2; 1804 if (rp[t] > col) high = t; 1805 else low = t; 1806 } 1807 for (i=low; i<high; i++) { 1808 if (rp[i] > col) break; 1809 if (rp[i] == col) { 1810 if (A->structure_only) goto noinsert2; 1811 bap = ap + bs2*i; 1812 if (roworiented) { 1813 if (is == ADD_VALUES) { 1814 for (ii=0; ii<bs; ii++,value+=stepval) { 1815 for (jj=ii; jj<bs2; jj+=bs) { 1816 bap[jj] += *value++; 1817 } 1818 } 1819 } else { 1820 for (ii=0; ii<bs; ii++,value+=stepval) { 1821 for (jj=ii; jj<bs2; jj+=bs) { 1822 bap[jj] = *value++; 1823 } 1824 } 1825 } 1826 } else { 1827 if (is == ADD_VALUES) { 1828 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1829 for (jj=0; jj<bs; jj++) { 1830 bap[jj] += value[jj]; 1831 } 1832 bap += bs; 1833 } 1834 } else { 1835 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1836 for (jj=0; jj<bs; jj++) { 1837 bap[jj] = value[jj]; 1838 } 1839 bap += bs; 1840 } 1841 } 1842 } 1843 goto noinsert2; 1844 } 1845 } 1846 if (nonew == 1) goto noinsert2; 1847 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1848 if (A->structure_only) { 1849 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1850 } else { 1851 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1852 } 1853 N = nrow++ - 1; high++; 1854 /* shift up all the later entries in this row */ 1855 for (ii=N; ii>=i; ii--) { 1856 rp[ii+1] = rp[ii]; 1857 if (!A->structure_only) { 1858 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1859 } 1860 } 1861 if (N >= i && !A->structure_only) { 1862 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1863 } 1864 1865 rp[i] = col; 1866 if (!A->structure_only) { 1867 bap = ap + bs2*i; 1868 if (roworiented) { 1869 for (ii=0; ii<bs; ii++,value+=stepval) { 1870 for (jj=ii; jj<bs2; jj+=bs) { 1871 bap[jj] = *value++; 1872 } 1873 } 1874 } else { 1875 for (ii=0; ii<bs; ii++,value+=stepval) { 1876 for (jj=0; jj<bs; jj++) { 1877 *bap++ = *value++; 1878 } 1879 } 1880 } 1881 } 1882 noinsert2:; 1883 low = i; 1884 } 1885 ailen[row] = nrow; 1886 } 1887 PetscFunctionReturn(0); 1888 } 1889 1890 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1891 { 1892 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1893 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1894 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1895 PetscErrorCode ierr; 1896 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1897 MatScalar *aa = a->a,*ap; 1898 PetscReal ratio=0.6; 1899 1900 PetscFunctionBegin; 1901 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1902 1903 if (m) rmax = ailen[0]; 1904 for (i=1; i<mbs; i++) { 1905 /* move each row back by the amount of empty slots (fshift) before it*/ 1906 fshift += imax[i-1] - ailen[i-1]; 1907 rmax = PetscMax(rmax,ailen[i]); 1908 if (fshift) { 1909 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1910 N = ailen[i]; 1911 for (j=0; j<N; j++) { 1912 ip[j-fshift] = ip[j]; 1913 if (!A->structure_only) { 1914 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1915 } 1916 } 1917 } 1918 ai[i] = ai[i-1] + ailen[i-1]; 1919 } 1920 if (mbs) { 1921 fshift += imax[mbs-1] - ailen[mbs-1]; 1922 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1923 } 1924 1925 /* reset ilen and imax for each row */ 1926 a->nonzerorowcnt = 0; 1927 if (A->structure_only) { 1928 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1929 } else { /* !A->structure_only */ 1930 for (i=0; i<mbs; i++) { 1931 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1932 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1933 } 1934 } 1935 a->nz = ai[mbs]; 1936 1937 /* diagonals may have moved, so kill the diagonal pointers */ 1938 a->idiagvalid = PETSC_FALSE; 1939 if (fshift && a->diag) { 1940 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1941 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1942 a->diag = 0; 1943 } 1944 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1945 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1946 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1947 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1948 1949 A->info.mallocs += a->reallocs; 1950 a->reallocs = 0; 1951 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1952 a->rmax = rmax; 1953 1954 if (!A->structure_only) { 1955 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1956 } 1957 PetscFunctionReturn(0); 1958 } 1959 1960 /* 1961 This function returns an array of flags which indicate the locations of contiguous 1962 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1963 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1964 Assume: sizes should be long enough to hold all the values. 1965 */ 1966 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1967 { 1968 PetscInt i,j,k,row; 1969 PetscBool flg; 1970 1971 PetscFunctionBegin; 1972 for (i=0,j=0; i<n; j++) { 1973 row = idx[i]; 1974 if (row%bs!=0) { /* Not the begining of a block */ 1975 sizes[j] = 1; 1976 i++; 1977 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1978 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1979 i++; 1980 } else { /* Begining of the block, so check if the complete block exists */ 1981 flg = PETSC_TRUE; 1982 for (k=1; k<bs; k++) { 1983 if (row+k != idx[i+k]) { /* break in the block */ 1984 flg = PETSC_FALSE; 1985 break; 1986 } 1987 } 1988 if (flg) { /* No break in the bs */ 1989 sizes[j] = bs; 1990 i += bs; 1991 } else { 1992 sizes[j] = 1; 1993 i++; 1994 } 1995 } 1996 } 1997 *bs_max = j; 1998 PetscFunctionReturn(0); 1999 } 2000 2001 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2002 { 2003 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2004 PetscErrorCode ierr; 2005 PetscInt i,j,k,count,*rows; 2006 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2007 PetscScalar zero = 0.0; 2008 MatScalar *aa; 2009 const PetscScalar *xx; 2010 PetscScalar *bb; 2011 2012 PetscFunctionBegin; 2013 /* fix right hand side if needed */ 2014 if (x && b) { 2015 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2016 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2017 for (i=0; i<is_n; i++) { 2018 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2019 } 2020 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2021 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2022 } 2023 2024 /* Make a copy of the IS and sort it */ 2025 /* allocate memory for rows,sizes */ 2026 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2027 2028 /* copy IS values to rows, and sort them */ 2029 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2030 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2031 2032 if (baij->keepnonzeropattern) { 2033 for (i=0; i<is_n; i++) sizes[i] = 1; 2034 bs_max = is_n; 2035 } else { 2036 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2037 A->nonzerostate++; 2038 } 2039 2040 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2041 row = rows[j]; 2042 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2043 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2044 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2045 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2046 if (diag != (PetscScalar)0.0) { 2047 if (baij->ilen[row/bs] > 0) { 2048 baij->ilen[row/bs] = 1; 2049 baij->j[baij->i[row/bs]] = row/bs; 2050 2051 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2052 } 2053 /* Now insert all the diagonal values for this bs */ 2054 for (k=0; k<bs; k++) { 2055 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2056 } 2057 } else { /* (diag == 0.0) */ 2058 baij->ilen[row/bs] = 0; 2059 } /* end (diag == 0.0) */ 2060 } else { /* (sizes[i] != bs) */ 2061 #if defined(PETSC_USE_DEBUG) 2062 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2063 #endif 2064 for (k=0; k<count; k++) { 2065 aa[0] = zero; 2066 aa += bs; 2067 } 2068 if (diag != (PetscScalar)0.0) { 2069 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2070 } 2071 } 2072 } 2073 2074 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2075 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2076 PetscFunctionReturn(0); 2077 } 2078 2079 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2080 { 2081 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2082 PetscErrorCode ierr; 2083 PetscInt i,j,k,count; 2084 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2085 PetscScalar zero = 0.0; 2086 MatScalar *aa; 2087 const PetscScalar *xx; 2088 PetscScalar *bb; 2089 PetscBool *zeroed,vecs = PETSC_FALSE; 2090 2091 PetscFunctionBegin; 2092 /* fix right hand side if needed */ 2093 if (x && b) { 2094 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2095 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2096 vecs = PETSC_TRUE; 2097 } 2098 2099 /* zero the columns */ 2100 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2101 for (i=0; i<is_n; i++) { 2102 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2103 zeroed[is_idx[i]] = PETSC_TRUE; 2104 } 2105 for (i=0; i<A->rmap->N; i++) { 2106 if (!zeroed[i]) { 2107 row = i/bs; 2108 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2109 for (k=0; k<bs; k++) { 2110 col = bs*baij->j[j] + k; 2111 if (zeroed[col]) { 2112 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2113 if (vecs) bb[i] -= aa[0]*xx[col]; 2114 aa[0] = 0.0; 2115 } 2116 } 2117 } 2118 } else if (vecs) bb[i] = diag*xx[i]; 2119 } 2120 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2121 if (vecs) { 2122 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2123 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2124 } 2125 2126 /* zero the rows */ 2127 for (i=0; i<is_n; i++) { 2128 row = is_idx[i]; 2129 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2130 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2131 for (k=0; k<count; k++) { 2132 aa[0] = zero; 2133 aa += bs; 2134 } 2135 if (diag != (PetscScalar)0.0) { 2136 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2137 } 2138 } 2139 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2140 PetscFunctionReturn(0); 2141 } 2142 2143 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2144 { 2145 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2146 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2147 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2148 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2149 PetscErrorCode ierr; 2150 PetscInt ridx,cidx,bs2=a->bs2; 2151 PetscBool roworiented=a->roworiented; 2152 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2153 2154 PetscFunctionBegin; 2155 for (k=0; k<m; k++) { /* loop over added rows */ 2156 row = im[k]; 2157 brow = row/bs; 2158 if (row < 0) continue; 2159 #if defined(PETSC_USE_DEBUG) 2160 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2161 #endif 2162 rp = aj + ai[brow]; 2163 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2164 rmax = imax[brow]; 2165 nrow = ailen[brow]; 2166 low = 0; 2167 high = nrow; 2168 for (l=0; l<n; l++) { /* loop over added columns */ 2169 if (in[l] < 0) continue; 2170 #if defined(PETSC_USE_DEBUG) 2171 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2172 #endif 2173 col = in[l]; bcol = col/bs; 2174 ridx = row % bs; cidx = col % bs; 2175 if (!A->structure_only) { 2176 if (roworiented) { 2177 value = v[l + k*n]; 2178 } else { 2179 value = v[k + l*m]; 2180 } 2181 } 2182 if (col <= lastcol) low = 0; else high = nrow; 2183 lastcol = col; 2184 while (high-low > 7) { 2185 t = (low+high)/2; 2186 if (rp[t] > bcol) high = t; 2187 else low = t; 2188 } 2189 for (i=low; i<high; i++) { 2190 if (rp[i] > bcol) break; 2191 if (rp[i] == bcol) { 2192 bap = ap + bs2*i + bs*cidx + ridx; 2193 if (!A->structure_only) { 2194 if (is == ADD_VALUES) *bap += value; 2195 else *bap = value; 2196 } 2197 goto noinsert1; 2198 } 2199 } 2200 if (nonew == 1) goto noinsert1; 2201 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2202 if (A->structure_only) { 2203 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2204 } else { 2205 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2206 } 2207 N = nrow++ - 1; high++; 2208 /* shift up all the later entries in this row */ 2209 for (ii=N; ii>=i; ii--) { 2210 rp[ii+1] = rp[ii]; 2211 if (!A->structure_only) { 2212 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2213 } 2214 } 2215 if (N>=i && !A->structure_only) { 2216 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2217 } 2218 rp[i] = bcol; 2219 if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value; 2220 a->nz++; 2221 A->nonzerostate++; 2222 noinsert1:; 2223 low = i; 2224 } 2225 ailen[brow] = nrow; 2226 } 2227 PetscFunctionReturn(0); 2228 } 2229 2230 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2231 { 2232 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2233 Mat outA; 2234 PetscErrorCode ierr; 2235 PetscBool row_identity,col_identity; 2236 2237 PetscFunctionBegin; 2238 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2239 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2240 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2241 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2242 2243 outA = inA; 2244 inA->factortype = MAT_FACTOR_LU; 2245 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2246 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2247 2248 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2249 2250 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2251 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2252 a->row = row; 2253 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2254 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2255 a->col = col; 2256 2257 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2258 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2259 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2260 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2261 2262 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2263 if (!a->solve_work) { 2264 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2265 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2266 } 2267 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2268 PetscFunctionReturn(0); 2269 } 2270 2271 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2272 { 2273 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2274 PetscInt i,nz,mbs; 2275 2276 PetscFunctionBegin; 2277 nz = baij->maxnz; 2278 mbs = baij->mbs; 2279 for (i=0; i<nz; i++) { 2280 baij->j[i] = indices[i]; 2281 } 2282 baij->nz = nz; 2283 for (i=0; i<mbs; i++) { 2284 baij->ilen[i] = baij->imax[i]; 2285 } 2286 PetscFunctionReturn(0); 2287 } 2288 2289 /*@ 2290 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2291 in the matrix. 2292 2293 Input Parameters: 2294 + mat - the SeqBAIJ matrix 2295 - indices - the column indices 2296 2297 Level: advanced 2298 2299 Notes: 2300 This can be called if you have precomputed the nonzero structure of the 2301 matrix and want to provide it to the matrix object to improve the performance 2302 of the MatSetValues() operation. 2303 2304 You MUST have set the correct numbers of nonzeros per row in the call to 2305 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2306 2307 MUST be called before any calls to MatSetValues(); 2308 2309 @*/ 2310 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2311 { 2312 PetscErrorCode ierr; 2313 2314 PetscFunctionBegin; 2315 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2316 PetscValidPointer(indices,2); 2317 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2318 PetscFunctionReturn(0); 2319 } 2320 2321 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2322 { 2323 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2324 PetscErrorCode ierr; 2325 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2326 PetscReal atmp; 2327 PetscScalar *x,zero = 0.0; 2328 MatScalar *aa; 2329 PetscInt ncols,brow,krow,kcol; 2330 2331 PetscFunctionBegin; 2332 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2333 bs = A->rmap->bs; 2334 aa = a->a; 2335 ai = a->i; 2336 aj = a->j; 2337 mbs = a->mbs; 2338 2339 ierr = VecSet(v,zero);CHKERRQ(ierr); 2340 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2341 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2342 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2343 for (i=0; i<mbs; i++) { 2344 ncols = ai[1] - ai[0]; ai++; 2345 brow = bs*i; 2346 for (j=0; j<ncols; j++) { 2347 for (kcol=0; kcol<bs; kcol++) { 2348 for (krow=0; krow<bs; krow++) { 2349 atmp = PetscAbsScalar(*aa);aa++; 2350 row = brow + krow; /* row index */ 2351 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2352 } 2353 } 2354 aj++; 2355 } 2356 } 2357 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2358 PetscFunctionReturn(0); 2359 } 2360 2361 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2362 { 2363 PetscErrorCode ierr; 2364 2365 PetscFunctionBegin; 2366 /* If the two matrices have the same copy implementation, use fast copy. */ 2367 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2368 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2369 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2370 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2371 2372 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2373 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2374 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2375 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2376 } else { 2377 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2378 } 2379 PetscFunctionReturn(0); 2380 } 2381 2382 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2383 { 2384 PetscErrorCode ierr; 2385 2386 PetscFunctionBegin; 2387 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2388 PetscFunctionReturn(0); 2389 } 2390 2391 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2392 { 2393 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2394 2395 PetscFunctionBegin; 2396 *array = a->a; 2397 PetscFunctionReturn(0); 2398 } 2399 2400 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2401 { 2402 PetscFunctionBegin; 2403 PetscFunctionReturn(0); 2404 } 2405 2406 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2407 { 2408 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2409 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2410 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2411 PetscErrorCode ierr; 2412 2413 PetscFunctionBegin; 2414 /* Set the number of nonzeros in the new matrix */ 2415 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2416 PetscFunctionReturn(0); 2417 } 2418 2419 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2420 { 2421 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2422 PetscErrorCode ierr; 2423 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2424 PetscBLASInt one=1; 2425 2426 PetscFunctionBegin; 2427 if (str == SAME_NONZERO_PATTERN) { 2428 PetscScalar alpha = a; 2429 PetscBLASInt bnz; 2430 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2431 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2432 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2433 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2434 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2435 } else { 2436 Mat B; 2437 PetscInt *nnz; 2438 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2439 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2440 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2441 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2442 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2443 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2444 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2445 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2446 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2447 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2448 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2449 ierr = PetscFree(nnz);CHKERRQ(ierr); 2450 } 2451 PetscFunctionReturn(0); 2452 } 2453 2454 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2455 { 2456 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2457 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2458 MatScalar *aa = a->a; 2459 2460 PetscFunctionBegin; 2461 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2462 PetscFunctionReturn(0); 2463 } 2464 2465 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2466 { 2467 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2468 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2469 MatScalar *aa = a->a; 2470 2471 PetscFunctionBegin; 2472 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2473 PetscFunctionReturn(0); 2474 } 2475 2476 /* 2477 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2478 */ 2479 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2480 { 2481 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2482 PetscErrorCode ierr; 2483 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2484 PetscInt nz = a->i[m],row,*jj,mr,col; 2485 2486 PetscFunctionBegin; 2487 *nn = n; 2488 if (!ia) PetscFunctionReturn(0); 2489 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2490 else { 2491 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2492 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2493 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2494 jj = a->j; 2495 for (i=0; i<nz; i++) { 2496 collengths[jj[i]]++; 2497 } 2498 cia[0] = oshift; 2499 for (i=0; i<n; i++) { 2500 cia[i+1] = cia[i] + collengths[i]; 2501 } 2502 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2503 jj = a->j; 2504 for (row=0; row<m; row++) { 2505 mr = a->i[row+1] - a->i[row]; 2506 for (i=0; i<mr; i++) { 2507 col = *jj++; 2508 2509 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2510 } 2511 } 2512 ierr = PetscFree(collengths);CHKERRQ(ierr); 2513 *ia = cia; *ja = cja; 2514 } 2515 PetscFunctionReturn(0); 2516 } 2517 2518 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2519 { 2520 PetscErrorCode ierr; 2521 2522 PetscFunctionBegin; 2523 if (!ia) PetscFunctionReturn(0); 2524 ierr = PetscFree(*ia);CHKERRQ(ierr); 2525 ierr = PetscFree(*ja);CHKERRQ(ierr); 2526 PetscFunctionReturn(0); 2527 } 2528 2529 /* 2530 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2531 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2532 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2533 */ 2534 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2535 { 2536 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2537 PetscErrorCode ierr; 2538 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2539 PetscInt nz = a->i[m],row,*jj,mr,col; 2540 PetscInt *cspidx; 2541 2542 PetscFunctionBegin; 2543 *nn = n; 2544 if (!ia) PetscFunctionReturn(0); 2545 2546 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2547 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2548 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2549 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2550 jj = a->j; 2551 for (i=0; i<nz; i++) { 2552 collengths[jj[i]]++; 2553 } 2554 cia[0] = oshift; 2555 for (i=0; i<n; i++) { 2556 cia[i+1] = cia[i] + collengths[i]; 2557 } 2558 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2559 jj = a->j; 2560 for (row=0; row<m; row++) { 2561 mr = a->i[row+1] - a->i[row]; 2562 for (i=0; i<mr; i++) { 2563 col = *jj++; 2564 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2565 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2566 } 2567 } 2568 ierr = PetscFree(collengths);CHKERRQ(ierr); 2569 *ia = cia; *ja = cja; 2570 *spidx = cspidx; 2571 PetscFunctionReturn(0); 2572 } 2573 2574 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2575 { 2576 PetscErrorCode ierr; 2577 2578 PetscFunctionBegin; 2579 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2580 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2581 PetscFunctionReturn(0); 2582 } 2583 2584 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2585 { 2586 PetscErrorCode ierr; 2587 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2588 2589 PetscFunctionBegin; 2590 if (!Y->preallocated || !aij->nz) { 2591 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2592 } 2593 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2594 PetscFunctionReturn(0); 2595 } 2596 2597 /* -------------------------------------------------------------------*/ 2598 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2599 MatGetRow_SeqBAIJ, 2600 MatRestoreRow_SeqBAIJ, 2601 MatMult_SeqBAIJ_N, 2602 /* 4*/ MatMultAdd_SeqBAIJ_N, 2603 MatMultTranspose_SeqBAIJ, 2604 MatMultTransposeAdd_SeqBAIJ, 2605 0, 2606 0, 2607 0, 2608 /* 10*/ 0, 2609 MatLUFactor_SeqBAIJ, 2610 0, 2611 0, 2612 MatTranspose_SeqBAIJ, 2613 /* 15*/ MatGetInfo_SeqBAIJ, 2614 MatEqual_SeqBAIJ, 2615 MatGetDiagonal_SeqBAIJ, 2616 MatDiagonalScale_SeqBAIJ, 2617 MatNorm_SeqBAIJ, 2618 /* 20*/ 0, 2619 MatAssemblyEnd_SeqBAIJ, 2620 MatSetOption_SeqBAIJ, 2621 MatZeroEntries_SeqBAIJ, 2622 /* 24*/ MatZeroRows_SeqBAIJ, 2623 0, 2624 0, 2625 0, 2626 0, 2627 /* 29*/ MatSetUp_SeqBAIJ, 2628 0, 2629 0, 2630 0, 2631 0, 2632 /* 34*/ MatDuplicate_SeqBAIJ, 2633 0, 2634 0, 2635 MatILUFactor_SeqBAIJ, 2636 0, 2637 /* 39*/ MatAXPY_SeqBAIJ, 2638 MatCreateSubMatrices_SeqBAIJ, 2639 MatIncreaseOverlap_SeqBAIJ, 2640 MatGetValues_SeqBAIJ, 2641 MatCopy_SeqBAIJ, 2642 /* 44*/ 0, 2643 MatScale_SeqBAIJ, 2644 MatShift_SeqBAIJ, 2645 0, 2646 MatZeroRowsColumns_SeqBAIJ, 2647 /* 49*/ 0, 2648 MatGetRowIJ_SeqBAIJ, 2649 MatRestoreRowIJ_SeqBAIJ, 2650 MatGetColumnIJ_SeqBAIJ, 2651 MatRestoreColumnIJ_SeqBAIJ, 2652 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2653 0, 2654 0, 2655 0, 2656 MatSetValuesBlocked_SeqBAIJ, 2657 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2658 MatDestroy_SeqBAIJ, 2659 MatView_SeqBAIJ, 2660 0, 2661 0, 2662 /* 64*/ 0, 2663 0, 2664 0, 2665 0, 2666 0, 2667 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2668 0, 2669 MatConvert_Basic, 2670 0, 2671 0, 2672 /* 74*/ 0, 2673 MatFDColoringApply_BAIJ, 2674 0, 2675 0, 2676 0, 2677 /* 79*/ 0, 2678 0, 2679 0, 2680 0, 2681 MatLoad_SeqBAIJ, 2682 /* 84*/ 0, 2683 0, 2684 0, 2685 0, 2686 0, 2687 /* 89*/ 0, 2688 0, 2689 0, 2690 0, 2691 0, 2692 /* 94*/ 0, 2693 0, 2694 0, 2695 0, 2696 0, 2697 /* 99*/ 0, 2698 0, 2699 0, 2700 0, 2701 0, 2702 /*104*/ 0, 2703 MatRealPart_SeqBAIJ, 2704 MatImaginaryPart_SeqBAIJ, 2705 0, 2706 0, 2707 /*109*/ 0, 2708 0, 2709 0, 2710 0, 2711 MatMissingDiagonal_SeqBAIJ, 2712 /*114*/ 0, 2713 0, 2714 0, 2715 0, 2716 0, 2717 /*119*/ 0, 2718 0, 2719 MatMultHermitianTranspose_SeqBAIJ, 2720 MatMultHermitianTransposeAdd_SeqBAIJ, 2721 0, 2722 /*124*/ 0, 2723 0, 2724 MatInvertBlockDiagonal_SeqBAIJ, 2725 0, 2726 0, 2727 /*129*/ 0, 2728 0, 2729 0, 2730 0, 2731 0, 2732 /*134*/ 0, 2733 0, 2734 0, 2735 0, 2736 0, 2737 /*139*/ MatSetBlockSizes_Default, 2738 0, 2739 0, 2740 MatFDColoringSetUp_SeqXAIJ, 2741 0, 2742 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2743 MatDestroySubMatrices_SeqBAIJ 2744 }; 2745 2746 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2747 { 2748 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2749 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2750 PetscErrorCode ierr; 2751 2752 PetscFunctionBegin; 2753 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2754 2755 /* allocate space for values if not already there */ 2756 if (!aij->saved_values) { 2757 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2758 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2759 } 2760 2761 /* copy values over */ 2762 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2763 PetscFunctionReturn(0); 2764 } 2765 2766 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2767 { 2768 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2769 PetscErrorCode ierr; 2770 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2771 2772 PetscFunctionBegin; 2773 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2774 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2775 2776 /* copy values over */ 2777 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2778 PetscFunctionReturn(0); 2779 } 2780 2781 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2782 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2783 2784 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2785 { 2786 Mat_SeqBAIJ *b; 2787 PetscErrorCode ierr; 2788 PetscInt i,mbs,nbs,bs2; 2789 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2790 2791 PetscFunctionBegin; 2792 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2793 if (nz == MAT_SKIP_ALLOCATION) { 2794 skipallocation = PETSC_TRUE; 2795 nz = 0; 2796 } 2797 2798 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2799 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2800 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2801 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2802 2803 B->preallocated = PETSC_TRUE; 2804 2805 mbs = B->rmap->n/bs; 2806 nbs = B->cmap->n/bs; 2807 bs2 = bs*bs; 2808 2809 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2810 2811 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2812 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2813 if (nnz) { 2814 for (i=0; i<mbs; i++) { 2815 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2816 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2817 } 2818 } 2819 2820 b = (Mat_SeqBAIJ*)B->data; 2821 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2822 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2823 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2824 2825 if (!flg) { 2826 switch (bs) { 2827 case 1: 2828 B->ops->mult = MatMult_SeqBAIJ_1; 2829 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2830 break; 2831 case 2: 2832 B->ops->mult = MatMult_SeqBAIJ_2; 2833 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2834 break; 2835 case 3: 2836 B->ops->mult = MatMult_SeqBAIJ_3; 2837 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2838 break; 2839 case 4: 2840 B->ops->mult = MatMult_SeqBAIJ_4; 2841 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2842 break; 2843 case 5: 2844 B->ops->mult = MatMult_SeqBAIJ_5; 2845 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2846 break; 2847 case 6: 2848 B->ops->mult = MatMult_SeqBAIJ_6; 2849 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2850 break; 2851 case 7: 2852 B->ops->mult = MatMult_SeqBAIJ_7; 2853 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2854 break; 2855 case 9: 2856 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2857 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2858 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2859 #else 2860 B->ops->mult = MatMult_SeqBAIJ_N; 2861 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2862 #endif 2863 break; 2864 case 11: 2865 B->ops->mult = MatMult_SeqBAIJ_11; 2866 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2867 break; 2868 case 15: 2869 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2870 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2871 break; 2872 default: 2873 B->ops->mult = MatMult_SeqBAIJ_N; 2874 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2875 break; 2876 } 2877 } 2878 B->ops->sor = MatSOR_SeqBAIJ; 2879 b->mbs = mbs; 2880 b->nbs = nbs; 2881 if (!skipallocation) { 2882 if (!b->imax) { 2883 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2884 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2885 2886 b->free_imax_ilen = PETSC_TRUE; 2887 } 2888 /* b->ilen will count nonzeros in each block row so far. */ 2889 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2890 if (!nnz) { 2891 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2892 else if (nz < 0) nz = 1; 2893 for (i=0; i<mbs; i++) b->imax[i] = nz; 2894 nz = nz*mbs; 2895 } else { 2896 nz = 0; 2897 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2898 } 2899 2900 /* allocate the matrix space */ 2901 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2902 if (B->structure_only) { 2903 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 2904 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 2905 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 2906 } else { 2907 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2908 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2909 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2910 } 2911 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2912 2913 if (B->structure_only) { 2914 b->singlemalloc = PETSC_FALSE; 2915 b->free_a = PETSC_FALSE; 2916 } else { 2917 b->singlemalloc = PETSC_TRUE; 2918 b->free_a = PETSC_TRUE; 2919 } 2920 b->free_ij = PETSC_TRUE; 2921 2922 b->i[0] = 0; 2923 for (i=1; i<mbs+1; i++) { 2924 b->i[i] = b->i[i-1] + b->imax[i-1]; 2925 } 2926 2927 } else { 2928 b->free_a = PETSC_FALSE; 2929 b->free_ij = PETSC_FALSE; 2930 } 2931 2932 b->bs2 = bs2; 2933 b->mbs = mbs; 2934 b->nz = 0; 2935 b->maxnz = nz; 2936 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2937 B->was_assembled = PETSC_FALSE; 2938 B->assembled = PETSC_FALSE; 2939 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2940 PetscFunctionReturn(0); 2941 } 2942 2943 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2944 { 2945 PetscInt i,m,nz,nz_max=0,*nnz; 2946 PetscScalar *values=0; 2947 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2948 PetscErrorCode ierr; 2949 2950 PetscFunctionBegin; 2951 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2952 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2953 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2954 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2955 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2956 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2957 m = B->rmap->n/bs; 2958 2959 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2960 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2961 for (i=0; i<m; i++) { 2962 nz = ii[i+1]- ii[i]; 2963 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2964 nz_max = PetscMax(nz_max, nz); 2965 nnz[i] = nz; 2966 } 2967 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2968 ierr = PetscFree(nnz);CHKERRQ(ierr); 2969 2970 values = (PetscScalar*)V; 2971 if (!values) { 2972 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2973 } 2974 for (i=0; i<m; i++) { 2975 PetscInt ncols = ii[i+1] - ii[i]; 2976 const PetscInt *icols = jj + ii[i]; 2977 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2978 if (!roworiented) { 2979 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2980 } else { 2981 PetscInt j; 2982 for (j=0; j<ncols; j++) { 2983 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2984 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2985 } 2986 } 2987 } 2988 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2989 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2990 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2991 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2992 PetscFunctionReturn(0); 2993 } 2994 2995 /*MC 2996 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2997 block sparse compressed row format. 2998 2999 Options Database Keys: 3000 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3001 3002 Level: beginner 3003 3004 .seealso: MatCreateSeqBAIJ() 3005 M*/ 3006 3007 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3008 3009 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3010 { 3011 PetscErrorCode ierr; 3012 PetscMPIInt size; 3013 Mat_SeqBAIJ *b; 3014 3015 PetscFunctionBegin; 3016 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3017 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3018 3019 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3020 B->data = (void*)b; 3021 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3022 3023 b->row = 0; 3024 b->col = 0; 3025 b->icol = 0; 3026 b->reallocs = 0; 3027 b->saved_values = 0; 3028 3029 b->roworiented = PETSC_TRUE; 3030 b->nonew = 0; 3031 b->diag = 0; 3032 B->spptr = 0; 3033 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3034 b->keepnonzeropattern = PETSC_FALSE; 3035 3036 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3037 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3039 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3045 #if defined(PETSC_HAVE_HYPRE) 3046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3047 #endif 3048 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3049 PetscFunctionReturn(0); 3050 } 3051 3052 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3053 { 3054 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3055 PetscErrorCode ierr; 3056 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3057 3058 PetscFunctionBegin; 3059 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3060 3061 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3062 c->imax = a->imax; 3063 c->ilen = a->ilen; 3064 c->free_imax_ilen = PETSC_FALSE; 3065 } else { 3066 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3067 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3068 for (i=0; i<mbs; i++) { 3069 c->imax[i] = a->imax[i]; 3070 c->ilen[i] = a->ilen[i]; 3071 } 3072 c->free_imax_ilen = PETSC_TRUE; 3073 } 3074 3075 /* allocate the matrix space */ 3076 if (mallocmatspace) { 3077 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3078 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3079 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3080 3081 c->i = a->i; 3082 c->j = a->j; 3083 c->singlemalloc = PETSC_FALSE; 3084 c->free_a = PETSC_TRUE; 3085 c->free_ij = PETSC_FALSE; 3086 c->parent = A; 3087 C->preallocated = PETSC_TRUE; 3088 C->assembled = PETSC_TRUE; 3089 3090 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3091 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3092 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3093 } else { 3094 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3095 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3096 3097 c->singlemalloc = PETSC_TRUE; 3098 c->free_a = PETSC_TRUE; 3099 c->free_ij = PETSC_TRUE; 3100 3101 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3102 if (mbs > 0) { 3103 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3104 if (cpvalues == MAT_COPY_VALUES) { 3105 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3106 } else { 3107 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3108 } 3109 } 3110 C->preallocated = PETSC_TRUE; 3111 C->assembled = PETSC_TRUE; 3112 } 3113 } 3114 3115 c->roworiented = a->roworiented; 3116 c->nonew = a->nonew; 3117 3118 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3119 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3120 3121 c->bs2 = a->bs2; 3122 c->mbs = a->mbs; 3123 c->nbs = a->nbs; 3124 3125 if (a->diag) { 3126 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3127 c->diag = a->diag; 3128 c->free_diag = PETSC_FALSE; 3129 } else { 3130 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3131 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3132 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3133 c->free_diag = PETSC_TRUE; 3134 } 3135 } else c->diag = 0; 3136 3137 c->nz = a->nz; 3138 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3139 c->solve_work = NULL; 3140 c->mult_work = NULL; 3141 c->sor_workt = NULL; 3142 c->sor_work = NULL; 3143 3144 c->compressedrow.use = a->compressedrow.use; 3145 c->compressedrow.nrows = a->compressedrow.nrows; 3146 if (a->compressedrow.use) { 3147 i = a->compressedrow.nrows; 3148 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3149 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3150 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3151 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3152 } else { 3153 c->compressedrow.use = PETSC_FALSE; 3154 c->compressedrow.i = NULL; 3155 c->compressedrow.rindex = NULL; 3156 } 3157 C->nonzerostate = A->nonzerostate; 3158 3159 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3160 PetscFunctionReturn(0); 3161 } 3162 3163 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3164 { 3165 PetscErrorCode ierr; 3166 3167 PetscFunctionBegin; 3168 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3169 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3170 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3171 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3172 PetscFunctionReturn(0); 3173 } 3174 3175 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3176 { 3177 Mat_SeqBAIJ *a; 3178 PetscErrorCode ierr; 3179 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3180 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3181 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3182 PetscInt *masked,nmask,tmp,bs2,ishift; 3183 PetscMPIInt size; 3184 int fd; 3185 PetscScalar *aa; 3186 MPI_Comm comm; 3187 3188 PetscFunctionBegin; 3189 /* force binary viewer to load .info file if it has not yet done so */ 3190 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3191 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3192 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3193 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3194 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3195 if (bs < 0) bs = 1; 3196 bs2 = bs*bs; 3197 3198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3199 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3200 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3201 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3202 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3203 M = header[1]; N = header[2]; nz = header[3]; 3204 3205 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3206 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3207 3208 /* 3209 This code adds extra rows to make sure the number of rows is 3210 divisible by the blocksize 3211 */ 3212 mbs = M/bs; 3213 extra_rows = bs - M + bs*(mbs); 3214 if (extra_rows == bs) extra_rows = 0; 3215 else mbs++; 3216 if (extra_rows) { 3217 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3218 } 3219 3220 /* Set global sizes if not already set */ 3221 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3222 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3223 } else { /* Check if the matrix global sizes are correct */ 3224 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3225 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3226 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3227 } 3228 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3229 } 3230 3231 /* read in row lengths */ 3232 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3233 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3234 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3235 3236 /* read in column indices */ 3237 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3238 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3239 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3240 3241 /* loop over row lengths determining block row lengths */ 3242 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3243 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3244 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3245 rowcount = 0; 3246 nzcount = 0; 3247 for (i=0; i<mbs; i++) { 3248 nmask = 0; 3249 for (j=0; j<bs; j++) { 3250 kmax = rowlengths[rowcount]; 3251 for (k=0; k<kmax; k++) { 3252 tmp = jj[nzcount++]/bs; 3253 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3254 } 3255 rowcount++; 3256 } 3257 browlengths[i] += nmask; 3258 /* zero out the mask elements we set */ 3259 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3260 } 3261 3262 /* Do preallocation */ 3263 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3264 a = (Mat_SeqBAIJ*)newmat->data; 3265 3266 /* set matrix "i" values */ 3267 a->i[0] = 0; 3268 for (i=1; i<= mbs; i++) { 3269 a->i[i] = a->i[i-1] + browlengths[i-1]; 3270 a->ilen[i-1] = browlengths[i-1]; 3271 } 3272 a->nz = 0; 3273 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3274 3275 /* read in nonzero values */ 3276 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3277 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3278 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3279 3280 /* set "a" and "j" values into matrix */ 3281 nzcount = 0; jcount = 0; 3282 for (i=0; i<mbs; i++) { 3283 nzcountb = nzcount; 3284 nmask = 0; 3285 for (j=0; j<bs; j++) { 3286 kmax = rowlengths[i*bs+j]; 3287 for (k=0; k<kmax; k++) { 3288 tmp = jj[nzcount++]/bs; 3289 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3290 } 3291 } 3292 /* sort the masked values */ 3293 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3294 3295 /* set "j" values into matrix */ 3296 maskcount = 1; 3297 for (j=0; j<nmask; j++) { 3298 a->j[jcount++] = masked[j]; 3299 mask[masked[j]] = maskcount++; 3300 } 3301 /* set "a" values into matrix */ 3302 ishift = bs2*a->i[i]; 3303 for (j=0; j<bs; j++) { 3304 kmax = rowlengths[i*bs+j]; 3305 for (k=0; k<kmax; k++) { 3306 tmp = jj[nzcountb]/bs; 3307 block = mask[tmp] - 1; 3308 point = jj[nzcountb] - bs*tmp; 3309 idx = ishift + bs2*block + j + bs*point; 3310 a->a[idx] = (MatScalar)aa[nzcountb++]; 3311 } 3312 } 3313 /* zero out the mask elements we set */ 3314 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3315 } 3316 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3317 3318 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3319 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3320 ierr = PetscFree(aa);CHKERRQ(ierr); 3321 ierr = PetscFree(jj);CHKERRQ(ierr); 3322 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3323 3324 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3325 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3326 PetscFunctionReturn(0); 3327 } 3328 3329 /*@C 3330 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3331 compressed row) format. For good matrix assembly performance the 3332 user should preallocate the matrix storage by setting the parameter nz 3333 (or the array nnz). By setting these parameters accurately, performance 3334 during matrix assembly can be increased by more than a factor of 50. 3335 3336 Collective on MPI_Comm 3337 3338 Input Parameters: 3339 + comm - MPI communicator, set to PETSC_COMM_SELF 3340 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3341 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3342 . m - number of rows 3343 . n - number of columns 3344 . nz - number of nonzero blocks per block row (same for all rows) 3345 - nnz - array containing the number of nonzero blocks in the various block rows 3346 (possibly different for each block row) or NULL 3347 3348 Output Parameter: 3349 . A - the matrix 3350 3351 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3352 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3353 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3354 3355 Options Database Keys: 3356 . -mat_no_unroll - uses code that does not unroll the loops in the 3357 block calculations (much slower) 3358 . -mat_block_size - size of the blocks to use 3359 3360 Level: intermediate 3361 3362 Notes: 3363 The number of rows and columns must be divisible by blocksize. 3364 3365 If the nnz parameter is given then the nz parameter is ignored 3366 3367 A nonzero block is any block that as 1 or more nonzeros in it 3368 3369 The block AIJ format is fully compatible with standard Fortran 77 3370 storage. That is, the stored row and column indices can begin at 3371 either one (as in Fortran) or zero. See the users' manual for details. 3372 3373 Specify the preallocated storage with either nz or nnz (not both). 3374 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3375 allocation. See Users-Manual: ch_mat for details. 3376 matrices. 3377 3378 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3379 @*/ 3380 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3381 { 3382 PetscErrorCode ierr; 3383 3384 PetscFunctionBegin; 3385 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3386 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3387 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3388 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3389 PetscFunctionReturn(0); 3390 } 3391 3392 /*@C 3393 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3394 per row in the matrix. For good matrix assembly performance the 3395 user should preallocate the matrix storage by setting the parameter nz 3396 (or the array nnz). By setting these parameters accurately, performance 3397 during matrix assembly can be increased by more than a factor of 50. 3398 3399 Collective on MPI_Comm 3400 3401 Input Parameters: 3402 + B - the matrix 3403 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3404 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3405 . nz - number of block nonzeros per block row (same for all rows) 3406 - nnz - array containing the number of block nonzeros in the various block rows 3407 (possibly different for each block row) or NULL 3408 3409 Options Database Keys: 3410 . -mat_no_unroll - uses code that does not unroll the loops in the 3411 block calculations (much slower) 3412 . -mat_block_size - size of the blocks to use 3413 3414 Level: intermediate 3415 3416 Notes: 3417 If the nnz parameter is given then the nz parameter is ignored 3418 3419 You can call MatGetInfo() to get information on how effective the preallocation was; 3420 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3421 You can also run with the option -info and look for messages with the string 3422 malloc in them to see if additional memory allocation was needed. 3423 3424 The block AIJ format is fully compatible with standard Fortran 77 3425 storage. That is, the stored row and column indices can begin at 3426 either one (as in Fortran) or zero. See the users' manual for details. 3427 3428 Specify the preallocated storage with either nz or nnz (not both). 3429 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3430 allocation. See Users-Manual: ch_mat for details. 3431 3432 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3433 @*/ 3434 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3435 { 3436 PetscErrorCode ierr; 3437 3438 PetscFunctionBegin; 3439 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3440 PetscValidType(B,1); 3441 PetscValidLogicalCollectiveInt(B,bs,2); 3442 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3443 PetscFunctionReturn(0); 3444 } 3445 3446 /*@C 3447 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3448 (the default sequential PETSc format). 3449 3450 Collective on MPI_Comm 3451 3452 Input Parameters: 3453 + B - the matrix 3454 . i - the indices into j for the start of each local row (starts with zero) 3455 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3456 - v - optional values in the matrix 3457 3458 Level: developer 3459 3460 Notes: 3461 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3462 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3463 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3464 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3465 block column and the second index is over columns within a block. 3466 3467 .keywords: matrix, aij, compressed row, sparse 3468 3469 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3470 @*/ 3471 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3472 { 3473 PetscErrorCode ierr; 3474 3475 PetscFunctionBegin; 3476 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3477 PetscValidType(B,1); 3478 PetscValidLogicalCollectiveInt(B,bs,2); 3479 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3480 PetscFunctionReturn(0); 3481 } 3482 3483 3484 /*@ 3485 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3486 3487 Collective on MPI_Comm 3488 3489 Input Parameters: 3490 + comm - must be an MPI communicator of size 1 3491 . bs - size of block 3492 . m - number of rows 3493 . n - number of columns 3494 . i - row indices 3495 . j - column indices 3496 - a - matrix values 3497 3498 Output Parameter: 3499 . mat - the matrix 3500 3501 Level: advanced 3502 3503 Notes: 3504 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3505 once the matrix is destroyed 3506 3507 You cannot set new nonzero locations into this matrix, that will generate an error. 3508 3509 The i and j indices are 0 based 3510 3511 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3512 3513 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3514 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3515 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3516 with column-major ordering within blocks. 3517 3518 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3519 3520 @*/ 3521 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3522 { 3523 PetscErrorCode ierr; 3524 PetscInt ii; 3525 Mat_SeqBAIJ *baij; 3526 3527 PetscFunctionBegin; 3528 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3529 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3530 3531 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3532 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3533 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3534 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3535 baij = (Mat_SeqBAIJ*)(*mat)->data; 3536 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3537 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3538 3539 baij->i = i; 3540 baij->j = j; 3541 baij->a = a; 3542 3543 baij->singlemalloc = PETSC_FALSE; 3544 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3545 baij->free_a = PETSC_FALSE; 3546 baij->free_ij = PETSC_FALSE; 3547 3548 for (ii=0; ii<m; ii++) { 3549 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3550 #if defined(PETSC_USE_DEBUG) 3551 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3552 #endif 3553 } 3554 #if defined(PETSC_USE_DEBUG) 3555 for (ii=0; ii<baij->i[m]; ii++) { 3556 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3557 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3558 } 3559 #endif 3560 3561 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3562 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3563 PetscFunctionReturn(0); 3564 } 3565 3566 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3567 { 3568 PetscErrorCode ierr; 3569 PetscMPIInt size; 3570 3571 PetscFunctionBegin; 3572 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3573 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3574 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3575 } else { 3576 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3577 } 3578 PetscFunctionReturn(0); 3579 } 3580