1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,const MatType,MatReuse,Mat*); 17 #endif 18 19 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 20 { 21 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 22 PetscErrorCode ierr; 23 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 24 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 25 PetscReal shift = 0.0; 26 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 27 28 PetscFunctionBegin; 29 allowzeropivot = PetscNot(A->erroriffailure); 30 31 if (a->idiagvalid) { 32 if (values) *values = a->idiag; 33 PetscFunctionReturn(0); 34 } 35 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 36 diag_offset = a->diag; 37 if (!a->idiag) { 38 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 39 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 40 } 41 diag = a->idiag; 42 if (values) *values = a->idiag; 43 /* factor and invert each block */ 44 switch (bs) { 45 case 1: 46 for (i=0; i<mbs; i++) { 47 odiag = v + 1*diag_offset[i]; 48 diag[0] = odiag[0]; 49 50 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 51 if (allowzeropivot) { 52 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 53 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 54 A->factorerror_zeropivot_row = i; 55 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 56 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 57 } 58 59 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 60 diag += 1; 61 } 62 break; 63 case 2: 64 for (i=0; i<mbs; i++) { 65 odiag = v + 4*diag_offset[i]; 66 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 67 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 68 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 69 diag += 4; 70 } 71 break; 72 case 3: 73 for (i=0; i<mbs; i++) { 74 odiag = v + 9*diag_offset[i]; 75 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 76 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 77 diag[8] = odiag[8]; 78 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 79 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 80 diag += 9; 81 } 82 break; 83 case 4: 84 for (i=0; i<mbs; i++) { 85 odiag = v + 16*diag_offset[i]; 86 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 87 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 88 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 89 diag += 16; 90 } 91 break; 92 case 5: 93 for (i=0; i<mbs; i++) { 94 odiag = v + 25*diag_offset[i]; 95 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 97 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 98 diag += 25; 99 } 100 break; 101 case 6: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 36*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 106 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 107 diag += 36; 108 } 109 break; 110 case 7: 111 for (i=0; i<mbs; i++) { 112 odiag = v + 49*diag_offset[i]; 113 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 114 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 115 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 116 diag += 49; 117 } 118 break; 119 default: 120 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 121 for (i=0; i<mbs; i++) { 122 odiag = v + bs2*diag_offset[i]; 123 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 124 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 125 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 126 diag += bs2; 127 } 128 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 129 } 130 a->idiagvalid = PETSC_TRUE; 131 PetscFunctionReturn(0); 132 } 133 134 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 135 { 136 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 137 PetscScalar *x,*work,*w,*workt,*t; 138 const MatScalar *v,*aa = a->a, *idiag; 139 const PetscScalar *b,*xb; 140 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 141 PetscErrorCode ierr; 142 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 143 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 144 145 PetscFunctionBegin; 146 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 147 its = its*lits; 148 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 149 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 150 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 151 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 152 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 153 154 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 155 156 if (!m) PetscFunctionReturn(0); 157 diag = a->diag; 158 idiag = a->idiag; 159 k = PetscMax(A->rmap->n,A->cmap->n); 160 if (!a->mult_work) { 161 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 162 } 163 if (!a->sor_workt) { 164 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 165 } 166 if (!a->sor_work) { 167 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 168 } 169 work = a->mult_work; 170 t = a->sor_workt; 171 w = a->sor_work; 172 173 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 174 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 175 176 if (flag & SOR_ZERO_INITIAL_GUESS) { 177 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 178 switch (bs) { 179 case 1: 180 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 181 t[0] = b[0]; 182 i2 = 1; 183 idiag += 1; 184 for (i=1; i<m; i++) { 185 v = aa + ai[i]; 186 vi = aj + ai[i]; 187 nz = diag[i] - ai[i]; 188 s[0] = b[i2]; 189 for (j=0; j<nz; j++) { 190 xw[0] = x[vi[j]]; 191 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 192 } 193 t[i2] = s[0]; 194 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 195 x[i2] = xw[0]; 196 idiag += 1; 197 i2 += 1; 198 } 199 break; 200 case 2: 201 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 202 t[0] = b[0]; t[1] = b[1]; 203 i2 = 2; 204 idiag += 4; 205 for (i=1; i<m; i++) { 206 v = aa + 4*ai[i]; 207 vi = aj + ai[i]; 208 nz = diag[i] - ai[i]; 209 s[0] = b[i2]; s[1] = b[i2+1]; 210 for (j=0; j<nz; j++) { 211 idx = 2*vi[j]; 212 it = 4*j; 213 xw[0] = x[idx]; xw[1] = x[1+idx]; 214 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 215 } 216 t[i2] = s[0]; t[i2+1] = s[1]; 217 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 218 x[i2] = xw[0]; x[i2+1] = xw[1]; 219 idiag += 4; 220 i2 += 2; 221 } 222 break; 223 case 3: 224 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 225 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 226 i2 = 3; 227 idiag += 9; 228 for (i=1; i<m; i++) { 229 v = aa + 9*ai[i]; 230 vi = aj + ai[i]; 231 nz = diag[i] - ai[i]; 232 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 233 while (nz--) { 234 idx = 3*(*vi++); 235 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 236 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 237 v += 9; 238 } 239 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 240 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 241 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 242 idiag += 9; 243 i2 += 3; 244 } 245 break; 246 case 4: 247 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 248 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 249 i2 = 4; 250 idiag += 16; 251 for (i=1; i<m; i++) { 252 v = aa + 16*ai[i]; 253 vi = aj + ai[i]; 254 nz = diag[i] - ai[i]; 255 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 256 while (nz--) { 257 idx = 4*(*vi++); 258 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 259 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 260 v += 16; 261 } 262 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 263 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 264 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 265 idiag += 16; 266 i2 += 4; 267 } 268 break; 269 case 5: 270 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 271 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 272 i2 = 5; 273 idiag += 25; 274 for (i=1; i<m; i++) { 275 v = aa + 25*ai[i]; 276 vi = aj + ai[i]; 277 nz = diag[i] - ai[i]; 278 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 279 while (nz--) { 280 idx = 5*(*vi++); 281 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 282 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 283 v += 25; 284 } 285 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 286 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 287 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 288 idiag += 25; 289 i2 += 5; 290 } 291 break; 292 case 6: 293 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 294 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 295 i2 = 6; 296 idiag += 36; 297 for (i=1; i<m; i++) { 298 v = aa + 36*ai[i]; 299 vi = aj + ai[i]; 300 nz = diag[i] - ai[i]; 301 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 302 while (nz--) { 303 idx = 6*(*vi++); 304 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 305 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 306 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 307 v += 36; 308 } 309 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 310 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 311 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 312 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 313 idiag += 36; 314 i2 += 6; 315 } 316 break; 317 case 7: 318 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 319 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 320 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 321 i2 = 7; 322 idiag += 49; 323 for (i=1; i<m; i++) { 324 v = aa + 49*ai[i]; 325 vi = aj + ai[i]; 326 nz = diag[i] - ai[i]; 327 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 328 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 329 while (nz--) { 330 idx = 7*(*vi++); 331 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 332 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 333 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 334 v += 49; 335 } 336 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 337 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 338 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 339 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 340 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 341 idiag += 49; 342 i2 += 7; 343 } 344 break; 345 default: 346 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 347 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 348 i2 = bs; 349 idiag += bs2; 350 for (i=1; i<m; i++) { 351 v = aa + bs2*ai[i]; 352 vi = aj + ai[i]; 353 nz = diag[i] - ai[i]; 354 355 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 356 /* copy all rows of x that are needed into contiguous space */ 357 workt = work; 358 for (j=0; j<nz; j++) { 359 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 360 workt += bs; 361 } 362 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 363 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 364 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 365 366 idiag += bs2; 367 i2 += bs; 368 } 369 break; 370 } 371 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 372 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 373 xb = t; 374 } 375 else xb = b; 376 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 377 idiag = a->idiag+bs2*(a->mbs-1); 378 i2 = bs * (m-1); 379 switch (bs) { 380 case 1: 381 s[0] = xb[i2]; 382 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 383 x[i2] = xw[0]; 384 i2 -= 1; 385 for (i=m-2; i>=0; i--) { 386 v = aa + (diag[i]+1); 387 vi = aj + diag[i] + 1; 388 nz = ai[i+1] - diag[i] - 1; 389 s[0] = xb[i2]; 390 for (j=0; j<nz; j++) { 391 xw[0] = x[vi[j]]; 392 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 393 } 394 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 395 x[i2] = xw[0]; 396 idiag -= 1; 397 i2 -= 1; 398 } 399 break; 400 case 2: 401 s[0] = xb[i2]; s[1] = xb[i2+1]; 402 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 403 x[i2] = xw[0]; x[i2+1] = xw[1]; 404 i2 -= 2; 405 idiag -= 4; 406 for (i=m-2; i>=0; i--) { 407 v = aa + 4*(diag[i] + 1); 408 vi = aj + diag[i] + 1; 409 nz = ai[i+1] - diag[i] - 1; 410 s[0] = xb[i2]; s[1] = xb[i2+1]; 411 for (j=0; j<nz; j++) { 412 idx = 2*vi[j]; 413 it = 4*j; 414 xw[0] = x[idx]; xw[1] = x[1+idx]; 415 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 416 } 417 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 418 x[i2] = xw[0]; x[i2+1] = xw[1]; 419 idiag -= 4; 420 i2 -= 2; 421 } 422 break; 423 case 3: 424 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 425 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 426 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 427 i2 -= 3; 428 idiag -= 9; 429 for (i=m-2; i>=0; i--) { 430 v = aa + 9*(diag[i]+1); 431 vi = aj + diag[i] + 1; 432 nz = ai[i+1] - diag[i] - 1; 433 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 434 while (nz--) { 435 idx = 3*(*vi++); 436 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 437 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 438 v += 9; 439 } 440 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 441 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 442 idiag -= 9; 443 i2 -= 3; 444 } 445 break; 446 case 4: 447 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 448 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 449 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 450 i2 -= 4; 451 idiag -= 16; 452 for (i=m-2; i>=0; i--) { 453 v = aa + 16*(diag[i]+1); 454 vi = aj + diag[i] + 1; 455 nz = ai[i+1] - diag[i] - 1; 456 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 457 while (nz--) { 458 idx = 4*(*vi++); 459 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 460 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 461 v += 16; 462 } 463 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 464 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 465 idiag -= 16; 466 i2 -= 4; 467 } 468 break; 469 case 5: 470 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 471 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 472 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 473 i2 -= 5; 474 idiag -= 25; 475 for (i=m-2; i>=0; i--) { 476 v = aa + 25*(diag[i]+1); 477 vi = aj + diag[i] + 1; 478 nz = ai[i+1] - diag[i] - 1; 479 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 480 while (nz--) { 481 idx = 5*(*vi++); 482 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 483 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 484 v += 25; 485 } 486 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 487 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 488 idiag -= 25; 489 i2 -= 5; 490 } 491 break; 492 case 6: 493 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 494 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 495 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 496 i2 -= 6; 497 idiag -= 36; 498 for (i=m-2; i>=0; i--) { 499 v = aa + 36*(diag[i]+1); 500 vi = aj + diag[i] + 1; 501 nz = ai[i+1] - diag[i] - 1; 502 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 503 while (nz--) { 504 idx = 6*(*vi++); 505 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 506 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 507 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 508 v += 36; 509 } 510 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 511 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 512 idiag -= 36; 513 i2 -= 6; 514 } 515 break; 516 case 7: 517 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 518 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 519 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 520 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 521 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 522 i2 -= 7; 523 idiag -= 49; 524 for (i=m-2; i>=0; i--) { 525 v = aa + 49*(diag[i]+1); 526 vi = aj + diag[i] + 1; 527 nz = ai[i+1] - diag[i] - 1; 528 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 529 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 530 while (nz--) { 531 idx = 7*(*vi++); 532 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 533 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 534 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 535 v += 49; 536 } 537 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 538 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 539 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 540 idiag -= 49; 541 i2 -= 7; 542 } 543 break; 544 default: 545 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 546 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 547 i2 -= bs; 548 idiag -= bs2; 549 for (i=m-2; i>=0; i--) { 550 v = aa + bs2*(diag[i]+1); 551 vi = aj + diag[i] + 1; 552 nz = ai[i+1] - diag[i] - 1; 553 554 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 555 /* copy all rows of x that are needed into contiguous space */ 556 workt = work; 557 for (j=0; j<nz; j++) { 558 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 559 workt += bs; 560 } 561 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 562 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 563 564 idiag -= bs2; 565 i2 -= bs; 566 } 567 break; 568 } 569 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 570 } 571 its--; 572 } 573 while (its--) { 574 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 575 idiag = a->idiag; 576 i2 = 0; 577 switch (bs) { 578 case 1: 579 for (i=0; i<m; i++) { 580 v = aa + ai[i]; 581 vi = aj + ai[i]; 582 nz = ai[i+1] - ai[i]; 583 s[0] = b[i2]; 584 for (j=0; j<nz; j++) { 585 xw[0] = x[vi[j]]; 586 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 587 } 588 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 589 x[i2] += xw[0]; 590 idiag += 1; 591 i2 += 1; 592 } 593 break; 594 case 2: 595 for (i=0; i<m; i++) { 596 v = aa + 4*ai[i]; 597 vi = aj + ai[i]; 598 nz = ai[i+1] - ai[i]; 599 s[0] = b[i2]; s[1] = b[i2+1]; 600 for (j=0; j<nz; j++) { 601 idx = 2*vi[j]; 602 it = 4*j; 603 xw[0] = x[idx]; xw[1] = x[1+idx]; 604 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 605 } 606 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 607 x[i2] += xw[0]; x[i2+1] += xw[1]; 608 idiag += 4; 609 i2 += 2; 610 } 611 break; 612 case 3: 613 for (i=0; i<m; i++) { 614 v = aa + 9*ai[i]; 615 vi = aj + ai[i]; 616 nz = ai[i+1] - ai[i]; 617 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 618 while (nz--) { 619 idx = 3*(*vi++); 620 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 621 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 622 v += 9; 623 } 624 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 625 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 626 idiag += 9; 627 i2 += 3; 628 } 629 break; 630 case 4: 631 for (i=0; i<m; i++) { 632 v = aa + 16*ai[i]; 633 vi = aj + ai[i]; 634 nz = ai[i+1] - ai[i]; 635 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 636 while (nz--) { 637 idx = 4*(*vi++); 638 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 639 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 640 v += 16; 641 } 642 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 643 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 644 idiag += 16; 645 i2 += 4; 646 } 647 break; 648 case 5: 649 for (i=0; i<m; i++) { 650 v = aa + 25*ai[i]; 651 vi = aj + ai[i]; 652 nz = ai[i+1] - ai[i]; 653 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 654 while (nz--) { 655 idx = 5*(*vi++); 656 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 657 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 658 v += 25; 659 } 660 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 661 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 662 idiag += 25; 663 i2 += 5; 664 } 665 break; 666 case 6: 667 for (i=0; i<m; i++) { 668 v = aa + 36*ai[i]; 669 vi = aj + ai[i]; 670 nz = ai[i+1] - ai[i]; 671 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 672 while (nz--) { 673 idx = 6*(*vi++); 674 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 675 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 676 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 677 v += 36; 678 } 679 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 680 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 681 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 682 idiag += 36; 683 i2 += 6; 684 } 685 break; 686 case 7: 687 for (i=0; i<m; i++) { 688 v = aa + 49*ai[i]; 689 vi = aj + ai[i]; 690 nz = ai[i+1] - ai[i]; 691 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 692 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 693 while (nz--) { 694 idx = 7*(*vi++); 695 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 696 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 697 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 698 v += 49; 699 } 700 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 701 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 702 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 703 idiag += 49; 704 i2 += 7; 705 } 706 break; 707 default: 708 for (i=0; i<m; i++) { 709 v = aa + bs2*ai[i]; 710 vi = aj + ai[i]; 711 nz = ai[i+1] - ai[i]; 712 713 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 714 /* copy all rows of x that are needed into contiguous space */ 715 workt = work; 716 for (j=0; j<nz; j++) { 717 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 718 workt += bs; 719 } 720 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 721 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 722 723 idiag += bs2; 724 i2 += bs; 725 } 726 break; 727 } 728 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 729 } 730 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 731 idiag = a->idiag+bs2*(a->mbs-1); 732 i2 = bs * (m-1); 733 switch (bs) { 734 case 1: 735 for (i=m-1; i>=0; i--) { 736 v = aa + ai[i]; 737 vi = aj + ai[i]; 738 nz = ai[i+1] - ai[i]; 739 s[0] = b[i2]; 740 for (j=0; j<nz; j++) { 741 xw[0] = x[vi[j]]; 742 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 743 } 744 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 745 x[i2] += xw[0]; 746 idiag -= 1; 747 i2 -= 1; 748 } 749 break; 750 case 2: 751 for (i=m-1; i>=0; i--) { 752 v = aa + 4*ai[i]; 753 vi = aj + ai[i]; 754 nz = ai[i+1] - ai[i]; 755 s[0] = b[i2]; s[1] = b[i2+1]; 756 for (j=0; j<nz; j++) { 757 idx = 2*vi[j]; 758 it = 4*j; 759 xw[0] = x[idx]; xw[1] = x[1+idx]; 760 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 761 } 762 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 763 x[i2] += xw[0]; x[i2+1] += xw[1]; 764 idiag -= 4; 765 i2 -= 2; 766 } 767 break; 768 case 3: 769 for (i=m-1; i>=0; i--) { 770 v = aa + 9*ai[i]; 771 vi = aj + ai[i]; 772 nz = ai[i+1] - ai[i]; 773 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 774 while (nz--) { 775 idx = 3*(*vi++); 776 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 777 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 778 v += 9; 779 } 780 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 781 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 782 idiag -= 9; 783 i2 -= 3; 784 } 785 break; 786 case 4: 787 for (i=m-1; i>=0; i--) { 788 v = aa + 16*ai[i]; 789 vi = aj + ai[i]; 790 nz = ai[i+1] - ai[i]; 791 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 792 while (nz--) { 793 idx = 4*(*vi++); 794 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 795 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 796 v += 16; 797 } 798 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 799 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 800 idiag -= 16; 801 i2 -= 4; 802 } 803 break; 804 case 5: 805 for (i=m-1; i>=0; i--) { 806 v = aa + 25*ai[i]; 807 vi = aj + ai[i]; 808 nz = ai[i+1] - ai[i]; 809 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 810 while (nz--) { 811 idx = 5*(*vi++); 812 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 813 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 814 v += 25; 815 } 816 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 817 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 818 idiag -= 25; 819 i2 -= 5; 820 } 821 break; 822 case 6: 823 for (i=m-1; i>=0; i--) { 824 v = aa + 36*ai[i]; 825 vi = aj + ai[i]; 826 nz = ai[i+1] - ai[i]; 827 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 828 while (nz--) { 829 idx = 6*(*vi++); 830 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 831 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 832 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 833 v += 36; 834 } 835 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 836 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 837 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 838 idiag -= 36; 839 i2 -= 6; 840 } 841 break; 842 case 7: 843 for (i=m-1; i>=0; i--) { 844 v = aa + 49*ai[i]; 845 vi = aj + ai[i]; 846 nz = ai[i+1] - ai[i]; 847 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 848 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 849 while (nz--) { 850 idx = 7*(*vi++); 851 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 852 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 853 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 854 v += 49; 855 } 856 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 857 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 858 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 859 idiag -= 49; 860 i2 -= 7; 861 } 862 break; 863 default: 864 for (i=m-1; i>=0; i--) { 865 v = aa + bs2*ai[i]; 866 vi = aj + ai[i]; 867 nz = ai[i+1] - ai[i]; 868 869 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 870 /* copy all rows of x that are needed into contiguous space */ 871 workt = work; 872 for (j=0; j<nz; j++) { 873 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 874 workt += bs; 875 } 876 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 877 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 878 879 idiag -= bs2; 880 i2 -= bs; 881 } 882 break; 883 } 884 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 885 } 886 } 887 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 888 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 889 PetscFunctionReturn(0); 890 } 891 892 893 /* 894 Special version for direct calls from Fortran (Used in PETSc-fun3d) 895 */ 896 #if defined(PETSC_HAVE_FORTRAN_CAPS) 897 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 898 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 899 #define matsetvaluesblocked4_ matsetvaluesblocked4 900 #endif 901 902 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 903 { 904 Mat A = *AA; 905 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 906 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 907 PetscInt *ai =a->i,*ailen=a->ilen; 908 PetscInt *aj =a->j,stepval,lastcol = -1; 909 const PetscScalar *value = v; 910 MatScalar *ap,*aa = a->a,*bap; 911 912 PetscFunctionBegin; 913 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 914 stepval = (n-1)*4; 915 for (k=0; k<m; k++) { /* loop over added rows */ 916 row = im[k]; 917 rp = aj + ai[row]; 918 ap = aa + 16*ai[row]; 919 nrow = ailen[row]; 920 low = 0; 921 high = nrow; 922 for (l=0; l<n; l++) { /* loop over added columns */ 923 col = in[l]; 924 if (col <= lastcol) low = 0; 925 else high = nrow; 926 lastcol = col; 927 value = v + k*(stepval+4 + l)*4; 928 while (high-low > 7) { 929 t = (low+high)/2; 930 if (rp[t] > col) high = t; 931 else low = t; 932 } 933 for (i=low; i<high; i++) { 934 if (rp[i] > col) break; 935 if (rp[i] == col) { 936 bap = ap + 16*i; 937 for (ii=0; ii<4; ii++,value+=stepval) { 938 for (jj=ii; jj<16; jj+=4) { 939 bap[jj] += *value++; 940 } 941 } 942 goto noinsert2; 943 } 944 } 945 N = nrow++ - 1; 946 high++; /* added new column index thus must search to one higher than before */ 947 /* shift up all the later entries in this row */ 948 for (ii=N; ii>=i; ii--) { 949 rp[ii+1] = rp[ii]; 950 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 951 } 952 if (N >= i) { 953 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 954 } 955 rp[i] = col; 956 bap = ap + 16*i; 957 for (ii=0; ii<4; ii++,value+=stepval) { 958 for (jj=ii; jj<16; jj+=4) { 959 bap[jj] = *value++; 960 } 961 } 962 noinsert2:; 963 low = i; 964 } 965 ailen[row] = nrow; 966 } 967 PetscFunctionReturnVoid(); 968 } 969 970 #if defined(PETSC_HAVE_FORTRAN_CAPS) 971 #define matsetvalues4_ MATSETVALUES4 972 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 973 #define matsetvalues4_ matsetvalues4 974 #endif 975 976 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 977 { 978 Mat A = *AA; 979 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 980 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 981 PetscInt *ai=a->i,*ailen=a->ilen; 982 PetscInt *aj=a->j,brow,bcol; 983 PetscInt ridx,cidx,lastcol = -1; 984 MatScalar *ap,value,*aa=a->a,*bap; 985 986 PetscFunctionBegin; 987 for (k=0; k<m; k++) { /* loop over added rows */ 988 row = im[k]; brow = row/4; 989 rp = aj + ai[brow]; 990 ap = aa + 16*ai[brow]; 991 nrow = ailen[brow]; 992 low = 0; 993 high = nrow; 994 for (l=0; l<n; l++) { /* loop over added columns */ 995 col = in[l]; bcol = col/4; 996 ridx = row % 4; cidx = col % 4; 997 value = v[l + k*n]; 998 if (col <= lastcol) low = 0; 999 else high = nrow; 1000 lastcol = col; 1001 while (high-low > 7) { 1002 t = (low+high)/2; 1003 if (rp[t] > bcol) high = t; 1004 else low = t; 1005 } 1006 for (i=low; i<high; i++) { 1007 if (rp[i] > bcol) break; 1008 if (rp[i] == bcol) { 1009 bap = ap + 16*i + 4*cidx + ridx; 1010 *bap += value; 1011 goto noinsert1; 1012 } 1013 } 1014 N = nrow++ - 1; 1015 high++; /* added new column thus must search to one higher than before */ 1016 /* shift up all the later entries in this row */ 1017 for (ii=N; ii>=i; ii--) { 1018 rp[ii+1] = rp[ii]; 1019 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1020 } 1021 if (N>=i) { 1022 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1023 } 1024 rp[i] = bcol; 1025 ap[16*i + 4*cidx + ridx] = value; 1026 noinsert1:; 1027 low = i; 1028 } 1029 ailen[brow] = nrow; 1030 } 1031 PetscFunctionReturnVoid(); 1032 } 1033 1034 /* 1035 Checks for missing diagonals 1036 */ 1037 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1038 { 1039 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1040 PetscErrorCode ierr; 1041 PetscInt *diag,*ii = a->i,i; 1042 1043 PetscFunctionBegin; 1044 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1045 *missing = PETSC_FALSE; 1046 if (A->rmap->n > 0 && !ii) { 1047 *missing = PETSC_TRUE; 1048 if (d) *d = 0; 1049 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1050 } else { 1051 diag = a->diag; 1052 for (i=0; i<a->mbs; i++) { 1053 if (diag[i] >= ii[i+1]) { 1054 *missing = PETSC_TRUE; 1055 if (d) *d = i; 1056 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1057 break; 1058 } 1059 } 1060 } 1061 PetscFunctionReturn(0); 1062 } 1063 1064 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1065 { 1066 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1067 PetscErrorCode ierr; 1068 PetscInt i,j,m = a->mbs; 1069 1070 PetscFunctionBegin; 1071 if (!a->diag) { 1072 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1073 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1074 a->free_diag = PETSC_TRUE; 1075 } 1076 for (i=0; i<m; i++) { 1077 a->diag[i] = a->i[i+1]; 1078 for (j=a->i[i]; j<a->i[i+1]; j++) { 1079 if (a->j[j] == i) { 1080 a->diag[i] = j; 1081 break; 1082 } 1083 } 1084 } 1085 PetscFunctionReturn(0); 1086 } 1087 1088 1089 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1090 { 1091 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1092 PetscErrorCode ierr; 1093 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1094 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1095 1096 PetscFunctionBegin; 1097 *nn = n; 1098 if (!ia) PetscFunctionReturn(0); 1099 if (symmetric) { 1100 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1101 nz = tia[n]; 1102 } else { 1103 tia = a->i; tja = a->j; 1104 } 1105 1106 if (!blockcompressed && bs > 1) { 1107 (*nn) *= bs; 1108 /* malloc & create the natural set of indices */ 1109 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1110 if (n) { 1111 (*ia)[0] = oshift; 1112 for (j=1; j<bs; j++) { 1113 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1114 } 1115 } 1116 1117 for (i=1; i<n; i++) { 1118 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1119 for (j=1; j<bs; j++) { 1120 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1121 } 1122 } 1123 if (n) { 1124 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1125 } 1126 1127 if (inja) { 1128 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1129 cnt = 0; 1130 for (i=0; i<n; i++) { 1131 for (j=0; j<bs; j++) { 1132 for (k=tia[i]; k<tia[i+1]; k++) { 1133 for (l=0; l<bs; l++) { 1134 (*ja)[cnt++] = bs*tja[k] + l; 1135 } 1136 } 1137 } 1138 } 1139 } 1140 1141 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1142 ierr = PetscFree(tia);CHKERRQ(ierr); 1143 ierr = PetscFree(tja);CHKERRQ(ierr); 1144 } 1145 } else if (oshift == 1) { 1146 if (symmetric) { 1147 nz = tia[A->rmap->n/bs]; 1148 /* add 1 to i and j indices */ 1149 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1150 *ia = tia; 1151 if (ja) { 1152 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1153 *ja = tja; 1154 } 1155 } else { 1156 nz = a->i[A->rmap->n/bs]; 1157 /* malloc space and add 1 to i and j indices */ 1158 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1159 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1160 if (ja) { 1161 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1162 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1163 } 1164 } 1165 } else { 1166 *ia = tia; 1167 if (ja) *ja = tja; 1168 } 1169 PetscFunctionReturn(0); 1170 } 1171 1172 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1173 { 1174 PetscErrorCode ierr; 1175 1176 PetscFunctionBegin; 1177 if (!ia) PetscFunctionReturn(0); 1178 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1179 ierr = PetscFree(*ia);CHKERRQ(ierr); 1180 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1181 } 1182 PetscFunctionReturn(0); 1183 } 1184 1185 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1186 { 1187 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1188 PetscErrorCode ierr; 1189 1190 PetscFunctionBegin; 1191 #if defined(PETSC_USE_LOG) 1192 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1193 #endif 1194 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1195 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1196 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1197 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1198 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1199 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1200 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1201 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1202 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1203 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1204 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1205 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1206 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1207 1208 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1209 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1210 ierr = PetscFree(A->data);CHKERRQ(ierr); 1211 1212 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1213 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1214 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1215 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1223 #if defined(PETSC_HAVE_HYPRE) 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1225 #endif 1226 PetscFunctionReturn(0); 1227 } 1228 1229 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1230 { 1231 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 switch (op) { 1236 case MAT_ROW_ORIENTED: 1237 a->roworiented = flg; 1238 break; 1239 case MAT_KEEP_NONZERO_PATTERN: 1240 a->keepnonzeropattern = flg; 1241 break; 1242 case MAT_NEW_NONZERO_LOCATIONS: 1243 a->nonew = (flg ? 0 : 1); 1244 break; 1245 case MAT_NEW_NONZERO_LOCATION_ERR: 1246 a->nonew = (flg ? -1 : 0); 1247 break; 1248 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1249 a->nonew = (flg ? -2 : 0); 1250 break; 1251 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1252 a->nounused = (flg ? -1 : 0); 1253 break; 1254 case MAT_NEW_DIAGONALS: 1255 case MAT_IGNORE_OFF_PROC_ENTRIES: 1256 case MAT_USE_HASH_TABLE: 1257 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1258 break; 1259 case MAT_SPD: 1260 case MAT_SYMMETRIC: 1261 case MAT_STRUCTURALLY_SYMMETRIC: 1262 case MAT_HERMITIAN: 1263 case MAT_SYMMETRY_ETERNAL: 1264 case MAT_SUBMAT_SINGLEIS: 1265 case MAT_STRUCTURE_ONLY: 1266 /* These options are handled directly by MatSetOption() */ 1267 break; 1268 default: 1269 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1270 } 1271 PetscFunctionReturn(0); 1272 } 1273 1274 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1275 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1276 { 1277 PetscErrorCode ierr; 1278 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1279 MatScalar *aa_i; 1280 PetscScalar *v_i; 1281 1282 PetscFunctionBegin; 1283 bs = A->rmap->bs; 1284 bs2 = bs*bs; 1285 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1286 1287 bn = row/bs; /* Block number */ 1288 bp = row % bs; /* Block Position */ 1289 M = ai[bn+1] - ai[bn]; 1290 *nz = bs*M; 1291 1292 if (v) { 1293 *v = 0; 1294 if (*nz) { 1295 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1296 for (i=0; i<M; i++) { /* for each block in the block row */ 1297 v_i = *v + i*bs; 1298 aa_i = aa + bs2*(ai[bn] + i); 1299 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1300 } 1301 } 1302 } 1303 1304 if (idx) { 1305 *idx = 0; 1306 if (*nz) { 1307 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1308 for (i=0; i<M; i++) { /* for each block in the block row */ 1309 idx_i = *idx + i*bs; 1310 itmp = bs*aj[ai[bn] + i]; 1311 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1312 } 1313 } 1314 } 1315 PetscFunctionReturn(0); 1316 } 1317 1318 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1319 { 1320 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1321 PetscErrorCode ierr; 1322 1323 PetscFunctionBegin; 1324 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1325 PetscFunctionReturn(0); 1326 } 1327 1328 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1329 { 1330 PetscErrorCode ierr; 1331 1332 PetscFunctionBegin; 1333 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1334 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1335 PetscFunctionReturn(0); 1336 } 1337 1338 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1339 { 1340 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1341 Mat C; 1342 PetscErrorCode ierr; 1343 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1344 PetscInt *rows,*cols,bs2=a->bs2; 1345 MatScalar *array; 1346 1347 PetscFunctionBegin; 1348 if (reuse == MAT_INPLACE_MATRIX && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1349 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1350 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1351 1352 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1353 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1354 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1355 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1356 ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr); 1357 ierr = PetscFree(col);CHKERRQ(ierr); 1358 } else { 1359 C = *B; 1360 } 1361 1362 array = a->a; 1363 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1364 for (i=0; i<mbs; i++) { 1365 cols[0] = i*bs; 1366 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1367 len = ai[i+1] - ai[i]; 1368 for (j=0; j<len; j++) { 1369 rows[0] = (*aj++)*bs; 1370 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1371 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1372 array += bs2; 1373 } 1374 } 1375 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1376 1377 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1378 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1379 1380 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1381 *B = C; 1382 } else { 1383 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1384 } 1385 PetscFunctionReturn(0); 1386 } 1387 1388 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1389 { 1390 PetscErrorCode ierr; 1391 Mat Btrans; 1392 1393 PetscFunctionBegin; 1394 *f = PETSC_FALSE; 1395 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1396 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1397 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } 1400 1401 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1402 { 1403 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1404 PetscErrorCode ierr; 1405 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1406 int fd; 1407 PetscScalar *aa; 1408 FILE *file; 1409 1410 PetscFunctionBegin; 1411 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1412 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1413 col_lens[0] = MAT_FILE_CLASSID; 1414 1415 col_lens[1] = A->rmap->N; 1416 col_lens[2] = A->cmap->n; 1417 col_lens[3] = a->nz*bs2; 1418 1419 /* store lengths of each row and write (including header) to file */ 1420 count = 0; 1421 for (i=0; i<a->mbs; i++) { 1422 for (j=0; j<bs; j++) { 1423 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1424 } 1425 } 1426 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1427 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1428 1429 /* store column indices (zero start index) */ 1430 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1431 count = 0; 1432 for (i=0; i<a->mbs; i++) { 1433 for (j=0; j<bs; j++) { 1434 for (k=a->i[i]; k<a->i[i+1]; k++) { 1435 for (l=0; l<bs; l++) { 1436 jj[count++] = bs*a->j[k] + l; 1437 } 1438 } 1439 } 1440 } 1441 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1442 ierr = PetscFree(jj);CHKERRQ(ierr); 1443 1444 /* store nonzero values */ 1445 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1446 count = 0; 1447 for (i=0; i<a->mbs; i++) { 1448 for (j=0; j<bs; j++) { 1449 for (k=a->i[i]; k<a->i[i+1]; k++) { 1450 for (l=0; l<bs; l++) { 1451 aa[count++] = a->a[bs2*k + l*bs + j]; 1452 } 1453 } 1454 } 1455 } 1456 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1457 ierr = PetscFree(aa);CHKERRQ(ierr); 1458 1459 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1460 if (file) { 1461 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1462 } 1463 PetscFunctionReturn(0); 1464 } 1465 1466 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1467 { 1468 PetscErrorCode ierr; 1469 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1470 PetscInt i,bs = A->rmap->bs,k; 1471 1472 PetscFunctionBegin; 1473 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1474 for (i=0; i<a->mbs; i++) { 1475 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1476 for (k=a->i[i]; k<a->i[i+1]; k++) { 1477 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1478 } 1479 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1480 } 1481 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1486 { 1487 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1488 PetscErrorCode ierr; 1489 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1490 PetscViewerFormat format; 1491 1492 PetscFunctionBegin; 1493 if (A->structure_only) { 1494 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1495 PetscFunctionReturn(0); 1496 } 1497 1498 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1499 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1500 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1501 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1502 const char *matname; 1503 Mat aij; 1504 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1505 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1506 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1507 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1508 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1509 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1510 PetscFunctionReturn(0); 1511 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1512 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1513 for (i=0; i<a->mbs; i++) { 1514 for (j=0; j<bs; j++) { 1515 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1516 for (k=a->i[i]; k<a->i[i+1]; k++) { 1517 for (l=0; l<bs; l++) { 1518 #if defined(PETSC_USE_COMPLEX) 1519 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1520 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1521 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1522 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1523 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1524 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1525 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1526 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1527 } 1528 #else 1529 if (a->a[bs2*k + l*bs + j] != 0.0) { 1530 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1531 } 1532 #endif 1533 } 1534 } 1535 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1536 } 1537 } 1538 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1539 } else { 1540 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1541 for (i=0; i<a->mbs; i++) { 1542 for (j=0; j<bs; j++) { 1543 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1544 for (k=a->i[i]; k<a->i[i+1]; k++) { 1545 for (l=0; l<bs; l++) { 1546 #if defined(PETSC_USE_COMPLEX) 1547 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1548 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1549 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1550 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1551 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1552 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1553 } else { 1554 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1555 } 1556 #else 1557 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1558 #endif 1559 } 1560 } 1561 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1562 } 1563 } 1564 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1565 } 1566 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1567 PetscFunctionReturn(0); 1568 } 1569 1570 #include <petscdraw.h> 1571 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1572 { 1573 Mat A = (Mat) Aa; 1574 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1575 PetscErrorCode ierr; 1576 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1577 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1578 MatScalar *aa; 1579 PetscViewer viewer; 1580 PetscViewerFormat format; 1581 1582 PetscFunctionBegin; 1583 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1584 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1585 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1586 1587 /* loop over matrix elements drawing boxes */ 1588 1589 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1590 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1591 /* Blue for negative, Cyan for zero and Red for positive */ 1592 color = PETSC_DRAW_BLUE; 1593 for (i=0,row=0; i<mbs; i++,row+=bs) { 1594 for (j=a->i[i]; j<a->i[i+1]; j++) { 1595 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1596 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1597 aa = a->a + j*bs2; 1598 for (k=0; k<bs; k++) { 1599 for (l=0; l<bs; l++) { 1600 if (PetscRealPart(*aa++) >= 0.) continue; 1601 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1602 } 1603 } 1604 } 1605 } 1606 color = PETSC_DRAW_CYAN; 1607 for (i=0,row=0; i<mbs; i++,row+=bs) { 1608 for (j=a->i[i]; j<a->i[i+1]; j++) { 1609 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1610 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1611 aa = a->a + j*bs2; 1612 for (k=0; k<bs; k++) { 1613 for (l=0; l<bs; l++) { 1614 if (PetscRealPart(*aa++) != 0.) continue; 1615 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1616 } 1617 } 1618 } 1619 } 1620 color = PETSC_DRAW_RED; 1621 for (i=0,row=0; i<mbs; i++,row+=bs) { 1622 for (j=a->i[i]; j<a->i[i+1]; j++) { 1623 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1624 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1625 aa = a->a + j*bs2; 1626 for (k=0; k<bs; k++) { 1627 for (l=0; l<bs; l++) { 1628 if (PetscRealPart(*aa++) <= 0.) continue; 1629 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1630 } 1631 } 1632 } 1633 } 1634 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1635 } else { 1636 /* use contour shading to indicate magnitude of values */ 1637 /* first determine max of all nonzero values */ 1638 PetscReal minv = 0.0, maxv = 0.0; 1639 PetscDraw popup; 1640 1641 for (i=0; i<a->nz*a->bs2; i++) { 1642 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1643 } 1644 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1645 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1646 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1647 1648 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1649 for (i=0,row=0; i<mbs; i++,row+=bs) { 1650 for (j=a->i[i]; j<a->i[i+1]; j++) { 1651 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1652 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1653 aa = a->a + j*bs2; 1654 for (k=0; k<bs; k++) { 1655 for (l=0; l<bs; l++) { 1656 MatScalar v = *aa++; 1657 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1658 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1659 } 1660 } 1661 } 1662 } 1663 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1664 } 1665 PetscFunctionReturn(0); 1666 } 1667 1668 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1669 { 1670 PetscErrorCode ierr; 1671 PetscReal xl,yl,xr,yr,w,h; 1672 PetscDraw draw; 1673 PetscBool isnull; 1674 1675 PetscFunctionBegin; 1676 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1677 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1678 if (isnull) PetscFunctionReturn(0); 1679 1680 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1681 xr += w; yr += h; xl = -w; yl = -h; 1682 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1683 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1684 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1685 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1686 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1687 PetscFunctionReturn(0); 1688 } 1689 1690 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1691 { 1692 PetscErrorCode ierr; 1693 PetscBool iascii,isbinary,isdraw; 1694 1695 PetscFunctionBegin; 1696 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1697 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1698 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1699 if (iascii) { 1700 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1701 } else if (isbinary) { 1702 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1703 } else if (isdraw) { 1704 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1705 } else { 1706 Mat B; 1707 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1708 ierr = MatView(B,viewer);CHKERRQ(ierr); 1709 ierr = MatDestroy(&B);CHKERRQ(ierr); 1710 } 1711 PetscFunctionReturn(0); 1712 } 1713 1714 1715 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1716 { 1717 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1718 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1719 PetscInt *ai = a->i,*ailen = a->ilen; 1720 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1721 MatScalar *ap,*aa = a->a; 1722 1723 PetscFunctionBegin; 1724 for (k=0; k<m; k++) { /* loop over rows */ 1725 row = im[k]; brow = row/bs; 1726 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1727 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1728 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1729 nrow = ailen[brow]; 1730 for (l=0; l<n; l++) { /* loop over columns */ 1731 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1732 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1733 col = in[l]; 1734 bcol = col/bs; 1735 cidx = col%bs; 1736 ridx = row%bs; 1737 high = nrow; 1738 low = 0; /* assume unsorted */ 1739 while (high-low > 5) { 1740 t = (low+high)/2; 1741 if (rp[t] > bcol) high = t; 1742 else low = t; 1743 } 1744 for (i=low; i<high; i++) { 1745 if (rp[i] > bcol) break; 1746 if (rp[i] == bcol) { 1747 *v++ = ap[bs2*i+bs*cidx+ridx]; 1748 goto finished; 1749 } 1750 } 1751 *v++ = 0.0; 1752 finished:; 1753 } 1754 } 1755 PetscFunctionReturn(0); 1756 } 1757 1758 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1759 { 1760 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1761 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1762 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1763 PetscErrorCode ierr; 1764 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1765 PetscBool roworiented=a->roworiented; 1766 const PetscScalar *value = v; 1767 MatScalar *ap=NULL,*aa = a->a,*bap; 1768 1769 PetscFunctionBegin; 1770 if (roworiented) { 1771 stepval = (n-1)*bs; 1772 } else { 1773 stepval = (m-1)*bs; 1774 } 1775 for (k=0; k<m; k++) { /* loop over added rows */ 1776 row = im[k]; 1777 if (row < 0) continue; 1778 #if defined(PETSC_USE_DEBUG) 1779 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1780 #endif 1781 rp = aj + ai[row]; 1782 if (!A->structure_only) ap = aa + bs2*ai[row]; 1783 rmax = imax[row]; 1784 nrow = ailen[row]; 1785 low = 0; 1786 high = nrow; 1787 for (l=0; l<n; l++) { /* loop over added columns */ 1788 if (in[l] < 0) continue; 1789 #if defined(PETSC_USE_DEBUG) 1790 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1791 #endif 1792 col = in[l]; 1793 if (!A->structure_only) { 1794 if (roworiented) { 1795 value = v + (k*(stepval+bs) + l)*bs; 1796 } else { 1797 value = v + (l*(stepval+bs) + k)*bs; 1798 } 1799 } 1800 if (col <= lastcol) low = 0; 1801 else high = nrow; 1802 lastcol = col; 1803 while (high-low > 7) { 1804 t = (low+high)/2; 1805 if (rp[t] > col) high = t; 1806 else low = t; 1807 } 1808 for (i=low; i<high; i++) { 1809 if (rp[i] > col) break; 1810 if (rp[i] == col) { 1811 if (A->structure_only) goto noinsert2; 1812 bap = ap + bs2*i; 1813 if (roworiented) { 1814 if (is == ADD_VALUES) { 1815 for (ii=0; ii<bs; ii++,value+=stepval) { 1816 for (jj=ii; jj<bs2; jj+=bs) { 1817 bap[jj] += *value++; 1818 } 1819 } 1820 } else { 1821 for (ii=0; ii<bs; ii++,value+=stepval) { 1822 for (jj=ii; jj<bs2; jj+=bs) { 1823 bap[jj] = *value++; 1824 } 1825 } 1826 } 1827 } else { 1828 if (is == ADD_VALUES) { 1829 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1830 for (jj=0; jj<bs; jj++) { 1831 bap[jj] += value[jj]; 1832 } 1833 bap += bs; 1834 } 1835 } else { 1836 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1837 for (jj=0; jj<bs; jj++) { 1838 bap[jj] = value[jj]; 1839 } 1840 bap += bs; 1841 } 1842 } 1843 } 1844 goto noinsert2; 1845 } 1846 } 1847 if (nonew == 1) goto noinsert2; 1848 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1849 if (A->structure_only) { 1850 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1851 } else { 1852 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1853 } 1854 N = nrow++ - 1; high++; 1855 /* shift up all the later entries in this row */ 1856 for (ii=N; ii>=i; ii--) { 1857 rp[ii+1] = rp[ii]; 1858 if (!A->structure_only) { 1859 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1860 } 1861 } 1862 if (N >= i && !A->structure_only) { 1863 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1864 } 1865 1866 rp[i] = col; 1867 if (!A->structure_only) { 1868 bap = ap + bs2*i; 1869 if (roworiented) { 1870 for (ii=0; ii<bs; ii++,value+=stepval) { 1871 for (jj=ii; jj<bs2; jj+=bs) { 1872 bap[jj] = *value++; 1873 } 1874 } 1875 } else { 1876 for (ii=0; ii<bs; ii++,value+=stepval) { 1877 for (jj=0; jj<bs; jj++) { 1878 *bap++ = *value++; 1879 } 1880 } 1881 } 1882 } 1883 noinsert2:; 1884 low = i; 1885 } 1886 ailen[row] = nrow; 1887 } 1888 PetscFunctionReturn(0); 1889 } 1890 1891 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1892 { 1893 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1894 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1895 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1896 PetscErrorCode ierr; 1897 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1898 MatScalar *aa = a->a,*ap; 1899 PetscReal ratio=0.6; 1900 1901 PetscFunctionBegin; 1902 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1903 1904 if (m) rmax = ailen[0]; 1905 for (i=1; i<mbs; i++) { 1906 /* move each row back by the amount of empty slots (fshift) before it*/ 1907 fshift += imax[i-1] - ailen[i-1]; 1908 rmax = PetscMax(rmax,ailen[i]); 1909 if (fshift) { 1910 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1911 N = ailen[i]; 1912 for (j=0; j<N; j++) { 1913 ip[j-fshift] = ip[j]; 1914 if (!A->structure_only) { 1915 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1916 } 1917 } 1918 } 1919 ai[i] = ai[i-1] + ailen[i-1]; 1920 } 1921 if (mbs) { 1922 fshift += imax[mbs-1] - ailen[mbs-1]; 1923 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1924 } 1925 1926 /* reset ilen and imax for each row */ 1927 a->nonzerorowcnt = 0; 1928 if (A->structure_only) { 1929 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1930 } else { /* !A->structure_only */ 1931 for (i=0; i<mbs; i++) { 1932 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1933 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1934 } 1935 } 1936 a->nz = ai[mbs]; 1937 1938 /* diagonals may have moved, so kill the diagonal pointers */ 1939 a->idiagvalid = PETSC_FALSE; 1940 if (fshift && a->diag) { 1941 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1942 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1943 a->diag = 0; 1944 } 1945 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1946 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1947 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1948 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1949 1950 A->info.mallocs += a->reallocs; 1951 a->reallocs = 0; 1952 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1953 a->rmax = rmax; 1954 1955 if (!A->structure_only) { 1956 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1957 } 1958 PetscFunctionReturn(0); 1959 } 1960 1961 /* 1962 This function returns an array of flags which indicate the locations of contiguous 1963 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1964 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1965 Assume: sizes should be long enough to hold all the values. 1966 */ 1967 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1968 { 1969 PetscInt i,j,k,row; 1970 PetscBool flg; 1971 1972 PetscFunctionBegin; 1973 for (i=0,j=0; i<n; j++) { 1974 row = idx[i]; 1975 if (row%bs!=0) { /* Not the begining of a block */ 1976 sizes[j] = 1; 1977 i++; 1978 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1979 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1980 i++; 1981 } else { /* Begining of the block, so check if the complete block exists */ 1982 flg = PETSC_TRUE; 1983 for (k=1; k<bs; k++) { 1984 if (row+k != idx[i+k]) { /* break in the block */ 1985 flg = PETSC_FALSE; 1986 break; 1987 } 1988 } 1989 if (flg) { /* No break in the bs */ 1990 sizes[j] = bs; 1991 i += bs; 1992 } else { 1993 sizes[j] = 1; 1994 i++; 1995 } 1996 } 1997 } 1998 *bs_max = j; 1999 PetscFunctionReturn(0); 2000 } 2001 2002 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2003 { 2004 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2005 PetscErrorCode ierr; 2006 PetscInt i,j,k,count,*rows; 2007 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2008 PetscScalar zero = 0.0; 2009 MatScalar *aa; 2010 const PetscScalar *xx; 2011 PetscScalar *bb; 2012 2013 PetscFunctionBegin; 2014 /* fix right hand side if needed */ 2015 if (x && b) { 2016 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2017 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2018 for (i=0; i<is_n; i++) { 2019 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2020 } 2021 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2022 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2023 } 2024 2025 /* Make a copy of the IS and sort it */ 2026 /* allocate memory for rows,sizes */ 2027 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2028 2029 /* copy IS values to rows, and sort them */ 2030 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2031 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2032 2033 if (baij->keepnonzeropattern) { 2034 for (i=0; i<is_n; i++) sizes[i] = 1; 2035 bs_max = is_n; 2036 } else { 2037 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2038 A->nonzerostate++; 2039 } 2040 2041 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2042 row = rows[j]; 2043 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2044 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2045 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2046 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2047 if (diag != (PetscScalar)0.0) { 2048 if (baij->ilen[row/bs] > 0) { 2049 baij->ilen[row/bs] = 1; 2050 baij->j[baij->i[row/bs]] = row/bs; 2051 2052 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2053 } 2054 /* Now insert all the diagonal values for this bs */ 2055 for (k=0; k<bs; k++) { 2056 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2057 } 2058 } else { /* (diag == 0.0) */ 2059 baij->ilen[row/bs] = 0; 2060 } /* end (diag == 0.0) */ 2061 } else { /* (sizes[i] != bs) */ 2062 #if defined(PETSC_USE_DEBUG) 2063 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2064 #endif 2065 for (k=0; k<count; k++) { 2066 aa[0] = zero; 2067 aa += bs; 2068 } 2069 if (diag != (PetscScalar)0.0) { 2070 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2071 } 2072 } 2073 } 2074 2075 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2076 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2077 PetscFunctionReturn(0); 2078 } 2079 2080 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2081 { 2082 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2083 PetscErrorCode ierr; 2084 PetscInt i,j,k,count; 2085 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2086 PetscScalar zero = 0.0; 2087 MatScalar *aa; 2088 const PetscScalar *xx; 2089 PetscScalar *bb; 2090 PetscBool *zeroed,vecs = PETSC_FALSE; 2091 2092 PetscFunctionBegin; 2093 /* fix right hand side if needed */ 2094 if (x && b) { 2095 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2096 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2097 vecs = PETSC_TRUE; 2098 } 2099 2100 /* zero the columns */ 2101 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2102 for (i=0; i<is_n; i++) { 2103 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2104 zeroed[is_idx[i]] = PETSC_TRUE; 2105 } 2106 for (i=0; i<A->rmap->N; i++) { 2107 if (!zeroed[i]) { 2108 row = i/bs; 2109 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2110 for (k=0; k<bs; k++) { 2111 col = bs*baij->j[j] + k; 2112 if (zeroed[col]) { 2113 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2114 if (vecs) bb[i] -= aa[0]*xx[col]; 2115 aa[0] = 0.0; 2116 } 2117 } 2118 } 2119 } else if (vecs) bb[i] = diag*xx[i]; 2120 } 2121 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2122 if (vecs) { 2123 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2124 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2125 } 2126 2127 /* zero the rows */ 2128 for (i=0; i<is_n; i++) { 2129 row = is_idx[i]; 2130 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2131 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2132 for (k=0; k<count; k++) { 2133 aa[0] = zero; 2134 aa += bs; 2135 } 2136 if (diag != (PetscScalar)0.0) { 2137 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2138 } 2139 } 2140 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2141 PetscFunctionReturn(0); 2142 } 2143 2144 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2145 { 2146 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2147 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2148 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2149 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2150 PetscErrorCode ierr; 2151 PetscInt ridx,cidx,bs2=a->bs2; 2152 PetscBool roworiented=a->roworiented; 2153 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2154 2155 PetscFunctionBegin; 2156 for (k=0; k<m; k++) { /* loop over added rows */ 2157 row = im[k]; 2158 brow = row/bs; 2159 if (row < 0) continue; 2160 #if defined(PETSC_USE_DEBUG) 2161 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2162 #endif 2163 rp = aj + ai[brow]; 2164 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2165 rmax = imax[brow]; 2166 nrow = ailen[brow]; 2167 low = 0; 2168 high = nrow; 2169 for (l=0; l<n; l++) { /* loop over added columns */ 2170 if (in[l] < 0) continue; 2171 #if defined(PETSC_USE_DEBUG) 2172 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2173 #endif 2174 col = in[l]; bcol = col/bs; 2175 ridx = row % bs; cidx = col % bs; 2176 if (!A->structure_only) { 2177 if (roworiented) { 2178 value = v[l + k*n]; 2179 } else { 2180 value = v[k + l*m]; 2181 } 2182 } 2183 if (col <= lastcol) low = 0; else high = nrow; 2184 lastcol = col; 2185 while (high-low > 7) { 2186 t = (low+high)/2; 2187 if (rp[t] > bcol) high = t; 2188 else low = t; 2189 } 2190 for (i=low; i<high; i++) { 2191 if (rp[i] > bcol) break; 2192 if (rp[i] == bcol) { 2193 bap = ap + bs2*i + bs*cidx + ridx; 2194 if (!A->structure_only) { 2195 if (is == ADD_VALUES) *bap += value; 2196 else *bap = value; 2197 } 2198 goto noinsert1; 2199 } 2200 } 2201 if (nonew == 1) goto noinsert1; 2202 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2203 if (A->structure_only) { 2204 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2205 } else { 2206 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2207 } 2208 N = nrow++ - 1; high++; 2209 /* shift up all the later entries in this row */ 2210 for (ii=N; ii>=i; ii--) { 2211 rp[ii+1] = rp[ii]; 2212 if (!A->structure_only) { 2213 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2214 } 2215 } 2216 if (N>=i && !A->structure_only) { 2217 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2218 } 2219 rp[i] = bcol; 2220 if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value; 2221 a->nz++; 2222 A->nonzerostate++; 2223 noinsert1:; 2224 low = i; 2225 } 2226 ailen[brow] = nrow; 2227 } 2228 PetscFunctionReturn(0); 2229 } 2230 2231 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2232 { 2233 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2234 Mat outA; 2235 PetscErrorCode ierr; 2236 PetscBool row_identity,col_identity; 2237 2238 PetscFunctionBegin; 2239 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2240 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2241 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2242 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2243 2244 outA = inA; 2245 inA->factortype = MAT_FACTOR_LU; 2246 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2247 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2248 2249 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2250 2251 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2252 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2253 a->row = row; 2254 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2255 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2256 a->col = col; 2257 2258 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2259 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2260 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2261 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2262 2263 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2264 if (!a->solve_work) { 2265 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2266 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2267 } 2268 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2269 PetscFunctionReturn(0); 2270 } 2271 2272 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2273 { 2274 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2275 PetscInt i,nz,mbs; 2276 2277 PetscFunctionBegin; 2278 nz = baij->maxnz; 2279 mbs = baij->mbs; 2280 for (i=0; i<nz; i++) { 2281 baij->j[i] = indices[i]; 2282 } 2283 baij->nz = nz; 2284 for (i=0; i<mbs; i++) { 2285 baij->ilen[i] = baij->imax[i]; 2286 } 2287 PetscFunctionReturn(0); 2288 } 2289 2290 /*@ 2291 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2292 in the matrix. 2293 2294 Input Parameters: 2295 + mat - the SeqBAIJ matrix 2296 - indices - the column indices 2297 2298 Level: advanced 2299 2300 Notes: 2301 This can be called if you have precomputed the nonzero structure of the 2302 matrix and want to provide it to the matrix object to improve the performance 2303 of the MatSetValues() operation. 2304 2305 You MUST have set the correct numbers of nonzeros per row in the call to 2306 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2307 2308 MUST be called before any calls to MatSetValues(); 2309 2310 @*/ 2311 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2312 { 2313 PetscErrorCode ierr; 2314 2315 PetscFunctionBegin; 2316 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2317 PetscValidPointer(indices,2); 2318 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2323 { 2324 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2325 PetscErrorCode ierr; 2326 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2327 PetscReal atmp; 2328 PetscScalar *x,zero = 0.0; 2329 MatScalar *aa; 2330 PetscInt ncols,brow,krow,kcol; 2331 2332 PetscFunctionBegin; 2333 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2334 bs = A->rmap->bs; 2335 aa = a->a; 2336 ai = a->i; 2337 aj = a->j; 2338 mbs = a->mbs; 2339 2340 ierr = VecSet(v,zero);CHKERRQ(ierr); 2341 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2342 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2343 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2344 for (i=0; i<mbs; i++) { 2345 ncols = ai[1] - ai[0]; ai++; 2346 brow = bs*i; 2347 for (j=0; j<ncols; j++) { 2348 for (kcol=0; kcol<bs; kcol++) { 2349 for (krow=0; krow<bs; krow++) { 2350 atmp = PetscAbsScalar(*aa);aa++; 2351 row = brow + krow; /* row index */ 2352 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2353 } 2354 } 2355 aj++; 2356 } 2357 } 2358 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2359 PetscFunctionReturn(0); 2360 } 2361 2362 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2363 { 2364 PetscErrorCode ierr; 2365 2366 PetscFunctionBegin; 2367 /* If the two matrices have the same copy implementation, use fast copy. */ 2368 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2369 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2370 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2371 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2372 2373 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2374 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2375 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2376 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2377 } else { 2378 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2379 } 2380 PetscFunctionReturn(0); 2381 } 2382 2383 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2384 { 2385 PetscErrorCode ierr; 2386 2387 PetscFunctionBegin; 2388 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2389 PetscFunctionReturn(0); 2390 } 2391 2392 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2393 { 2394 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2395 2396 PetscFunctionBegin; 2397 *array = a->a; 2398 PetscFunctionReturn(0); 2399 } 2400 2401 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2402 { 2403 PetscFunctionBegin; 2404 PetscFunctionReturn(0); 2405 } 2406 2407 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2408 { 2409 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2410 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2411 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2412 PetscErrorCode ierr; 2413 2414 PetscFunctionBegin; 2415 /* Set the number of nonzeros in the new matrix */ 2416 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2417 PetscFunctionReturn(0); 2418 } 2419 2420 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2421 { 2422 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2423 PetscErrorCode ierr; 2424 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2425 PetscBLASInt one=1; 2426 2427 PetscFunctionBegin; 2428 if (str == SAME_NONZERO_PATTERN) { 2429 PetscScalar alpha = a; 2430 PetscBLASInt bnz; 2431 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2432 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2433 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2434 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2435 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2436 } else { 2437 Mat B; 2438 PetscInt *nnz; 2439 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2440 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2441 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2442 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2443 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2444 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2445 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2446 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2447 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2448 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2449 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2450 ierr = PetscFree(nnz);CHKERRQ(ierr); 2451 } 2452 PetscFunctionReturn(0); 2453 } 2454 2455 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2456 { 2457 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2458 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2459 MatScalar *aa = a->a; 2460 2461 PetscFunctionBegin; 2462 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2463 PetscFunctionReturn(0); 2464 } 2465 2466 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2467 { 2468 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2469 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2470 MatScalar *aa = a->a; 2471 2472 PetscFunctionBegin; 2473 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2474 PetscFunctionReturn(0); 2475 } 2476 2477 /* 2478 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2479 */ 2480 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2481 { 2482 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2483 PetscErrorCode ierr; 2484 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2485 PetscInt nz = a->i[m],row,*jj,mr,col; 2486 2487 PetscFunctionBegin; 2488 *nn = n; 2489 if (!ia) PetscFunctionReturn(0); 2490 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2491 else { 2492 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2493 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2494 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2495 jj = a->j; 2496 for (i=0; i<nz; i++) { 2497 collengths[jj[i]]++; 2498 } 2499 cia[0] = oshift; 2500 for (i=0; i<n; i++) { 2501 cia[i+1] = cia[i] + collengths[i]; 2502 } 2503 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2504 jj = a->j; 2505 for (row=0; row<m; row++) { 2506 mr = a->i[row+1] - a->i[row]; 2507 for (i=0; i<mr; i++) { 2508 col = *jj++; 2509 2510 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2511 } 2512 } 2513 ierr = PetscFree(collengths);CHKERRQ(ierr); 2514 *ia = cia; *ja = cja; 2515 } 2516 PetscFunctionReturn(0); 2517 } 2518 2519 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2520 { 2521 PetscErrorCode ierr; 2522 2523 PetscFunctionBegin; 2524 if (!ia) PetscFunctionReturn(0); 2525 ierr = PetscFree(*ia);CHKERRQ(ierr); 2526 ierr = PetscFree(*ja);CHKERRQ(ierr); 2527 PetscFunctionReturn(0); 2528 } 2529 2530 /* 2531 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2532 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2533 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2534 */ 2535 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2536 { 2537 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2538 PetscErrorCode ierr; 2539 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2540 PetscInt nz = a->i[m],row,*jj,mr,col; 2541 PetscInt *cspidx; 2542 2543 PetscFunctionBegin; 2544 *nn = n; 2545 if (!ia) PetscFunctionReturn(0); 2546 2547 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2548 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2549 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2550 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2551 jj = a->j; 2552 for (i=0; i<nz; i++) { 2553 collengths[jj[i]]++; 2554 } 2555 cia[0] = oshift; 2556 for (i=0; i<n; i++) { 2557 cia[i+1] = cia[i] + collengths[i]; 2558 } 2559 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2560 jj = a->j; 2561 for (row=0; row<m; row++) { 2562 mr = a->i[row+1] - a->i[row]; 2563 for (i=0; i<mr; i++) { 2564 col = *jj++; 2565 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2566 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2567 } 2568 } 2569 ierr = PetscFree(collengths);CHKERRQ(ierr); 2570 *ia = cia; *ja = cja; 2571 *spidx = cspidx; 2572 PetscFunctionReturn(0); 2573 } 2574 2575 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2576 { 2577 PetscErrorCode ierr; 2578 2579 PetscFunctionBegin; 2580 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2581 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2582 PetscFunctionReturn(0); 2583 } 2584 2585 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2586 { 2587 PetscErrorCode ierr; 2588 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2589 2590 PetscFunctionBegin; 2591 if (!Y->preallocated || !aij->nz) { 2592 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2593 } 2594 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2595 PetscFunctionReturn(0); 2596 } 2597 2598 /* -------------------------------------------------------------------*/ 2599 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2600 MatGetRow_SeqBAIJ, 2601 MatRestoreRow_SeqBAIJ, 2602 MatMult_SeqBAIJ_N, 2603 /* 4*/ MatMultAdd_SeqBAIJ_N, 2604 MatMultTranspose_SeqBAIJ, 2605 MatMultTransposeAdd_SeqBAIJ, 2606 0, 2607 0, 2608 0, 2609 /* 10*/ 0, 2610 MatLUFactor_SeqBAIJ, 2611 0, 2612 0, 2613 MatTranspose_SeqBAIJ, 2614 /* 15*/ MatGetInfo_SeqBAIJ, 2615 MatEqual_SeqBAIJ, 2616 MatGetDiagonal_SeqBAIJ, 2617 MatDiagonalScale_SeqBAIJ, 2618 MatNorm_SeqBAIJ, 2619 /* 20*/ 0, 2620 MatAssemblyEnd_SeqBAIJ, 2621 MatSetOption_SeqBAIJ, 2622 MatZeroEntries_SeqBAIJ, 2623 /* 24*/ MatZeroRows_SeqBAIJ, 2624 0, 2625 0, 2626 0, 2627 0, 2628 /* 29*/ MatSetUp_SeqBAIJ, 2629 0, 2630 0, 2631 0, 2632 0, 2633 /* 34*/ MatDuplicate_SeqBAIJ, 2634 0, 2635 0, 2636 MatILUFactor_SeqBAIJ, 2637 0, 2638 /* 39*/ MatAXPY_SeqBAIJ, 2639 MatCreateSubMatrices_SeqBAIJ, 2640 MatIncreaseOverlap_SeqBAIJ, 2641 MatGetValues_SeqBAIJ, 2642 MatCopy_SeqBAIJ, 2643 /* 44*/ 0, 2644 MatScale_SeqBAIJ, 2645 MatShift_SeqBAIJ, 2646 0, 2647 MatZeroRowsColumns_SeqBAIJ, 2648 /* 49*/ 0, 2649 MatGetRowIJ_SeqBAIJ, 2650 MatRestoreRowIJ_SeqBAIJ, 2651 MatGetColumnIJ_SeqBAIJ, 2652 MatRestoreColumnIJ_SeqBAIJ, 2653 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2654 0, 2655 0, 2656 0, 2657 MatSetValuesBlocked_SeqBAIJ, 2658 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2659 MatDestroy_SeqBAIJ, 2660 MatView_SeqBAIJ, 2661 0, 2662 0, 2663 /* 64*/ 0, 2664 0, 2665 0, 2666 0, 2667 0, 2668 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2669 0, 2670 MatConvert_Basic, 2671 0, 2672 0, 2673 /* 74*/ 0, 2674 MatFDColoringApply_BAIJ, 2675 0, 2676 0, 2677 0, 2678 /* 79*/ 0, 2679 0, 2680 0, 2681 0, 2682 MatLoad_SeqBAIJ, 2683 /* 84*/ 0, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /* 89*/ 0, 2689 0, 2690 0, 2691 0, 2692 0, 2693 /* 94*/ 0, 2694 0, 2695 0, 2696 0, 2697 0, 2698 /* 99*/ 0, 2699 0, 2700 0, 2701 0, 2702 0, 2703 /*104*/ 0, 2704 MatRealPart_SeqBAIJ, 2705 MatImaginaryPart_SeqBAIJ, 2706 0, 2707 0, 2708 /*109*/ 0, 2709 0, 2710 0, 2711 0, 2712 MatMissingDiagonal_SeqBAIJ, 2713 /*114*/ 0, 2714 0, 2715 0, 2716 0, 2717 0, 2718 /*119*/ 0, 2719 0, 2720 MatMultHermitianTranspose_SeqBAIJ, 2721 MatMultHermitianTransposeAdd_SeqBAIJ, 2722 0, 2723 /*124*/ 0, 2724 0, 2725 MatInvertBlockDiagonal_SeqBAIJ, 2726 0, 2727 0, 2728 /*129*/ 0, 2729 0, 2730 0, 2731 0, 2732 0, 2733 /*134*/ 0, 2734 0, 2735 0, 2736 0, 2737 0, 2738 /*139*/ MatSetBlockSizes_Default, 2739 0, 2740 0, 2741 MatFDColoringSetUp_SeqXAIJ, 2742 0, 2743 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2744 MatDestroySubMatrices_SeqBAIJ 2745 }; 2746 2747 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2748 { 2749 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2750 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2751 PetscErrorCode ierr; 2752 2753 PetscFunctionBegin; 2754 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2755 2756 /* allocate space for values if not already there */ 2757 if (!aij->saved_values) { 2758 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2759 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2760 } 2761 2762 /* copy values over */ 2763 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2764 PetscFunctionReturn(0); 2765 } 2766 2767 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2768 { 2769 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2770 PetscErrorCode ierr; 2771 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2772 2773 PetscFunctionBegin; 2774 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2775 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2776 2777 /* copy values over */ 2778 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2779 PetscFunctionReturn(0); 2780 } 2781 2782 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2783 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2784 2785 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2786 { 2787 Mat_SeqBAIJ *b; 2788 PetscErrorCode ierr; 2789 PetscInt i,mbs,nbs,bs2; 2790 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2791 2792 PetscFunctionBegin; 2793 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2794 if (nz == MAT_SKIP_ALLOCATION) { 2795 skipallocation = PETSC_TRUE; 2796 nz = 0; 2797 } 2798 2799 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2800 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2801 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2802 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2803 2804 B->preallocated = PETSC_TRUE; 2805 2806 mbs = B->rmap->n/bs; 2807 nbs = B->cmap->n/bs; 2808 bs2 = bs*bs; 2809 2810 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2811 2812 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2813 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2814 if (nnz) { 2815 for (i=0; i<mbs; i++) { 2816 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2817 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2818 } 2819 } 2820 2821 b = (Mat_SeqBAIJ*)B->data; 2822 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2823 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2824 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2825 2826 if (!flg) { 2827 switch (bs) { 2828 case 1: 2829 B->ops->mult = MatMult_SeqBAIJ_1; 2830 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2831 break; 2832 case 2: 2833 B->ops->mult = MatMult_SeqBAIJ_2; 2834 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2835 break; 2836 case 3: 2837 B->ops->mult = MatMult_SeqBAIJ_3; 2838 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2839 break; 2840 case 4: 2841 B->ops->mult = MatMult_SeqBAIJ_4; 2842 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2843 break; 2844 case 5: 2845 B->ops->mult = MatMult_SeqBAIJ_5; 2846 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2847 break; 2848 case 6: 2849 B->ops->mult = MatMult_SeqBAIJ_6; 2850 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2851 break; 2852 case 7: 2853 B->ops->mult = MatMult_SeqBAIJ_7; 2854 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2855 break; 2856 case 9: 2857 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 2858 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2859 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2860 #else 2861 B->ops->mult = MatMult_SeqBAIJ_N; 2862 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2863 #endif 2864 break; 2865 case 11: 2866 B->ops->mult = MatMult_SeqBAIJ_11; 2867 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2868 break; 2869 case 15: 2870 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2871 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2872 break; 2873 default: 2874 B->ops->mult = MatMult_SeqBAIJ_N; 2875 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2876 break; 2877 } 2878 } 2879 B->ops->sor = MatSOR_SeqBAIJ; 2880 b->mbs = mbs; 2881 b->nbs = nbs; 2882 if (!skipallocation) { 2883 if (!b->imax) { 2884 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2885 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2886 2887 b->free_imax_ilen = PETSC_TRUE; 2888 } 2889 /* b->ilen will count nonzeros in each block row so far. */ 2890 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2891 if (!nnz) { 2892 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2893 else if (nz < 0) nz = 1; 2894 for (i=0; i<mbs; i++) b->imax[i] = nz; 2895 nz = nz*mbs; 2896 } else { 2897 nz = 0; 2898 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2899 } 2900 2901 /* allocate the matrix space */ 2902 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2903 if (B->structure_only) { 2904 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 2905 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 2906 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 2907 } else { 2908 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2909 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2910 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2911 } 2912 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2913 2914 if (B->structure_only) { 2915 b->singlemalloc = PETSC_FALSE; 2916 b->free_a = PETSC_FALSE; 2917 } else { 2918 b->singlemalloc = PETSC_TRUE; 2919 b->free_a = PETSC_TRUE; 2920 } 2921 b->free_ij = PETSC_TRUE; 2922 2923 b->i[0] = 0; 2924 for (i=1; i<mbs+1; i++) { 2925 b->i[i] = b->i[i-1] + b->imax[i-1]; 2926 } 2927 2928 } else { 2929 b->free_a = PETSC_FALSE; 2930 b->free_ij = PETSC_FALSE; 2931 } 2932 2933 b->bs2 = bs2; 2934 b->mbs = mbs; 2935 b->nz = 0; 2936 b->maxnz = nz; 2937 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2938 B->was_assembled = PETSC_FALSE; 2939 B->assembled = PETSC_FALSE; 2940 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2941 PetscFunctionReturn(0); 2942 } 2943 2944 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2945 { 2946 PetscInt i,m,nz,nz_max=0,*nnz; 2947 PetscScalar *values=0; 2948 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2949 PetscErrorCode ierr; 2950 2951 PetscFunctionBegin; 2952 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2953 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2954 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2955 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2956 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2957 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2958 m = B->rmap->n/bs; 2959 2960 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2961 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2962 for (i=0; i<m; i++) { 2963 nz = ii[i+1]- ii[i]; 2964 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2965 nz_max = PetscMax(nz_max, nz); 2966 nnz[i] = nz; 2967 } 2968 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2969 ierr = PetscFree(nnz);CHKERRQ(ierr); 2970 2971 values = (PetscScalar*)V; 2972 if (!values) { 2973 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2974 } 2975 for (i=0; i<m; i++) { 2976 PetscInt ncols = ii[i+1] - ii[i]; 2977 const PetscInt *icols = jj + ii[i]; 2978 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2979 if (!roworiented) { 2980 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2981 } else { 2982 PetscInt j; 2983 for (j=0; j<ncols; j++) { 2984 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2985 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2986 } 2987 } 2988 } 2989 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2990 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2991 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2992 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2993 PetscFunctionReturn(0); 2994 } 2995 2996 /*MC 2997 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2998 block sparse compressed row format. 2999 3000 Options Database Keys: 3001 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3002 3003 Level: beginner 3004 3005 .seealso: MatCreateSeqBAIJ() 3006 M*/ 3007 3008 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3009 3010 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3011 { 3012 PetscErrorCode ierr; 3013 PetscMPIInt size; 3014 Mat_SeqBAIJ *b; 3015 3016 PetscFunctionBegin; 3017 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3018 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3019 3020 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3021 B->data = (void*)b; 3022 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3023 3024 b->row = 0; 3025 b->col = 0; 3026 b->icol = 0; 3027 b->reallocs = 0; 3028 b->saved_values = 0; 3029 3030 b->roworiented = PETSC_TRUE; 3031 b->nonew = 0; 3032 b->diag = 0; 3033 B->spptr = 0; 3034 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3035 b->keepnonzeropattern = PETSC_FALSE; 3036 3037 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3039 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3046 #if defined(PETSC_HAVE_HYPRE) 3047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3048 #endif 3049 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3050 PetscFunctionReturn(0); 3051 } 3052 3053 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3054 { 3055 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3056 PetscErrorCode ierr; 3057 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3058 3059 PetscFunctionBegin; 3060 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3061 3062 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3063 c->imax = a->imax; 3064 c->ilen = a->ilen; 3065 c->free_imax_ilen = PETSC_FALSE; 3066 } else { 3067 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3068 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3069 for (i=0; i<mbs; i++) { 3070 c->imax[i] = a->imax[i]; 3071 c->ilen[i] = a->ilen[i]; 3072 } 3073 c->free_imax_ilen = PETSC_TRUE; 3074 } 3075 3076 /* allocate the matrix space */ 3077 if (mallocmatspace) { 3078 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3079 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3080 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3081 3082 c->i = a->i; 3083 c->j = a->j; 3084 c->singlemalloc = PETSC_FALSE; 3085 c->free_a = PETSC_TRUE; 3086 c->free_ij = PETSC_FALSE; 3087 c->parent = A; 3088 C->preallocated = PETSC_TRUE; 3089 C->assembled = PETSC_TRUE; 3090 3091 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3092 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3093 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3094 } else { 3095 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3096 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3097 3098 c->singlemalloc = PETSC_TRUE; 3099 c->free_a = PETSC_TRUE; 3100 c->free_ij = PETSC_TRUE; 3101 3102 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3103 if (mbs > 0) { 3104 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3105 if (cpvalues == MAT_COPY_VALUES) { 3106 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3107 } else { 3108 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3109 } 3110 } 3111 C->preallocated = PETSC_TRUE; 3112 C->assembled = PETSC_TRUE; 3113 } 3114 } 3115 3116 c->roworiented = a->roworiented; 3117 c->nonew = a->nonew; 3118 3119 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3120 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3121 3122 c->bs2 = a->bs2; 3123 c->mbs = a->mbs; 3124 c->nbs = a->nbs; 3125 3126 if (a->diag) { 3127 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3128 c->diag = a->diag; 3129 c->free_diag = PETSC_FALSE; 3130 } else { 3131 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3132 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3133 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3134 c->free_diag = PETSC_TRUE; 3135 } 3136 } else c->diag = 0; 3137 3138 c->nz = a->nz; 3139 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3140 c->solve_work = NULL; 3141 c->mult_work = NULL; 3142 c->sor_workt = NULL; 3143 c->sor_work = NULL; 3144 3145 c->compressedrow.use = a->compressedrow.use; 3146 c->compressedrow.nrows = a->compressedrow.nrows; 3147 if (a->compressedrow.use) { 3148 i = a->compressedrow.nrows; 3149 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3150 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3151 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3152 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3153 } else { 3154 c->compressedrow.use = PETSC_FALSE; 3155 c->compressedrow.i = NULL; 3156 c->compressedrow.rindex = NULL; 3157 } 3158 C->nonzerostate = A->nonzerostate; 3159 3160 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3161 PetscFunctionReturn(0); 3162 } 3163 3164 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3165 { 3166 PetscErrorCode ierr; 3167 3168 PetscFunctionBegin; 3169 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3170 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3171 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3172 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3173 PetscFunctionReturn(0); 3174 } 3175 3176 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3177 { 3178 Mat_SeqBAIJ *a; 3179 PetscErrorCode ierr; 3180 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3181 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3182 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3183 PetscInt *masked,nmask,tmp,bs2,ishift; 3184 PetscMPIInt size; 3185 int fd; 3186 PetscScalar *aa; 3187 MPI_Comm comm; 3188 3189 PetscFunctionBegin; 3190 /* force binary viewer to load .info file if it has not yet done so */ 3191 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3192 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3193 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3194 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3195 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3196 if (bs < 0) bs = 1; 3197 bs2 = bs*bs; 3198 3199 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3200 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3201 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3202 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3203 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3204 M = header[1]; N = header[2]; nz = header[3]; 3205 3206 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3207 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3208 3209 /* 3210 This code adds extra rows to make sure the number of rows is 3211 divisible by the blocksize 3212 */ 3213 mbs = M/bs; 3214 extra_rows = bs - M + bs*(mbs); 3215 if (extra_rows == bs) extra_rows = 0; 3216 else mbs++; 3217 if (extra_rows) { 3218 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3219 } 3220 3221 /* Set global sizes if not already set */ 3222 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3223 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3224 } else { /* Check if the matrix global sizes are correct */ 3225 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3226 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3227 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3228 } 3229 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3230 } 3231 3232 /* read in row lengths */ 3233 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3234 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3235 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3236 3237 /* read in column indices */ 3238 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3239 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3240 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3241 3242 /* loop over row lengths determining block row lengths */ 3243 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3244 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3245 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3246 rowcount = 0; 3247 nzcount = 0; 3248 for (i=0; i<mbs; i++) { 3249 nmask = 0; 3250 for (j=0; j<bs; j++) { 3251 kmax = rowlengths[rowcount]; 3252 for (k=0; k<kmax; k++) { 3253 tmp = jj[nzcount++]/bs; 3254 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3255 } 3256 rowcount++; 3257 } 3258 browlengths[i] += nmask; 3259 /* zero out the mask elements we set */ 3260 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3261 } 3262 3263 /* Do preallocation */ 3264 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3265 a = (Mat_SeqBAIJ*)newmat->data; 3266 3267 /* set matrix "i" values */ 3268 a->i[0] = 0; 3269 for (i=1; i<= mbs; i++) { 3270 a->i[i] = a->i[i-1] + browlengths[i-1]; 3271 a->ilen[i-1] = browlengths[i-1]; 3272 } 3273 a->nz = 0; 3274 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3275 3276 /* read in nonzero values */ 3277 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3278 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3279 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3280 3281 /* set "a" and "j" values into matrix */ 3282 nzcount = 0; jcount = 0; 3283 for (i=0; i<mbs; i++) { 3284 nzcountb = nzcount; 3285 nmask = 0; 3286 for (j=0; j<bs; j++) { 3287 kmax = rowlengths[i*bs+j]; 3288 for (k=0; k<kmax; k++) { 3289 tmp = jj[nzcount++]/bs; 3290 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3291 } 3292 } 3293 /* sort the masked values */ 3294 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3295 3296 /* set "j" values into matrix */ 3297 maskcount = 1; 3298 for (j=0; j<nmask; j++) { 3299 a->j[jcount++] = masked[j]; 3300 mask[masked[j]] = maskcount++; 3301 } 3302 /* set "a" values into matrix */ 3303 ishift = bs2*a->i[i]; 3304 for (j=0; j<bs; j++) { 3305 kmax = rowlengths[i*bs+j]; 3306 for (k=0; k<kmax; k++) { 3307 tmp = jj[nzcountb]/bs; 3308 block = mask[tmp] - 1; 3309 point = jj[nzcountb] - bs*tmp; 3310 idx = ishift + bs2*block + j + bs*point; 3311 a->a[idx] = (MatScalar)aa[nzcountb++]; 3312 } 3313 } 3314 /* zero out the mask elements we set */ 3315 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3316 } 3317 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3318 3319 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3320 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3321 ierr = PetscFree(aa);CHKERRQ(ierr); 3322 ierr = PetscFree(jj);CHKERRQ(ierr); 3323 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3324 3325 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3326 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3327 PetscFunctionReturn(0); 3328 } 3329 3330 /*@C 3331 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3332 compressed row) format. For good matrix assembly performance the 3333 user should preallocate the matrix storage by setting the parameter nz 3334 (or the array nnz). By setting these parameters accurately, performance 3335 during matrix assembly can be increased by more than a factor of 50. 3336 3337 Collective on MPI_Comm 3338 3339 Input Parameters: 3340 + comm - MPI communicator, set to PETSC_COMM_SELF 3341 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3342 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3343 . m - number of rows 3344 . n - number of columns 3345 . nz - number of nonzero blocks per block row (same for all rows) 3346 - nnz - array containing the number of nonzero blocks in the various block rows 3347 (possibly different for each block row) or NULL 3348 3349 Output Parameter: 3350 . A - the matrix 3351 3352 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3353 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3354 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3355 3356 Options Database Keys: 3357 . -mat_no_unroll - uses code that does not unroll the loops in the 3358 block calculations (much slower) 3359 . -mat_block_size - size of the blocks to use 3360 3361 Level: intermediate 3362 3363 Notes: 3364 The number of rows and columns must be divisible by blocksize. 3365 3366 If the nnz parameter is given then the nz parameter is ignored 3367 3368 A nonzero block is any block that as 1 or more nonzeros in it 3369 3370 The block AIJ format is fully compatible with standard Fortran 77 3371 storage. That is, the stored row and column indices can begin at 3372 either one (as in Fortran) or zero. See the users' manual for details. 3373 3374 Specify the preallocated storage with either nz or nnz (not both). 3375 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3376 allocation. See Users-Manual: ch_mat for details. 3377 matrices. 3378 3379 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3380 @*/ 3381 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3382 { 3383 PetscErrorCode ierr; 3384 3385 PetscFunctionBegin; 3386 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3387 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3388 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3389 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3390 PetscFunctionReturn(0); 3391 } 3392 3393 /*@C 3394 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3395 per row in the matrix. For good matrix assembly performance the 3396 user should preallocate the matrix storage by setting the parameter nz 3397 (or the array nnz). By setting these parameters accurately, performance 3398 during matrix assembly can be increased by more than a factor of 50. 3399 3400 Collective on MPI_Comm 3401 3402 Input Parameters: 3403 + B - the matrix 3404 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3405 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3406 . nz - number of block nonzeros per block row (same for all rows) 3407 - nnz - array containing the number of block nonzeros in the various block rows 3408 (possibly different for each block row) or NULL 3409 3410 Options Database Keys: 3411 . -mat_no_unroll - uses code that does not unroll the loops in the 3412 block calculations (much slower) 3413 . -mat_block_size - size of the blocks to use 3414 3415 Level: intermediate 3416 3417 Notes: 3418 If the nnz parameter is given then the nz parameter is ignored 3419 3420 You can call MatGetInfo() to get information on how effective the preallocation was; 3421 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3422 You can also run with the option -info and look for messages with the string 3423 malloc in them to see if additional memory allocation was needed. 3424 3425 The block AIJ format is fully compatible with standard Fortran 77 3426 storage. That is, the stored row and column indices can begin at 3427 either one (as in Fortran) or zero. See the users' manual for details. 3428 3429 Specify the preallocated storage with either nz or nnz (not both). 3430 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3431 allocation. See Users-Manual: ch_mat for details. 3432 3433 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3434 @*/ 3435 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3436 { 3437 PetscErrorCode ierr; 3438 3439 PetscFunctionBegin; 3440 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3441 PetscValidType(B,1); 3442 PetscValidLogicalCollectiveInt(B,bs,2); 3443 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3444 PetscFunctionReturn(0); 3445 } 3446 3447 /*@C 3448 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3449 (the default sequential PETSc format). 3450 3451 Collective on MPI_Comm 3452 3453 Input Parameters: 3454 + B - the matrix 3455 . i - the indices into j for the start of each local row (starts with zero) 3456 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3457 - v - optional values in the matrix 3458 3459 Level: developer 3460 3461 Notes: 3462 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3463 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3464 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3465 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3466 block column and the second index is over columns within a block. 3467 3468 .keywords: matrix, aij, compressed row, sparse 3469 3470 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3471 @*/ 3472 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3473 { 3474 PetscErrorCode ierr; 3475 3476 PetscFunctionBegin; 3477 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3478 PetscValidType(B,1); 3479 PetscValidLogicalCollectiveInt(B,bs,2); 3480 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3481 PetscFunctionReturn(0); 3482 } 3483 3484 3485 /*@ 3486 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3487 3488 Collective on MPI_Comm 3489 3490 Input Parameters: 3491 + comm - must be an MPI communicator of size 1 3492 . bs - size of block 3493 . m - number of rows 3494 . n - number of columns 3495 . i - row indices 3496 . j - column indices 3497 - a - matrix values 3498 3499 Output Parameter: 3500 . mat - the matrix 3501 3502 Level: advanced 3503 3504 Notes: 3505 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3506 once the matrix is destroyed 3507 3508 You cannot set new nonzero locations into this matrix, that will generate an error. 3509 3510 The i and j indices are 0 based 3511 3512 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3513 3514 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3515 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3516 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3517 with column-major ordering within blocks. 3518 3519 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3520 3521 @*/ 3522 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3523 { 3524 PetscErrorCode ierr; 3525 PetscInt ii; 3526 Mat_SeqBAIJ *baij; 3527 3528 PetscFunctionBegin; 3529 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3530 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3531 3532 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3533 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3534 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3535 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3536 baij = (Mat_SeqBAIJ*)(*mat)->data; 3537 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3538 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3539 3540 baij->i = i; 3541 baij->j = j; 3542 baij->a = a; 3543 3544 baij->singlemalloc = PETSC_FALSE; 3545 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3546 baij->free_a = PETSC_FALSE; 3547 baij->free_ij = PETSC_FALSE; 3548 3549 for (ii=0; ii<m; ii++) { 3550 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3551 #if defined(PETSC_USE_DEBUG) 3552 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3553 #endif 3554 } 3555 #if defined(PETSC_USE_DEBUG) 3556 for (ii=0; ii<baij->i[m]; ii++) { 3557 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3558 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3559 } 3560 #endif 3561 3562 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3563 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3564 PetscFunctionReturn(0); 3565 } 3566 3567 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3568 { 3569 PetscErrorCode ierr; 3570 PetscMPIInt size; 3571 3572 PetscFunctionBegin; 3573 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3574 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3575 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3576 } else { 3577 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3578 } 3579 PetscFunctionReturn(0); 3580 } 3581