1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19 20 PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A,PetscInt type,PetscReal *reductions) 21 { 22 PetscErrorCode ierr; 23 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) A->data; 24 PetscInt m,n,i; 25 PetscInt ib,jb,bs = A->rmap->bs; 26 MatScalar *a_val = a_aij->a; 27 28 PetscFunctionBegin; 29 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 30 for (i=0; i<n; i++) reductions[i] = 0.0; 31 if (type == NORM_2) { 32 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 33 for (jb=0; jb<bs; jb++) { 34 for (ib=0; ib<bs; ib++) { 35 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 36 a_val++; 37 } 38 } 39 } 40 } else if (type == NORM_1) { 41 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 42 for (jb=0; jb<bs; jb++) { 43 for (ib=0; ib<bs; ib++) { 44 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 45 a_val++; 46 } 47 } 48 } 49 } else if (type == NORM_INFINITY) { 50 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 51 for (jb=0; jb<bs; jb++) { 52 for (ib=0; ib<bs; ib++) { 53 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 54 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 55 a_val++; 56 } 57 } 58 } 59 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 60 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 61 for (jb=0; jb<bs; jb++) { 62 for (ib=0; ib<bs; ib++) { 63 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 64 a_val++; 65 } 66 } 67 } 68 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 69 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 70 for (jb=0; jb<bs; jb++) { 71 for (ib=0; ib<bs; ib++) { 72 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 73 a_val++; 74 } 75 } 76 } 77 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 78 if (type == NORM_2) { 79 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 80 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 81 for (i=0; i<n; i++) reductions[i] /= m; 82 } 83 PetscFunctionReturn(0); 84 } 85 86 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 87 { 88 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 89 PetscErrorCode ierr; 90 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 91 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 92 PetscReal shift = 0.0; 93 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 94 95 PetscFunctionBegin; 96 allowzeropivot = PetscNot(A->erroriffailure); 97 98 if (a->idiagvalid) { 99 if (values) *values = a->idiag; 100 PetscFunctionReturn(0); 101 } 102 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 103 diag_offset = a->diag; 104 if (!a->idiag) { 105 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 106 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 107 } 108 diag = a->idiag; 109 if (values) *values = a->idiag; 110 /* factor and invert each block */ 111 switch (bs) { 112 case 1: 113 for (i=0; i<mbs; i++) { 114 odiag = v + 1*diag_offset[i]; 115 diag[0] = odiag[0]; 116 117 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 118 if (allowzeropivot) { 119 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 120 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 121 A->factorerror_zeropivot_row = i; 122 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 123 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 124 } 125 126 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 127 diag += 1; 128 } 129 break; 130 case 2: 131 for (i=0; i<mbs; i++) { 132 odiag = v + 4*diag_offset[i]; 133 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 134 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 135 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 136 diag += 4; 137 } 138 break; 139 case 3: 140 for (i=0; i<mbs; i++) { 141 odiag = v + 9*diag_offset[i]; 142 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 143 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 144 diag[8] = odiag[8]; 145 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 146 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 147 diag += 9; 148 } 149 break; 150 case 4: 151 for (i=0; i<mbs; i++) { 152 odiag = v + 16*diag_offset[i]; 153 ierr = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr); 154 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 155 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 156 diag += 16; 157 } 158 break; 159 case 5: 160 for (i=0; i<mbs; i++) { 161 odiag = v + 25*diag_offset[i]; 162 ierr = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr); 163 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 164 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 165 diag += 25; 166 } 167 break; 168 case 6: 169 for (i=0; i<mbs; i++) { 170 odiag = v + 36*diag_offset[i]; 171 ierr = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr); 172 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 173 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 174 diag += 36; 175 } 176 break; 177 case 7: 178 for (i=0; i<mbs; i++) { 179 odiag = v + 49*diag_offset[i]; 180 ierr = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr); 181 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 182 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 183 diag += 49; 184 } 185 break; 186 default: 187 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 188 for (i=0; i<mbs; i++) { 189 odiag = v + bs2*diag_offset[i]; 190 ierr = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr); 191 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 192 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 193 diag += bs2; 194 } 195 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 196 } 197 a->idiagvalid = PETSC_TRUE; 198 PetscFunctionReturn(0); 199 } 200 201 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 202 { 203 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 204 PetscScalar *x,*work,*w,*workt,*t; 205 const MatScalar *v,*aa = a->a, *idiag; 206 const PetscScalar *b,*xb; 207 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 208 PetscErrorCode ierr; 209 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 210 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 211 212 PetscFunctionBegin; 213 its = its*lits; 214 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 215 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 216 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 217 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor"); 218 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 219 220 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 221 222 if (!m) PetscFunctionReturn(0); 223 diag = a->diag; 224 idiag = a->idiag; 225 k = PetscMax(A->rmap->n,A->cmap->n); 226 if (!a->mult_work) { 227 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 228 } 229 if (!a->sor_workt) { 230 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 231 } 232 if (!a->sor_work) { 233 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 234 } 235 work = a->mult_work; 236 t = a->sor_workt; 237 w = a->sor_work; 238 239 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 240 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 241 242 if (flag & SOR_ZERO_INITIAL_GUESS) { 243 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 244 switch (bs) { 245 case 1: 246 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 247 t[0] = b[0]; 248 i2 = 1; 249 idiag += 1; 250 for (i=1; i<m; i++) { 251 v = aa + ai[i]; 252 vi = aj + ai[i]; 253 nz = diag[i] - ai[i]; 254 s[0] = b[i2]; 255 for (j=0; j<nz; j++) { 256 xw[0] = x[vi[j]]; 257 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 258 } 259 t[i2] = s[0]; 260 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 261 x[i2] = xw[0]; 262 idiag += 1; 263 i2 += 1; 264 } 265 break; 266 case 2: 267 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 268 t[0] = b[0]; t[1] = b[1]; 269 i2 = 2; 270 idiag += 4; 271 for (i=1; i<m; i++) { 272 v = aa + 4*ai[i]; 273 vi = aj + ai[i]; 274 nz = diag[i] - ai[i]; 275 s[0] = b[i2]; s[1] = b[i2+1]; 276 for (j=0; j<nz; j++) { 277 idx = 2*vi[j]; 278 it = 4*j; 279 xw[0] = x[idx]; xw[1] = x[1+idx]; 280 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 281 } 282 t[i2] = s[0]; t[i2+1] = s[1]; 283 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 284 x[i2] = xw[0]; x[i2+1] = xw[1]; 285 idiag += 4; 286 i2 += 2; 287 } 288 break; 289 case 3: 290 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 291 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 292 i2 = 3; 293 idiag += 9; 294 for (i=1; i<m; i++) { 295 v = aa + 9*ai[i]; 296 vi = aj + ai[i]; 297 nz = diag[i] - ai[i]; 298 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 299 while (nz--) { 300 idx = 3*(*vi++); 301 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 302 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 303 v += 9; 304 } 305 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 306 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 307 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 308 idiag += 9; 309 i2 += 3; 310 } 311 break; 312 case 4: 313 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 314 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 315 i2 = 4; 316 idiag += 16; 317 for (i=1; i<m; i++) { 318 v = aa + 16*ai[i]; 319 vi = aj + ai[i]; 320 nz = diag[i] - ai[i]; 321 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 322 while (nz--) { 323 idx = 4*(*vi++); 324 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 325 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 326 v += 16; 327 } 328 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 329 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 330 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 331 idiag += 16; 332 i2 += 4; 333 } 334 break; 335 case 5: 336 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 337 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 338 i2 = 5; 339 idiag += 25; 340 for (i=1; i<m; i++) { 341 v = aa + 25*ai[i]; 342 vi = aj + ai[i]; 343 nz = diag[i] - ai[i]; 344 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 345 while (nz--) { 346 idx = 5*(*vi++); 347 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 348 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 349 v += 25; 350 } 351 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 352 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 353 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 354 idiag += 25; 355 i2 += 5; 356 } 357 break; 358 case 6: 359 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 360 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 361 i2 = 6; 362 idiag += 36; 363 for (i=1; i<m; i++) { 364 v = aa + 36*ai[i]; 365 vi = aj + ai[i]; 366 nz = diag[i] - ai[i]; 367 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 368 while (nz--) { 369 idx = 6*(*vi++); 370 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 371 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 372 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 373 v += 36; 374 } 375 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 376 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 377 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 378 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 379 idiag += 36; 380 i2 += 6; 381 } 382 break; 383 case 7: 384 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 385 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 386 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 387 i2 = 7; 388 idiag += 49; 389 for (i=1; i<m; i++) { 390 v = aa + 49*ai[i]; 391 vi = aj + ai[i]; 392 nz = diag[i] - ai[i]; 393 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 394 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 395 while (nz--) { 396 idx = 7*(*vi++); 397 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 398 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 399 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 400 v += 49; 401 } 402 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 403 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 404 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 405 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 406 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 407 idiag += 49; 408 i2 += 7; 409 } 410 break; 411 default: 412 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 413 ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr); 414 i2 = bs; 415 idiag += bs2; 416 for (i=1; i<m; i++) { 417 v = aa + bs2*ai[i]; 418 vi = aj + ai[i]; 419 nz = diag[i] - ai[i]; 420 421 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 422 /* copy all rows of x that are needed into contiguous space */ 423 workt = work; 424 for (j=0; j<nz; j++) { 425 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 426 workt += bs; 427 } 428 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 429 ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr); 430 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 431 432 idiag += bs2; 433 i2 += bs; 434 } 435 break; 436 } 437 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 438 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 439 xb = t; 440 } 441 else xb = b; 442 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 443 idiag = a->idiag+bs2*(a->mbs-1); 444 i2 = bs * (m-1); 445 switch (bs) { 446 case 1: 447 s[0] = xb[i2]; 448 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 449 x[i2] = xw[0]; 450 i2 -= 1; 451 for (i=m-2; i>=0; i--) { 452 v = aa + (diag[i]+1); 453 vi = aj + diag[i] + 1; 454 nz = ai[i+1] - diag[i] - 1; 455 s[0] = xb[i2]; 456 for (j=0; j<nz; j++) { 457 xw[0] = x[vi[j]]; 458 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 459 } 460 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 461 x[i2] = xw[0]; 462 idiag -= 1; 463 i2 -= 1; 464 } 465 break; 466 case 2: 467 s[0] = xb[i2]; s[1] = xb[i2+1]; 468 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 469 x[i2] = xw[0]; x[i2+1] = xw[1]; 470 i2 -= 2; 471 idiag -= 4; 472 for (i=m-2; i>=0; i--) { 473 v = aa + 4*(diag[i] + 1); 474 vi = aj + diag[i] + 1; 475 nz = ai[i+1] - diag[i] - 1; 476 s[0] = xb[i2]; s[1] = xb[i2+1]; 477 for (j=0; j<nz; j++) { 478 idx = 2*vi[j]; 479 it = 4*j; 480 xw[0] = x[idx]; xw[1] = x[1+idx]; 481 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 482 } 483 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 484 x[i2] = xw[0]; x[i2+1] = xw[1]; 485 idiag -= 4; 486 i2 -= 2; 487 } 488 break; 489 case 3: 490 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 491 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 492 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 493 i2 -= 3; 494 idiag -= 9; 495 for (i=m-2; i>=0; i--) { 496 v = aa + 9*(diag[i]+1); 497 vi = aj + diag[i] + 1; 498 nz = ai[i+1] - diag[i] - 1; 499 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 500 while (nz--) { 501 idx = 3*(*vi++); 502 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 503 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 504 v += 9; 505 } 506 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 507 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 508 idiag -= 9; 509 i2 -= 3; 510 } 511 break; 512 case 4: 513 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 514 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 515 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 516 i2 -= 4; 517 idiag -= 16; 518 for (i=m-2; i>=0; i--) { 519 v = aa + 16*(diag[i]+1); 520 vi = aj + diag[i] + 1; 521 nz = ai[i+1] - diag[i] - 1; 522 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 523 while (nz--) { 524 idx = 4*(*vi++); 525 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 526 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 527 v += 16; 528 } 529 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 530 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 531 idiag -= 16; 532 i2 -= 4; 533 } 534 break; 535 case 5: 536 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 537 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 538 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 539 i2 -= 5; 540 idiag -= 25; 541 for (i=m-2; i>=0; i--) { 542 v = aa + 25*(diag[i]+1); 543 vi = aj + diag[i] + 1; 544 nz = ai[i+1] - diag[i] - 1; 545 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 546 while (nz--) { 547 idx = 5*(*vi++); 548 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 549 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 550 v += 25; 551 } 552 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 553 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 554 idiag -= 25; 555 i2 -= 5; 556 } 557 break; 558 case 6: 559 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 560 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 561 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 562 i2 -= 6; 563 idiag -= 36; 564 for (i=m-2; i>=0; i--) { 565 v = aa + 36*(diag[i]+1); 566 vi = aj + diag[i] + 1; 567 nz = ai[i+1] - diag[i] - 1; 568 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 569 while (nz--) { 570 idx = 6*(*vi++); 571 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 572 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 573 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 574 v += 36; 575 } 576 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 577 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 578 idiag -= 36; 579 i2 -= 6; 580 } 581 break; 582 case 7: 583 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 584 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 585 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 586 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 587 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 588 i2 -= 7; 589 idiag -= 49; 590 for (i=m-2; i>=0; i--) { 591 v = aa + 49*(diag[i]+1); 592 vi = aj + diag[i] + 1; 593 nz = ai[i+1] - diag[i] - 1; 594 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 595 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 596 while (nz--) { 597 idx = 7*(*vi++); 598 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 599 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 600 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 601 v += 49; 602 } 603 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 604 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 605 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 606 idiag -= 49; 607 i2 -= 7; 608 } 609 break; 610 default: 611 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 612 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 613 i2 -= bs; 614 idiag -= bs2; 615 for (i=m-2; i>=0; i--) { 616 v = aa + bs2*(diag[i]+1); 617 vi = aj + diag[i] + 1; 618 nz = ai[i+1] - diag[i] - 1; 619 620 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 621 /* copy all rows of x that are needed into contiguous space */ 622 workt = work; 623 for (j=0; j<nz; j++) { 624 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 625 workt += bs; 626 } 627 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 628 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 629 630 idiag -= bs2; 631 i2 -= bs; 632 } 633 break; 634 } 635 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 636 } 637 its--; 638 } 639 while (its--) { 640 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 641 idiag = a->idiag; 642 i2 = 0; 643 switch (bs) { 644 case 1: 645 for (i=0; i<m; i++) { 646 v = aa + ai[i]; 647 vi = aj + ai[i]; 648 nz = ai[i+1] - ai[i]; 649 s[0] = b[i2]; 650 for (j=0; j<nz; j++) { 651 xw[0] = x[vi[j]]; 652 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 653 } 654 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 655 x[i2] += xw[0]; 656 idiag += 1; 657 i2 += 1; 658 } 659 break; 660 case 2: 661 for (i=0; i<m; i++) { 662 v = aa + 4*ai[i]; 663 vi = aj + ai[i]; 664 nz = ai[i+1] - ai[i]; 665 s[0] = b[i2]; s[1] = b[i2+1]; 666 for (j=0; j<nz; j++) { 667 idx = 2*vi[j]; 668 it = 4*j; 669 xw[0] = x[idx]; xw[1] = x[1+idx]; 670 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 671 } 672 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 673 x[i2] += xw[0]; x[i2+1] += xw[1]; 674 idiag += 4; 675 i2 += 2; 676 } 677 break; 678 case 3: 679 for (i=0; i<m; i++) { 680 v = aa + 9*ai[i]; 681 vi = aj + ai[i]; 682 nz = ai[i+1] - ai[i]; 683 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 684 while (nz--) { 685 idx = 3*(*vi++); 686 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 687 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 688 v += 9; 689 } 690 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 691 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 692 idiag += 9; 693 i2 += 3; 694 } 695 break; 696 case 4: 697 for (i=0; i<m; i++) { 698 v = aa + 16*ai[i]; 699 vi = aj + ai[i]; 700 nz = ai[i+1] - ai[i]; 701 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 702 while (nz--) { 703 idx = 4*(*vi++); 704 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 705 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 706 v += 16; 707 } 708 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 709 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 710 idiag += 16; 711 i2 += 4; 712 } 713 break; 714 case 5: 715 for (i=0; i<m; i++) { 716 v = aa + 25*ai[i]; 717 vi = aj + ai[i]; 718 nz = ai[i+1] - ai[i]; 719 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 720 while (nz--) { 721 idx = 5*(*vi++); 722 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 723 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 724 v += 25; 725 } 726 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 727 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 728 idiag += 25; 729 i2 += 5; 730 } 731 break; 732 case 6: 733 for (i=0; i<m; i++) { 734 v = aa + 36*ai[i]; 735 vi = aj + ai[i]; 736 nz = ai[i+1] - ai[i]; 737 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 738 while (nz--) { 739 idx = 6*(*vi++); 740 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 741 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 742 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 743 v += 36; 744 } 745 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 746 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 747 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 748 idiag += 36; 749 i2 += 6; 750 } 751 break; 752 case 7: 753 for (i=0; i<m; i++) { 754 v = aa + 49*ai[i]; 755 vi = aj + ai[i]; 756 nz = ai[i+1] - ai[i]; 757 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 758 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 759 while (nz--) { 760 idx = 7*(*vi++); 761 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 762 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 763 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 764 v += 49; 765 } 766 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 767 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 768 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 769 idiag += 49; 770 i2 += 7; 771 } 772 break; 773 default: 774 for (i=0; i<m; i++) { 775 v = aa + bs2*ai[i]; 776 vi = aj + ai[i]; 777 nz = ai[i+1] - ai[i]; 778 779 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 780 /* copy all rows of x that are needed into contiguous space */ 781 workt = work; 782 for (j=0; j<nz; j++) { 783 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 784 workt += bs; 785 } 786 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 787 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 788 789 idiag += bs2; 790 i2 += bs; 791 } 792 break; 793 } 794 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 795 } 796 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 797 idiag = a->idiag+bs2*(a->mbs-1); 798 i2 = bs * (m-1); 799 switch (bs) { 800 case 1: 801 for (i=m-1; i>=0; i--) { 802 v = aa + ai[i]; 803 vi = aj + ai[i]; 804 nz = ai[i+1] - ai[i]; 805 s[0] = b[i2]; 806 for (j=0; j<nz; j++) { 807 xw[0] = x[vi[j]]; 808 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 809 } 810 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 811 x[i2] += xw[0]; 812 idiag -= 1; 813 i2 -= 1; 814 } 815 break; 816 case 2: 817 for (i=m-1; i>=0; i--) { 818 v = aa + 4*ai[i]; 819 vi = aj + ai[i]; 820 nz = ai[i+1] - ai[i]; 821 s[0] = b[i2]; s[1] = b[i2+1]; 822 for (j=0; j<nz; j++) { 823 idx = 2*vi[j]; 824 it = 4*j; 825 xw[0] = x[idx]; xw[1] = x[1+idx]; 826 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 827 } 828 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 829 x[i2] += xw[0]; x[i2+1] += xw[1]; 830 idiag -= 4; 831 i2 -= 2; 832 } 833 break; 834 case 3: 835 for (i=m-1; i>=0; i--) { 836 v = aa + 9*ai[i]; 837 vi = aj + ai[i]; 838 nz = ai[i+1] - ai[i]; 839 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 840 while (nz--) { 841 idx = 3*(*vi++); 842 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 843 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 844 v += 9; 845 } 846 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 847 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 848 idiag -= 9; 849 i2 -= 3; 850 } 851 break; 852 case 4: 853 for (i=m-1; i>=0; i--) { 854 v = aa + 16*ai[i]; 855 vi = aj + ai[i]; 856 nz = ai[i+1] - ai[i]; 857 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 858 while (nz--) { 859 idx = 4*(*vi++); 860 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 861 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 862 v += 16; 863 } 864 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 865 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 866 idiag -= 16; 867 i2 -= 4; 868 } 869 break; 870 case 5: 871 for (i=m-1; i>=0; i--) { 872 v = aa + 25*ai[i]; 873 vi = aj + ai[i]; 874 nz = ai[i+1] - ai[i]; 875 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 876 while (nz--) { 877 idx = 5*(*vi++); 878 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 879 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 880 v += 25; 881 } 882 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 883 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 884 idiag -= 25; 885 i2 -= 5; 886 } 887 break; 888 case 6: 889 for (i=m-1; i>=0; i--) { 890 v = aa + 36*ai[i]; 891 vi = aj + ai[i]; 892 nz = ai[i+1] - ai[i]; 893 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 894 while (nz--) { 895 idx = 6*(*vi++); 896 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 897 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 898 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 899 v += 36; 900 } 901 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 902 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 903 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 904 idiag -= 36; 905 i2 -= 6; 906 } 907 break; 908 case 7: 909 for (i=m-1; i>=0; i--) { 910 v = aa + 49*ai[i]; 911 vi = aj + ai[i]; 912 nz = ai[i+1] - ai[i]; 913 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 914 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 915 while (nz--) { 916 idx = 7*(*vi++); 917 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 918 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 919 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 920 v += 49; 921 } 922 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 923 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 924 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 925 idiag -= 49; 926 i2 -= 7; 927 } 928 break; 929 default: 930 for (i=m-1; i>=0; i--) { 931 v = aa + bs2*ai[i]; 932 vi = aj + ai[i]; 933 nz = ai[i+1] - ai[i]; 934 935 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 936 /* copy all rows of x that are needed into contiguous space */ 937 workt = work; 938 for (j=0; j<nz; j++) { 939 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 940 workt += bs; 941 } 942 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 943 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 944 945 idiag -= bs2; 946 i2 -= bs; 947 } 948 break; 949 } 950 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 951 } 952 } 953 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 954 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 955 PetscFunctionReturn(0); 956 } 957 958 /* 959 Special version for direct calls from Fortran (Used in PETSc-fun3d) 960 */ 961 #if defined(PETSC_HAVE_FORTRAN_CAPS) 962 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 963 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 964 #define matsetvaluesblocked4_ matsetvaluesblocked4 965 #endif 966 967 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 968 { 969 Mat A = *AA; 970 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 971 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 972 PetscInt *ai =a->i,*ailen=a->ilen; 973 PetscInt *aj =a->j,stepval,lastcol = -1; 974 const PetscScalar *value = v; 975 MatScalar *ap,*aa = a->a,*bap; 976 PetscErrorCode ierr; 977 978 PetscFunctionBegin; 979 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 980 stepval = (n-1)*4; 981 for (k=0; k<m; k++) { /* loop over added rows */ 982 row = im[k]; 983 rp = aj + ai[row]; 984 ap = aa + 16*ai[row]; 985 nrow = ailen[row]; 986 low = 0; 987 high = nrow; 988 for (l=0; l<n; l++) { /* loop over added columns */ 989 col = in[l]; 990 if (col <= lastcol) low = 0; 991 else high = nrow; 992 lastcol = col; 993 value = v + k*(stepval+4 + l)*4; 994 while (high-low > 7) { 995 t = (low+high)/2; 996 if (rp[t] > col) high = t; 997 else low = t; 998 } 999 for (i=low; i<high; i++) { 1000 if (rp[i] > col) break; 1001 if (rp[i] == col) { 1002 bap = ap + 16*i; 1003 for (ii=0; ii<4; ii++,value+=stepval) { 1004 for (jj=ii; jj<16; jj+=4) { 1005 bap[jj] += *value++; 1006 } 1007 } 1008 goto noinsert2; 1009 } 1010 } 1011 N = nrow++ - 1; 1012 high++; /* added new column index thus must search to one higher than before */ 1013 /* shift up all the later entries in this row */ 1014 for (ii=N; ii>=i; ii--) { 1015 rp[ii+1] = rp[ii]; 1016 ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr); 1017 } 1018 if (N >= i) { 1019 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 1020 } 1021 rp[i] = col; 1022 bap = ap + 16*i; 1023 for (ii=0; ii<4; ii++,value+=stepval) { 1024 for (jj=ii; jj<16; jj+=4) { 1025 bap[jj] = *value++; 1026 } 1027 } 1028 noinsert2:; 1029 low = i; 1030 } 1031 ailen[row] = nrow; 1032 } 1033 PetscFunctionReturnVoid(); 1034 } 1035 1036 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1037 #define matsetvalues4_ MATSETVALUES4 1038 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1039 #define matsetvalues4_ matsetvalues4 1040 #endif 1041 1042 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1043 { 1044 Mat A = *AA; 1045 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1046 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm; 1047 PetscInt *ai=a->i,*ailen=a->ilen; 1048 PetscInt *aj=a->j,brow,bcol; 1049 PetscInt ridx,cidx,lastcol = -1; 1050 MatScalar *ap,value,*aa=a->a,*bap; 1051 PetscErrorCode ierr; 1052 1053 PetscFunctionBegin; 1054 for (k=0; k<m; k++) { /* loop over added rows */ 1055 row = im[k]; brow = row/4; 1056 rp = aj + ai[brow]; 1057 ap = aa + 16*ai[brow]; 1058 nrow = ailen[brow]; 1059 low = 0; 1060 high = nrow; 1061 for (l=0; l<n; l++) { /* loop over added columns */ 1062 col = in[l]; bcol = col/4; 1063 ridx = row % 4; cidx = col % 4; 1064 value = v[l + k*n]; 1065 if (col <= lastcol) low = 0; 1066 else high = nrow; 1067 lastcol = col; 1068 while (high-low > 7) { 1069 t = (low+high)/2; 1070 if (rp[t] > bcol) high = t; 1071 else low = t; 1072 } 1073 for (i=low; i<high; i++) { 1074 if (rp[i] > bcol) break; 1075 if (rp[i] == bcol) { 1076 bap = ap + 16*i + 4*cidx + ridx; 1077 *bap += value; 1078 goto noinsert1; 1079 } 1080 } 1081 N = nrow++ - 1; 1082 high++; /* added new column thus must search to one higher than before */ 1083 /* shift up all the later entries in this row */ 1084 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr); 1085 ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr); 1086 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 1087 rp[i] = bcol; 1088 ap[16*i + 4*cidx + ridx] = value; 1089 noinsert1:; 1090 low = i; 1091 } 1092 ailen[brow] = nrow; 1093 } 1094 PetscFunctionReturnVoid(); 1095 } 1096 1097 /* 1098 Checks for missing diagonals 1099 */ 1100 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1101 { 1102 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1103 PetscErrorCode ierr; 1104 PetscInt *diag,*ii = a->i,i; 1105 1106 PetscFunctionBegin; 1107 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1108 *missing = PETSC_FALSE; 1109 if (A->rmap->n > 0 && !ii) { 1110 *missing = PETSC_TRUE; 1111 if (d) *d = 0; 1112 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 1113 } else { 1114 PetscInt n; 1115 n = PetscMin(a->mbs, a->nbs); 1116 diag = a->diag; 1117 for (i=0; i<n; i++) { 1118 if (diag[i] >= ii[i+1]) { 1119 *missing = PETSC_TRUE; 1120 if (d) *d = i; 1121 ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr); 1122 break; 1123 } 1124 } 1125 } 1126 PetscFunctionReturn(0); 1127 } 1128 1129 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1130 { 1131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1132 PetscErrorCode ierr; 1133 PetscInt i,j,m = a->mbs; 1134 1135 PetscFunctionBegin; 1136 if (!a->diag) { 1137 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1138 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1139 a->free_diag = PETSC_TRUE; 1140 } 1141 for (i=0; i<m; i++) { 1142 a->diag[i] = a->i[i+1]; 1143 for (j=a->i[i]; j<a->i[i+1]; j++) { 1144 if (a->j[j] == i) { 1145 a->diag[i] = j; 1146 break; 1147 } 1148 } 1149 } 1150 PetscFunctionReturn(0); 1151 } 1152 1153 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1154 { 1155 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1156 PetscErrorCode ierr; 1157 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1158 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1159 1160 PetscFunctionBegin; 1161 *nn = n; 1162 if (!ia) PetscFunctionReturn(0); 1163 if (symmetric) { 1164 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1165 nz = tia[n]; 1166 } else { 1167 tia = a->i; tja = a->j; 1168 } 1169 1170 if (!blockcompressed && bs > 1) { 1171 (*nn) *= bs; 1172 /* malloc & create the natural set of indices */ 1173 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1174 if (n) { 1175 (*ia)[0] = oshift; 1176 for (j=1; j<bs; j++) { 1177 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1178 } 1179 } 1180 1181 for (i=1; i<n; i++) { 1182 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1183 for (j=1; j<bs; j++) { 1184 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1185 } 1186 } 1187 if (n) { 1188 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1189 } 1190 1191 if (inja) { 1192 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1193 cnt = 0; 1194 for (i=0; i<n; i++) { 1195 for (j=0; j<bs; j++) { 1196 for (k=tia[i]; k<tia[i+1]; k++) { 1197 for (l=0; l<bs; l++) { 1198 (*ja)[cnt++] = bs*tja[k] + l; 1199 } 1200 } 1201 } 1202 } 1203 } 1204 1205 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1206 ierr = PetscFree(tia);CHKERRQ(ierr); 1207 ierr = PetscFree(tja);CHKERRQ(ierr); 1208 } 1209 } else if (oshift == 1) { 1210 if (symmetric) { 1211 nz = tia[A->rmap->n/bs]; 1212 /* add 1 to i and j indices */ 1213 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1214 *ia = tia; 1215 if (ja) { 1216 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1217 *ja = tja; 1218 } 1219 } else { 1220 nz = a->i[A->rmap->n/bs]; 1221 /* malloc space and add 1 to i and j indices */ 1222 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1223 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1224 if (ja) { 1225 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1226 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1227 } 1228 } 1229 } else { 1230 *ia = tia; 1231 if (ja) *ja = tja; 1232 } 1233 PetscFunctionReturn(0); 1234 } 1235 1236 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1237 { 1238 PetscErrorCode ierr; 1239 1240 PetscFunctionBegin; 1241 if (!ia) PetscFunctionReturn(0); 1242 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1243 ierr = PetscFree(*ia);CHKERRQ(ierr); 1244 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1245 } 1246 PetscFunctionReturn(0); 1247 } 1248 1249 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1250 { 1251 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1252 PetscErrorCode ierr; 1253 1254 PetscFunctionBegin; 1255 #if defined(PETSC_USE_LOG) 1256 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1257 #endif 1258 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1259 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1260 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1261 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1262 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1263 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1264 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1265 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1266 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1267 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1268 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1269 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1270 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1271 1272 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1273 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1274 ierr = PetscFree(A->data);CHKERRQ(ierr); 1275 1276 ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1288 #if defined(PETSC_HAVE_HYPRE) 1289 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr); 1290 #endif 1291 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1296 { 1297 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1298 PetscErrorCode ierr; 1299 1300 PetscFunctionBegin; 1301 switch (op) { 1302 case MAT_ROW_ORIENTED: 1303 a->roworiented = flg; 1304 break; 1305 case MAT_KEEP_NONZERO_PATTERN: 1306 a->keepnonzeropattern = flg; 1307 break; 1308 case MAT_NEW_NONZERO_LOCATIONS: 1309 a->nonew = (flg ? 0 : 1); 1310 break; 1311 case MAT_NEW_NONZERO_LOCATION_ERR: 1312 a->nonew = (flg ? -1 : 0); 1313 break; 1314 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1315 a->nonew = (flg ? -2 : 0); 1316 break; 1317 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1318 a->nounused = (flg ? -1 : 0); 1319 break; 1320 case MAT_FORCE_DIAGONAL_ENTRIES: 1321 case MAT_IGNORE_OFF_PROC_ENTRIES: 1322 case MAT_USE_HASH_TABLE: 1323 case MAT_SORTED_FULL: 1324 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1325 break; 1326 case MAT_SPD: 1327 case MAT_SYMMETRIC: 1328 case MAT_STRUCTURALLY_SYMMETRIC: 1329 case MAT_HERMITIAN: 1330 case MAT_SYMMETRY_ETERNAL: 1331 case MAT_SUBMAT_SINGLEIS: 1332 case MAT_STRUCTURE_ONLY: 1333 /* These options are handled directly by MatSetOption() */ 1334 break; 1335 default: 1336 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1337 } 1338 PetscFunctionReturn(0); 1339 } 1340 1341 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1342 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1343 { 1344 PetscErrorCode ierr; 1345 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1346 MatScalar *aa_i; 1347 PetscScalar *v_i; 1348 1349 PetscFunctionBegin; 1350 bs = A->rmap->bs; 1351 bs2 = bs*bs; 1352 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1353 1354 bn = row/bs; /* Block number */ 1355 bp = row % bs; /* Block Position */ 1356 M = ai[bn+1] - ai[bn]; 1357 *nz = bs*M; 1358 1359 if (v) { 1360 *v = NULL; 1361 if (*nz) { 1362 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1363 for (i=0; i<M; i++) { /* for each block in the block row */ 1364 v_i = *v + i*bs; 1365 aa_i = aa + bs2*(ai[bn] + i); 1366 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1367 } 1368 } 1369 } 1370 1371 if (idx) { 1372 *idx = NULL; 1373 if (*nz) { 1374 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1375 for (i=0; i<M; i++) { /* for each block in the block row */ 1376 idx_i = *idx + i*bs; 1377 itmp = bs*aj[ai[bn] + i]; 1378 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1379 } 1380 } 1381 } 1382 PetscFunctionReturn(0); 1383 } 1384 1385 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1386 { 1387 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1388 PetscErrorCode ierr; 1389 1390 PetscFunctionBegin; 1391 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1392 PetscFunctionReturn(0); 1393 } 1394 1395 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1396 { 1397 PetscErrorCode ierr; 1398 1399 PetscFunctionBegin; 1400 if (nz) *nz = 0; 1401 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1402 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1403 PetscFunctionReturn(0); 1404 } 1405 1406 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1407 { 1408 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at; 1409 Mat C; 1410 PetscErrorCode ierr; 1411 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill; 1412 PetscInt bs2=a->bs2,*ati,*atj,anzj,kr; 1413 MatScalar *ata,*aa=a->a; 1414 1415 PetscFunctionBegin; 1416 ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr); 1417 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1418 for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1419 1420 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1421 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1422 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1423 ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr); 1424 1425 at = (Mat_SeqBAIJ*)C->data; 1426 ati = at->i; 1427 for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i]; 1428 } else { 1429 C = *B; 1430 at = (Mat_SeqBAIJ*)C->data; 1431 ati = at->i; 1432 } 1433 1434 atj = at->j; 1435 ata = at->a; 1436 1437 /* Copy ati into atfill so we have locations of the next free space in atj */ 1438 ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr); 1439 1440 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1441 for (i=0; i<mbs; i++) { 1442 anzj = ai[i+1] - ai[i]; 1443 for (j=0; j<anzj; j++) { 1444 atj[atfill[*aj]] = i; 1445 for (kr=0; kr<bs; kr++) { 1446 for (k=0; k<bs; k++) { 1447 ata[bs2*atfill[*aj]+k*bs+kr] = *aa++; 1448 } 1449 } 1450 atfill[*aj++] += 1; 1451 } 1452 } 1453 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1454 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1455 1456 /* Clean up temporary space and complete requests. */ 1457 ierr = PetscFree(atfill);CHKERRQ(ierr); 1458 1459 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1460 ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1461 *B = C; 1462 } else { 1463 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1464 } 1465 PetscFunctionReturn(0); 1466 } 1467 1468 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1469 { 1470 PetscErrorCode ierr; 1471 Mat Btrans; 1472 1473 PetscFunctionBegin; 1474 *f = PETSC_FALSE; 1475 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1476 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1477 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1478 PetscFunctionReturn(0); 1479 } 1480 1481 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1482 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 1483 { 1484 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)mat->data; 1485 PetscInt header[4],M,N,m,bs,nz,cnt,i,j,k,l; 1486 PetscInt *rowlens,*colidxs; 1487 PetscScalar *matvals; 1488 PetscErrorCode ierr; 1489 1490 PetscFunctionBegin; 1491 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1492 1493 M = mat->rmap->N; 1494 N = mat->cmap->N; 1495 m = mat->rmap->n; 1496 bs = mat->rmap->bs; 1497 nz = bs*bs*A->nz; 1498 1499 /* write matrix header */ 1500 header[0] = MAT_FILE_CLASSID; 1501 header[1] = M; header[2] = N; header[3] = nz; 1502 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1503 1504 /* store row lengths */ 1505 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1506 for (cnt=0, i=0; i<A->mbs; i++) 1507 for (j=0; j<bs; j++) 1508 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]); 1509 ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr); 1510 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1511 1512 /* store column indices */ 1513 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1514 for (cnt=0, i=0; i<A->mbs; i++) 1515 for (k=0; k<bs; k++) 1516 for (j=A->i[i]; j<A->i[i+1]; j++) 1517 for (l=0; l<bs; l++) 1518 colidxs[cnt++] = bs*A->j[j] + l; 1519 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1520 ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr); 1521 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1522 1523 /* store nonzero values */ 1524 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1525 for (cnt=0, i=0; i<A->mbs; i++) 1526 for (k=0; k<bs; k++) 1527 for (j=A->i[i]; j<A->i[i+1]; j++) 1528 for (l=0; l<bs; l++) 1529 matvals[cnt++] = A->a[bs*(bs*j + l) + k]; 1530 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1531 ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1532 ierr = PetscFree(matvals);CHKERRQ(ierr); 1533 1534 /* write block size option to the viewer's .info file */ 1535 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1540 { 1541 PetscErrorCode ierr; 1542 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1543 PetscInt i,bs = A->rmap->bs,k; 1544 1545 PetscFunctionBegin; 1546 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1547 for (i=0; i<a->mbs; i++) { 1548 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1549 for (k=a->i[i]; k<a->i[i+1]; k++) { 1550 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1551 } 1552 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1553 } 1554 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1555 PetscFunctionReturn(0); 1556 } 1557 1558 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1559 { 1560 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1561 PetscErrorCode ierr; 1562 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1563 PetscViewerFormat format; 1564 1565 PetscFunctionBegin; 1566 if (A->structure_only) { 1567 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1568 PetscFunctionReturn(0); 1569 } 1570 1571 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1572 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1573 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1574 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1575 const char *matname; 1576 Mat aij; 1577 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1578 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1579 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1580 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1581 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1582 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1583 PetscFunctionReturn(0); 1584 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1585 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1586 for (i=0; i<a->mbs; i++) { 1587 for (j=0; j<bs; j++) { 1588 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1589 for (k=a->i[i]; k<a->i[i+1]; k++) { 1590 for (l=0; l<bs; l++) { 1591 #if defined(PETSC_USE_COMPLEX) 1592 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1593 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1594 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1595 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1596 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1597 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1598 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1599 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1600 } 1601 #else 1602 if (a->a[bs2*k + l*bs + j] != 0.0) { 1603 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1604 } 1605 #endif 1606 } 1607 } 1608 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1609 } 1610 } 1611 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1612 } else { 1613 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1614 for (i=0; i<a->mbs; i++) { 1615 for (j=0; j<bs; j++) { 1616 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1617 for (k=a->i[i]; k<a->i[i+1]; k++) { 1618 for (l=0; l<bs; l++) { 1619 #if defined(PETSC_USE_COMPLEX) 1620 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1621 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1622 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1623 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1624 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1625 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1626 } else { 1627 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1628 } 1629 #else 1630 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1631 #endif 1632 } 1633 } 1634 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1635 } 1636 } 1637 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1638 } 1639 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1640 PetscFunctionReturn(0); 1641 } 1642 1643 #include <petscdraw.h> 1644 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1645 { 1646 Mat A = (Mat) Aa; 1647 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1648 PetscErrorCode ierr; 1649 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1650 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1651 MatScalar *aa; 1652 PetscViewer viewer; 1653 PetscViewerFormat format; 1654 1655 PetscFunctionBegin; 1656 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1657 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1658 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1659 1660 /* loop over matrix elements drawing boxes */ 1661 1662 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1663 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1664 /* Blue for negative, Cyan for zero and Red for positive */ 1665 color = PETSC_DRAW_BLUE; 1666 for (i=0,row=0; i<mbs; i++,row+=bs) { 1667 for (j=a->i[i]; j<a->i[i+1]; j++) { 1668 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1669 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1670 aa = a->a + j*bs2; 1671 for (k=0; k<bs; k++) { 1672 for (l=0; l<bs; l++) { 1673 if (PetscRealPart(*aa++) >= 0.) continue; 1674 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1675 } 1676 } 1677 } 1678 } 1679 color = PETSC_DRAW_CYAN; 1680 for (i=0,row=0; i<mbs; i++,row+=bs) { 1681 for (j=a->i[i]; j<a->i[i+1]; j++) { 1682 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1683 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1684 aa = a->a + j*bs2; 1685 for (k=0; k<bs; k++) { 1686 for (l=0; l<bs; l++) { 1687 if (PetscRealPart(*aa++) != 0.) continue; 1688 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1689 } 1690 } 1691 } 1692 } 1693 color = PETSC_DRAW_RED; 1694 for (i=0,row=0; i<mbs; i++,row+=bs) { 1695 for (j=a->i[i]; j<a->i[i+1]; j++) { 1696 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1697 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1698 aa = a->a + j*bs2; 1699 for (k=0; k<bs; k++) { 1700 for (l=0; l<bs; l++) { 1701 if (PetscRealPart(*aa++) <= 0.) continue; 1702 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1703 } 1704 } 1705 } 1706 } 1707 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1708 } else { 1709 /* use contour shading to indicate magnitude of values */ 1710 /* first determine max of all nonzero values */ 1711 PetscReal minv = 0.0, maxv = 0.0; 1712 PetscDraw popup; 1713 1714 for (i=0; i<a->nz*a->bs2; i++) { 1715 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1716 } 1717 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1718 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1719 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1720 1721 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1722 for (i=0,row=0; i<mbs; i++,row+=bs) { 1723 for (j=a->i[i]; j<a->i[i+1]; j++) { 1724 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1725 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1726 aa = a->a + j*bs2; 1727 for (k=0; k<bs; k++) { 1728 for (l=0; l<bs; l++) { 1729 MatScalar v = *aa++; 1730 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1731 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1732 } 1733 } 1734 } 1735 } 1736 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1737 } 1738 PetscFunctionReturn(0); 1739 } 1740 1741 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1742 { 1743 PetscErrorCode ierr; 1744 PetscReal xl,yl,xr,yr,w,h; 1745 PetscDraw draw; 1746 PetscBool isnull; 1747 1748 PetscFunctionBegin; 1749 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1750 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1751 if (isnull) PetscFunctionReturn(0); 1752 1753 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1754 xr += w; yr += h; xl = -w; yl = -h; 1755 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1756 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1757 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1758 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1759 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1760 PetscFunctionReturn(0); 1761 } 1762 1763 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1764 { 1765 PetscErrorCode ierr; 1766 PetscBool iascii,isbinary,isdraw; 1767 1768 PetscFunctionBegin; 1769 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1770 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1771 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1772 if (iascii) { 1773 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1774 } else if (isbinary) { 1775 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1776 } else if (isdraw) { 1777 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1778 } else { 1779 Mat B; 1780 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1781 ierr = MatView(B,viewer);CHKERRQ(ierr); 1782 ierr = MatDestroy(&B);CHKERRQ(ierr); 1783 } 1784 PetscFunctionReturn(0); 1785 } 1786 1787 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1788 { 1789 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1790 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1791 PetscInt *ai = a->i,*ailen = a->ilen; 1792 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1793 MatScalar *ap,*aa = a->a; 1794 1795 PetscFunctionBegin; 1796 for (k=0; k<m; k++) { /* loop over rows */ 1797 row = im[k]; brow = row/bs; 1798 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1799 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1800 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1801 nrow = ailen[brow]; 1802 for (l=0; l<n; l++) { /* loop over columns */ 1803 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1804 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1805 col = in[l]; 1806 bcol = col/bs; 1807 cidx = col%bs; 1808 ridx = row%bs; 1809 high = nrow; 1810 low = 0; /* assume unsorted */ 1811 while (high-low > 5) { 1812 t = (low+high)/2; 1813 if (rp[t] > bcol) high = t; 1814 else low = t; 1815 } 1816 for (i=low; i<high; i++) { 1817 if (rp[i] > bcol) break; 1818 if (rp[i] == bcol) { 1819 *v++ = ap[bs2*i+bs*cidx+ridx]; 1820 goto finished; 1821 } 1822 } 1823 *v++ = 0.0; 1824 finished:; 1825 } 1826 } 1827 PetscFunctionReturn(0); 1828 } 1829 1830 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1831 { 1832 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1833 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1834 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1835 PetscErrorCode ierr; 1836 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1837 PetscBool roworiented=a->roworiented; 1838 const PetscScalar *value = v; 1839 MatScalar *ap=NULL,*aa = a->a,*bap; 1840 1841 PetscFunctionBegin; 1842 if (roworiented) { 1843 stepval = (n-1)*bs; 1844 } else { 1845 stepval = (m-1)*bs; 1846 } 1847 for (k=0; k<m; k++) { /* loop over added rows */ 1848 row = im[k]; 1849 if (row < 0) continue; 1850 if (PetscUnlikelyDebug(row >= a->mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1851 rp = aj + ai[row]; 1852 if (!A->structure_only) ap = aa + bs2*ai[row]; 1853 rmax = imax[row]; 1854 nrow = ailen[row]; 1855 low = 0; 1856 high = nrow; 1857 for (l=0; l<n; l++) { /* loop over added columns */ 1858 if (in[l] < 0) continue; 1859 if (PetscUnlikelyDebug(in[l] >= a->nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1860 col = in[l]; 1861 if (!A->structure_only) { 1862 if (roworiented) { 1863 value = v + (k*(stepval+bs) + l)*bs; 1864 } else { 1865 value = v + (l*(stepval+bs) + k)*bs; 1866 } 1867 } 1868 if (col <= lastcol) low = 0; 1869 else high = nrow; 1870 lastcol = col; 1871 while (high-low > 7) { 1872 t = (low+high)/2; 1873 if (rp[t] > col) high = t; 1874 else low = t; 1875 } 1876 for (i=low; i<high; i++) { 1877 if (rp[i] > col) break; 1878 if (rp[i] == col) { 1879 if (A->structure_only) goto noinsert2; 1880 bap = ap + bs2*i; 1881 if (roworiented) { 1882 if (is == ADD_VALUES) { 1883 for (ii=0; ii<bs; ii++,value+=stepval) { 1884 for (jj=ii; jj<bs2; jj+=bs) { 1885 bap[jj] += *value++; 1886 } 1887 } 1888 } else { 1889 for (ii=0; ii<bs; ii++,value+=stepval) { 1890 for (jj=ii; jj<bs2; jj+=bs) { 1891 bap[jj] = *value++; 1892 } 1893 } 1894 } 1895 } else { 1896 if (is == ADD_VALUES) { 1897 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1898 for (jj=0; jj<bs; jj++) { 1899 bap[jj] += value[jj]; 1900 } 1901 bap += bs; 1902 } 1903 } else { 1904 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1905 for (jj=0; jj<bs; jj++) { 1906 bap[jj] = value[jj]; 1907 } 1908 bap += bs; 1909 } 1910 } 1911 } 1912 goto noinsert2; 1913 } 1914 } 1915 if (nonew == 1) goto noinsert2; 1916 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1917 if (A->structure_only) { 1918 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1919 } else { 1920 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1921 } 1922 N = nrow++ - 1; high++; 1923 /* shift up all the later entries in this row */ 1924 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 1925 rp[i] = col; 1926 if (!A->structure_only) { 1927 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 1928 bap = ap + bs2*i; 1929 if (roworiented) { 1930 for (ii=0; ii<bs; ii++,value+=stepval) { 1931 for (jj=ii; jj<bs2; jj+=bs) { 1932 bap[jj] = *value++; 1933 } 1934 } 1935 } else { 1936 for (ii=0; ii<bs; ii++,value+=stepval) { 1937 for (jj=0; jj<bs; jj++) { 1938 *bap++ = *value++; 1939 } 1940 } 1941 } 1942 } 1943 noinsert2:; 1944 low = i; 1945 } 1946 ailen[row] = nrow; 1947 } 1948 PetscFunctionReturn(0); 1949 } 1950 1951 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1952 { 1953 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1954 PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax; 1955 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1956 PetscErrorCode ierr; 1957 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1958 MatScalar *aa = a->a,*ap; 1959 PetscReal ratio=0.6; 1960 1961 PetscFunctionBegin; 1962 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1963 1964 if (m) rmax = ailen[0]; 1965 for (i=1; i<mbs; i++) { 1966 /* move each row back by the amount of empty slots (fshift) before it*/ 1967 fshift += imax[i-1] - ailen[i-1]; 1968 rmax = PetscMax(rmax,ailen[i]); 1969 if (fshift) { 1970 ip = aj + ai[i]; 1971 ap = aa + bs2*ai[i]; 1972 N = ailen[i]; 1973 ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr); 1974 if (!A->structure_only) { 1975 ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr); 1976 } 1977 } 1978 ai[i] = ai[i-1] + ailen[i-1]; 1979 } 1980 if (mbs) { 1981 fshift += imax[mbs-1] - ailen[mbs-1]; 1982 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1983 } 1984 1985 /* reset ilen and imax for each row */ 1986 a->nonzerorowcnt = 0; 1987 if (A->structure_only) { 1988 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1989 } else { /* !A->structure_only */ 1990 for (i=0; i<mbs; i++) { 1991 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1992 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1993 } 1994 } 1995 a->nz = ai[mbs]; 1996 1997 /* diagonals may have moved, so kill the diagonal pointers */ 1998 a->idiagvalid = PETSC_FALSE; 1999 if (fshift && a->diag) { 2000 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2001 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2002 a->diag = NULL; 2003 } 2004 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2005 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2006 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2007 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2008 2009 A->info.mallocs += a->reallocs; 2010 a->reallocs = 0; 2011 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2012 a->rmax = rmax; 2013 2014 if (!A->structure_only) { 2015 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2016 } 2017 PetscFunctionReturn(0); 2018 } 2019 2020 /* 2021 This function returns an array of flags which indicate the locations of contiguous 2022 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2023 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2024 Assume: sizes should be long enough to hold all the values. 2025 */ 2026 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2027 { 2028 PetscInt i,j,k,row; 2029 PetscBool flg; 2030 2031 PetscFunctionBegin; 2032 for (i=0,j=0; i<n; j++) { 2033 row = idx[i]; 2034 if (row%bs!=0) { /* Not the beginning of a block */ 2035 sizes[j] = 1; 2036 i++; 2037 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2038 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2039 i++; 2040 } else { /* Begining of the block, so check if the complete block exists */ 2041 flg = PETSC_TRUE; 2042 for (k=1; k<bs; k++) { 2043 if (row+k != idx[i+k]) { /* break in the block */ 2044 flg = PETSC_FALSE; 2045 break; 2046 } 2047 } 2048 if (flg) { /* No break in the bs */ 2049 sizes[j] = bs; 2050 i += bs; 2051 } else { 2052 sizes[j] = 1; 2053 i++; 2054 } 2055 } 2056 } 2057 *bs_max = j; 2058 PetscFunctionReturn(0); 2059 } 2060 2061 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2062 { 2063 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2064 PetscErrorCode ierr; 2065 PetscInt i,j,k,count,*rows; 2066 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2067 PetscScalar zero = 0.0; 2068 MatScalar *aa; 2069 const PetscScalar *xx; 2070 PetscScalar *bb; 2071 2072 PetscFunctionBegin; 2073 /* fix right hand side if needed */ 2074 if (x && b) { 2075 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2076 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2077 for (i=0; i<is_n; i++) { 2078 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2079 } 2080 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2081 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2082 } 2083 2084 /* Make a copy of the IS and sort it */ 2085 /* allocate memory for rows,sizes */ 2086 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2087 2088 /* copy IS values to rows, and sort them */ 2089 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2090 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2091 2092 if (baij->keepnonzeropattern) { 2093 for (i=0; i<is_n; i++) sizes[i] = 1; 2094 bs_max = is_n; 2095 } else { 2096 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2097 A->nonzerostate++; 2098 } 2099 2100 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2101 row = rows[j]; 2102 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2103 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2104 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2105 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2106 if (diag != (PetscScalar)0.0) { 2107 if (baij->ilen[row/bs] > 0) { 2108 baij->ilen[row/bs] = 1; 2109 baij->j[baij->i[row/bs]] = row/bs; 2110 2111 ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr); 2112 } 2113 /* Now insert all the diagonal values for this bs */ 2114 for (k=0; k<bs; k++) { 2115 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2116 } 2117 } else { /* (diag == 0.0) */ 2118 baij->ilen[row/bs] = 0; 2119 } /* end (diag == 0.0) */ 2120 } else { /* (sizes[i] != bs) */ 2121 if (PetscUnlikelyDebug(sizes[i] != 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2122 for (k=0; k<count; k++) { 2123 aa[0] = zero; 2124 aa += bs; 2125 } 2126 if (diag != (PetscScalar)0.0) { 2127 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2128 } 2129 } 2130 } 2131 2132 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2133 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2134 PetscFunctionReturn(0); 2135 } 2136 2137 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2138 { 2139 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2140 PetscErrorCode ierr; 2141 PetscInt i,j,k,count; 2142 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2143 PetscScalar zero = 0.0; 2144 MatScalar *aa; 2145 const PetscScalar *xx; 2146 PetscScalar *bb; 2147 PetscBool *zeroed,vecs = PETSC_FALSE; 2148 2149 PetscFunctionBegin; 2150 /* fix right hand side if needed */ 2151 if (x && b) { 2152 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2153 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2154 vecs = PETSC_TRUE; 2155 } 2156 2157 /* zero the columns */ 2158 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2159 for (i=0; i<is_n; i++) { 2160 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2161 zeroed[is_idx[i]] = PETSC_TRUE; 2162 } 2163 for (i=0; i<A->rmap->N; i++) { 2164 if (!zeroed[i]) { 2165 row = i/bs; 2166 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2167 for (k=0; k<bs; k++) { 2168 col = bs*baij->j[j] + k; 2169 if (zeroed[col]) { 2170 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2171 if (vecs) bb[i] -= aa[0]*xx[col]; 2172 aa[0] = 0.0; 2173 } 2174 } 2175 } 2176 } else if (vecs) bb[i] = diag*xx[i]; 2177 } 2178 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2179 if (vecs) { 2180 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2181 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2182 } 2183 2184 /* zero the rows */ 2185 for (i=0; i<is_n; i++) { 2186 row = is_idx[i]; 2187 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2188 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2189 for (k=0; k<count; k++) { 2190 aa[0] = zero; 2191 aa += bs; 2192 } 2193 if (diag != (PetscScalar)0.0) { 2194 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2195 } 2196 } 2197 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2198 PetscFunctionReturn(0); 2199 } 2200 2201 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2202 { 2203 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2204 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2205 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2206 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2207 PetscErrorCode ierr; 2208 PetscInt ridx,cidx,bs2=a->bs2; 2209 PetscBool roworiented=a->roworiented; 2210 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2211 2212 PetscFunctionBegin; 2213 for (k=0; k<m; k++) { /* loop over added rows */ 2214 row = im[k]; 2215 brow = row/bs; 2216 if (row < 0) continue; 2217 if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2218 rp = aj + ai[brow]; 2219 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2220 rmax = imax[brow]; 2221 nrow = ailen[brow]; 2222 low = 0; 2223 high = nrow; 2224 for (l=0; l<n; l++) { /* loop over added columns */ 2225 if (in[l] < 0) continue; 2226 if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2227 col = in[l]; bcol = col/bs; 2228 ridx = row % bs; cidx = col % bs; 2229 if (!A->structure_only) { 2230 if (roworiented) { 2231 value = v[l + k*n]; 2232 } else { 2233 value = v[k + l*m]; 2234 } 2235 } 2236 if (col <= lastcol) low = 0; else high = nrow; 2237 lastcol = col; 2238 while (high-low > 7) { 2239 t = (low+high)/2; 2240 if (rp[t] > bcol) high = t; 2241 else low = t; 2242 } 2243 for (i=low; i<high; i++) { 2244 if (rp[i] > bcol) break; 2245 if (rp[i] == bcol) { 2246 bap = ap + bs2*i + bs*cidx + ridx; 2247 if (!A->structure_only) { 2248 if (is == ADD_VALUES) *bap += value; 2249 else *bap = value; 2250 } 2251 goto noinsert1; 2252 } 2253 } 2254 if (nonew == 1) goto noinsert1; 2255 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2256 if (A->structure_only) { 2257 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2258 } else { 2259 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2260 } 2261 N = nrow++ - 1; high++; 2262 /* shift up all the later entries in this row */ 2263 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 2264 rp[i] = bcol; 2265 if (!A->structure_only) { 2266 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 2267 ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr); 2268 ap[bs2*i + bs*cidx + ridx] = value; 2269 } 2270 a->nz++; 2271 A->nonzerostate++; 2272 noinsert1:; 2273 low = i; 2274 } 2275 ailen[brow] = nrow; 2276 } 2277 PetscFunctionReturn(0); 2278 } 2279 2280 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2281 { 2282 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2283 Mat outA; 2284 PetscErrorCode ierr; 2285 PetscBool row_identity,col_identity; 2286 2287 PetscFunctionBegin; 2288 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2289 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2290 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2291 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2292 2293 outA = inA; 2294 inA->factortype = MAT_FACTOR_LU; 2295 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2296 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2297 2298 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2299 2300 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2301 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2302 a->row = row; 2303 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2304 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2305 a->col = col; 2306 2307 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2308 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2309 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2310 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2311 2312 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2313 if (!a->solve_work) { 2314 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2315 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2316 } 2317 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2318 PetscFunctionReturn(0); 2319 } 2320 2321 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2322 { 2323 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2324 PetscInt i,nz,mbs; 2325 2326 PetscFunctionBegin; 2327 nz = baij->maxnz; 2328 mbs = baij->mbs; 2329 for (i=0; i<nz; i++) { 2330 baij->j[i] = indices[i]; 2331 } 2332 baij->nz = nz; 2333 for (i=0; i<mbs; i++) { 2334 baij->ilen[i] = baij->imax[i]; 2335 } 2336 PetscFunctionReturn(0); 2337 } 2338 2339 /*@ 2340 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2341 in the matrix. 2342 2343 Input Parameters: 2344 + mat - the SeqBAIJ matrix 2345 - indices - the column indices 2346 2347 Level: advanced 2348 2349 Notes: 2350 This can be called if you have precomputed the nonzero structure of the 2351 matrix and want to provide it to the matrix object to improve the performance 2352 of the MatSetValues() operation. 2353 2354 You MUST have set the correct numbers of nonzeros per row in the call to 2355 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2356 2357 MUST be called before any calls to MatSetValues(); 2358 2359 @*/ 2360 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2361 { 2362 PetscErrorCode ierr; 2363 2364 PetscFunctionBegin; 2365 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2366 PetscValidPointer(indices,2); 2367 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2368 PetscFunctionReturn(0); 2369 } 2370 2371 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2372 { 2373 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2374 PetscErrorCode ierr; 2375 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2376 PetscReal atmp; 2377 PetscScalar *x,zero = 0.0; 2378 MatScalar *aa; 2379 PetscInt ncols,brow,krow,kcol; 2380 2381 PetscFunctionBegin; 2382 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2383 bs = A->rmap->bs; 2384 aa = a->a; 2385 ai = a->i; 2386 aj = a->j; 2387 mbs = a->mbs; 2388 2389 ierr = VecSet(v,zero);CHKERRQ(ierr); 2390 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2391 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2392 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2393 for (i=0; i<mbs; i++) { 2394 ncols = ai[1] - ai[0]; ai++; 2395 brow = bs*i; 2396 for (j=0; j<ncols; j++) { 2397 for (kcol=0; kcol<bs; kcol++) { 2398 for (krow=0; krow<bs; krow++) { 2399 atmp = PetscAbsScalar(*aa);aa++; 2400 row = brow + krow; /* row index */ 2401 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2402 } 2403 } 2404 aj++; 2405 } 2406 } 2407 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2408 PetscFunctionReturn(0); 2409 } 2410 2411 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2412 { 2413 PetscErrorCode ierr; 2414 2415 PetscFunctionBegin; 2416 /* If the two matrices have the same copy implementation, use fast copy. */ 2417 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2418 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2419 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2420 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2421 2422 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2423 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2424 ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr); 2425 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2426 } else { 2427 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2428 } 2429 PetscFunctionReturn(0); 2430 } 2431 2432 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2433 { 2434 PetscErrorCode ierr; 2435 2436 PetscFunctionBegin; 2437 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2438 PetscFunctionReturn(0); 2439 } 2440 2441 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2442 { 2443 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2444 2445 PetscFunctionBegin; 2446 *array = a->a; 2447 PetscFunctionReturn(0); 2448 } 2449 2450 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2451 { 2452 PetscFunctionBegin; 2453 *array = NULL; 2454 PetscFunctionReturn(0); 2455 } 2456 2457 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2458 { 2459 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2460 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2461 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2462 PetscErrorCode ierr; 2463 2464 PetscFunctionBegin; 2465 /* Set the number of nonzeros in the new matrix */ 2466 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2467 PetscFunctionReturn(0); 2468 } 2469 2470 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2471 { 2472 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2473 PetscErrorCode ierr; 2474 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2475 PetscBLASInt one=1; 2476 2477 PetscFunctionBegin; 2478 if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2479 PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2480 if (e) { 2481 ierr = PetscArraycmp(x->i,y->i,x->mbs+1,&e);CHKERRQ(ierr); 2482 if (e) { 2483 ierr = PetscArraycmp(x->j,y->j,x->i[x->mbs],&e);CHKERRQ(ierr); 2484 if (e) str = SAME_NONZERO_PATTERN; 2485 } 2486 } 2487 if (!e && str == SAME_NONZERO_PATTERN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"MatStructure is not SAME_NONZERO_PATTERN"); 2488 } 2489 if (str == SAME_NONZERO_PATTERN) { 2490 PetscScalar alpha = a; 2491 PetscBLASInt bnz; 2492 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2493 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2494 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2495 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2496 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2497 } else { 2498 Mat B; 2499 PetscInt *nnz; 2500 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2501 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2502 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2503 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2504 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2505 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2506 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2507 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2508 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2509 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2510 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2511 ierr = PetscFree(nnz);CHKERRQ(ierr); 2512 } 2513 PetscFunctionReturn(0); 2514 } 2515 2516 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2517 { 2518 #if defined(PETSC_USE_COMPLEX) 2519 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2520 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2521 MatScalar *aa = a->a; 2522 2523 PetscFunctionBegin; 2524 for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]); 2525 #else 2526 PetscFunctionBegin; 2527 #endif 2528 PetscFunctionReturn(0); 2529 } 2530 2531 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2532 { 2533 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2534 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2535 MatScalar *aa = a->a; 2536 2537 PetscFunctionBegin; 2538 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2539 PetscFunctionReturn(0); 2540 } 2541 2542 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2543 { 2544 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2545 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2546 MatScalar *aa = a->a; 2547 2548 PetscFunctionBegin; 2549 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2550 PetscFunctionReturn(0); 2551 } 2552 2553 /* 2554 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2555 */ 2556 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2557 { 2558 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2559 PetscErrorCode ierr; 2560 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2561 PetscInt nz = a->i[m],row,*jj,mr,col; 2562 2563 PetscFunctionBegin; 2564 *nn = n; 2565 if (!ia) PetscFunctionReturn(0); 2566 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2567 else { 2568 ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2569 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2570 ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2571 jj = a->j; 2572 for (i=0; i<nz; i++) { 2573 collengths[jj[i]]++; 2574 } 2575 cia[0] = oshift; 2576 for (i=0; i<n; i++) { 2577 cia[i+1] = cia[i] + collengths[i]; 2578 } 2579 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2580 jj = a->j; 2581 for (row=0; row<m; row++) { 2582 mr = a->i[row+1] - a->i[row]; 2583 for (i=0; i<mr; i++) { 2584 col = *jj++; 2585 2586 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2587 } 2588 } 2589 ierr = PetscFree(collengths);CHKERRQ(ierr); 2590 *ia = cia; *ja = cja; 2591 } 2592 PetscFunctionReturn(0); 2593 } 2594 2595 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2596 { 2597 PetscErrorCode ierr; 2598 2599 PetscFunctionBegin; 2600 if (!ia) PetscFunctionReturn(0); 2601 ierr = PetscFree(*ia);CHKERRQ(ierr); 2602 ierr = PetscFree(*ja);CHKERRQ(ierr); 2603 PetscFunctionReturn(0); 2604 } 2605 2606 /* 2607 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2608 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2609 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2610 */ 2611 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2612 { 2613 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2614 PetscErrorCode ierr; 2615 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2616 PetscInt nz = a->i[m],row,*jj,mr,col; 2617 PetscInt *cspidx; 2618 2619 PetscFunctionBegin; 2620 *nn = n; 2621 if (!ia) PetscFunctionReturn(0); 2622 2623 ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2624 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2625 ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2626 ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr); 2627 jj = a->j; 2628 for (i=0; i<nz; i++) { 2629 collengths[jj[i]]++; 2630 } 2631 cia[0] = oshift; 2632 for (i=0; i<n; i++) { 2633 cia[i+1] = cia[i] + collengths[i]; 2634 } 2635 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2636 jj = a->j; 2637 for (row=0; row<m; row++) { 2638 mr = a->i[row+1] - a->i[row]; 2639 for (i=0; i<mr; i++) { 2640 col = *jj++; 2641 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2642 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2643 } 2644 } 2645 ierr = PetscFree(collengths);CHKERRQ(ierr); 2646 *ia = cia; 2647 *ja = cja; 2648 *spidx = cspidx; 2649 PetscFunctionReturn(0); 2650 } 2651 2652 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2653 { 2654 PetscErrorCode ierr; 2655 2656 PetscFunctionBegin; 2657 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2658 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2659 PetscFunctionReturn(0); 2660 } 2661 2662 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2663 { 2664 PetscErrorCode ierr; 2665 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2666 2667 PetscFunctionBegin; 2668 if (!Y->preallocated || !aij->nz) { 2669 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2670 } 2671 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2672 PetscFunctionReturn(0); 2673 } 2674 2675 /* -------------------------------------------------------------------*/ 2676 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2677 MatGetRow_SeqBAIJ, 2678 MatRestoreRow_SeqBAIJ, 2679 MatMult_SeqBAIJ_N, 2680 /* 4*/ MatMultAdd_SeqBAIJ_N, 2681 MatMultTranspose_SeqBAIJ, 2682 MatMultTransposeAdd_SeqBAIJ, 2683 NULL, 2684 NULL, 2685 NULL, 2686 /* 10*/ NULL, 2687 MatLUFactor_SeqBAIJ, 2688 NULL, 2689 NULL, 2690 MatTranspose_SeqBAIJ, 2691 /* 15*/ MatGetInfo_SeqBAIJ, 2692 MatEqual_SeqBAIJ, 2693 MatGetDiagonal_SeqBAIJ, 2694 MatDiagonalScale_SeqBAIJ, 2695 MatNorm_SeqBAIJ, 2696 /* 20*/ NULL, 2697 MatAssemblyEnd_SeqBAIJ, 2698 MatSetOption_SeqBAIJ, 2699 MatZeroEntries_SeqBAIJ, 2700 /* 24*/ MatZeroRows_SeqBAIJ, 2701 NULL, 2702 NULL, 2703 NULL, 2704 NULL, 2705 /* 29*/ MatSetUp_SeqBAIJ, 2706 NULL, 2707 NULL, 2708 NULL, 2709 NULL, 2710 /* 34*/ MatDuplicate_SeqBAIJ, 2711 NULL, 2712 NULL, 2713 MatILUFactor_SeqBAIJ, 2714 NULL, 2715 /* 39*/ MatAXPY_SeqBAIJ, 2716 MatCreateSubMatrices_SeqBAIJ, 2717 MatIncreaseOverlap_SeqBAIJ, 2718 MatGetValues_SeqBAIJ, 2719 MatCopy_SeqBAIJ, 2720 /* 44*/ NULL, 2721 MatScale_SeqBAIJ, 2722 MatShift_SeqBAIJ, 2723 NULL, 2724 MatZeroRowsColumns_SeqBAIJ, 2725 /* 49*/ NULL, 2726 MatGetRowIJ_SeqBAIJ, 2727 MatRestoreRowIJ_SeqBAIJ, 2728 MatGetColumnIJ_SeqBAIJ, 2729 MatRestoreColumnIJ_SeqBAIJ, 2730 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2731 NULL, 2732 NULL, 2733 NULL, 2734 MatSetValuesBlocked_SeqBAIJ, 2735 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2736 MatDestroy_SeqBAIJ, 2737 MatView_SeqBAIJ, 2738 NULL, 2739 NULL, 2740 /* 64*/ NULL, 2741 NULL, 2742 NULL, 2743 NULL, 2744 NULL, 2745 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2746 NULL, 2747 MatConvert_Basic, 2748 NULL, 2749 NULL, 2750 /* 74*/ NULL, 2751 MatFDColoringApply_BAIJ, 2752 NULL, 2753 NULL, 2754 NULL, 2755 /* 79*/ NULL, 2756 NULL, 2757 NULL, 2758 NULL, 2759 MatLoad_SeqBAIJ, 2760 /* 84*/ NULL, 2761 NULL, 2762 NULL, 2763 NULL, 2764 NULL, 2765 /* 89*/ NULL, 2766 NULL, 2767 NULL, 2768 NULL, 2769 NULL, 2770 /* 94*/ NULL, 2771 NULL, 2772 NULL, 2773 NULL, 2774 NULL, 2775 /* 99*/ NULL, 2776 NULL, 2777 NULL, 2778 MatConjugate_SeqBAIJ, 2779 NULL, 2780 /*104*/ NULL, 2781 MatRealPart_SeqBAIJ, 2782 MatImaginaryPart_SeqBAIJ, 2783 NULL, 2784 NULL, 2785 /*109*/ NULL, 2786 NULL, 2787 NULL, 2788 NULL, 2789 MatMissingDiagonal_SeqBAIJ, 2790 /*114*/ NULL, 2791 NULL, 2792 NULL, 2793 NULL, 2794 NULL, 2795 /*119*/ NULL, 2796 NULL, 2797 MatMultHermitianTranspose_SeqBAIJ, 2798 MatMultHermitianTransposeAdd_SeqBAIJ, 2799 NULL, 2800 /*124*/ NULL, 2801 MatGetColumnReductions_SeqBAIJ, 2802 MatInvertBlockDiagonal_SeqBAIJ, 2803 NULL, 2804 NULL, 2805 /*129*/ NULL, 2806 NULL, 2807 NULL, 2808 NULL, 2809 NULL, 2810 /*134*/ NULL, 2811 NULL, 2812 NULL, 2813 NULL, 2814 NULL, 2815 /*139*/ MatSetBlockSizes_Default, 2816 NULL, 2817 NULL, 2818 MatFDColoringSetUp_SeqXAIJ, 2819 NULL, 2820 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2821 MatDestroySubMatrices_SeqBAIJ 2822 }; 2823 2824 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2825 { 2826 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2827 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2828 PetscErrorCode ierr; 2829 2830 PetscFunctionBegin; 2831 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2832 2833 /* allocate space for values if not already there */ 2834 if (!aij->saved_values) { 2835 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2836 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2837 } 2838 2839 /* copy values over */ 2840 ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr); 2841 PetscFunctionReturn(0); 2842 } 2843 2844 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2845 { 2846 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2847 PetscErrorCode ierr; 2848 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2849 2850 PetscFunctionBegin; 2851 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2852 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2853 2854 /* copy values over */ 2855 ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr); 2856 PetscFunctionReturn(0); 2857 } 2858 2859 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2860 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2861 2862 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2863 { 2864 Mat_SeqBAIJ *b; 2865 PetscErrorCode ierr; 2866 PetscInt i,mbs,nbs,bs2; 2867 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2868 2869 PetscFunctionBegin; 2870 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2871 if (nz == MAT_SKIP_ALLOCATION) { 2872 skipallocation = PETSC_TRUE; 2873 nz = 0; 2874 } 2875 2876 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2877 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2878 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2879 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2880 2881 B->preallocated = PETSC_TRUE; 2882 2883 mbs = B->rmap->n/bs; 2884 nbs = B->cmap->n/bs; 2885 bs2 = bs*bs; 2886 2887 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2888 2889 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2890 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2891 if (nnz) { 2892 for (i=0; i<mbs; i++) { 2893 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2894 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2895 } 2896 } 2897 2898 b = (Mat_SeqBAIJ*)B->data; 2899 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2900 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2901 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2902 2903 if (!flg) { 2904 switch (bs) { 2905 case 1: 2906 B->ops->mult = MatMult_SeqBAIJ_1; 2907 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2908 break; 2909 case 2: 2910 B->ops->mult = MatMult_SeqBAIJ_2; 2911 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2912 break; 2913 case 3: 2914 B->ops->mult = MatMult_SeqBAIJ_3; 2915 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2916 break; 2917 case 4: 2918 B->ops->mult = MatMult_SeqBAIJ_4; 2919 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2920 break; 2921 case 5: 2922 B->ops->mult = MatMult_SeqBAIJ_5; 2923 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2924 break; 2925 case 6: 2926 B->ops->mult = MatMult_SeqBAIJ_6; 2927 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2928 break; 2929 case 7: 2930 B->ops->mult = MatMult_SeqBAIJ_7; 2931 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2932 break; 2933 case 9: 2934 { 2935 PetscInt version = 1; 2936 ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 2937 switch (version) { 2938 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2939 case 1: 2940 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2941 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2942 ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2943 break; 2944 #endif 2945 default: 2946 B->ops->mult = MatMult_SeqBAIJ_N; 2947 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2948 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2949 break; 2950 } 2951 break; 2952 } 2953 case 11: 2954 B->ops->mult = MatMult_SeqBAIJ_11; 2955 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2956 break; 2957 case 12: 2958 { 2959 PetscInt version = 1; 2960 ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 2961 switch (version) { 2962 case 1: 2963 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 2964 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 2965 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2966 break; 2967 case 2: 2968 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 2969 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 2970 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2971 break; 2972 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2973 case 3: 2974 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 2975 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 2976 ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2977 break; 2978 #endif 2979 default: 2980 B->ops->mult = MatMult_SeqBAIJ_N; 2981 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2982 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2983 break; 2984 } 2985 break; 2986 } 2987 case 15: 2988 { 2989 PetscInt version = 1; 2990 ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 2991 switch (version) { 2992 case 1: 2993 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2994 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2995 break; 2996 case 2: 2997 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 2998 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2999 break; 3000 case 3: 3001 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 3002 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 3003 break; 3004 case 4: 3005 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 3006 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 3007 break; 3008 default: 3009 B->ops->mult = MatMult_SeqBAIJ_N; 3010 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 3011 break; 3012 } 3013 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3014 break; 3015 } 3016 default: 3017 B->ops->mult = MatMult_SeqBAIJ_N; 3018 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3019 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 3020 break; 3021 } 3022 } 3023 B->ops->sor = MatSOR_SeqBAIJ; 3024 b->mbs = mbs; 3025 b->nbs = nbs; 3026 if (!skipallocation) { 3027 if (!b->imax) { 3028 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 3029 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3030 3031 b->free_imax_ilen = PETSC_TRUE; 3032 } 3033 /* b->ilen will count nonzeros in each block row so far. */ 3034 for (i=0; i<mbs; i++) b->ilen[i] = 0; 3035 if (!nnz) { 3036 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3037 else if (nz < 0) nz = 1; 3038 nz = PetscMin(nz,nbs); 3039 for (i=0; i<mbs; i++) b->imax[i] = nz; 3040 ierr = PetscIntMultError(nz,mbs,&nz);CHKERRQ(ierr); 3041 } else { 3042 PetscInt64 nz64 = 0; 3043 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];} 3044 ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr); 3045 } 3046 3047 /* allocate the matrix space */ 3048 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3049 if (B->structure_only) { 3050 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 3051 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 3052 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 3053 } else { 3054 PetscInt nzbs2 = 0; 3055 ierr = PetscIntMultError(nz,bs2,&nzbs2);CHKERRQ(ierr); 3056 ierr = PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 3057 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3058 ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr); 3059 } 3060 ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr); 3061 3062 if (B->structure_only) { 3063 b->singlemalloc = PETSC_FALSE; 3064 b->free_a = PETSC_FALSE; 3065 } else { 3066 b->singlemalloc = PETSC_TRUE; 3067 b->free_a = PETSC_TRUE; 3068 } 3069 b->free_ij = PETSC_TRUE; 3070 3071 b->i[0] = 0; 3072 for (i=1; i<mbs+1; i++) { 3073 b->i[i] = b->i[i-1] + b->imax[i-1]; 3074 } 3075 3076 } else { 3077 b->free_a = PETSC_FALSE; 3078 b->free_ij = PETSC_FALSE; 3079 } 3080 3081 b->bs2 = bs2; 3082 b->mbs = mbs; 3083 b->nz = 0; 3084 b->maxnz = nz; 3085 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3086 B->was_assembled = PETSC_FALSE; 3087 B->assembled = PETSC_FALSE; 3088 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3089 PetscFunctionReturn(0); 3090 } 3091 3092 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3093 { 3094 PetscInt i,m,nz,nz_max=0,*nnz; 3095 PetscScalar *values=NULL; 3096 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 3097 PetscErrorCode ierr; 3098 3099 PetscFunctionBegin; 3100 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3101 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3102 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3103 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3104 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3105 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3106 m = B->rmap->n/bs; 3107 3108 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 3109 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 3110 for (i=0; i<m; i++) { 3111 nz = ii[i+1]- ii[i]; 3112 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 3113 nz_max = PetscMax(nz_max, nz); 3114 nnz[i] = nz; 3115 } 3116 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3117 ierr = PetscFree(nnz);CHKERRQ(ierr); 3118 3119 values = (PetscScalar*)V; 3120 if (!values) { 3121 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 3122 } 3123 for (i=0; i<m; i++) { 3124 PetscInt ncols = ii[i+1] - ii[i]; 3125 const PetscInt *icols = jj + ii[i]; 3126 if (bs == 1 || !roworiented) { 3127 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3128 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3129 } else { 3130 PetscInt j; 3131 for (j=0; j<ncols; j++) { 3132 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 3133 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 3134 } 3135 } 3136 } 3137 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3138 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3139 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3140 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3141 PetscFunctionReturn(0); 3142 } 3143 3144 /*@C 3145 MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored 3146 3147 Not Collective 3148 3149 Input Parameter: 3150 . mat - a MATSEQBAIJ matrix 3151 3152 Output Parameter: 3153 . array - pointer to the data 3154 3155 Level: intermediate 3156 3157 .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3158 @*/ 3159 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array) 3160 { 3161 PetscErrorCode ierr; 3162 3163 PetscFunctionBegin; 3164 ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3165 PetscFunctionReturn(0); 3166 } 3167 3168 /*@C 3169 MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray() 3170 3171 Not Collective 3172 3173 Input Parameters: 3174 + mat - a MATSEQBAIJ matrix 3175 - array - pointer to the data 3176 3177 Level: intermediate 3178 3179 .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3180 @*/ 3181 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array) 3182 { 3183 PetscErrorCode ierr; 3184 3185 PetscFunctionBegin; 3186 ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3187 PetscFunctionReturn(0); 3188 } 3189 3190 /*MC 3191 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3192 block sparse compressed row format. 3193 3194 Options Database Keys: 3195 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3196 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3197 3198 Level: beginner 3199 3200 Notes: 3201 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 3202 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 3203 3204 Run with -info to see what version of the matrix-vector product is being used 3205 3206 .seealso: MatCreateSeqBAIJ() 3207 M*/ 3208 3209 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3210 3211 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3212 { 3213 PetscErrorCode ierr; 3214 PetscMPIInt size; 3215 Mat_SeqBAIJ *b; 3216 3217 PetscFunctionBegin; 3218 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 3219 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3220 3221 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3222 B->data = (void*)b; 3223 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3224 3225 b->row = NULL; 3226 b->col = NULL; 3227 b->icol = NULL; 3228 b->reallocs = 0; 3229 b->saved_values = NULL; 3230 3231 b->roworiented = PETSC_TRUE; 3232 b->nonew = 0; 3233 b->diag = NULL; 3234 B->spptr = NULL; 3235 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3236 b->keepnonzeropattern = PETSC_FALSE; 3237 3238 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr); 3239 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr); 3240 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3241 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3242 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3243 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3244 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3245 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3246 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3247 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3248 #if defined(PETSC_HAVE_HYPRE) 3249 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3250 #endif 3251 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 3252 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3253 PetscFunctionReturn(0); 3254 } 3255 3256 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3257 { 3258 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3259 PetscErrorCode ierr; 3260 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3261 3262 PetscFunctionBegin; 3263 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3264 3265 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3266 c->imax = a->imax; 3267 c->ilen = a->ilen; 3268 c->free_imax_ilen = PETSC_FALSE; 3269 } else { 3270 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3271 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3272 for (i=0; i<mbs; i++) { 3273 c->imax[i] = a->imax[i]; 3274 c->ilen[i] = a->ilen[i]; 3275 } 3276 c->free_imax_ilen = PETSC_TRUE; 3277 } 3278 3279 /* allocate the matrix space */ 3280 if (mallocmatspace) { 3281 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3282 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3283 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3284 3285 c->i = a->i; 3286 c->j = a->j; 3287 c->singlemalloc = PETSC_FALSE; 3288 c->free_a = PETSC_TRUE; 3289 c->free_ij = PETSC_FALSE; 3290 c->parent = A; 3291 C->preallocated = PETSC_TRUE; 3292 C->assembled = PETSC_TRUE; 3293 3294 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3295 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3296 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3297 } else { 3298 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3299 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3300 3301 c->singlemalloc = PETSC_TRUE; 3302 c->free_a = PETSC_TRUE; 3303 c->free_ij = PETSC_TRUE; 3304 3305 ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr); 3306 if (mbs > 0) { 3307 ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr); 3308 if (cpvalues == MAT_COPY_VALUES) { 3309 ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr); 3310 } else { 3311 ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr); 3312 } 3313 } 3314 C->preallocated = PETSC_TRUE; 3315 C->assembled = PETSC_TRUE; 3316 } 3317 } 3318 3319 c->roworiented = a->roworiented; 3320 c->nonew = a->nonew; 3321 3322 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3323 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3324 3325 c->bs2 = a->bs2; 3326 c->mbs = a->mbs; 3327 c->nbs = a->nbs; 3328 3329 if (a->diag) { 3330 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3331 c->diag = a->diag; 3332 c->free_diag = PETSC_FALSE; 3333 } else { 3334 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3335 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3336 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3337 c->free_diag = PETSC_TRUE; 3338 } 3339 } else c->diag = NULL; 3340 3341 c->nz = a->nz; 3342 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3343 c->solve_work = NULL; 3344 c->mult_work = NULL; 3345 c->sor_workt = NULL; 3346 c->sor_work = NULL; 3347 3348 c->compressedrow.use = a->compressedrow.use; 3349 c->compressedrow.nrows = a->compressedrow.nrows; 3350 if (a->compressedrow.use) { 3351 i = a->compressedrow.nrows; 3352 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3353 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3354 ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr); 3355 ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr); 3356 } else { 3357 c->compressedrow.use = PETSC_FALSE; 3358 c->compressedrow.i = NULL; 3359 c->compressedrow.rindex = NULL; 3360 } 3361 C->nonzerostate = A->nonzerostate; 3362 3363 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3364 PetscFunctionReturn(0); 3365 } 3366 3367 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3368 { 3369 PetscErrorCode ierr; 3370 3371 PetscFunctionBegin; 3372 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3373 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3374 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3375 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3376 PetscFunctionReturn(0); 3377 } 3378 3379 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3380 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 3381 { 3382 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3383 PetscInt *rowidxs,*colidxs; 3384 PetscScalar *matvals; 3385 PetscErrorCode ierr; 3386 3387 PetscFunctionBegin; 3388 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3389 3390 /* read matrix header */ 3391 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3392 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3393 M = header[1]; N = header[2]; nz = header[3]; 3394 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3395 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3396 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3397 3398 /* set block sizes from the viewer's .info file */ 3399 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3400 /* set local and global sizes if not set already */ 3401 if (mat->rmap->n < 0) mat->rmap->n = M; 3402 if (mat->cmap->n < 0) mat->cmap->n = N; 3403 if (mat->rmap->N < 0) mat->rmap->N = M; 3404 if (mat->cmap->N < 0) mat->cmap->N = N; 3405 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3406 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3407 3408 /* check if the matrix sizes are correct */ 3409 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3410 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3411 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3412 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 3413 mbs = m/bs; nbs = n/bs; 3414 3415 /* read in row lengths, column indices and nonzero values */ 3416 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3417 ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr); 3418 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3419 sum = rowidxs[m]; 3420 if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3421 3422 /* read in column indices and nonzero values */ 3423 ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr); 3424 ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr); 3425 ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr); 3426 3427 { /* preallocate matrix storage */ 3428 PetscBT bt; /* helper bit set to count nonzeros */ 3429 PetscInt *nnz; 3430 PetscBool sbaij; 3431 3432 ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr); 3433 ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr); 3434 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr); 3435 for (i=0; i<mbs; i++) { 3436 ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr); 3437 for (k=0; k<bs; k++) { 3438 PetscInt row = bs*i + k; 3439 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3440 PetscInt col = colidxs[j]; 3441 if (!sbaij || col >= row) 3442 if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++; 3443 } 3444 } 3445 } 3446 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 3447 ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3448 ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3449 ierr = PetscFree(nnz);CHKERRQ(ierr); 3450 } 3451 3452 /* store matrix values */ 3453 for (i=0; i<m; i++) { 3454 PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1]; 3455 ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr); 3456 } 3457 3458 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3459 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3460 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3461 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3462 PetscFunctionReturn(0); 3463 } 3464 3465 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer) 3466 { 3467 PetscErrorCode ierr; 3468 PetscBool isbinary; 3469 3470 PetscFunctionBegin; 3471 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3472 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3473 ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 3474 PetscFunctionReturn(0); 3475 } 3476 3477 /*@C 3478 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3479 compressed row) format. For good matrix assembly performance the 3480 user should preallocate the matrix storage by setting the parameter nz 3481 (or the array nnz). By setting these parameters accurately, performance 3482 during matrix assembly can be increased by more than a factor of 50. 3483 3484 Collective 3485 3486 Input Parameters: 3487 + comm - MPI communicator, set to PETSC_COMM_SELF 3488 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3489 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3490 . m - number of rows 3491 . n - number of columns 3492 . nz - number of nonzero blocks per block row (same for all rows) 3493 - nnz - array containing the number of nonzero blocks in the various block rows 3494 (possibly different for each block row) or NULL 3495 3496 Output Parameter: 3497 . A - the matrix 3498 3499 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3500 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3501 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3502 3503 Options Database Keys: 3504 + -mat_no_unroll - uses code that does not unroll the loops in the 3505 block calculations (much slower) 3506 - -mat_block_size - size of the blocks to use 3507 3508 Level: intermediate 3509 3510 Notes: 3511 The number of rows and columns must be divisible by blocksize. 3512 3513 If the nnz parameter is given then the nz parameter is ignored 3514 3515 A nonzero block is any block that as 1 or more nonzeros in it 3516 3517 The block AIJ format is fully compatible with standard Fortran 77 3518 storage. That is, the stored row and column indices can begin at 3519 either one (as in Fortran) or zero. See the users' manual for details. 3520 3521 Specify the preallocated storage with either nz or nnz (not both). 3522 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3523 allocation. See Users-Manual: ch_mat for details. 3524 matrices. 3525 3526 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3527 @*/ 3528 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3529 { 3530 PetscErrorCode ierr; 3531 3532 PetscFunctionBegin; 3533 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3534 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3535 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3536 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3537 PetscFunctionReturn(0); 3538 } 3539 3540 /*@C 3541 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3542 per row in the matrix. For good matrix assembly performance the 3543 user should preallocate the matrix storage by setting the parameter nz 3544 (or the array nnz). By setting these parameters accurately, performance 3545 during matrix assembly can be increased by more than a factor of 50. 3546 3547 Collective 3548 3549 Input Parameters: 3550 + B - the matrix 3551 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3552 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3553 . nz - number of block nonzeros per block row (same for all rows) 3554 - nnz - array containing the number of block nonzeros in the various block rows 3555 (possibly different for each block row) or NULL 3556 3557 Options Database Keys: 3558 + -mat_no_unroll - uses code that does not unroll the loops in the 3559 block calculations (much slower) 3560 - -mat_block_size - size of the blocks to use 3561 3562 Level: intermediate 3563 3564 Notes: 3565 If the nnz parameter is given then the nz parameter is ignored 3566 3567 You can call MatGetInfo() to get information on how effective the preallocation was; 3568 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3569 You can also run with the option -info and look for messages with the string 3570 malloc in them to see if additional memory allocation was needed. 3571 3572 The block AIJ format is fully compatible with standard Fortran 77 3573 storage. That is, the stored row and column indices can begin at 3574 either one (as in Fortran) or zero. See the users' manual for details. 3575 3576 Specify the preallocated storage with either nz or nnz (not both). 3577 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3578 allocation. See Users-Manual: ch_mat for details. 3579 3580 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3581 @*/ 3582 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3583 { 3584 PetscErrorCode ierr; 3585 3586 PetscFunctionBegin; 3587 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3588 PetscValidType(B,1); 3589 PetscValidLogicalCollectiveInt(B,bs,2); 3590 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3591 PetscFunctionReturn(0); 3592 } 3593 3594 /*@C 3595 MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 3596 3597 Collective 3598 3599 Input Parameters: 3600 + B - the matrix 3601 . i - the indices into j for the start of each local row (starts with zero) 3602 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3603 - v - optional values in the matrix 3604 3605 Level: advanced 3606 3607 Notes: 3608 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3609 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3610 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3611 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3612 block column and the second index is over columns within a block. 3613 3614 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3615 3616 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3617 @*/ 3618 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3619 { 3620 PetscErrorCode ierr; 3621 3622 PetscFunctionBegin; 3623 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3624 PetscValidType(B,1); 3625 PetscValidLogicalCollectiveInt(B,bs,2); 3626 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3627 PetscFunctionReturn(0); 3628 } 3629 3630 /*@ 3631 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3632 3633 Collective 3634 3635 Input Parameters: 3636 + comm - must be an MPI communicator of size 1 3637 . bs - size of block 3638 . m - number of rows 3639 . n - number of columns 3640 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3641 . j - column indices 3642 - a - matrix values 3643 3644 Output Parameter: 3645 . mat - the matrix 3646 3647 Level: advanced 3648 3649 Notes: 3650 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3651 once the matrix is destroyed 3652 3653 You cannot set new nonzero locations into this matrix, that will generate an error. 3654 3655 The i and j indices are 0 based 3656 3657 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3658 3659 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3660 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3661 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3662 with column-major ordering within blocks. 3663 3664 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3665 3666 @*/ 3667 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3668 { 3669 PetscErrorCode ierr; 3670 PetscInt ii; 3671 Mat_SeqBAIJ *baij; 3672 3673 PetscFunctionBegin; 3674 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3675 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3676 3677 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3678 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3679 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3680 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 3681 baij = (Mat_SeqBAIJ*)(*mat)->data; 3682 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3683 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3684 3685 baij->i = i; 3686 baij->j = j; 3687 baij->a = a; 3688 3689 baij->singlemalloc = PETSC_FALSE; 3690 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3691 baij->free_a = PETSC_FALSE; 3692 baij->free_ij = PETSC_FALSE; 3693 3694 for (ii=0; ii<m; ii++) { 3695 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3696 if (PetscUnlikelyDebug(i[ii+1] - i[ii] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3697 } 3698 if (PetscDefined(USE_DEBUG)) { 3699 for (ii=0; ii<baij->i[m]; ii++) { 3700 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3701 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3702 } 3703 } 3704 3705 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3706 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3707 PetscFunctionReturn(0); 3708 } 3709 3710 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3711 { 3712 PetscErrorCode ierr; 3713 PetscMPIInt size; 3714 3715 PetscFunctionBegin; 3716 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3717 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3718 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3719 } else { 3720 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3721 } 3722 PetscFunctionReturn(0); 3723 } 3724