1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19 20 PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A,PetscInt type,PetscReal *reductions) 21 { 22 PetscErrorCode ierr; 23 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) A->data; 24 PetscInt m,n,i; 25 PetscInt ib,jb,bs = A->rmap->bs; 26 MatScalar *a_val = a_aij->a; 27 28 PetscFunctionBegin; 29 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 30 for (i=0; i<n; i++) reductions[i] = 0.0; 31 if (type == NORM_2) { 32 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 33 for (jb=0; jb<bs; jb++) { 34 for (ib=0; ib<bs; ib++) { 35 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 36 a_val++; 37 } 38 } 39 } 40 } else if (type == NORM_1) { 41 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 42 for (jb=0; jb<bs; jb++) { 43 for (ib=0; ib<bs; ib++) { 44 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 45 a_val++; 46 } 47 } 48 } 49 } else if (type == NORM_INFINITY) { 50 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 51 for (jb=0; jb<bs; jb++) { 52 for (ib=0; ib<bs; ib++) { 53 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 54 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 55 a_val++; 56 } 57 } 58 } 59 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 60 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 61 for (jb=0; jb<bs; jb++) { 62 for (ib=0; ib<bs; ib++) { 63 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 64 a_val++; 65 } 66 } 67 } 68 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 69 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 70 for (jb=0; jb<bs; jb++) { 71 for (ib=0; ib<bs; ib++) { 72 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 73 a_val++; 74 } 75 } 76 } 77 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 78 if (type == NORM_2) { 79 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 80 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 81 for (i=0; i<n; i++) reductions[i] /= m; 82 } 83 PetscFunctionReturn(0); 84 } 85 86 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 87 { 88 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 89 PetscErrorCode ierr; 90 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 91 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 92 PetscReal shift = 0.0; 93 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 94 95 PetscFunctionBegin; 96 allowzeropivot = PetscNot(A->erroriffailure); 97 98 if (a->idiagvalid) { 99 if (values) *values = a->idiag; 100 PetscFunctionReturn(0); 101 } 102 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 103 diag_offset = a->diag; 104 if (!a->idiag) { 105 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 106 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 107 } 108 diag = a->idiag; 109 if (values) *values = a->idiag; 110 /* factor and invert each block */ 111 switch (bs) { 112 case 1: 113 for (i=0; i<mbs; i++) { 114 odiag = v + 1*diag_offset[i]; 115 diag[0] = odiag[0]; 116 117 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 118 if (allowzeropivot) { 119 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 120 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 121 A->factorerror_zeropivot_row = i; 122 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 123 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 124 } 125 126 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 127 diag += 1; 128 } 129 break; 130 case 2: 131 for (i=0; i<mbs; i++) { 132 odiag = v + 4*diag_offset[i]; 133 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 134 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 135 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 136 diag += 4; 137 } 138 break; 139 case 3: 140 for (i=0; i<mbs; i++) { 141 odiag = v + 9*diag_offset[i]; 142 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 143 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 144 diag[8] = odiag[8]; 145 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 146 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 147 diag += 9; 148 } 149 break; 150 case 4: 151 for (i=0; i<mbs; i++) { 152 odiag = v + 16*diag_offset[i]; 153 ierr = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr); 154 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 155 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 156 diag += 16; 157 } 158 break; 159 case 5: 160 for (i=0; i<mbs; i++) { 161 odiag = v + 25*diag_offset[i]; 162 ierr = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr); 163 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 164 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 165 diag += 25; 166 } 167 break; 168 case 6: 169 for (i=0; i<mbs; i++) { 170 odiag = v + 36*diag_offset[i]; 171 ierr = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr); 172 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 173 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 174 diag += 36; 175 } 176 break; 177 case 7: 178 for (i=0; i<mbs; i++) { 179 odiag = v + 49*diag_offset[i]; 180 ierr = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr); 181 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 182 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 183 diag += 49; 184 } 185 break; 186 default: 187 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 188 for (i=0; i<mbs; i++) { 189 odiag = v + bs2*diag_offset[i]; 190 ierr = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr); 191 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 192 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 193 diag += bs2; 194 } 195 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 196 } 197 a->idiagvalid = PETSC_TRUE; 198 PetscFunctionReturn(0); 199 } 200 201 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 202 { 203 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 204 PetscScalar *x,*work,*w,*workt,*t; 205 const MatScalar *v,*aa = a->a, *idiag; 206 const PetscScalar *b,*xb; 207 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 208 PetscErrorCode ierr; 209 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 210 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 211 212 PetscFunctionBegin; 213 its = its*lits; 214 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 215 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 216 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 217 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor"); 218 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 219 220 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 221 222 if (!m) PetscFunctionReturn(0); 223 diag = a->diag; 224 idiag = a->idiag; 225 k = PetscMax(A->rmap->n,A->cmap->n); 226 if (!a->mult_work) { 227 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 228 } 229 if (!a->sor_workt) { 230 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 231 } 232 if (!a->sor_work) { 233 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 234 } 235 work = a->mult_work; 236 t = a->sor_workt; 237 w = a->sor_work; 238 239 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 240 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 241 242 if (flag & SOR_ZERO_INITIAL_GUESS) { 243 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 244 switch (bs) { 245 case 1: 246 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 247 t[0] = b[0]; 248 i2 = 1; 249 idiag += 1; 250 for (i=1; i<m; i++) { 251 v = aa + ai[i]; 252 vi = aj + ai[i]; 253 nz = diag[i] - ai[i]; 254 s[0] = b[i2]; 255 for (j=0; j<nz; j++) { 256 xw[0] = x[vi[j]]; 257 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 258 } 259 t[i2] = s[0]; 260 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 261 x[i2] = xw[0]; 262 idiag += 1; 263 i2 += 1; 264 } 265 break; 266 case 2: 267 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 268 t[0] = b[0]; t[1] = b[1]; 269 i2 = 2; 270 idiag += 4; 271 for (i=1; i<m; i++) { 272 v = aa + 4*ai[i]; 273 vi = aj + ai[i]; 274 nz = diag[i] - ai[i]; 275 s[0] = b[i2]; s[1] = b[i2+1]; 276 for (j=0; j<nz; j++) { 277 idx = 2*vi[j]; 278 it = 4*j; 279 xw[0] = x[idx]; xw[1] = x[1+idx]; 280 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 281 } 282 t[i2] = s[0]; t[i2+1] = s[1]; 283 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 284 x[i2] = xw[0]; x[i2+1] = xw[1]; 285 idiag += 4; 286 i2 += 2; 287 } 288 break; 289 case 3: 290 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 291 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 292 i2 = 3; 293 idiag += 9; 294 for (i=1; i<m; i++) { 295 v = aa + 9*ai[i]; 296 vi = aj + ai[i]; 297 nz = diag[i] - ai[i]; 298 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 299 while (nz--) { 300 idx = 3*(*vi++); 301 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 302 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 303 v += 9; 304 } 305 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 306 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 307 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 308 idiag += 9; 309 i2 += 3; 310 } 311 break; 312 case 4: 313 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 314 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 315 i2 = 4; 316 idiag += 16; 317 for (i=1; i<m; i++) { 318 v = aa + 16*ai[i]; 319 vi = aj + ai[i]; 320 nz = diag[i] - ai[i]; 321 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 322 while (nz--) { 323 idx = 4*(*vi++); 324 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 325 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 326 v += 16; 327 } 328 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 329 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 330 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 331 idiag += 16; 332 i2 += 4; 333 } 334 break; 335 case 5: 336 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 337 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 338 i2 = 5; 339 idiag += 25; 340 for (i=1; i<m; i++) { 341 v = aa + 25*ai[i]; 342 vi = aj + ai[i]; 343 nz = diag[i] - ai[i]; 344 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 345 while (nz--) { 346 idx = 5*(*vi++); 347 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 348 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 349 v += 25; 350 } 351 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 352 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 353 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 354 idiag += 25; 355 i2 += 5; 356 } 357 break; 358 case 6: 359 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 360 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 361 i2 = 6; 362 idiag += 36; 363 for (i=1; i<m; i++) { 364 v = aa + 36*ai[i]; 365 vi = aj + ai[i]; 366 nz = diag[i] - ai[i]; 367 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 368 while (nz--) { 369 idx = 6*(*vi++); 370 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 371 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 372 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 373 v += 36; 374 } 375 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 376 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 377 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 378 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 379 idiag += 36; 380 i2 += 6; 381 } 382 break; 383 case 7: 384 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 385 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 386 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 387 i2 = 7; 388 idiag += 49; 389 for (i=1; i<m; i++) { 390 v = aa + 49*ai[i]; 391 vi = aj + ai[i]; 392 nz = diag[i] - ai[i]; 393 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 394 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 395 while (nz--) { 396 idx = 7*(*vi++); 397 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 398 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 399 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 400 v += 49; 401 } 402 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 403 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 404 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 405 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 406 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 407 idiag += 49; 408 i2 += 7; 409 } 410 break; 411 default: 412 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 413 ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr); 414 i2 = bs; 415 idiag += bs2; 416 for (i=1; i<m; i++) { 417 v = aa + bs2*ai[i]; 418 vi = aj + ai[i]; 419 nz = diag[i] - ai[i]; 420 421 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 422 /* copy all rows of x that are needed into contiguous space */ 423 workt = work; 424 for (j=0; j<nz; j++) { 425 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 426 workt += bs; 427 } 428 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 429 ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr); 430 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 431 432 idiag += bs2; 433 i2 += bs; 434 } 435 break; 436 } 437 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 438 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 439 xb = t; 440 } 441 else xb = b; 442 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 443 idiag = a->idiag+bs2*(a->mbs-1); 444 i2 = bs * (m-1); 445 switch (bs) { 446 case 1: 447 s[0] = xb[i2]; 448 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 449 x[i2] = xw[0]; 450 i2 -= 1; 451 for (i=m-2; i>=0; i--) { 452 v = aa + (diag[i]+1); 453 vi = aj + diag[i] + 1; 454 nz = ai[i+1] - diag[i] - 1; 455 s[0] = xb[i2]; 456 for (j=0; j<nz; j++) { 457 xw[0] = x[vi[j]]; 458 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 459 } 460 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 461 x[i2] = xw[0]; 462 idiag -= 1; 463 i2 -= 1; 464 } 465 break; 466 case 2: 467 s[0] = xb[i2]; s[1] = xb[i2+1]; 468 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 469 x[i2] = xw[0]; x[i2+1] = xw[1]; 470 i2 -= 2; 471 idiag -= 4; 472 for (i=m-2; i>=0; i--) { 473 v = aa + 4*(diag[i] + 1); 474 vi = aj + diag[i] + 1; 475 nz = ai[i+1] - diag[i] - 1; 476 s[0] = xb[i2]; s[1] = xb[i2+1]; 477 for (j=0; j<nz; j++) { 478 idx = 2*vi[j]; 479 it = 4*j; 480 xw[0] = x[idx]; xw[1] = x[1+idx]; 481 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 482 } 483 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 484 x[i2] = xw[0]; x[i2+1] = xw[1]; 485 idiag -= 4; 486 i2 -= 2; 487 } 488 break; 489 case 3: 490 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 491 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 492 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 493 i2 -= 3; 494 idiag -= 9; 495 for (i=m-2; i>=0; i--) { 496 v = aa + 9*(diag[i]+1); 497 vi = aj + diag[i] + 1; 498 nz = ai[i+1] - diag[i] - 1; 499 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 500 while (nz--) { 501 idx = 3*(*vi++); 502 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 503 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 504 v += 9; 505 } 506 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 507 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 508 idiag -= 9; 509 i2 -= 3; 510 } 511 break; 512 case 4: 513 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 514 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 515 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 516 i2 -= 4; 517 idiag -= 16; 518 for (i=m-2; i>=0; i--) { 519 v = aa + 16*(diag[i]+1); 520 vi = aj + diag[i] + 1; 521 nz = ai[i+1] - diag[i] - 1; 522 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 523 while (nz--) { 524 idx = 4*(*vi++); 525 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 526 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 527 v += 16; 528 } 529 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 530 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 531 idiag -= 16; 532 i2 -= 4; 533 } 534 break; 535 case 5: 536 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 537 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 538 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 539 i2 -= 5; 540 idiag -= 25; 541 for (i=m-2; i>=0; i--) { 542 v = aa + 25*(diag[i]+1); 543 vi = aj + diag[i] + 1; 544 nz = ai[i+1] - diag[i] - 1; 545 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 546 while (nz--) { 547 idx = 5*(*vi++); 548 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 549 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 550 v += 25; 551 } 552 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 553 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 554 idiag -= 25; 555 i2 -= 5; 556 } 557 break; 558 case 6: 559 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 560 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 561 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 562 i2 -= 6; 563 idiag -= 36; 564 for (i=m-2; i>=0; i--) { 565 v = aa + 36*(diag[i]+1); 566 vi = aj + diag[i] + 1; 567 nz = ai[i+1] - diag[i] - 1; 568 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 569 while (nz--) { 570 idx = 6*(*vi++); 571 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 572 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 573 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 574 v += 36; 575 } 576 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 577 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 578 idiag -= 36; 579 i2 -= 6; 580 } 581 break; 582 case 7: 583 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 584 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 585 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 586 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 587 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 588 i2 -= 7; 589 idiag -= 49; 590 for (i=m-2; i>=0; i--) { 591 v = aa + 49*(diag[i]+1); 592 vi = aj + diag[i] + 1; 593 nz = ai[i+1] - diag[i] - 1; 594 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 595 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 596 while (nz--) { 597 idx = 7*(*vi++); 598 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 599 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 600 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 601 v += 49; 602 } 603 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 604 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 605 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 606 idiag -= 49; 607 i2 -= 7; 608 } 609 break; 610 default: 611 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 612 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 613 i2 -= bs; 614 idiag -= bs2; 615 for (i=m-2; i>=0; i--) { 616 v = aa + bs2*(diag[i]+1); 617 vi = aj + diag[i] + 1; 618 nz = ai[i+1] - diag[i] - 1; 619 620 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 621 /* copy all rows of x that are needed into contiguous space */ 622 workt = work; 623 for (j=0; j<nz; j++) { 624 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 625 workt += bs; 626 } 627 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 628 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 629 630 idiag -= bs2; 631 i2 -= bs; 632 } 633 break; 634 } 635 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 636 } 637 its--; 638 } 639 while (its--) { 640 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 641 idiag = a->idiag; 642 i2 = 0; 643 switch (bs) { 644 case 1: 645 for (i=0; i<m; i++) { 646 v = aa + ai[i]; 647 vi = aj + ai[i]; 648 nz = ai[i+1] - ai[i]; 649 s[0] = b[i2]; 650 for (j=0; j<nz; j++) { 651 xw[0] = x[vi[j]]; 652 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 653 } 654 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 655 x[i2] += xw[0]; 656 idiag += 1; 657 i2 += 1; 658 } 659 break; 660 case 2: 661 for (i=0; i<m; i++) { 662 v = aa + 4*ai[i]; 663 vi = aj + ai[i]; 664 nz = ai[i+1] - ai[i]; 665 s[0] = b[i2]; s[1] = b[i2+1]; 666 for (j=0; j<nz; j++) { 667 idx = 2*vi[j]; 668 it = 4*j; 669 xw[0] = x[idx]; xw[1] = x[1+idx]; 670 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 671 } 672 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 673 x[i2] += xw[0]; x[i2+1] += xw[1]; 674 idiag += 4; 675 i2 += 2; 676 } 677 break; 678 case 3: 679 for (i=0; i<m; i++) { 680 v = aa + 9*ai[i]; 681 vi = aj + ai[i]; 682 nz = ai[i+1] - ai[i]; 683 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 684 while (nz--) { 685 idx = 3*(*vi++); 686 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 687 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 688 v += 9; 689 } 690 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 691 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 692 idiag += 9; 693 i2 += 3; 694 } 695 break; 696 case 4: 697 for (i=0; i<m; i++) { 698 v = aa + 16*ai[i]; 699 vi = aj + ai[i]; 700 nz = ai[i+1] - ai[i]; 701 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 702 while (nz--) { 703 idx = 4*(*vi++); 704 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 705 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 706 v += 16; 707 } 708 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 709 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 710 idiag += 16; 711 i2 += 4; 712 } 713 break; 714 case 5: 715 for (i=0; i<m; i++) { 716 v = aa + 25*ai[i]; 717 vi = aj + ai[i]; 718 nz = ai[i+1] - ai[i]; 719 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 720 while (nz--) { 721 idx = 5*(*vi++); 722 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 723 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 724 v += 25; 725 } 726 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 727 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 728 idiag += 25; 729 i2 += 5; 730 } 731 break; 732 case 6: 733 for (i=0; i<m; i++) { 734 v = aa + 36*ai[i]; 735 vi = aj + ai[i]; 736 nz = ai[i+1] - ai[i]; 737 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 738 while (nz--) { 739 idx = 6*(*vi++); 740 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 741 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 742 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 743 v += 36; 744 } 745 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 746 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 747 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 748 idiag += 36; 749 i2 += 6; 750 } 751 break; 752 case 7: 753 for (i=0; i<m; i++) { 754 v = aa + 49*ai[i]; 755 vi = aj + ai[i]; 756 nz = ai[i+1] - ai[i]; 757 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 758 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 759 while (nz--) { 760 idx = 7*(*vi++); 761 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 762 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 763 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 764 v += 49; 765 } 766 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 767 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 768 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 769 idiag += 49; 770 i2 += 7; 771 } 772 break; 773 default: 774 for (i=0; i<m; i++) { 775 v = aa + bs2*ai[i]; 776 vi = aj + ai[i]; 777 nz = ai[i+1] - ai[i]; 778 779 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 780 /* copy all rows of x that are needed into contiguous space */ 781 workt = work; 782 for (j=0; j<nz; j++) { 783 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 784 workt += bs; 785 } 786 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 787 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 788 789 idiag += bs2; 790 i2 += bs; 791 } 792 break; 793 } 794 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 795 } 796 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 797 idiag = a->idiag+bs2*(a->mbs-1); 798 i2 = bs * (m-1); 799 switch (bs) { 800 case 1: 801 for (i=m-1; i>=0; i--) { 802 v = aa + ai[i]; 803 vi = aj + ai[i]; 804 nz = ai[i+1] - ai[i]; 805 s[0] = b[i2]; 806 for (j=0; j<nz; j++) { 807 xw[0] = x[vi[j]]; 808 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 809 } 810 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 811 x[i2] += xw[0]; 812 idiag -= 1; 813 i2 -= 1; 814 } 815 break; 816 case 2: 817 for (i=m-1; i>=0; i--) { 818 v = aa + 4*ai[i]; 819 vi = aj + ai[i]; 820 nz = ai[i+1] - ai[i]; 821 s[0] = b[i2]; s[1] = b[i2+1]; 822 for (j=0; j<nz; j++) { 823 idx = 2*vi[j]; 824 it = 4*j; 825 xw[0] = x[idx]; xw[1] = x[1+idx]; 826 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 827 } 828 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 829 x[i2] += xw[0]; x[i2+1] += xw[1]; 830 idiag -= 4; 831 i2 -= 2; 832 } 833 break; 834 case 3: 835 for (i=m-1; i>=0; i--) { 836 v = aa + 9*ai[i]; 837 vi = aj + ai[i]; 838 nz = ai[i+1] - ai[i]; 839 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 840 while (nz--) { 841 idx = 3*(*vi++); 842 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 843 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 844 v += 9; 845 } 846 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 847 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 848 idiag -= 9; 849 i2 -= 3; 850 } 851 break; 852 case 4: 853 for (i=m-1; i>=0; i--) { 854 v = aa + 16*ai[i]; 855 vi = aj + ai[i]; 856 nz = ai[i+1] - ai[i]; 857 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 858 while (nz--) { 859 idx = 4*(*vi++); 860 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 861 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 862 v += 16; 863 } 864 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 865 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 866 idiag -= 16; 867 i2 -= 4; 868 } 869 break; 870 case 5: 871 for (i=m-1; i>=0; i--) { 872 v = aa + 25*ai[i]; 873 vi = aj + ai[i]; 874 nz = ai[i+1] - ai[i]; 875 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 876 while (nz--) { 877 idx = 5*(*vi++); 878 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 879 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 880 v += 25; 881 } 882 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 883 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 884 idiag -= 25; 885 i2 -= 5; 886 } 887 break; 888 case 6: 889 for (i=m-1; i>=0; i--) { 890 v = aa + 36*ai[i]; 891 vi = aj + ai[i]; 892 nz = ai[i+1] - ai[i]; 893 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 894 while (nz--) { 895 idx = 6*(*vi++); 896 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 897 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 898 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 899 v += 36; 900 } 901 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 902 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 903 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 904 idiag -= 36; 905 i2 -= 6; 906 } 907 break; 908 case 7: 909 for (i=m-1; i>=0; i--) { 910 v = aa + 49*ai[i]; 911 vi = aj + ai[i]; 912 nz = ai[i+1] - ai[i]; 913 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 914 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 915 while (nz--) { 916 idx = 7*(*vi++); 917 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 918 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 919 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 920 v += 49; 921 } 922 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 923 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 924 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 925 idiag -= 49; 926 i2 -= 7; 927 } 928 break; 929 default: 930 for (i=m-1; i>=0; i--) { 931 v = aa + bs2*ai[i]; 932 vi = aj + ai[i]; 933 nz = ai[i+1] - ai[i]; 934 935 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 936 /* copy all rows of x that are needed into contiguous space */ 937 workt = work; 938 for (j=0; j<nz; j++) { 939 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 940 workt += bs; 941 } 942 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 943 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 944 945 idiag -= bs2; 946 i2 -= bs; 947 } 948 break; 949 } 950 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 951 } 952 } 953 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 954 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 955 PetscFunctionReturn(0); 956 } 957 958 /* 959 Special version for direct calls from Fortran (Used in PETSc-fun3d) 960 */ 961 #if defined(PETSC_HAVE_FORTRAN_CAPS) 962 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 963 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 964 #define matsetvaluesblocked4_ matsetvaluesblocked4 965 #endif 966 967 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 968 { 969 Mat A = *AA; 970 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 971 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 972 PetscInt *ai =a->i,*ailen=a->ilen; 973 PetscInt *aj =a->j,stepval,lastcol = -1; 974 const PetscScalar *value = v; 975 MatScalar *ap,*aa = a->a,*bap; 976 PetscErrorCode ierr; 977 978 PetscFunctionBegin; 979 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 980 stepval = (n-1)*4; 981 for (k=0; k<m; k++) { /* loop over added rows */ 982 row = im[k]; 983 rp = aj + ai[row]; 984 ap = aa + 16*ai[row]; 985 nrow = ailen[row]; 986 low = 0; 987 high = nrow; 988 for (l=0; l<n; l++) { /* loop over added columns */ 989 col = in[l]; 990 if (col <= lastcol) low = 0; 991 else high = nrow; 992 lastcol = col; 993 value = v + k*(stepval+4 + l)*4; 994 while (high-low > 7) { 995 t = (low+high)/2; 996 if (rp[t] > col) high = t; 997 else low = t; 998 } 999 for (i=low; i<high; i++) { 1000 if (rp[i] > col) break; 1001 if (rp[i] == col) { 1002 bap = ap + 16*i; 1003 for (ii=0; ii<4; ii++,value+=stepval) { 1004 for (jj=ii; jj<16; jj+=4) { 1005 bap[jj] += *value++; 1006 } 1007 } 1008 goto noinsert2; 1009 } 1010 } 1011 N = nrow++ - 1; 1012 high++; /* added new column index thus must search to one higher than before */ 1013 /* shift up all the later entries in this row */ 1014 for (ii=N; ii>=i; ii--) { 1015 rp[ii+1] = rp[ii]; 1016 ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr); 1017 } 1018 if (N >= i) { 1019 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 1020 } 1021 rp[i] = col; 1022 bap = ap + 16*i; 1023 for (ii=0; ii<4; ii++,value+=stepval) { 1024 for (jj=ii; jj<16; jj+=4) { 1025 bap[jj] = *value++; 1026 } 1027 } 1028 noinsert2:; 1029 low = i; 1030 } 1031 ailen[row] = nrow; 1032 } 1033 PetscFunctionReturnVoid(); 1034 } 1035 1036 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1037 #define matsetvalues4_ MATSETVALUES4 1038 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1039 #define matsetvalues4_ matsetvalues4 1040 #endif 1041 1042 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1043 { 1044 Mat A = *AA; 1045 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1046 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm; 1047 PetscInt *ai=a->i,*ailen=a->ilen; 1048 PetscInt *aj=a->j,brow,bcol; 1049 PetscInt ridx,cidx,lastcol = -1; 1050 MatScalar *ap,value,*aa=a->a,*bap; 1051 PetscErrorCode ierr; 1052 1053 PetscFunctionBegin; 1054 for (k=0; k<m; k++) { /* loop over added rows */ 1055 row = im[k]; brow = row/4; 1056 rp = aj + ai[brow]; 1057 ap = aa + 16*ai[brow]; 1058 nrow = ailen[brow]; 1059 low = 0; 1060 high = nrow; 1061 for (l=0; l<n; l++) { /* loop over added columns */ 1062 col = in[l]; bcol = col/4; 1063 ridx = row % 4; cidx = col % 4; 1064 value = v[l + k*n]; 1065 if (col <= lastcol) low = 0; 1066 else high = nrow; 1067 lastcol = col; 1068 while (high-low > 7) { 1069 t = (low+high)/2; 1070 if (rp[t] > bcol) high = t; 1071 else low = t; 1072 } 1073 for (i=low; i<high; i++) { 1074 if (rp[i] > bcol) break; 1075 if (rp[i] == bcol) { 1076 bap = ap + 16*i + 4*cidx + ridx; 1077 *bap += value; 1078 goto noinsert1; 1079 } 1080 } 1081 N = nrow++ - 1; 1082 high++; /* added new column thus must search to one higher than before */ 1083 /* shift up all the later entries in this row */ 1084 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr); 1085 ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr); 1086 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 1087 rp[i] = bcol; 1088 ap[16*i + 4*cidx + ridx] = value; 1089 noinsert1:; 1090 low = i; 1091 } 1092 ailen[brow] = nrow; 1093 } 1094 PetscFunctionReturnVoid(); 1095 } 1096 1097 /* 1098 Checks for missing diagonals 1099 */ 1100 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1101 { 1102 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1103 PetscErrorCode ierr; 1104 PetscInt *diag,*ii = a->i,i; 1105 1106 PetscFunctionBegin; 1107 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1108 *missing = PETSC_FALSE; 1109 if (A->rmap->n > 0 && !ii) { 1110 *missing = PETSC_TRUE; 1111 if (d) *d = 0; 1112 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 1113 } else { 1114 PetscInt n; 1115 n = PetscMin(a->mbs, a->nbs); 1116 diag = a->diag; 1117 for (i=0; i<n; i++) { 1118 if (diag[i] >= ii[i+1]) { 1119 *missing = PETSC_TRUE; 1120 if (d) *d = i; 1121 ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr); 1122 break; 1123 } 1124 } 1125 } 1126 PetscFunctionReturn(0); 1127 } 1128 1129 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1130 { 1131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1132 PetscErrorCode ierr; 1133 PetscInt i,j,m = a->mbs; 1134 1135 PetscFunctionBegin; 1136 if (!a->diag) { 1137 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1138 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1139 a->free_diag = PETSC_TRUE; 1140 } 1141 for (i=0; i<m; i++) { 1142 a->diag[i] = a->i[i+1]; 1143 for (j=a->i[i]; j<a->i[i+1]; j++) { 1144 if (a->j[j] == i) { 1145 a->diag[i] = j; 1146 break; 1147 } 1148 } 1149 } 1150 PetscFunctionReturn(0); 1151 } 1152 1153 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1154 { 1155 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1156 PetscErrorCode ierr; 1157 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1158 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1159 1160 PetscFunctionBegin; 1161 *nn = n; 1162 if (!ia) PetscFunctionReturn(0); 1163 if (symmetric) { 1164 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1165 nz = tia[n]; 1166 } else { 1167 tia = a->i; tja = a->j; 1168 } 1169 1170 if (!blockcompressed && bs > 1) { 1171 (*nn) *= bs; 1172 /* malloc & create the natural set of indices */ 1173 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1174 if (n) { 1175 (*ia)[0] = oshift; 1176 for (j=1; j<bs; j++) { 1177 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1178 } 1179 } 1180 1181 for (i=1; i<n; i++) { 1182 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1183 for (j=1; j<bs; j++) { 1184 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1185 } 1186 } 1187 if (n) { 1188 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1189 } 1190 1191 if (inja) { 1192 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1193 cnt = 0; 1194 for (i=0; i<n; i++) { 1195 for (j=0; j<bs; j++) { 1196 for (k=tia[i]; k<tia[i+1]; k++) { 1197 for (l=0; l<bs; l++) { 1198 (*ja)[cnt++] = bs*tja[k] + l; 1199 } 1200 } 1201 } 1202 } 1203 } 1204 1205 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1206 ierr = PetscFree(tia);CHKERRQ(ierr); 1207 ierr = PetscFree(tja);CHKERRQ(ierr); 1208 } 1209 } else if (oshift == 1) { 1210 if (symmetric) { 1211 nz = tia[A->rmap->n/bs]; 1212 /* add 1 to i and j indices */ 1213 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1214 *ia = tia; 1215 if (ja) { 1216 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1217 *ja = tja; 1218 } 1219 } else { 1220 nz = a->i[A->rmap->n/bs]; 1221 /* malloc space and add 1 to i and j indices */ 1222 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1223 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1224 if (ja) { 1225 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1226 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1227 } 1228 } 1229 } else { 1230 *ia = tia; 1231 if (ja) *ja = tja; 1232 } 1233 PetscFunctionReturn(0); 1234 } 1235 1236 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1237 { 1238 PetscErrorCode ierr; 1239 1240 PetscFunctionBegin; 1241 if (!ia) PetscFunctionReturn(0); 1242 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1243 ierr = PetscFree(*ia);CHKERRQ(ierr); 1244 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1245 } 1246 PetscFunctionReturn(0); 1247 } 1248 1249 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1250 { 1251 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1252 PetscErrorCode ierr; 1253 1254 PetscFunctionBegin; 1255 #if defined(PETSC_USE_LOG) 1256 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1257 #endif 1258 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1259 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1260 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1261 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1262 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1263 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1264 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1265 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1266 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1267 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1268 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1269 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1270 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1271 1272 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1273 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1274 ierr = PetscFree(A->data);CHKERRQ(ierr); 1275 1276 ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1288 #if defined(PETSC_HAVE_HYPRE) 1289 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr); 1290 #endif 1291 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1296 { 1297 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1298 PetscErrorCode ierr; 1299 1300 PetscFunctionBegin; 1301 switch (op) { 1302 case MAT_ROW_ORIENTED: 1303 a->roworiented = flg; 1304 break; 1305 case MAT_KEEP_NONZERO_PATTERN: 1306 a->keepnonzeropattern = flg; 1307 break; 1308 case MAT_NEW_NONZERO_LOCATIONS: 1309 a->nonew = (flg ? 0 : 1); 1310 break; 1311 case MAT_NEW_NONZERO_LOCATION_ERR: 1312 a->nonew = (flg ? -1 : 0); 1313 break; 1314 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1315 a->nonew = (flg ? -2 : 0); 1316 break; 1317 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1318 a->nounused = (flg ? -1 : 0); 1319 break; 1320 case MAT_FORCE_DIAGONAL_ENTRIES: 1321 case MAT_IGNORE_OFF_PROC_ENTRIES: 1322 case MAT_USE_HASH_TABLE: 1323 case MAT_SORTED_FULL: 1324 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1325 break; 1326 case MAT_SPD: 1327 case MAT_SYMMETRIC: 1328 case MAT_STRUCTURALLY_SYMMETRIC: 1329 case MAT_HERMITIAN: 1330 case MAT_SYMMETRY_ETERNAL: 1331 case MAT_SUBMAT_SINGLEIS: 1332 case MAT_STRUCTURE_ONLY: 1333 /* These options are handled directly by MatSetOption() */ 1334 break; 1335 default: 1336 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1337 } 1338 PetscFunctionReturn(0); 1339 } 1340 1341 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1342 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1343 { 1344 PetscErrorCode ierr; 1345 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1346 MatScalar *aa_i; 1347 PetscScalar *v_i; 1348 1349 PetscFunctionBegin; 1350 bs = A->rmap->bs; 1351 bs2 = bs*bs; 1352 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1353 1354 bn = row/bs; /* Block number */ 1355 bp = row % bs; /* Block Position */ 1356 M = ai[bn+1] - ai[bn]; 1357 *nz = bs*M; 1358 1359 if (v) { 1360 *v = NULL; 1361 if (*nz) { 1362 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1363 for (i=0; i<M; i++) { /* for each block in the block row */ 1364 v_i = *v + i*bs; 1365 aa_i = aa + bs2*(ai[bn] + i); 1366 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1367 } 1368 } 1369 } 1370 1371 if (idx) { 1372 *idx = NULL; 1373 if (*nz) { 1374 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1375 for (i=0; i<M; i++) { /* for each block in the block row */ 1376 idx_i = *idx + i*bs; 1377 itmp = bs*aj[ai[bn] + i]; 1378 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1379 } 1380 } 1381 } 1382 PetscFunctionReturn(0); 1383 } 1384 1385 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1386 { 1387 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1388 PetscErrorCode ierr; 1389 1390 PetscFunctionBegin; 1391 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1392 PetscFunctionReturn(0); 1393 } 1394 1395 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1396 { 1397 PetscErrorCode ierr; 1398 1399 PetscFunctionBegin; 1400 if (nz) *nz = 0; 1401 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1402 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1403 PetscFunctionReturn(0); 1404 } 1405 1406 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1407 { 1408 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at; 1409 Mat C; 1410 PetscErrorCode ierr; 1411 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill; 1412 PetscInt bs2=a->bs2,*ati,*atj,anzj,kr; 1413 MatScalar *ata,*aa=a->a; 1414 1415 PetscFunctionBegin; 1416 ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr); 1417 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1418 for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1419 1420 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1421 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1422 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1423 ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr); 1424 1425 at = (Mat_SeqBAIJ*)C->data; 1426 ati = at->i; 1427 for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i]; 1428 } else { 1429 C = *B; 1430 at = (Mat_SeqBAIJ*)C->data; 1431 ati = at->i; 1432 } 1433 1434 atj = at->j; 1435 ata = at->a; 1436 1437 /* Copy ati into atfill so we have locations of the next free space in atj */ 1438 ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr); 1439 1440 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1441 for (i=0; i<mbs; i++) { 1442 anzj = ai[i+1] - ai[i]; 1443 for (j=0; j<anzj; j++) { 1444 atj[atfill[*aj]] = i; 1445 for (kr=0; kr<bs; kr++) { 1446 for (k=0; k<bs; k++) { 1447 ata[bs2*atfill[*aj]+k*bs+kr] = *aa++; 1448 } 1449 } 1450 atfill[*aj++] += 1; 1451 } 1452 } 1453 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1454 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1455 1456 /* Clean up temporary space and complete requests. */ 1457 ierr = PetscFree(atfill);CHKERRQ(ierr); 1458 1459 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1460 ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1461 *B = C; 1462 } else { 1463 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1464 } 1465 PetscFunctionReturn(0); 1466 } 1467 1468 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1469 { 1470 PetscErrorCode ierr; 1471 Mat Btrans; 1472 1473 PetscFunctionBegin; 1474 *f = PETSC_FALSE; 1475 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1476 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1477 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1478 PetscFunctionReturn(0); 1479 } 1480 1481 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1482 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 1483 { 1484 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)mat->data; 1485 PetscInt header[4],M,N,m,bs,nz,cnt,i,j,k,l; 1486 PetscInt *rowlens,*colidxs; 1487 PetscScalar *matvals; 1488 PetscErrorCode ierr; 1489 1490 PetscFunctionBegin; 1491 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1492 1493 M = mat->rmap->N; 1494 N = mat->cmap->N; 1495 m = mat->rmap->n; 1496 bs = mat->rmap->bs; 1497 nz = bs*bs*A->nz; 1498 1499 /* write matrix header */ 1500 header[0] = MAT_FILE_CLASSID; 1501 header[1] = M; header[2] = N; header[3] = nz; 1502 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1503 1504 /* store row lengths */ 1505 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1506 for (cnt=0, i=0; i<A->mbs; i++) 1507 for (j=0; j<bs; j++) 1508 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]); 1509 ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr); 1510 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1511 1512 /* store column indices */ 1513 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1514 for (cnt=0, i=0; i<A->mbs; i++) 1515 for (k=0; k<bs; k++) 1516 for (j=A->i[i]; j<A->i[i+1]; j++) 1517 for (l=0; l<bs; l++) 1518 colidxs[cnt++] = bs*A->j[j] + l; 1519 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1520 ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr); 1521 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1522 1523 /* store nonzero values */ 1524 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1525 for (cnt=0, i=0; i<A->mbs; i++) 1526 for (k=0; k<bs; k++) 1527 for (j=A->i[i]; j<A->i[i+1]; j++) 1528 for (l=0; l<bs; l++) 1529 matvals[cnt++] = A->a[bs*(bs*j + l) + k]; 1530 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1531 ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1532 ierr = PetscFree(matvals);CHKERRQ(ierr); 1533 1534 /* write block size option to the viewer's .info file */ 1535 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1540 { 1541 PetscErrorCode ierr; 1542 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1543 PetscInt i,bs = A->rmap->bs,k; 1544 1545 PetscFunctionBegin; 1546 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1547 for (i=0; i<a->mbs; i++) { 1548 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1549 for (k=a->i[i]; k<a->i[i+1]; k++) { 1550 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1551 } 1552 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1553 } 1554 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1555 PetscFunctionReturn(0); 1556 } 1557 1558 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1559 { 1560 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1561 PetscErrorCode ierr; 1562 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1563 PetscViewerFormat format; 1564 1565 PetscFunctionBegin; 1566 if (A->structure_only) { 1567 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1568 PetscFunctionReturn(0); 1569 } 1570 1571 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1572 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1573 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1574 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1575 const char *matname; 1576 Mat aij; 1577 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1578 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1579 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1580 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1581 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1582 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1583 PetscFunctionReturn(0); 1584 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1585 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1586 for (i=0; i<a->mbs; i++) { 1587 for (j=0; j<bs; j++) { 1588 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1589 for (k=a->i[i]; k<a->i[i+1]; k++) { 1590 for (l=0; l<bs; l++) { 1591 #if defined(PETSC_USE_COMPLEX) 1592 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1593 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1594 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1595 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1596 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1597 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1598 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1599 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1600 } 1601 #else 1602 if (a->a[bs2*k + l*bs + j] != 0.0) { 1603 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1604 } 1605 #endif 1606 } 1607 } 1608 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1609 } 1610 } 1611 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1612 } else { 1613 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1614 for (i=0; i<a->mbs; i++) { 1615 for (j=0; j<bs; j++) { 1616 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1617 for (k=a->i[i]; k<a->i[i+1]; k++) { 1618 for (l=0; l<bs; l++) { 1619 #if defined(PETSC_USE_COMPLEX) 1620 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1621 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1622 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1623 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1624 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1625 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1626 } else { 1627 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1628 } 1629 #else 1630 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1631 #endif 1632 } 1633 } 1634 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1635 } 1636 } 1637 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1638 } 1639 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1640 PetscFunctionReturn(0); 1641 } 1642 1643 #include <petscdraw.h> 1644 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1645 { 1646 Mat A = (Mat) Aa; 1647 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1648 PetscErrorCode ierr; 1649 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1650 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1651 MatScalar *aa; 1652 PetscViewer viewer; 1653 PetscViewerFormat format; 1654 1655 PetscFunctionBegin; 1656 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1657 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1658 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1659 1660 /* loop over matrix elements drawing boxes */ 1661 1662 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1663 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1664 /* Blue for negative, Cyan for zero and Red for positive */ 1665 color = PETSC_DRAW_BLUE; 1666 for (i=0,row=0; i<mbs; i++,row+=bs) { 1667 for (j=a->i[i]; j<a->i[i+1]; j++) { 1668 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1669 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1670 aa = a->a + j*bs2; 1671 for (k=0; k<bs; k++) { 1672 for (l=0; l<bs; l++) { 1673 if (PetscRealPart(*aa++) >= 0.) continue; 1674 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1675 } 1676 } 1677 } 1678 } 1679 color = PETSC_DRAW_CYAN; 1680 for (i=0,row=0; i<mbs; i++,row+=bs) { 1681 for (j=a->i[i]; j<a->i[i+1]; j++) { 1682 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1683 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1684 aa = a->a + j*bs2; 1685 for (k=0; k<bs; k++) { 1686 for (l=0; l<bs; l++) { 1687 if (PetscRealPart(*aa++) != 0.) continue; 1688 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1689 } 1690 } 1691 } 1692 } 1693 color = PETSC_DRAW_RED; 1694 for (i=0,row=0; i<mbs; i++,row+=bs) { 1695 for (j=a->i[i]; j<a->i[i+1]; j++) { 1696 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1697 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1698 aa = a->a + j*bs2; 1699 for (k=0; k<bs; k++) { 1700 for (l=0; l<bs; l++) { 1701 if (PetscRealPart(*aa++) <= 0.) continue; 1702 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1703 } 1704 } 1705 } 1706 } 1707 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1708 } else { 1709 /* use contour shading to indicate magnitude of values */ 1710 /* first determine max of all nonzero values */ 1711 PetscReal minv = 0.0, maxv = 0.0; 1712 PetscDraw popup; 1713 1714 for (i=0; i<a->nz*a->bs2; i++) { 1715 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1716 } 1717 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1718 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1719 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1720 1721 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1722 for (i=0,row=0; i<mbs; i++,row+=bs) { 1723 for (j=a->i[i]; j<a->i[i+1]; j++) { 1724 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1725 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1726 aa = a->a + j*bs2; 1727 for (k=0; k<bs; k++) { 1728 for (l=0; l<bs; l++) { 1729 MatScalar v = *aa++; 1730 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1731 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1732 } 1733 } 1734 } 1735 } 1736 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1737 } 1738 PetscFunctionReturn(0); 1739 } 1740 1741 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1742 { 1743 PetscErrorCode ierr; 1744 PetscReal xl,yl,xr,yr,w,h; 1745 PetscDraw draw; 1746 PetscBool isnull; 1747 1748 PetscFunctionBegin; 1749 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1750 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1751 if (isnull) PetscFunctionReturn(0); 1752 1753 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1754 xr += w; yr += h; xl = -w; yl = -h; 1755 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1756 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1757 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1758 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1759 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1760 PetscFunctionReturn(0); 1761 } 1762 1763 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1764 { 1765 PetscErrorCode ierr; 1766 PetscBool iascii,isbinary,isdraw; 1767 1768 PetscFunctionBegin; 1769 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1770 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1771 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1772 if (iascii) { 1773 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1774 } else if (isbinary) { 1775 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1776 } else if (isdraw) { 1777 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1778 } else { 1779 Mat B; 1780 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1781 ierr = MatView(B,viewer);CHKERRQ(ierr); 1782 ierr = MatDestroy(&B);CHKERRQ(ierr); 1783 } 1784 PetscFunctionReturn(0); 1785 } 1786 1787 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1788 { 1789 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1790 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1791 PetscInt *ai = a->i,*ailen = a->ilen; 1792 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1793 MatScalar *ap,*aa = a->a; 1794 1795 PetscFunctionBegin; 1796 for (k=0; k<m; k++) { /* loop over rows */ 1797 row = im[k]; brow = row/bs; 1798 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1799 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1800 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1801 nrow = ailen[brow]; 1802 for (l=0; l<n; l++) { /* loop over columns */ 1803 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1804 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1805 col = in[l]; 1806 bcol = col/bs; 1807 cidx = col%bs; 1808 ridx = row%bs; 1809 high = nrow; 1810 low = 0; /* assume unsorted */ 1811 while (high-low > 5) { 1812 t = (low+high)/2; 1813 if (rp[t] > bcol) high = t; 1814 else low = t; 1815 } 1816 for (i=low; i<high; i++) { 1817 if (rp[i] > bcol) break; 1818 if (rp[i] == bcol) { 1819 *v++ = ap[bs2*i+bs*cidx+ridx]; 1820 goto finished; 1821 } 1822 } 1823 *v++ = 0.0; 1824 finished:; 1825 } 1826 } 1827 PetscFunctionReturn(0); 1828 } 1829 1830 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1831 { 1832 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1833 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1834 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1835 PetscErrorCode ierr; 1836 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1837 PetscBool roworiented=a->roworiented; 1838 const PetscScalar *value = v; 1839 MatScalar *ap=NULL,*aa = a->a,*bap; 1840 1841 PetscFunctionBegin; 1842 if (roworiented) { 1843 stepval = (n-1)*bs; 1844 } else { 1845 stepval = (m-1)*bs; 1846 } 1847 for (k=0; k<m; k++) { /* loop over added rows */ 1848 row = im[k]; 1849 if (row < 0) continue; 1850 if (PetscUnlikelyDebug(row >= a->mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1851 rp = aj + ai[row]; 1852 if (!A->structure_only) ap = aa + bs2*ai[row]; 1853 rmax = imax[row]; 1854 nrow = ailen[row]; 1855 low = 0; 1856 high = nrow; 1857 for (l=0; l<n; l++) { /* loop over added columns */ 1858 if (in[l] < 0) continue; 1859 if (PetscUnlikelyDebug(in[l] >= a->nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1860 col = in[l]; 1861 if (!A->structure_only) { 1862 if (roworiented) { 1863 value = v + (k*(stepval+bs) + l)*bs; 1864 } else { 1865 value = v + (l*(stepval+bs) + k)*bs; 1866 } 1867 } 1868 if (col <= lastcol) low = 0; 1869 else high = nrow; 1870 lastcol = col; 1871 while (high-low > 7) { 1872 t = (low+high)/2; 1873 if (rp[t] > col) high = t; 1874 else low = t; 1875 } 1876 for (i=low; i<high; i++) { 1877 if (rp[i] > col) break; 1878 if (rp[i] == col) { 1879 if (A->structure_only) goto noinsert2; 1880 bap = ap + bs2*i; 1881 if (roworiented) { 1882 if (is == ADD_VALUES) { 1883 for (ii=0; ii<bs; ii++,value+=stepval) { 1884 for (jj=ii; jj<bs2; jj+=bs) { 1885 bap[jj] += *value++; 1886 } 1887 } 1888 } else { 1889 for (ii=0; ii<bs; ii++,value+=stepval) { 1890 for (jj=ii; jj<bs2; jj+=bs) { 1891 bap[jj] = *value++; 1892 } 1893 } 1894 } 1895 } else { 1896 if (is == ADD_VALUES) { 1897 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1898 for (jj=0; jj<bs; jj++) { 1899 bap[jj] += value[jj]; 1900 } 1901 bap += bs; 1902 } 1903 } else { 1904 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1905 for (jj=0; jj<bs; jj++) { 1906 bap[jj] = value[jj]; 1907 } 1908 bap += bs; 1909 } 1910 } 1911 } 1912 goto noinsert2; 1913 } 1914 } 1915 if (nonew == 1) goto noinsert2; 1916 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1917 if (A->structure_only) { 1918 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1919 } else { 1920 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1921 } 1922 N = nrow++ - 1; high++; 1923 /* shift up all the later entries in this row */ 1924 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 1925 rp[i] = col; 1926 if (!A->structure_only) { 1927 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 1928 bap = ap + bs2*i; 1929 if (roworiented) { 1930 for (ii=0; ii<bs; ii++,value+=stepval) { 1931 for (jj=ii; jj<bs2; jj+=bs) { 1932 bap[jj] = *value++; 1933 } 1934 } 1935 } else { 1936 for (ii=0; ii<bs; ii++,value+=stepval) { 1937 for (jj=0; jj<bs; jj++) { 1938 *bap++ = *value++; 1939 } 1940 } 1941 } 1942 } 1943 noinsert2:; 1944 low = i; 1945 } 1946 ailen[row] = nrow; 1947 } 1948 PetscFunctionReturn(0); 1949 } 1950 1951 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1952 { 1953 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1954 PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax; 1955 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1956 PetscErrorCode ierr; 1957 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1958 MatScalar *aa = a->a,*ap; 1959 PetscReal ratio=0.6; 1960 1961 PetscFunctionBegin; 1962 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1963 1964 if (m) rmax = ailen[0]; 1965 for (i=1; i<mbs; i++) { 1966 /* move each row back by the amount of empty slots (fshift) before it*/ 1967 fshift += imax[i-1] - ailen[i-1]; 1968 rmax = PetscMax(rmax,ailen[i]); 1969 if (fshift) { 1970 ip = aj + ai[i]; 1971 ap = aa + bs2*ai[i]; 1972 N = ailen[i]; 1973 ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr); 1974 if (!A->structure_only) { 1975 ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr); 1976 } 1977 } 1978 ai[i] = ai[i-1] + ailen[i-1]; 1979 } 1980 if (mbs) { 1981 fshift += imax[mbs-1] - ailen[mbs-1]; 1982 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1983 } 1984 1985 /* reset ilen and imax for each row */ 1986 a->nonzerorowcnt = 0; 1987 if (A->structure_only) { 1988 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1989 } else { /* !A->structure_only */ 1990 for (i=0; i<mbs; i++) { 1991 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1992 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1993 } 1994 } 1995 a->nz = ai[mbs]; 1996 1997 /* diagonals may have moved, so kill the diagonal pointers */ 1998 a->idiagvalid = PETSC_FALSE; 1999 if (fshift && a->diag) { 2000 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2001 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2002 a->diag = NULL; 2003 } 2004 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2005 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2006 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2007 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2008 2009 A->info.mallocs += a->reallocs; 2010 a->reallocs = 0; 2011 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2012 a->rmax = rmax; 2013 2014 if (!A->structure_only) { 2015 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2016 } 2017 PetscFunctionReturn(0); 2018 } 2019 2020 /* 2021 This function returns an array of flags which indicate the locations of contiguous 2022 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2023 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2024 Assume: sizes should be long enough to hold all the values. 2025 */ 2026 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2027 { 2028 PetscInt i,j,k,row; 2029 PetscBool flg; 2030 2031 PetscFunctionBegin; 2032 for (i=0,j=0; i<n; j++) { 2033 row = idx[i]; 2034 if (row%bs!=0) { /* Not the beginning of a block */ 2035 sizes[j] = 1; 2036 i++; 2037 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2038 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2039 i++; 2040 } else { /* Begining of the block, so check if the complete block exists */ 2041 flg = PETSC_TRUE; 2042 for (k=1; k<bs; k++) { 2043 if (row+k != idx[i+k]) { /* break in the block */ 2044 flg = PETSC_FALSE; 2045 break; 2046 } 2047 } 2048 if (flg) { /* No break in the bs */ 2049 sizes[j] = bs; 2050 i += bs; 2051 } else { 2052 sizes[j] = 1; 2053 i++; 2054 } 2055 } 2056 } 2057 *bs_max = j; 2058 PetscFunctionReturn(0); 2059 } 2060 2061 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2062 { 2063 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2064 PetscErrorCode ierr; 2065 PetscInt i,j,k,count,*rows; 2066 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2067 PetscScalar zero = 0.0; 2068 MatScalar *aa; 2069 const PetscScalar *xx; 2070 PetscScalar *bb; 2071 2072 PetscFunctionBegin; 2073 /* fix right hand side if needed */ 2074 if (x && b) { 2075 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2076 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2077 for (i=0; i<is_n; i++) { 2078 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2079 } 2080 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2081 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2082 } 2083 2084 /* Make a copy of the IS and sort it */ 2085 /* allocate memory for rows,sizes */ 2086 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2087 2088 /* copy IS values to rows, and sort them */ 2089 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2090 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2091 2092 if (baij->keepnonzeropattern) { 2093 for (i=0; i<is_n; i++) sizes[i] = 1; 2094 bs_max = is_n; 2095 } else { 2096 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2097 A->nonzerostate++; 2098 } 2099 2100 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2101 row = rows[j]; 2102 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2103 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2104 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2105 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2106 if (diag != (PetscScalar)0.0) { 2107 if (baij->ilen[row/bs] > 0) { 2108 baij->ilen[row/bs] = 1; 2109 baij->j[baij->i[row/bs]] = row/bs; 2110 2111 ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr); 2112 } 2113 /* Now insert all the diagonal values for this bs */ 2114 for (k=0; k<bs; k++) { 2115 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2116 } 2117 } else { /* (diag == 0.0) */ 2118 baij->ilen[row/bs] = 0; 2119 } /* end (diag == 0.0) */ 2120 } else { /* (sizes[i] != bs) */ 2121 if (PetscUnlikelyDebug(sizes[i] != 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2122 for (k=0; k<count; k++) { 2123 aa[0] = zero; 2124 aa += bs; 2125 } 2126 if (diag != (PetscScalar)0.0) { 2127 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2128 } 2129 } 2130 } 2131 2132 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2133 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2134 PetscFunctionReturn(0); 2135 } 2136 2137 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2138 { 2139 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2140 PetscErrorCode ierr; 2141 PetscInt i,j,k,count; 2142 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2143 PetscScalar zero = 0.0; 2144 MatScalar *aa; 2145 const PetscScalar *xx; 2146 PetscScalar *bb; 2147 PetscBool *zeroed,vecs = PETSC_FALSE; 2148 2149 PetscFunctionBegin; 2150 /* fix right hand side if needed */ 2151 if (x && b) { 2152 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2153 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2154 vecs = PETSC_TRUE; 2155 } 2156 2157 /* zero the columns */ 2158 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2159 for (i=0; i<is_n; i++) { 2160 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2161 zeroed[is_idx[i]] = PETSC_TRUE; 2162 } 2163 for (i=0; i<A->rmap->N; i++) { 2164 if (!zeroed[i]) { 2165 row = i/bs; 2166 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2167 for (k=0; k<bs; k++) { 2168 col = bs*baij->j[j] + k; 2169 if (zeroed[col]) { 2170 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2171 if (vecs) bb[i] -= aa[0]*xx[col]; 2172 aa[0] = 0.0; 2173 } 2174 } 2175 } 2176 } else if (vecs) bb[i] = diag*xx[i]; 2177 } 2178 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2179 if (vecs) { 2180 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2181 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2182 } 2183 2184 /* zero the rows */ 2185 for (i=0; i<is_n; i++) { 2186 row = is_idx[i]; 2187 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2188 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2189 for (k=0; k<count; k++) { 2190 aa[0] = zero; 2191 aa += bs; 2192 } 2193 if (diag != (PetscScalar)0.0) { 2194 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2195 } 2196 } 2197 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2198 PetscFunctionReturn(0); 2199 } 2200 2201 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2202 { 2203 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2204 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2205 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2206 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2207 PetscErrorCode ierr; 2208 PetscInt ridx,cidx,bs2=a->bs2; 2209 PetscBool roworiented=a->roworiented; 2210 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2211 2212 PetscFunctionBegin; 2213 for (k=0; k<m; k++) { /* loop over added rows */ 2214 row = im[k]; 2215 brow = row/bs; 2216 if (row < 0) continue; 2217 if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2218 rp = aj + ai[brow]; 2219 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2220 rmax = imax[brow]; 2221 nrow = ailen[brow]; 2222 low = 0; 2223 high = nrow; 2224 for (l=0; l<n; l++) { /* loop over added columns */ 2225 if (in[l] < 0) continue; 2226 if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2227 col = in[l]; bcol = col/bs; 2228 ridx = row % bs; cidx = col % bs; 2229 if (!A->structure_only) { 2230 if (roworiented) { 2231 value = v[l + k*n]; 2232 } else { 2233 value = v[k + l*m]; 2234 } 2235 } 2236 if (col <= lastcol) low = 0; else high = nrow; 2237 lastcol = col; 2238 while (high-low > 7) { 2239 t = (low+high)/2; 2240 if (rp[t] > bcol) high = t; 2241 else low = t; 2242 } 2243 for (i=low; i<high; i++) { 2244 if (rp[i] > bcol) break; 2245 if (rp[i] == bcol) { 2246 bap = ap + bs2*i + bs*cidx + ridx; 2247 if (!A->structure_only) { 2248 if (is == ADD_VALUES) *bap += value; 2249 else *bap = value; 2250 } 2251 goto noinsert1; 2252 } 2253 } 2254 if (nonew == 1) goto noinsert1; 2255 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2256 if (A->structure_only) { 2257 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2258 } else { 2259 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2260 } 2261 N = nrow++ - 1; high++; 2262 /* shift up all the later entries in this row */ 2263 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 2264 rp[i] = bcol; 2265 if (!A->structure_only) { 2266 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 2267 ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr); 2268 ap[bs2*i + bs*cidx + ridx] = value; 2269 } 2270 a->nz++; 2271 A->nonzerostate++; 2272 noinsert1:; 2273 low = i; 2274 } 2275 ailen[brow] = nrow; 2276 } 2277 PetscFunctionReturn(0); 2278 } 2279 2280 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2281 { 2282 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2283 Mat outA; 2284 PetscErrorCode ierr; 2285 PetscBool row_identity,col_identity; 2286 2287 PetscFunctionBegin; 2288 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2289 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2290 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2291 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2292 2293 outA = inA; 2294 inA->factortype = MAT_FACTOR_LU; 2295 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2296 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2297 2298 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2299 2300 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2301 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2302 a->row = row; 2303 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2304 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2305 a->col = col; 2306 2307 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2308 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2309 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2310 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2311 2312 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2313 if (!a->solve_work) { 2314 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2315 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2316 } 2317 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2318 PetscFunctionReturn(0); 2319 } 2320 2321 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2322 { 2323 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2324 PetscInt i,nz,mbs; 2325 2326 PetscFunctionBegin; 2327 nz = baij->maxnz; 2328 mbs = baij->mbs; 2329 for (i=0; i<nz; i++) { 2330 baij->j[i] = indices[i]; 2331 } 2332 baij->nz = nz; 2333 for (i=0; i<mbs; i++) { 2334 baij->ilen[i] = baij->imax[i]; 2335 } 2336 PetscFunctionReturn(0); 2337 } 2338 2339 /*@ 2340 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2341 in the matrix. 2342 2343 Input Parameters: 2344 + mat - the SeqBAIJ matrix 2345 - indices - the column indices 2346 2347 Level: advanced 2348 2349 Notes: 2350 This can be called if you have precomputed the nonzero structure of the 2351 matrix and want to provide it to the matrix object to improve the performance 2352 of the MatSetValues() operation. 2353 2354 You MUST have set the correct numbers of nonzeros per row in the call to 2355 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2356 2357 MUST be called before any calls to MatSetValues(); 2358 2359 @*/ 2360 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2361 { 2362 PetscErrorCode ierr; 2363 2364 PetscFunctionBegin; 2365 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2366 PetscValidPointer(indices,2); 2367 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2368 PetscFunctionReturn(0); 2369 } 2370 2371 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2372 { 2373 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2374 PetscErrorCode ierr; 2375 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2376 PetscReal atmp; 2377 PetscScalar *x,zero = 0.0; 2378 MatScalar *aa; 2379 PetscInt ncols,brow,krow,kcol; 2380 2381 PetscFunctionBegin; 2382 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2383 bs = A->rmap->bs; 2384 aa = a->a; 2385 ai = a->i; 2386 aj = a->j; 2387 mbs = a->mbs; 2388 2389 ierr = VecSet(v,zero);CHKERRQ(ierr); 2390 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2391 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2392 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2393 for (i=0; i<mbs; i++) { 2394 ncols = ai[1] - ai[0]; ai++; 2395 brow = bs*i; 2396 for (j=0; j<ncols; j++) { 2397 for (kcol=0; kcol<bs; kcol++) { 2398 for (krow=0; krow<bs; krow++) { 2399 atmp = PetscAbsScalar(*aa);aa++; 2400 row = brow + krow; /* row index */ 2401 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2402 } 2403 } 2404 aj++; 2405 } 2406 } 2407 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2408 PetscFunctionReturn(0); 2409 } 2410 2411 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2412 { 2413 PetscErrorCode ierr; 2414 2415 PetscFunctionBegin; 2416 /* If the two matrices have the same copy implementation, use fast copy. */ 2417 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2418 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2419 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2420 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2421 2422 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2423 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2424 ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr); 2425 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2426 } else { 2427 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2428 } 2429 PetscFunctionReturn(0); 2430 } 2431 2432 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2433 { 2434 PetscErrorCode ierr; 2435 2436 PetscFunctionBegin; 2437 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2438 PetscFunctionReturn(0); 2439 } 2440 2441 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2442 { 2443 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2444 2445 PetscFunctionBegin; 2446 *array = a->a; 2447 PetscFunctionReturn(0); 2448 } 2449 2450 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2451 { 2452 PetscFunctionBegin; 2453 *array = NULL; 2454 PetscFunctionReturn(0); 2455 } 2456 2457 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2458 { 2459 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2460 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2461 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2462 PetscErrorCode ierr; 2463 2464 PetscFunctionBegin; 2465 /* Set the number of nonzeros in the new matrix */ 2466 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2467 PetscFunctionReturn(0); 2468 } 2469 2470 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2471 { 2472 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2473 PetscErrorCode ierr; 2474 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2475 PetscBLASInt one=1; 2476 2477 PetscFunctionBegin; 2478 if (str == SAME_NONZERO_PATTERN) { 2479 PetscScalar alpha = a; 2480 PetscBLASInt bnz; 2481 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2482 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2483 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2484 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2485 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2486 } else { 2487 Mat B; 2488 PetscInt *nnz; 2489 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2490 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2491 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2492 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2493 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2494 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2495 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2496 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2497 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2498 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2499 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2500 ierr = PetscFree(nnz);CHKERRQ(ierr); 2501 } 2502 PetscFunctionReturn(0); 2503 } 2504 2505 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2506 { 2507 #if defined(PETSC_USE_COMPLEX) 2508 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2509 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2510 MatScalar *aa = a->a; 2511 2512 PetscFunctionBegin; 2513 for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]); 2514 #else 2515 PetscFunctionBegin; 2516 #endif 2517 PetscFunctionReturn(0); 2518 } 2519 2520 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2521 { 2522 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2523 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2524 MatScalar *aa = a->a; 2525 2526 PetscFunctionBegin; 2527 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2528 PetscFunctionReturn(0); 2529 } 2530 2531 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2532 { 2533 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2534 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2535 MatScalar *aa = a->a; 2536 2537 PetscFunctionBegin; 2538 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2539 PetscFunctionReturn(0); 2540 } 2541 2542 /* 2543 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2544 */ 2545 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2546 { 2547 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2548 PetscErrorCode ierr; 2549 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2550 PetscInt nz = a->i[m],row,*jj,mr,col; 2551 2552 PetscFunctionBegin; 2553 *nn = n; 2554 if (!ia) PetscFunctionReturn(0); 2555 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2556 else { 2557 ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2558 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2559 ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2560 jj = a->j; 2561 for (i=0; i<nz; i++) { 2562 collengths[jj[i]]++; 2563 } 2564 cia[0] = oshift; 2565 for (i=0; i<n; i++) { 2566 cia[i+1] = cia[i] + collengths[i]; 2567 } 2568 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2569 jj = a->j; 2570 for (row=0; row<m; row++) { 2571 mr = a->i[row+1] - a->i[row]; 2572 for (i=0; i<mr; i++) { 2573 col = *jj++; 2574 2575 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2576 } 2577 } 2578 ierr = PetscFree(collengths);CHKERRQ(ierr); 2579 *ia = cia; *ja = cja; 2580 } 2581 PetscFunctionReturn(0); 2582 } 2583 2584 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2585 { 2586 PetscErrorCode ierr; 2587 2588 PetscFunctionBegin; 2589 if (!ia) PetscFunctionReturn(0); 2590 ierr = PetscFree(*ia);CHKERRQ(ierr); 2591 ierr = PetscFree(*ja);CHKERRQ(ierr); 2592 PetscFunctionReturn(0); 2593 } 2594 2595 /* 2596 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2597 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2598 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2599 */ 2600 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2601 { 2602 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2603 PetscErrorCode ierr; 2604 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2605 PetscInt nz = a->i[m],row,*jj,mr,col; 2606 PetscInt *cspidx; 2607 2608 PetscFunctionBegin; 2609 *nn = n; 2610 if (!ia) PetscFunctionReturn(0); 2611 2612 ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2613 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2614 ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2615 ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr); 2616 jj = a->j; 2617 for (i=0; i<nz; i++) { 2618 collengths[jj[i]]++; 2619 } 2620 cia[0] = oshift; 2621 for (i=0; i<n; i++) { 2622 cia[i+1] = cia[i] + collengths[i]; 2623 } 2624 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2625 jj = a->j; 2626 for (row=0; row<m; row++) { 2627 mr = a->i[row+1] - a->i[row]; 2628 for (i=0; i<mr; i++) { 2629 col = *jj++; 2630 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2631 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2632 } 2633 } 2634 ierr = PetscFree(collengths);CHKERRQ(ierr); 2635 *ia = cia; 2636 *ja = cja; 2637 *spidx = cspidx; 2638 PetscFunctionReturn(0); 2639 } 2640 2641 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2642 { 2643 PetscErrorCode ierr; 2644 2645 PetscFunctionBegin; 2646 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2647 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2648 PetscFunctionReturn(0); 2649 } 2650 2651 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2652 { 2653 PetscErrorCode ierr; 2654 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2655 2656 PetscFunctionBegin; 2657 if (!Y->preallocated || !aij->nz) { 2658 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2659 } 2660 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2661 PetscFunctionReturn(0); 2662 } 2663 2664 /* -------------------------------------------------------------------*/ 2665 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2666 MatGetRow_SeqBAIJ, 2667 MatRestoreRow_SeqBAIJ, 2668 MatMult_SeqBAIJ_N, 2669 /* 4*/ MatMultAdd_SeqBAIJ_N, 2670 MatMultTranspose_SeqBAIJ, 2671 MatMultTransposeAdd_SeqBAIJ, 2672 NULL, 2673 NULL, 2674 NULL, 2675 /* 10*/ NULL, 2676 MatLUFactor_SeqBAIJ, 2677 NULL, 2678 NULL, 2679 MatTranspose_SeqBAIJ, 2680 /* 15*/ MatGetInfo_SeqBAIJ, 2681 MatEqual_SeqBAIJ, 2682 MatGetDiagonal_SeqBAIJ, 2683 MatDiagonalScale_SeqBAIJ, 2684 MatNorm_SeqBAIJ, 2685 /* 20*/ NULL, 2686 MatAssemblyEnd_SeqBAIJ, 2687 MatSetOption_SeqBAIJ, 2688 MatZeroEntries_SeqBAIJ, 2689 /* 24*/ MatZeroRows_SeqBAIJ, 2690 NULL, 2691 NULL, 2692 NULL, 2693 NULL, 2694 /* 29*/ MatSetUp_SeqBAIJ, 2695 NULL, 2696 NULL, 2697 NULL, 2698 NULL, 2699 /* 34*/ MatDuplicate_SeqBAIJ, 2700 NULL, 2701 NULL, 2702 MatILUFactor_SeqBAIJ, 2703 NULL, 2704 /* 39*/ MatAXPY_SeqBAIJ, 2705 MatCreateSubMatrices_SeqBAIJ, 2706 MatIncreaseOverlap_SeqBAIJ, 2707 MatGetValues_SeqBAIJ, 2708 MatCopy_SeqBAIJ, 2709 /* 44*/ NULL, 2710 MatScale_SeqBAIJ, 2711 MatShift_SeqBAIJ, 2712 NULL, 2713 MatZeroRowsColumns_SeqBAIJ, 2714 /* 49*/ NULL, 2715 MatGetRowIJ_SeqBAIJ, 2716 MatRestoreRowIJ_SeqBAIJ, 2717 MatGetColumnIJ_SeqBAIJ, 2718 MatRestoreColumnIJ_SeqBAIJ, 2719 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2720 NULL, 2721 NULL, 2722 NULL, 2723 MatSetValuesBlocked_SeqBAIJ, 2724 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2725 MatDestroy_SeqBAIJ, 2726 MatView_SeqBAIJ, 2727 NULL, 2728 NULL, 2729 /* 64*/ NULL, 2730 NULL, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2735 NULL, 2736 MatConvert_Basic, 2737 NULL, 2738 NULL, 2739 /* 74*/ NULL, 2740 MatFDColoringApply_BAIJ, 2741 NULL, 2742 NULL, 2743 NULL, 2744 /* 79*/ NULL, 2745 NULL, 2746 NULL, 2747 NULL, 2748 MatLoad_SeqBAIJ, 2749 /* 84*/ NULL, 2750 NULL, 2751 NULL, 2752 NULL, 2753 NULL, 2754 /* 89*/ NULL, 2755 NULL, 2756 NULL, 2757 NULL, 2758 NULL, 2759 /* 94*/ NULL, 2760 NULL, 2761 NULL, 2762 NULL, 2763 NULL, 2764 /* 99*/ NULL, 2765 NULL, 2766 NULL, 2767 MatConjugate_SeqBAIJ, 2768 NULL, 2769 /*104*/ NULL, 2770 MatRealPart_SeqBAIJ, 2771 MatImaginaryPart_SeqBAIJ, 2772 NULL, 2773 NULL, 2774 /*109*/ NULL, 2775 NULL, 2776 NULL, 2777 NULL, 2778 MatMissingDiagonal_SeqBAIJ, 2779 /*114*/ NULL, 2780 NULL, 2781 NULL, 2782 NULL, 2783 NULL, 2784 /*119*/ NULL, 2785 NULL, 2786 MatMultHermitianTranspose_SeqBAIJ, 2787 MatMultHermitianTransposeAdd_SeqBAIJ, 2788 NULL, 2789 /*124*/ NULL, 2790 MatGetColumnReductions_SeqBAIJ, 2791 MatInvertBlockDiagonal_SeqBAIJ, 2792 NULL, 2793 NULL, 2794 /*129*/ NULL, 2795 NULL, 2796 NULL, 2797 NULL, 2798 NULL, 2799 /*134*/ NULL, 2800 NULL, 2801 NULL, 2802 NULL, 2803 NULL, 2804 /*139*/ MatSetBlockSizes_Default, 2805 NULL, 2806 NULL, 2807 MatFDColoringSetUp_SeqXAIJ, 2808 NULL, 2809 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2810 MatDestroySubMatrices_SeqBAIJ 2811 }; 2812 2813 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2814 { 2815 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2816 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2817 PetscErrorCode ierr; 2818 2819 PetscFunctionBegin; 2820 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2821 2822 /* allocate space for values if not already there */ 2823 if (!aij->saved_values) { 2824 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2825 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2826 } 2827 2828 /* copy values over */ 2829 ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr); 2830 PetscFunctionReturn(0); 2831 } 2832 2833 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2834 { 2835 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2836 PetscErrorCode ierr; 2837 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2838 2839 PetscFunctionBegin; 2840 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2841 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2842 2843 /* copy values over */ 2844 ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr); 2845 PetscFunctionReturn(0); 2846 } 2847 2848 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2849 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2850 2851 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2852 { 2853 Mat_SeqBAIJ *b; 2854 PetscErrorCode ierr; 2855 PetscInt i,mbs,nbs,bs2; 2856 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2857 2858 PetscFunctionBegin; 2859 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2860 if (nz == MAT_SKIP_ALLOCATION) { 2861 skipallocation = PETSC_TRUE; 2862 nz = 0; 2863 } 2864 2865 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2866 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2867 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2868 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2869 2870 B->preallocated = PETSC_TRUE; 2871 2872 mbs = B->rmap->n/bs; 2873 nbs = B->cmap->n/bs; 2874 bs2 = bs*bs; 2875 2876 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2877 2878 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2879 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2880 if (nnz) { 2881 for (i=0; i<mbs; i++) { 2882 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2883 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2884 } 2885 } 2886 2887 b = (Mat_SeqBAIJ*)B->data; 2888 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2889 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2890 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2891 2892 if (!flg) { 2893 switch (bs) { 2894 case 1: 2895 B->ops->mult = MatMult_SeqBAIJ_1; 2896 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2897 break; 2898 case 2: 2899 B->ops->mult = MatMult_SeqBAIJ_2; 2900 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2901 break; 2902 case 3: 2903 B->ops->mult = MatMult_SeqBAIJ_3; 2904 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2905 break; 2906 case 4: 2907 B->ops->mult = MatMult_SeqBAIJ_4; 2908 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2909 break; 2910 case 5: 2911 B->ops->mult = MatMult_SeqBAIJ_5; 2912 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2913 break; 2914 case 6: 2915 B->ops->mult = MatMult_SeqBAIJ_6; 2916 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2917 break; 2918 case 7: 2919 B->ops->mult = MatMult_SeqBAIJ_7; 2920 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2921 break; 2922 case 9: 2923 { 2924 PetscInt version = 1; 2925 ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 2926 switch (version) { 2927 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2928 case 1: 2929 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2930 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2931 ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2932 break; 2933 #endif 2934 default: 2935 B->ops->mult = MatMult_SeqBAIJ_N; 2936 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2937 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2938 break; 2939 } 2940 break; 2941 } 2942 case 11: 2943 B->ops->mult = MatMult_SeqBAIJ_11; 2944 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2945 break; 2946 case 12: 2947 { 2948 PetscInt version = 1; 2949 ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 2950 switch (version) { 2951 case 1: 2952 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 2953 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 2954 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2955 break; 2956 case 2: 2957 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 2958 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 2959 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2960 break; 2961 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2962 case 3: 2963 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 2964 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 2965 ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2966 break; 2967 #endif 2968 default: 2969 B->ops->mult = MatMult_SeqBAIJ_N; 2970 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2971 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2972 break; 2973 } 2974 break; 2975 } 2976 case 15: 2977 { 2978 PetscInt version = 1; 2979 ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 2980 switch (version) { 2981 case 1: 2982 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2983 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2984 break; 2985 case 2: 2986 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 2987 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2988 break; 2989 case 3: 2990 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 2991 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2992 break; 2993 case 4: 2994 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 2995 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2996 break; 2997 default: 2998 B->ops->mult = MatMult_SeqBAIJ_N; 2999 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 3000 break; 3001 } 3002 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3003 break; 3004 } 3005 default: 3006 B->ops->mult = MatMult_SeqBAIJ_N; 3007 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3008 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 3009 break; 3010 } 3011 } 3012 B->ops->sor = MatSOR_SeqBAIJ; 3013 b->mbs = mbs; 3014 b->nbs = nbs; 3015 if (!skipallocation) { 3016 if (!b->imax) { 3017 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 3018 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3019 3020 b->free_imax_ilen = PETSC_TRUE; 3021 } 3022 /* b->ilen will count nonzeros in each block row so far. */ 3023 for (i=0; i<mbs; i++) b->ilen[i] = 0; 3024 if (!nnz) { 3025 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3026 else if (nz < 0) nz = 1; 3027 nz = PetscMin(nz,nbs); 3028 for (i=0; i<mbs; i++) b->imax[i] = nz; 3029 ierr = PetscIntMultError(nz,mbs,&nz);CHKERRQ(ierr); 3030 } else { 3031 PetscInt64 nz64 = 0; 3032 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];} 3033 ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr); 3034 } 3035 3036 /* allocate the matrix space */ 3037 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3038 if (B->structure_only) { 3039 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 3040 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 3041 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 3042 } else { 3043 PetscInt nzbs2 = 0; 3044 ierr = PetscIntMultError(nz,bs2,&nzbs2);CHKERRQ(ierr); 3045 ierr = PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 3046 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3047 ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr); 3048 } 3049 ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr); 3050 3051 if (B->structure_only) { 3052 b->singlemalloc = PETSC_FALSE; 3053 b->free_a = PETSC_FALSE; 3054 } else { 3055 b->singlemalloc = PETSC_TRUE; 3056 b->free_a = PETSC_TRUE; 3057 } 3058 b->free_ij = PETSC_TRUE; 3059 3060 b->i[0] = 0; 3061 for (i=1; i<mbs+1; i++) { 3062 b->i[i] = b->i[i-1] + b->imax[i-1]; 3063 } 3064 3065 } else { 3066 b->free_a = PETSC_FALSE; 3067 b->free_ij = PETSC_FALSE; 3068 } 3069 3070 b->bs2 = bs2; 3071 b->mbs = mbs; 3072 b->nz = 0; 3073 b->maxnz = nz; 3074 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3075 B->was_assembled = PETSC_FALSE; 3076 B->assembled = PETSC_FALSE; 3077 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3078 PetscFunctionReturn(0); 3079 } 3080 3081 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3082 { 3083 PetscInt i,m,nz,nz_max=0,*nnz; 3084 PetscScalar *values=NULL; 3085 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 3086 PetscErrorCode ierr; 3087 3088 PetscFunctionBegin; 3089 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3090 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3091 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3092 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3093 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3094 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3095 m = B->rmap->n/bs; 3096 3097 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 3098 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 3099 for (i=0; i<m; i++) { 3100 nz = ii[i+1]- ii[i]; 3101 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 3102 nz_max = PetscMax(nz_max, nz); 3103 nnz[i] = nz; 3104 } 3105 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3106 ierr = PetscFree(nnz);CHKERRQ(ierr); 3107 3108 values = (PetscScalar*)V; 3109 if (!values) { 3110 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 3111 } 3112 for (i=0; i<m; i++) { 3113 PetscInt ncols = ii[i+1] - ii[i]; 3114 const PetscInt *icols = jj + ii[i]; 3115 if (bs == 1 || !roworiented) { 3116 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3117 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3118 } else { 3119 PetscInt j; 3120 for (j=0; j<ncols; j++) { 3121 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 3122 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 3123 } 3124 } 3125 } 3126 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3127 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3128 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3129 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3130 PetscFunctionReturn(0); 3131 } 3132 3133 /*@C 3134 MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored 3135 3136 Not Collective 3137 3138 Input Parameter: 3139 . mat - a MATSEQBAIJ matrix 3140 3141 Output Parameter: 3142 . array - pointer to the data 3143 3144 Level: intermediate 3145 3146 .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3147 @*/ 3148 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array) 3149 { 3150 PetscErrorCode ierr; 3151 3152 PetscFunctionBegin; 3153 ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3154 PetscFunctionReturn(0); 3155 } 3156 3157 /*@C 3158 MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray() 3159 3160 Not Collective 3161 3162 Input Parameters: 3163 + mat - a MATSEQBAIJ matrix 3164 - array - pointer to the data 3165 3166 Level: intermediate 3167 3168 .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3169 @*/ 3170 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array) 3171 { 3172 PetscErrorCode ierr; 3173 3174 PetscFunctionBegin; 3175 ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3176 PetscFunctionReturn(0); 3177 } 3178 3179 /*MC 3180 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3181 block sparse compressed row format. 3182 3183 Options Database Keys: 3184 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3185 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3186 3187 Level: beginner 3188 3189 Notes: 3190 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 3191 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 3192 3193 Run with -info to see what version of the matrix-vector product is being used 3194 3195 .seealso: MatCreateSeqBAIJ() 3196 M*/ 3197 3198 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3199 3200 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3201 { 3202 PetscErrorCode ierr; 3203 PetscMPIInt size; 3204 Mat_SeqBAIJ *b; 3205 3206 PetscFunctionBegin; 3207 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 3208 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3209 3210 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3211 B->data = (void*)b; 3212 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3213 3214 b->row = NULL; 3215 b->col = NULL; 3216 b->icol = NULL; 3217 b->reallocs = 0; 3218 b->saved_values = NULL; 3219 3220 b->roworiented = PETSC_TRUE; 3221 b->nonew = 0; 3222 b->diag = NULL; 3223 B->spptr = NULL; 3224 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3225 b->keepnonzeropattern = PETSC_FALSE; 3226 3227 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr); 3228 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr); 3229 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3230 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3231 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3232 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3233 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3234 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3235 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3236 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3237 #if defined(PETSC_HAVE_HYPRE) 3238 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3239 #endif 3240 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 3241 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3242 PetscFunctionReturn(0); 3243 } 3244 3245 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3246 { 3247 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3248 PetscErrorCode ierr; 3249 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3250 3251 PetscFunctionBegin; 3252 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3253 3254 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3255 c->imax = a->imax; 3256 c->ilen = a->ilen; 3257 c->free_imax_ilen = PETSC_FALSE; 3258 } else { 3259 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3260 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3261 for (i=0; i<mbs; i++) { 3262 c->imax[i] = a->imax[i]; 3263 c->ilen[i] = a->ilen[i]; 3264 } 3265 c->free_imax_ilen = PETSC_TRUE; 3266 } 3267 3268 /* allocate the matrix space */ 3269 if (mallocmatspace) { 3270 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3271 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3272 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3273 3274 c->i = a->i; 3275 c->j = a->j; 3276 c->singlemalloc = PETSC_FALSE; 3277 c->free_a = PETSC_TRUE; 3278 c->free_ij = PETSC_FALSE; 3279 c->parent = A; 3280 C->preallocated = PETSC_TRUE; 3281 C->assembled = PETSC_TRUE; 3282 3283 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3284 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3285 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3286 } else { 3287 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3288 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3289 3290 c->singlemalloc = PETSC_TRUE; 3291 c->free_a = PETSC_TRUE; 3292 c->free_ij = PETSC_TRUE; 3293 3294 ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr); 3295 if (mbs > 0) { 3296 ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr); 3297 if (cpvalues == MAT_COPY_VALUES) { 3298 ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr); 3299 } else { 3300 ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr); 3301 } 3302 } 3303 C->preallocated = PETSC_TRUE; 3304 C->assembled = PETSC_TRUE; 3305 } 3306 } 3307 3308 c->roworiented = a->roworiented; 3309 c->nonew = a->nonew; 3310 3311 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3312 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3313 3314 c->bs2 = a->bs2; 3315 c->mbs = a->mbs; 3316 c->nbs = a->nbs; 3317 3318 if (a->diag) { 3319 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3320 c->diag = a->diag; 3321 c->free_diag = PETSC_FALSE; 3322 } else { 3323 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3324 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3325 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3326 c->free_diag = PETSC_TRUE; 3327 } 3328 } else c->diag = NULL; 3329 3330 c->nz = a->nz; 3331 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3332 c->solve_work = NULL; 3333 c->mult_work = NULL; 3334 c->sor_workt = NULL; 3335 c->sor_work = NULL; 3336 3337 c->compressedrow.use = a->compressedrow.use; 3338 c->compressedrow.nrows = a->compressedrow.nrows; 3339 if (a->compressedrow.use) { 3340 i = a->compressedrow.nrows; 3341 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3342 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3343 ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr); 3344 ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr); 3345 } else { 3346 c->compressedrow.use = PETSC_FALSE; 3347 c->compressedrow.i = NULL; 3348 c->compressedrow.rindex = NULL; 3349 } 3350 C->nonzerostate = A->nonzerostate; 3351 3352 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3353 PetscFunctionReturn(0); 3354 } 3355 3356 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3357 { 3358 PetscErrorCode ierr; 3359 3360 PetscFunctionBegin; 3361 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3362 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3363 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3364 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3365 PetscFunctionReturn(0); 3366 } 3367 3368 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3369 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 3370 { 3371 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3372 PetscInt *rowidxs,*colidxs; 3373 PetscScalar *matvals; 3374 PetscErrorCode ierr; 3375 3376 PetscFunctionBegin; 3377 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3378 3379 /* read matrix header */ 3380 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3381 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3382 M = header[1]; N = header[2]; nz = header[3]; 3383 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3384 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3385 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3386 3387 /* set block sizes from the viewer's .info file */ 3388 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3389 /* set local and global sizes if not set already */ 3390 if (mat->rmap->n < 0) mat->rmap->n = M; 3391 if (mat->cmap->n < 0) mat->cmap->n = N; 3392 if (mat->rmap->N < 0) mat->rmap->N = M; 3393 if (mat->cmap->N < 0) mat->cmap->N = N; 3394 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3395 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3396 3397 /* check if the matrix sizes are correct */ 3398 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3399 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3400 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3401 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 3402 mbs = m/bs; nbs = n/bs; 3403 3404 /* read in row lengths, column indices and nonzero values */ 3405 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3406 ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr); 3407 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3408 sum = rowidxs[m]; 3409 if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3410 3411 /* read in column indices and nonzero values */ 3412 ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr); 3413 ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr); 3414 ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr); 3415 3416 { /* preallocate matrix storage */ 3417 PetscBT bt; /* helper bit set to count nonzeros */ 3418 PetscInt *nnz; 3419 PetscBool sbaij; 3420 3421 ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr); 3422 ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr); 3423 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr); 3424 for (i=0; i<mbs; i++) { 3425 ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr); 3426 for (k=0; k<bs; k++) { 3427 PetscInt row = bs*i + k; 3428 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3429 PetscInt col = colidxs[j]; 3430 if (!sbaij || col >= row) 3431 if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++; 3432 } 3433 } 3434 } 3435 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 3436 ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3437 ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3438 ierr = PetscFree(nnz);CHKERRQ(ierr); 3439 } 3440 3441 /* store matrix values */ 3442 for (i=0; i<m; i++) { 3443 PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1]; 3444 ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr); 3445 } 3446 3447 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3448 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3449 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3450 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3451 PetscFunctionReturn(0); 3452 } 3453 3454 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer) 3455 { 3456 PetscErrorCode ierr; 3457 PetscBool isbinary; 3458 3459 PetscFunctionBegin; 3460 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3461 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3462 ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 3463 PetscFunctionReturn(0); 3464 } 3465 3466 /*@C 3467 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3468 compressed row) format. For good matrix assembly performance the 3469 user should preallocate the matrix storage by setting the parameter nz 3470 (or the array nnz). By setting these parameters accurately, performance 3471 during matrix assembly can be increased by more than a factor of 50. 3472 3473 Collective 3474 3475 Input Parameters: 3476 + comm - MPI communicator, set to PETSC_COMM_SELF 3477 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3478 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3479 . m - number of rows 3480 . n - number of columns 3481 . nz - number of nonzero blocks per block row (same for all rows) 3482 - nnz - array containing the number of nonzero blocks in the various block rows 3483 (possibly different for each block row) or NULL 3484 3485 Output Parameter: 3486 . A - the matrix 3487 3488 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3489 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3490 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3491 3492 Options Database Keys: 3493 + -mat_no_unroll - uses code that does not unroll the loops in the 3494 block calculations (much slower) 3495 - -mat_block_size - size of the blocks to use 3496 3497 Level: intermediate 3498 3499 Notes: 3500 The number of rows and columns must be divisible by blocksize. 3501 3502 If the nnz parameter is given then the nz parameter is ignored 3503 3504 A nonzero block is any block that as 1 or more nonzeros in it 3505 3506 The block AIJ format is fully compatible with standard Fortran 77 3507 storage. That is, the stored row and column indices can begin at 3508 either one (as in Fortran) or zero. See the users' manual for details. 3509 3510 Specify the preallocated storage with either nz or nnz (not both). 3511 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3512 allocation. See Users-Manual: ch_mat for details. 3513 matrices. 3514 3515 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3516 @*/ 3517 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3518 { 3519 PetscErrorCode ierr; 3520 3521 PetscFunctionBegin; 3522 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3523 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3524 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3525 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3526 PetscFunctionReturn(0); 3527 } 3528 3529 /*@C 3530 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3531 per row in the matrix. For good matrix assembly performance the 3532 user should preallocate the matrix storage by setting the parameter nz 3533 (or the array nnz). By setting these parameters accurately, performance 3534 during matrix assembly can be increased by more than a factor of 50. 3535 3536 Collective 3537 3538 Input Parameters: 3539 + B - the matrix 3540 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3541 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3542 . nz - number of block nonzeros per block row (same for all rows) 3543 - nnz - array containing the number of block nonzeros in the various block rows 3544 (possibly different for each block row) or NULL 3545 3546 Options Database Keys: 3547 + -mat_no_unroll - uses code that does not unroll the loops in the 3548 block calculations (much slower) 3549 - -mat_block_size - size of the blocks to use 3550 3551 Level: intermediate 3552 3553 Notes: 3554 If the nnz parameter is given then the nz parameter is ignored 3555 3556 You can call MatGetInfo() to get information on how effective the preallocation was; 3557 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3558 You can also run with the option -info and look for messages with the string 3559 malloc in them to see if additional memory allocation was needed. 3560 3561 The block AIJ format is fully compatible with standard Fortran 77 3562 storage. That is, the stored row and column indices can begin at 3563 either one (as in Fortran) or zero. See the users' manual for details. 3564 3565 Specify the preallocated storage with either nz or nnz (not both). 3566 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3567 allocation. See Users-Manual: ch_mat for details. 3568 3569 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3570 @*/ 3571 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3572 { 3573 PetscErrorCode ierr; 3574 3575 PetscFunctionBegin; 3576 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3577 PetscValidType(B,1); 3578 PetscValidLogicalCollectiveInt(B,bs,2); 3579 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3580 PetscFunctionReturn(0); 3581 } 3582 3583 /*@C 3584 MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 3585 3586 Collective 3587 3588 Input Parameters: 3589 + B - the matrix 3590 . i - the indices into j for the start of each local row (starts with zero) 3591 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3592 - v - optional values in the matrix 3593 3594 Level: advanced 3595 3596 Notes: 3597 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3598 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3599 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3600 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3601 block column and the second index is over columns within a block. 3602 3603 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3604 3605 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3606 @*/ 3607 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3608 { 3609 PetscErrorCode ierr; 3610 3611 PetscFunctionBegin; 3612 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3613 PetscValidType(B,1); 3614 PetscValidLogicalCollectiveInt(B,bs,2); 3615 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3616 PetscFunctionReturn(0); 3617 } 3618 3619 /*@ 3620 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3621 3622 Collective 3623 3624 Input Parameters: 3625 + comm - must be an MPI communicator of size 1 3626 . bs - size of block 3627 . m - number of rows 3628 . n - number of columns 3629 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3630 . j - column indices 3631 - a - matrix values 3632 3633 Output Parameter: 3634 . mat - the matrix 3635 3636 Level: advanced 3637 3638 Notes: 3639 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3640 once the matrix is destroyed 3641 3642 You cannot set new nonzero locations into this matrix, that will generate an error. 3643 3644 The i and j indices are 0 based 3645 3646 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3647 3648 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3649 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3650 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3651 with column-major ordering within blocks. 3652 3653 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3654 3655 @*/ 3656 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3657 { 3658 PetscErrorCode ierr; 3659 PetscInt ii; 3660 Mat_SeqBAIJ *baij; 3661 3662 PetscFunctionBegin; 3663 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3664 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3665 3666 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3667 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3668 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3669 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 3670 baij = (Mat_SeqBAIJ*)(*mat)->data; 3671 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3672 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3673 3674 baij->i = i; 3675 baij->j = j; 3676 baij->a = a; 3677 3678 baij->singlemalloc = PETSC_FALSE; 3679 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3680 baij->free_a = PETSC_FALSE; 3681 baij->free_ij = PETSC_FALSE; 3682 3683 for (ii=0; ii<m; ii++) { 3684 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3685 if (PetscUnlikelyDebug(i[ii+1] - i[ii] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3686 } 3687 if (PetscDefined(USE_DEBUG)) { 3688 for (ii=0; ii<baij->i[m]; ii++) { 3689 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3690 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3691 } 3692 } 3693 3694 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3695 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3696 PetscFunctionReturn(0); 3697 } 3698 3699 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3700 { 3701 PetscErrorCode ierr; 3702 PetscMPIInt size; 3703 3704 PetscFunctionBegin; 3705 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3706 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3707 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3708 } else { 3709 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3710 } 3711 PetscFunctionReturn(0); 3712 } 3713