1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19 20 PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A,PetscInt type,PetscReal *reductions) 21 { 22 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) A->data; 23 PetscInt m,n,i; 24 PetscInt ib,jb,bs = A->rmap->bs; 25 MatScalar *a_val = a_aij->a; 26 27 PetscFunctionBegin; 28 PetscCall(MatGetSize(A,&m,&n)); 29 for (i=0; i<n; i++) reductions[i] = 0.0; 30 if (type == NORM_2) { 31 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 32 for (jb=0; jb<bs; jb++) { 33 for (ib=0; ib<bs; ib++) { 34 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 35 a_val++; 36 } 37 } 38 } 39 } else if (type == NORM_1) { 40 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 41 for (jb=0; jb<bs; jb++) { 42 for (ib=0; ib<bs; ib++) { 43 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 44 a_val++; 45 } 46 } 47 } 48 } else if (type == NORM_INFINITY) { 49 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 50 for (jb=0; jb<bs; jb++) { 51 for (ib=0; ib<bs; ib++) { 52 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 53 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 54 a_val++; 55 } 56 } 57 } 58 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 59 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 60 for (jb=0; jb<bs; jb++) { 61 for (ib=0; ib<bs; ib++) { 62 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 63 a_val++; 64 } 65 } 66 } 67 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 68 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 69 for (jb=0; jb<bs; jb++) { 70 for (ib=0; ib<bs; ib++) { 71 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 72 a_val++; 73 } 74 } 75 } 76 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 77 if (type == NORM_2) { 78 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 79 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 80 for (i=0; i<n; i++) reductions[i] /= m; 81 } 82 PetscFunctionReturn(0); 83 } 84 85 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 86 { 87 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 88 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 89 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 90 PetscReal shift = 0.0; 91 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 92 93 PetscFunctionBegin; 94 allowzeropivot = PetscNot(A->erroriffailure); 95 96 if (a->idiagvalid) { 97 if (values) *values = a->idiag; 98 PetscFunctionReturn(0); 99 } 100 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 101 diag_offset = a->diag; 102 if (!a->idiag) { 103 PetscCall(PetscMalloc1(bs2*mbs,&a->idiag)); 104 PetscCall(PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar))); 105 } 106 diag = a->idiag; 107 if (values) *values = a->idiag; 108 /* factor and invert each block */ 109 switch (bs) { 110 case 1: 111 for (i=0; i<mbs; i++) { 112 odiag = v + 1*diag_offset[i]; 113 diag[0] = odiag[0]; 114 115 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 116 if (allowzeropivot) { 117 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 118 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 119 A->factorerror_zeropivot_row = i; 120 PetscCall(PetscInfo(A,"Zero pivot, row %" PetscInt_FMT "\n",i)); 121 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 122 } 123 124 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 125 diag += 1; 126 } 127 break; 128 case 2: 129 for (i=0; i<mbs; i++) { 130 odiag = v + 4*diag_offset[i]; 131 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 132 PetscCall(PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected)); 133 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 134 diag += 4; 135 } 136 break; 137 case 3: 138 for (i=0; i<mbs; i++) { 139 odiag = v + 9*diag_offset[i]; 140 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 141 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 142 diag[8] = odiag[8]; 143 PetscCall(PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected)); 144 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 145 diag += 9; 146 } 147 break; 148 case 4: 149 for (i=0; i<mbs; i++) { 150 odiag = v + 16*diag_offset[i]; 151 PetscCall(PetscArraycpy(diag,odiag,16)); 152 PetscCall(PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected)); 153 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 154 diag += 16; 155 } 156 break; 157 case 5: 158 for (i=0; i<mbs; i++) { 159 odiag = v + 25*diag_offset[i]; 160 PetscCall(PetscArraycpy(diag,odiag,25)); 161 PetscCall(PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected)); 162 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 163 diag += 25; 164 } 165 break; 166 case 6: 167 for (i=0; i<mbs; i++) { 168 odiag = v + 36*diag_offset[i]; 169 PetscCall(PetscArraycpy(diag,odiag,36)); 170 PetscCall(PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected)); 171 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 172 diag += 36; 173 } 174 break; 175 case 7: 176 for (i=0; i<mbs; i++) { 177 odiag = v + 49*diag_offset[i]; 178 PetscCall(PetscArraycpy(diag,odiag,49)); 179 PetscCall(PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected)); 180 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 181 diag += 49; 182 } 183 break; 184 default: 185 PetscCall(PetscMalloc2(bs,&v_work,bs,&v_pivots)); 186 for (i=0; i<mbs; i++) { 187 odiag = v + bs2*diag_offset[i]; 188 PetscCall(PetscArraycpy(diag,odiag,bs2)); 189 PetscCall(PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected)); 190 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 191 diag += bs2; 192 } 193 PetscCall(PetscFree2(v_work,v_pivots)); 194 } 195 a->idiagvalid = PETSC_TRUE; 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 200 { 201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 202 PetscScalar *x,*work,*w,*workt,*t; 203 const MatScalar *v,*aa = a->a, *idiag; 204 const PetscScalar *b,*xb; 205 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 206 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 207 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 208 209 PetscFunctionBegin; 210 its = its*lits; 211 PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 212 PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits); 213 PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 214 PetscCheck(omega == 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor"); 215 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 216 217 if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A,NULL)); 218 219 if (!m) PetscFunctionReturn(0); 220 diag = a->diag; 221 idiag = a->idiag; 222 k = PetscMax(A->rmap->n,A->cmap->n); 223 if (!a->mult_work) { 224 PetscCall(PetscMalloc1(k+1,&a->mult_work)); 225 } 226 if (!a->sor_workt) { 227 PetscCall(PetscMalloc1(k,&a->sor_workt)); 228 } 229 if (!a->sor_work) { 230 PetscCall(PetscMalloc1(bs,&a->sor_work)); 231 } 232 work = a->mult_work; 233 t = a->sor_workt; 234 w = a->sor_work; 235 236 PetscCall(VecGetArray(xx,&x)); 237 PetscCall(VecGetArrayRead(bb,&b)); 238 239 if (flag & SOR_ZERO_INITIAL_GUESS) { 240 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 241 switch (bs) { 242 case 1: 243 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 244 t[0] = b[0]; 245 i2 = 1; 246 idiag += 1; 247 for (i=1; i<m; i++) { 248 v = aa + ai[i]; 249 vi = aj + ai[i]; 250 nz = diag[i] - ai[i]; 251 s[0] = b[i2]; 252 for (j=0; j<nz; j++) { 253 xw[0] = x[vi[j]]; 254 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 255 } 256 t[i2] = s[0]; 257 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 258 x[i2] = xw[0]; 259 idiag += 1; 260 i2 += 1; 261 } 262 break; 263 case 2: 264 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 265 t[0] = b[0]; t[1] = b[1]; 266 i2 = 2; 267 idiag += 4; 268 for (i=1; i<m; i++) { 269 v = aa + 4*ai[i]; 270 vi = aj + ai[i]; 271 nz = diag[i] - ai[i]; 272 s[0] = b[i2]; s[1] = b[i2+1]; 273 for (j=0; j<nz; j++) { 274 idx = 2*vi[j]; 275 it = 4*j; 276 xw[0] = x[idx]; xw[1] = x[1+idx]; 277 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 278 } 279 t[i2] = s[0]; t[i2+1] = s[1]; 280 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 281 x[i2] = xw[0]; x[i2+1] = xw[1]; 282 idiag += 4; 283 i2 += 2; 284 } 285 break; 286 case 3: 287 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 288 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 289 i2 = 3; 290 idiag += 9; 291 for (i=1; i<m; i++) { 292 v = aa + 9*ai[i]; 293 vi = aj + ai[i]; 294 nz = diag[i] - ai[i]; 295 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 296 while (nz--) { 297 idx = 3*(*vi++); 298 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 299 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 300 v += 9; 301 } 302 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 303 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 304 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 305 idiag += 9; 306 i2 += 3; 307 } 308 break; 309 case 4: 310 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 311 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 312 i2 = 4; 313 idiag += 16; 314 for (i=1; i<m; i++) { 315 v = aa + 16*ai[i]; 316 vi = aj + ai[i]; 317 nz = diag[i] - ai[i]; 318 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 319 while (nz--) { 320 idx = 4*(*vi++); 321 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 322 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 323 v += 16; 324 } 325 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 326 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 327 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 328 idiag += 16; 329 i2 += 4; 330 } 331 break; 332 case 5: 333 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 334 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 335 i2 = 5; 336 idiag += 25; 337 for (i=1; i<m; i++) { 338 v = aa + 25*ai[i]; 339 vi = aj + ai[i]; 340 nz = diag[i] - ai[i]; 341 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 342 while (nz--) { 343 idx = 5*(*vi++); 344 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 345 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 346 v += 25; 347 } 348 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 349 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 350 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 351 idiag += 25; 352 i2 += 5; 353 } 354 break; 355 case 6: 356 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 357 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 358 i2 = 6; 359 idiag += 36; 360 for (i=1; i<m; i++) { 361 v = aa + 36*ai[i]; 362 vi = aj + ai[i]; 363 nz = diag[i] - ai[i]; 364 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 365 while (nz--) { 366 idx = 6*(*vi++); 367 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 368 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 369 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 370 v += 36; 371 } 372 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 373 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 374 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 375 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 376 idiag += 36; 377 i2 += 6; 378 } 379 break; 380 case 7: 381 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 382 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 383 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 384 i2 = 7; 385 idiag += 49; 386 for (i=1; i<m; i++) { 387 v = aa + 49*ai[i]; 388 vi = aj + ai[i]; 389 nz = diag[i] - ai[i]; 390 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 391 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 392 while (nz--) { 393 idx = 7*(*vi++); 394 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 395 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 396 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 397 v += 49; 398 } 399 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 400 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 401 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 402 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 403 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 404 idiag += 49; 405 i2 += 7; 406 } 407 break; 408 default: 409 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 410 PetscCall(PetscArraycpy(t,b,bs)); 411 i2 = bs; 412 idiag += bs2; 413 for (i=1; i<m; i++) { 414 v = aa + bs2*ai[i]; 415 vi = aj + ai[i]; 416 nz = diag[i] - ai[i]; 417 418 PetscCall(PetscArraycpy(w,b+i2,bs)); 419 /* copy all rows of x that are needed into contiguous space */ 420 workt = work; 421 for (j=0; j<nz; j++) { 422 PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs)); 423 workt += bs; 424 } 425 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 426 PetscCall(PetscArraycpy(t+i2,w,bs)); 427 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 428 429 idiag += bs2; 430 i2 += bs; 431 } 432 break; 433 } 434 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 435 PetscCall(PetscLogFlops(1.0*bs2*a->nz)); 436 xb = t; 437 } 438 else xb = b; 439 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 440 idiag = a->idiag+bs2*(a->mbs-1); 441 i2 = bs * (m-1); 442 switch (bs) { 443 case 1: 444 s[0] = xb[i2]; 445 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 446 x[i2] = xw[0]; 447 i2 -= 1; 448 for (i=m-2; i>=0; i--) { 449 v = aa + (diag[i]+1); 450 vi = aj + diag[i] + 1; 451 nz = ai[i+1] - diag[i] - 1; 452 s[0] = xb[i2]; 453 for (j=0; j<nz; j++) { 454 xw[0] = x[vi[j]]; 455 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 456 } 457 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 458 x[i2] = xw[0]; 459 idiag -= 1; 460 i2 -= 1; 461 } 462 break; 463 case 2: 464 s[0] = xb[i2]; s[1] = xb[i2+1]; 465 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 466 x[i2] = xw[0]; x[i2+1] = xw[1]; 467 i2 -= 2; 468 idiag -= 4; 469 for (i=m-2; i>=0; i--) { 470 v = aa + 4*(diag[i] + 1); 471 vi = aj + diag[i] + 1; 472 nz = ai[i+1] - diag[i] - 1; 473 s[0] = xb[i2]; s[1] = xb[i2+1]; 474 for (j=0; j<nz; j++) { 475 idx = 2*vi[j]; 476 it = 4*j; 477 xw[0] = x[idx]; xw[1] = x[1+idx]; 478 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 479 } 480 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 481 x[i2] = xw[0]; x[i2+1] = xw[1]; 482 idiag -= 4; 483 i2 -= 2; 484 } 485 break; 486 case 3: 487 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 488 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 489 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 490 i2 -= 3; 491 idiag -= 9; 492 for (i=m-2; i>=0; i--) { 493 v = aa + 9*(diag[i]+1); 494 vi = aj + diag[i] + 1; 495 nz = ai[i+1] - diag[i] - 1; 496 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 497 while (nz--) { 498 idx = 3*(*vi++); 499 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 500 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 501 v += 9; 502 } 503 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 504 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 505 idiag -= 9; 506 i2 -= 3; 507 } 508 break; 509 case 4: 510 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 511 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 512 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 513 i2 -= 4; 514 idiag -= 16; 515 for (i=m-2; i>=0; i--) { 516 v = aa + 16*(diag[i]+1); 517 vi = aj + diag[i] + 1; 518 nz = ai[i+1] - diag[i] - 1; 519 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 520 while (nz--) { 521 idx = 4*(*vi++); 522 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 523 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 524 v += 16; 525 } 526 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 527 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 528 idiag -= 16; 529 i2 -= 4; 530 } 531 break; 532 case 5: 533 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 534 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 535 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 536 i2 -= 5; 537 idiag -= 25; 538 for (i=m-2; i>=0; i--) { 539 v = aa + 25*(diag[i]+1); 540 vi = aj + diag[i] + 1; 541 nz = ai[i+1] - diag[i] - 1; 542 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 543 while (nz--) { 544 idx = 5*(*vi++); 545 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 546 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 547 v += 25; 548 } 549 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 550 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 551 idiag -= 25; 552 i2 -= 5; 553 } 554 break; 555 case 6: 556 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 557 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 558 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 559 i2 -= 6; 560 idiag -= 36; 561 for (i=m-2; i>=0; i--) { 562 v = aa + 36*(diag[i]+1); 563 vi = aj + diag[i] + 1; 564 nz = ai[i+1] - diag[i] - 1; 565 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 566 while (nz--) { 567 idx = 6*(*vi++); 568 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 569 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 570 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 571 v += 36; 572 } 573 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 574 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 575 idiag -= 36; 576 i2 -= 6; 577 } 578 break; 579 case 7: 580 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 581 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 582 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 583 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 584 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 585 i2 -= 7; 586 idiag -= 49; 587 for (i=m-2; i>=0; i--) { 588 v = aa + 49*(diag[i]+1); 589 vi = aj + diag[i] + 1; 590 nz = ai[i+1] - diag[i] - 1; 591 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 592 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 593 while (nz--) { 594 idx = 7*(*vi++); 595 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 596 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 597 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 598 v += 49; 599 } 600 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 601 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 602 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 603 idiag -= 49; 604 i2 -= 7; 605 } 606 break; 607 default: 608 PetscCall(PetscArraycpy(w,xb+i2,bs)); 609 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 610 i2 -= bs; 611 idiag -= bs2; 612 for (i=m-2; i>=0; i--) { 613 v = aa + bs2*(diag[i]+1); 614 vi = aj + diag[i] + 1; 615 nz = ai[i+1] - diag[i] - 1; 616 617 PetscCall(PetscArraycpy(w,xb+i2,bs)); 618 /* copy all rows of x that are needed into contiguous space */ 619 workt = work; 620 for (j=0; j<nz; j++) { 621 PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs)); 622 workt += bs; 623 } 624 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 625 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 626 627 idiag -= bs2; 628 i2 -= bs; 629 } 630 break; 631 } 632 PetscCall(PetscLogFlops(1.0*bs2*(a->nz))); 633 } 634 its--; 635 } 636 while (its--) { 637 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 638 idiag = a->idiag; 639 i2 = 0; 640 switch (bs) { 641 case 1: 642 for (i=0; i<m; i++) { 643 v = aa + ai[i]; 644 vi = aj + ai[i]; 645 nz = ai[i+1] - ai[i]; 646 s[0] = b[i2]; 647 for (j=0; j<nz; j++) { 648 xw[0] = x[vi[j]]; 649 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 650 } 651 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 652 x[i2] += xw[0]; 653 idiag += 1; 654 i2 += 1; 655 } 656 break; 657 case 2: 658 for (i=0; i<m; i++) { 659 v = aa + 4*ai[i]; 660 vi = aj + ai[i]; 661 nz = ai[i+1] - ai[i]; 662 s[0] = b[i2]; s[1] = b[i2+1]; 663 for (j=0; j<nz; j++) { 664 idx = 2*vi[j]; 665 it = 4*j; 666 xw[0] = x[idx]; xw[1] = x[1+idx]; 667 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 668 } 669 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 670 x[i2] += xw[0]; x[i2+1] += xw[1]; 671 idiag += 4; 672 i2 += 2; 673 } 674 break; 675 case 3: 676 for (i=0; i<m; i++) { 677 v = aa + 9*ai[i]; 678 vi = aj + ai[i]; 679 nz = ai[i+1] - ai[i]; 680 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 681 while (nz--) { 682 idx = 3*(*vi++); 683 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 684 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 685 v += 9; 686 } 687 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 688 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 689 idiag += 9; 690 i2 += 3; 691 } 692 break; 693 case 4: 694 for (i=0; i<m; i++) { 695 v = aa + 16*ai[i]; 696 vi = aj + ai[i]; 697 nz = ai[i+1] - ai[i]; 698 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 699 while (nz--) { 700 idx = 4*(*vi++); 701 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 702 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 703 v += 16; 704 } 705 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 706 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 707 idiag += 16; 708 i2 += 4; 709 } 710 break; 711 case 5: 712 for (i=0; i<m; i++) { 713 v = aa + 25*ai[i]; 714 vi = aj + ai[i]; 715 nz = ai[i+1] - ai[i]; 716 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 717 while (nz--) { 718 idx = 5*(*vi++); 719 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 720 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 721 v += 25; 722 } 723 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 724 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 725 idiag += 25; 726 i2 += 5; 727 } 728 break; 729 case 6: 730 for (i=0; i<m; i++) { 731 v = aa + 36*ai[i]; 732 vi = aj + ai[i]; 733 nz = ai[i+1] - ai[i]; 734 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 735 while (nz--) { 736 idx = 6*(*vi++); 737 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 738 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 739 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 740 v += 36; 741 } 742 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 743 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 744 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 745 idiag += 36; 746 i2 += 6; 747 } 748 break; 749 case 7: 750 for (i=0; i<m; i++) { 751 v = aa + 49*ai[i]; 752 vi = aj + ai[i]; 753 nz = ai[i+1] - ai[i]; 754 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 755 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 756 while (nz--) { 757 idx = 7*(*vi++); 758 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 759 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 760 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 761 v += 49; 762 } 763 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 764 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 765 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 766 idiag += 49; 767 i2 += 7; 768 } 769 break; 770 default: 771 for (i=0; i<m; i++) { 772 v = aa + bs2*ai[i]; 773 vi = aj + ai[i]; 774 nz = ai[i+1] - ai[i]; 775 776 PetscCall(PetscArraycpy(w,b+i2,bs)); 777 /* copy all rows of x that are needed into contiguous space */ 778 workt = work; 779 for (j=0; j<nz; j++) { 780 PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs)); 781 workt += bs; 782 } 783 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 784 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 785 786 idiag += bs2; 787 i2 += bs; 788 } 789 break; 790 } 791 PetscCall(PetscLogFlops(2.0*bs2*a->nz)); 792 } 793 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 794 idiag = a->idiag+bs2*(a->mbs-1); 795 i2 = bs * (m-1); 796 switch (bs) { 797 case 1: 798 for (i=m-1; i>=0; i--) { 799 v = aa + ai[i]; 800 vi = aj + ai[i]; 801 nz = ai[i+1] - ai[i]; 802 s[0] = b[i2]; 803 for (j=0; j<nz; j++) { 804 xw[0] = x[vi[j]]; 805 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 806 } 807 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 808 x[i2] += xw[0]; 809 idiag -= 1; 810 i2 -= 1; 811 } 812 break; 813 case 2: 814 for (i=m-1; i>=0; i--) { 815 v = aa + 4*ai[i]; 816 vi = aj + ai[i]; 817 nz = ai[i+1] - ai[i]; 818 s[0] = b[i2]; s[1] = b[i2+1]; 819 for (j=0; j<nz; j++) { 820 idx = 2*vi[j]; 821 it = 4*j; 822 xw[0] = x[idx]; xw[1] = x[1+idx]; 823 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 824 } 825 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 826 x[i2] += xw[0]; x[i2+1] += xw[1]; 827 idiag -= 4; 828 i2 -= 2; 829 } 830 break; 831 case 3: 832 for (i=m-1; i>=0; i--) { 833 v = aa + 9*ai[i]; 834 vi = aj + ai[i]; 835 nz = ai[i+1] - ai[i]; 836 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 837 while (nz--) { 838 idx = 3*(*vi++); 839 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 840 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 841 v += 9; 842 } 843 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 844 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 845 idiag -= 9; 846 i2 -= 3; 847 } 848 break; 849 case 4: 850 for (i=m-1; i>=0; i--) { 851 v = aa + 16*ai[i]; 852 vi = aj + ai[i]; 853 nz = ai[i+1] - ai[i]; 854 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 855 while (nz--) { 856 idx = 4*(*vi++); 857 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 858 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 859 v += 16; 860 } 861 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 862 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 863 idiag -= 16; 864 i2 -= 4; 865 } 866 break; 867 case 5: 868 for (i=m-1; i>=0; i--) { 869 v = aa + 25*ai[i]; 870 vi = aj + ai[i]; 871 nz = ai[i+1] - ai[i]; 872 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 873 while (nz--) { 874 idx = 5*(*vi++); 875 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 876 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 877 v += 25; 878 } 879 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 880 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 881 idiag -= 25; 882 i2 -= 5; 883 } 884 break; 885 case 6: 886 for (i=m-1; i>=0; i--) { 887 v = aa + 36*ai[i]; 888 vi = aj + ai[i]; 889 nz = ai[i+1] - ai[i]; 890 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 891 while (nz--) { 892 idx = 6*(*vi++); 893 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 894 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 895 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 896 v += 36; 897 } 898 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 899 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 900 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 901 idiag -= 36; 902 i2 -= 6; 903 } 904 break; 905 case 7: 906 for (i=m-1; i>=0; i--) { 907 v = aa + 49*ai[i]; 908 vi = aj + ai[i]; 909 nz = ai[i+1] - ai[i]; 910 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 911 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 912 while (nz--) { 913 idx = 7*(*vi++); 914 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 915 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 916 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 917 v += 49; 918 } 919 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 920 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 921 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 922 idiag -= 49; 923 i2 -= 7; 924 } 925 break; 926 default: 927 for (i=m-1; i>=0; i--) { 928 v = aa + bs2*ai[i]; 929 vi = aj + ai[i]; 930 nz = ai[i+1] - ai[i]; 931 932 PetscCall(PetscArraycpy(w,b+i2,bs)); 933 /* copy all rows of x that are needed into contiguous space */ 934 workt = work; 935 for (j=0; j<nz; j++) { 936 PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs)); 937 workt += bs; 938 } 939 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 940 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 941 942 idiag -= bs2; 943 i2 -= bs; 944 } 945 break; 946 } 947 PetscCall(PetscLogFlops(2.0*bs2*(a->nz))); 948 } 949 } 950 PetscCall(VecRestoreArray(xx,&x)); 951 PetscCall(VecRestoreArrayRead(bb,&b)); 952 PetscFunctionReturn(0); 953 } 954 955 /* 956 Special version for direct calls from Fortran (Used in PETSc-fun3d) 957 */ 958 #if defined(PETSC_HAVE_FORTRAN_CAPS) 959 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 960 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 961 #define matsetvaluesblocked4_ matsetvaluesblocked4 962 #endif 963 964 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 965 { 966 Mat A = *AA; 967 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 968 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 969 PetscInt *ai =a->i,*ailen=a->ilen; 970 PetscInt *aj =a->j,stepval,lastcol = -1; 971 const PetscScalar *value = v; 972 MatScalar *ap,*aa = a->a,*bap; 973 974 PetscFunctionBegin; 975 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 976 stepval = (n-1)*4; 977 for (k=0; k<m; k++) { /* loop over added rows */ 978 row = im[k]; 979 rp = aj + ai[row]; 980 ap = aa + 16*ai[row]; 981 nrow = ailen[row]; 982 low = 0; 983 high = nrow; 984 for (l=0; l<n; l++) { /* loop over added columns */ 985 col = in[l]; 986 if (col <= lastcol) low = 0; 987 else high = nrow; 988 lastcol = col; 989 value = v + k*(stepval+4 + l)*4; 990 while (high-low > 7) { 991 t = (low+high)/2; 992 if (rp[t] > col) high = t; 993 else low = t; 994 } 995 for (i=low; i<high; i++) { 996 if (rp[i] > col) break; 997 if (rp[i] == col) { 998 bap = ap + 16*i; 999 for (ii=0; ii<4; ii++,value+=stepval) { 1000 for (jj=ii; jj<16; jj+=4) { 1001 bap[jj] += *value++; 1002 } 1003 } 1004 goto noinsert2; 1005 } 1006 } 1007 N = nrow++ - 1; 1008 high++; /* added new column index thus must search to one higher than before */ 1009 /* shift up all the later entries in this row */ 1010 for (ii=N; ii>=i; ii--) { 1011 rp[ii+1] = rp[ii]; 1012 PetscCallVoid(PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16)); 1013 } 1014 if (N >= i) { 1015 PetscCallVoid(PetscArrayzero(ap+16*i,16)); 1016 } 1017 rp[i] = col; 1018 bap = ap + 16*i; 1019 for (ii=0; ii<4; ii++,value+=stepval) { 1020 for (jj=ii; jj<16; jj+=4) { 1021 bap[jj] = *value++; 1022 } 1023 } 1024 noinsert2:; 1025 low = i; 1026 } 1027 ailen[row] = nrow; 1028 } 1029 PetscFunctionReturnVoid(); 1030 } 1031 1032 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1033 #define matsetvalues4_ MATSETVALUES4 1034 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1035 #define matsetvalues4_ matsetvalues4 1036 #endif 1037 1038 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1039 { 1040 Mat A = *AA; 1041 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1042 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm; 1043 PetscInt *ai=a->i,*ailen=a->ilen; 1044 PetscInt *aj=a->j,brow,bcol; 1045 PetscInt ridx,cidx,lastcol = -1; 1046 MatScalar *ap,value,*aa=a->a,*bap; 1047 1048 PetscFunctionBegin; 1049 for (k=0; k<m; k++) { /* loop over added rows */ 1050 row = im[k]; brow = row/4; 1051 rp = aj + ai[brow]; 1052 ap = aa + 16*ai[brow]; 1053 nrow = ailen[brow]; 1054 low = 0; 1055 high = nrow; 1056 for (l=0; l<n; l++) { /* loop over added columns */ 1057 col = in[l]; bcol = col/4; 1058 ridx = row % 4; cidx = col % 4; 1059 value = v[l + k*n]; 1060 if (col <= lastcol) low = 0; 1061 else high = nrow; 1062 lastcol = col; 1063 while (high-low > 7) { 1064 t = (low+high)/2; 1065 if (rp[t] > bcol) high = t; 1066 else low = t; 1067 } 1068 for (i=low; i<high; i++) { 1069 if (rp[i] > bcol) break; 1070 if (rp[i] == bcol) { 1071 bap = ap + 16*i + 4*cidx + ridx; 1072 *bap += value; 1073 goto noinsert1; 1074 } 1075 } 1076 N = nrow++ - 1; 1077 high++; /* added new column thus must search to one higher than before */ 1078 /* shift up all the later entries in this row */ 1079 PetscCallVoid(PetscArraymove(rp+i+1,rp+i,N-i+1)); 1080 PetscCallVoid(PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1))); 1081 PetscCallVoid(PetscArrayzero(ap+16*i,16)); 1082 rp[i] = bcol; 1083 ap[16*i + 4*cidx + ridx] = value; 1084 noinsert1:; 1085 low = i; 1086 } 1087 ailen[brow] = nrow; 1088 } 1089 PetscFunctionReturnVoid(); 1090 } 1091 1092 /* 1093 Checks for missing diagonals 1094 */ 1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1096 { 1097 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1098 PetscInt *diag,*ii = a->i,i; 1099 1100 PetscFunctionBegin; 1101 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 1102 *missing = PETSC_FALSE; 1103 if (A->rmap->n > 0 && !ii) { 1104 *missing = PETSC_TRUE; 1105 if (d) *d = 0; 1106 PetscCall(PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n")); 1107 } else { 1108 PetscInt n; 1109 n = PetscMin(a->mbs, a->nbs); 1110 diag = a->diag; 1111 for (i=0; i<n; i++) { 1112 if (diag[i] >= ii[i+1]) { 1113 *missing = PETSC_TRUE; 1114 if (d) *d = i; 1115 PetscCall(PetscInfo(A,"Matrix is missing block diagonal number %" PetscInt_FMT "\n",i)); 1116 break; 1117 } 1118 } 1119 } 1120 PetscFunctionReturn(0); 1121 } 1122 1123 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1124 { 1125 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1126 PetscInt i,j,m = a->mbs; 1127 1128 PetscFunctionBegin; 1129 if (!a->diag) { 1130 PetscCall(PetscMalloc1(m,&a->diag)); 1131 PetscCall(PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt))); 1132 a->free_diag = PETSC_TRUE; 1133 } 1134 for (i=0; i<m; i++) { 1135 a->diag[i] = a->i[i+1]; 1136 for (j=a->i[i]; j<a->i[i+1]; j++) { 1137 if (a->j[j] == i) { 1138 a->diag[i] = j; 1139 break; 1140 } 1141 } 1142 } 1143 PetscFunctionReturn(0); 1144 } 1145 1146 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1147 { 1148 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1149 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1150 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1151 1152 PetscFunctionBegin; 1153 *nn = n; 1154 if (!ia) PetscFunctionReturn(0); 1155 if (symmetric) { 1156 PetscCall(MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja)); 1157 nz = tia[n]; 1158 } else { 1159 tia = a->i; tja = a->j; 1160 } 1161 1162 if (!blockcompressed && bs > 1) { 1163 (*nn) *= bs; 1164 /* malloc & create the natural set of indices */ 1165 PetscCall(PetscMalloc1((n+1)*bs,ia)); 1166 if (n) { 1167 (*ia)[0] = oshift; 1168 for (j=1; j<bs; j++) { 1169 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1170 } 1171 } 1172 1173 for (i=1; i<n; i++) { 1174 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1175 for (j=1; j<bs; j++) { 1176 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1177 } 1178 } 1179 if (n) { 1180 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1181 } 1182 1183 if (inja) { 1184 PetscCall(PetscMalloc1(nz*bs*bs,ja)); 1185 cnt = 0; 1186 for (i=0; i<n; i++) { 1187 for (j=0; j<bs; j++) { 1188 for (k=tia[i]; k<tia[i+1]; k++) { 1189 for (l=0; l<bs; l++) { 1190 (*ja)[cnt++] = bs*tja[k] + l; 1191 } 1192 } 1193 } 1194 } 1195 } 1196 1197 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1198 PetscCall(PetscFree(tia)); 1199 PetscCall(PetscFree(tja)); 1200 } 1201 } else if (oshift == 1) { 1202 if (symmetric) { 1203 nz = tia[A->rmap->n/bs]; 1204 /* add 1 to i and j indices */ 1205 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1206 *ia = tia; 1207 if (ja) { 1208 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1209 *ja = tja; 1210 } 1211 } else { 1212 nz = a->i[A->rmap->n/bs]; 1213 /* malloc space and add 1 to i and j indices */ 1214 PetscCall(PetscMalloc1(A->rmap->n/bs+1,ia)); 1215 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1216 if (ja) { 1217 PetscCall(PetscMalloc1(nz,ja)); 1218 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1219 } 1220 } 1221 } else { 1222 *ia = tia; 1223 if (ja) *ja = tja; 1224 } 1225 PetscFunctionReturn(0); 1226 } 1227 1228 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1229 { 1230 PetscFunctionBegin; 1231 if (!ia) PetscFunctionReturn(0); 1232 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1233 PetscCall(PetscFree(*ia)); 1234 if (ja) PetscCall(PetscFree(*ja)); 1235 } 1236 PetscFunctionReturn(0); 1237 } 1238 1239 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1240 { 1241 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1242 1243 PetscFunctionBegin; 1244 #if defined(PETSC_USE_LOG) 1245 PetscLogObjectState((PetscObject)A,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT,A->rmap->N,A->cmap->n,a->nz); 1246 #endif 1247 PetscCall(MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i)); 1248 PetscCall(ISDestroy(&a->row)); 1249 PetscCall(ISDestroy(&a->col)); 1250 if (a->free_diag) PetscCall(PetscFree(a->diag)); 1251 PetscCall(PetscFree(a->idiag)); 1252 if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax,a->ilen)); 1253 PetscCall(PetscFree(a->solve_work)); 1254 PetscCall(PetscFree(a->mult_work)); 1255 PetscCall(PetscFree(a->sor_workt)); 1256 PetscCall(PetscFree(a->sor_work)); 1257 PetscCall(ISDestroy(&a->icol)); 1258 PetscCall(PetscFree(a->saved_values)); 1259 PetscCall(PetscFree2(a->compressedrow.i,a->compressedrow.rindex)); 1260 1261 PetscCall(MatDestroy(&a->sbaijMat)); 1262 PetscCall(MatDestroy(&a->parent)); 1263 PetscCall(PetscFree(A->data)); 1264 1265 PetscCall(PetscObjectChangeTypeName((PetscObject)A,NULL)); 1266 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL)); 1267 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL)); 1268 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL)); 1269 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL)); 1270 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL)); 1271 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL)); 1272 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL)); 1273 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL)); 1274 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL)); 1275 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL)); 1276 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL)); 1277 #if defined(PETSC_HAVE_HYPRE) 1278 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL)); 1279 #endif 1280 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL)); 1281 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL)); 1282 PetscFunctionReturn(0); 1283 } 1284 1285 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1286 { 1287 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1288 1289 PetscFunctionBegin; 1290 switch (op) { 1291 case MAT_ROW_ORIENTED: 1292 a->roworiented = flg; 1293 break; 1294 case MAT_KEEP_NONZERO_PATTERN: 1295 a->keepnonzeropattern = flg; 1296 break; 1297 case MAT_NEW_NONZERO_LOCATIONS: 1298 a->nonew = (flg ? 0 : 1); 1299 break; 1300 case MAT_NEW_NONZERO_LOCATION_ERR: 1301 a->nonew = (flg ? -1 : 0); 1302 break; 1303 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1304 a->nonew = (flg ? -2 : 0); 1305 break; 1306 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1307 a->nounused = (flg ? -1 : 0); 1308 break; 1309 case MAT_FORCE_DIAGONAL_ENTRIES: 1310 case MAT_IGNORE_OFF_PROC_ENTRIES: 1311 case MAT_USE_HASH_TABLE: 1312 case MAT_SORTED_FULL: 1313 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1314 break; 1315 case MAT_SPD: 1316 case MAT_SYMMETRIC: 1317 case MAT_STRUCTURALLY_SYMMETRIC: 1318 case MAT_HERMITIAN: 1319 case MAT_SYMMETRY_ETERNAL: 1320 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1321 case MAT_SUBMAT_SINGLEIS: 1322 case MAT_STRUCTURE_ONLY: 1323 case MAT_SPD_ETERNAL: 1324 /* if the diagonal matrix is square it inherits some of the properties above */ 1325 break; 1326 default: 1327 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1328 } 1329 PetscFunctionReturn(0); 1330 } 1331 1332 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1333 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1334 { 1335 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1336 MatScalar *aa_i; 1337 PetscScalar *v_i; 1338 1339 PetscFunctionBegin; 1340 bs = A->rmap->bs; 1341 bs2 = bs*bs; 1342 PetscCheck(row >= 0 && row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range", row); 1343 1344 bn = row/bs; /* Block number */ 1345 bp = row % bs; /* Block Position */ 1346 M = ai[bn+1] - ai[bn]; 1347 *nz = bs*M; 1348 1349 if (v) { 1350 *v = NULL; 1351 if (*nz) { 1352 PetscCall(PetscMalloc1(*nz,v)); 1353 for (i=0; i<M; i++) { /* for each block in the block row */ 1354 v_i = *v + i*bs; 1355 aa_i = aa + bs2*(ai[bn] + i); 1356 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1357 } 1358 } 1359 } 1360 1361 if (idx) { 1362 *idx = NULL; 1363 if (*nz) { 1364 PetscCall(PetscMalloc1(*nz,idx)); 1365 for (i=0; i<M; i++) { /* for each block in the block row */ 1366 idx_i = *idx + i*bs; 1367 itmp = bs*aj[ai[bn] + i]; 1368 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1369 } 1370 } 1371 } 1372 PetscFunctionReturn(0); 1373 } 1374 1375 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1376 { 1377 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1378 1379 PetscFunctionBegin; 1380 PetscCall(MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a)); 1381 PetscFunctionReturn(0); 1382 } 1383 1384 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1385 { 1386 PetscFunctionBegin; 1387 if (nz) *nz = 0; 1388 if (idx) PetscCall(PetscFree(*idx)); 1389 if (v) PetscCall(PetscFree(*v)); 1390 PetscFunctionReturn(0); 1391 } 1392 1393 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1394 { 1395 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at; 1396 Mat C; 1397 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill; 1398 PetscInt bs2=a->bs2,*ati,*atj,anzj,kr; 1399 MatScalar *ata,*aa=a->a; 1400 1401 PetscFunctionBegin; 1402 PetscCall(PetscCalloc1(1+nbs,&atfill)); 1403 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1404 for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1405 1406 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&C)); 1407 PetscCall(MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N)); 1408 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 1409 PetscCall(MatSeqBAIJSetPreallocation(C,bs,0,atfill)); 1410 1411 at = (Mat_SeqBAIJ*)C->data; 1412 ati = at->i; 1413 for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i]; 1414 } else { 1415 C = *B; 1416 at = (Mat_SeqBAIJ*)C->data; 1417 ati = at->i; 1418 } 1419 1420 atj = at->j; 1421 ata = at->a; 1422 1423 /* Copy ati into atfill so we have locations of the next free space in atj */ 1424 PetscCall(PetscArraycpy(atfill,ati,nbs)); 1425 1426 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1427 for (i=0; i<mbs; i++) { 1428 anzj = ai[i+1] - ai[i]; 1429 for (j=0; j<anzj; j++) { 1430 atj[atfill[*aj]] = i; 1431 for (kr=0; kr<bs; kr++) { 1432 for (k=0; k<bs; k++) { 1433 ata[bs2*atfill[*aj]+k*bs+kr] = *aa++; 1434 } 1435 } 1436 atfill[*aj++] += 1; 1437 } 1438 } 1439 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 1440 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 1441 1442 /* Clean up temporary space and complete requests. */ 1443 PetscCall(PetscFree(atfill)); 1444 1445 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1446 PetscCall(MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1447 *B = C; 1448 } else { 1449 PetscCall(MatHeaderMerge(A,&C)); 1450 } 1451 PetscFunctionReturn(0); 1452 } 1453 1454 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1455 { 1456 Mat Btrans; 1457 1458 PetscFunctionBegin; 1459 *f = PETSC_FALSE; 1460 PetscCall(MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans)); 1461 PetscCall(MatEqual_SeqBAIJ(B,Btrans,f)); 1462 PetscCall(MatDestroy(&Btrans)); 1463 PetscFunctionReturn(0); 1464 } 1465 1466 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1467 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 1468 { 1469 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)mat->data; 1470 PetscInt header[4],M,N,m,bs,nz,cnt,i,j,k,l; 1471 PetscInt *rowlens,*colidxs; 1472 PetscScalar *matvals; 1473 1474 PetscFunctionBegin; 1475 PetscCall(PetscViewerSetUp(viewer)); 1476 1477 M = mat->rmap->N; 1478 N = mat->cmap->N; 1479 m = mat->rmap->n; 1480 bs = mat->rmap->bs; 1481 nz = bs*bs*A->nz; 1482 1483 /* write matrix header */ 1484 header[0] = MAT_FILE_CLASSID; 1485 header[1] = M; header[2] = N; header[3] = nz; 1486 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1487 1488 /* store row lengths */ 1489 PetscCall(PetscMalloc1(m,&rowlens)); 1490 for (cnt=0, i=0; i<A->mbs; i++) 1491 for (j=0; j<bs; j++) 1492 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]); 1493 PetscCall(PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT)); 1494 PetscCall(PetscFree(rowlens)); 1495 1496 /* store column indices */ 1497 PetscCall(PetscMalloc1(nz,&colidxs)); 1498 for (cnt=0, i=0; i<A->mbs; i++) 1499 for (k=0; k<bs; k++) 1500 for (j=A->i[i]; j<A->i[i+1]; j++) 1501 for (l=0; l<bs; l++) 1502 colidxs[cnt++] = bs*A->j[j] + l; 1503 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1504 PetscCall(PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT)); 1505 PetscCall(PetscFree(colidxs)); 1506 1507 /* store nonzero values */ 1508 PetscCall(PetscMalloc1(nz,&matvals)); 1509 for (cnt=0, i=0; i<A->mbs; i++) 1510 for (k=0; k<bs; k++) 1511 for (j=A->i[i]; j<A->i[i+1]; j++) 1512 for (l=0; l<bs; l++) 1513 matvals[cnt++] = A->a[bs*(bs*j + l) + k]; 1514 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1515 PetscCall(PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR)); 1516 PetscCall(PetscFree(matvals)); 1517 1518 /* write block size option to the viewer's .info file */ 1519 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1520 PetscFunctionReturn(0); 1521 } 1522 1523 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1524 { 1525 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1526 PetscInt i,bs = A->rmap->bs,k; 1527 1528 PetscFunctionBegin; 1529 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE)); 1530 for (i=0; i<a->mbs; i++) { 1531 PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT "-%" PetscInt_FMT ":",i*bs,i*bs+bs-1)); 1532 for (k=a->i[i]; k<a->i[i+1]; k++) { 1533 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT "-%" PetscInt_FMT ") ",bs*a->j[k],bs*a->j[k]+bs-1)); 1534 } 1535 PetscCall(PetscViewerASCIIPrintf(viewer,"\n")); 1536 } 1537 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE)); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1542 { 1543 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1544 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1545 PetscViewerFormat format; 1546 1547 PetscFunctionBegin; 1548 if (A->structure_only) { 1549 PetscCall(MatView_SeqBAIJ_ASCII_structonly(A,viewer)); 1550 PetscFunctionReturn(0); 1551 } 1552 1553 PetscCall(PetscViewerGetFormat(viewer,&format)); 1554 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1555 PetscCall(PetscViewerASCIIPrintf(viewer," block size is %" PetscInt_FMT "\n",bs)); 1556 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1557 const char *matname; 1558 Mat aij; 1559 PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij)); 1560 PetscCall(PetscObjectGetName((PetscObject)A,&matname)); 1561 PetscCall(PetscObjectSetName((PetscObject)aij,matname)); 1562 PetscCall(MatView(aij,viewer)); 1563 PetscCall(MatDestroy(&aij)); 1564 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1565 PetscFunctionReturn(0); 1566 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1567 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE)); 1568 for (i=0; i<a->mbs; i++) { 1569 for (j=0; j<bs; j++) { 1570 PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j)); 1571 for (k=a->i[i]; k<a->i[i+1]; k++) { 1572 for (l=0; l<bs; l++) { 1573 #if defined(PETSC_USE_COMPLEX) 1574 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1575 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %gi) ",bs*a->j[k]+l, 1576 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]))); 1577 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1578 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %gi) ",bs*a->j[k]+l, 1579 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]))); 1580 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1581 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]))); 1582 } 1583 #else 1584 if (a->a[bs2*k + l*bs + j] != 0.0) { 1585 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j])); 1586 } 1587 #endif 1588 } 1589 } 1590 PetscCall(PetscViewerASCIIPrintf(viewer,"\n")); 1591 } 1592 } 1593 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE)); 1594 } else { 1595 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE)); 1596 for (i=0; i<a->mbs; i++) { 1597 for (j=0; j<bs; j++) { 1598 PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j)); 1599 for (k=a->i[i]; k<a->i[i+1]; k++) { 1600 for (l=0; l<bs; l++) { 1601 #if defined(PETSC_USE_COMPLEX) 1602 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1603 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i) ",bs*a->j[k]+l, 1604 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]))); 1605 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1606 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i) ",bs*a->j[k]+l, 1607 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]))); 1608 } else { 1609 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]))); 1610 } 1611 #else 1612 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j])); 1613 #endif 1614 } 1615 } 1616 PetscCall(PetscViewerASCIIPrintf(viewer,"\n")); 1617 } 1618 } 1619 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE)); 1620 } 1621 PetscCall(PetscViewerFlush(viewer)); 1622 PetscFunctionReturn(0); 1623 } 1624 1625 #include <petscdraw.h> 1626 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1627 { 1628 Mat A = (Mat) Aa; 1629 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1630 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1631 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1632 MatScalar *aa; 1633 PetscViewer viewer; 1634 PetscViewerFormat format; 1635 1636 PetscFunctionBegin; 1637 PetscCall(PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer)); 1638 PetscCall(PetscViewerGetFormat(viewer,&format)); 1639 PetscCall(PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr)); 1640 1641 /* loop over matrix elements drawing boxes */ 1642 1643 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1644 PetscDrawCollectiveBegin(draw); 1645 /* Blue for negative, Cyan for zero and Red for positive */ 1646 color = PETSC_DRAW_BLUE; 1647 for (i=0,row=0; i<mbs; i++,row+=bs) { 1648 for (j=a->i[i]; j<a->i[i+1]; j++) { 1649 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1650 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1651 aa = a->a + j*bs2; 1652 for (k=0; k<bs; k++) { 1653 for (l=0; l<bs; l++) { 1654 if (PetscRealPart(*aa++) >= 0.) continue; 1655 PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color)); 1656 } 1657 } 1658 } 1659 } 1660 color = PETSC_DRAW_CYAN; 1661 for (i=0,row=0; i<mbs; i++,row+=bs) { 1662 for (j=a->i[i]; j<a->i[i+1]; j++) { 1663 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1664 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1665 aa = a->a + j*bs2; 1666 for (k=0; k<bs; k++) { 1667 for (l=0; l<bs; l++) { 1668 if (PetscRealPart(*aa++) != 0.) continue; 1669 PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color)); 1670 } 1671 } 1672 } 1673 } 1674 color = PETSC_DRAW_RED; 1675 for (i=0,row=0; i<mbs; i++,row+=bs) { 1676 for (j=a->i[i]; j<a->i[i+1]; j++) { 1677 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1678 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1679 aa = a->a + j*bs2; 1680 for (k=0; k<bs; k++) { 1681 for (l=0; l<bs; l++) { 1682 if (PetscRealPart(*aa++) <= 0.) continue; 1683 PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color)); 1684 } 1685 } 1686 } 1687 } 1688 PetscDrawCollectiveEnd(draw); 1689 } else { 1690 /* use contour shading to indicate magnitude of values */ 1691 /* first determine max of all nonzero values */ 1692 PetscReal minv = 0.0, maxv = 0.0; 1693 PetscDraw popup; 1694 1695 for (i=0; i<a->nz*a->bs2; i++) { 1696 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1697 } 1698 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1699 PetscCall(PetscDrawGetPopup(draw,&popup)); 1700 PetscCall(PetscDrawScalePopup(popup,0.0,maxv)); 1701 1702 PetscDrawCollectiveBegin(draw); 1703 for (i=0,row=0; i<mbs; i++,row+=bs) { 1704 for (j=a->i[i]; j<a->i[i+1]; j++) { 1705 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1706 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1707 aa = a->a + j*bs2; 1708 for (k=0; k<bs; k++) { 1709 for (l=0; l<bs; l++) { 1710 MatScalar v = *aa++; 1711 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1712 PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color)); 1713 } 1714 } 1715 } 1716 } 1717 PetscDrawCollectiveEnd(draw); 1718 } 1719 PetscFunctionReturn(0); 1720 } 1721 1722 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1723 { 1724 PetscReal xl,yl,xr,yr,w,h; 1725 PetscDraw draw; 1726 PetscBool isnull; 1727 1728 PetscFunctionBegin; 1729 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1730 PetscCall(PetscDrawIsNull(draw,&isnull)); 1731 if (isnull) PetscFunctionReturn(0); 1732 1733 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1734 xr += w; yr += h; xl = -w; yl = -h; 1735 PetscCall(PetscDrawSetCoordinates(draw,xl,yl,xr,yr)); 1736 PetscCall(PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer)); 1737 PetscCall(PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A)); 1738 PetscCall(PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL)); 1739 PetscCall(PetscDrawSave(draw)); 1740 PetscFunctionReturn(0); 1741 } 1742 1743 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1744 { 1745 PetscBool iascii,isbinary,isdraw; 1746 1747 PetscFunctionBegin; 1748 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1749 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1750 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1751 if (iascii) { 1752 PetscCall(MatView_SeqBAIJ_ASCII(A,viewer)); 1753 } else if (isbinary) { 1754 PetscCall(MatView_SeqBAIJ_Binary(A,viewer)); 1755 } else if (isdraw) { 1756 PetscCall(MatView_SeqBAIJ_Draw(A,viewer)); 1757 } else { 1758 Mat B; 1759 PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 1760 PetscCall(MatView(B,viewer)); 1761 PetscCall(MatDestroy(&B)); 1762 } 1763 PetscFunctionReturn(0); 1764 } 1765 1766 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1767 { 1768 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1769 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1770 PetscInt *ai = a->i,*ailen = a->ilen; 1771 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1772 MatScalar *ap,*aa = a->a; 1773 1774 PetscFunctionBegin; 1775 for (k=0; k<m; k++) { /* loop over rows */ 1776 row = im[k]; brow = row/bs; 1777 if (row < 0) {v += n; continue;} /* negative row */ 1778 PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " too large", row); 1779 rp = aj ? aj + ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 1780 ap = aa ? aa + bs2*ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 1781 nrow = ailen[brow]; 1782 for (l=0; l<n; l++) { /* loop over columns */ 1783 if (in[l] < 0) {v++; continue;} /* negative column */ 1784 PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %" PetscInt_FMT " too large", in[l]); 1785 col = in[l]; 1786 bcol = col/bs; 1787 cidx = col%bs; 1788 ridx = row%bs; 1789 high = nrow; 1790 low = 0; /* assume unsorted */ 1791 while (high-low > 5) { 1792 t = (low+high)/2; 1793 if (rp[t] > bcol) high = t; 1794 else low = t; 1795 } 1796 for (i=low; i<high; i++) { 1797 if (rp[i] > bcol) break; 1798 if (rp[i] == bcol) { 1799 *v++ = ap[bs2*i+bs*cidx+ridx]; 1800 goto finished; 1801 } 1802 } 1803 *v++ = 0.0; 1804 finished:; 1805 } 1806 } 1807 PetscFunctionReturn(0); 1808 } 1809 1810 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1811 { 1812 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1813 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1814 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1815 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1816 PetscBool roworiented=a->roworiented; 1817 const PetscScalar *value = v; 1818 MatScalar *ap=NULL,*aa = a->a,*bap; 1819 1820 PetscFunctionBegin; 1821 if (roworiented) { 1822 stepval = (n-1)*bs; 1823 } else { 1824 stepval = (m-1)*bs; 1825 } 1826 for (k=0; k<m; k++) { /* loop over added rows */ 1827 row = im[k]; 1828 if (row < 0) continue; 1829 PetscCheck(row < a->mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT,row,a->mbs-1); 1830 rp = aj + ai[row]; 1831 if (!A->structure_only) ap = aa + bs2*ai[row]; 1832 rmax = imax[row]; 1833 nrow = ailen[row]; 1834 low = 0; 1835 high = nrow; 1836 for (l=0; l<n; l++) { /* loop over added columns */ 1837 if (in[l] < 0) continue; 1838 PetscCheck(in[l] < a->nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT,in[l],a->nbs-1); 1839 col = in[l]; 1840 if (!A->structure_only) { 1841 if (roworiented) { 1842 value = v + (k*(stepval+bs) + l)*bs; 1843 } else { 1844 value = v + (l*(stepval+bs) + k)*bs; 1845 } 1846 } 1847 if (col <= lastcol) low = 0; 1848 else high = nrow; 1849 lastcol = col; 1850 while (high-low > 7) { 1851 t = (low+high)/2; 1852 if (rp[t] > col) high = t; 1853 else low = t; 1854 } 1855 for (i=low; i<high; i++) { 1856 if (rp[i] > col) break; 1857 if (rp[i] == col) { 1858 if (A->structure_only) goto noinsert2; 1859 bap = ap + bs2*i; 1860 if (roworiented) { 1861 if (is == ADD_VALUES) { 1862 for (ii=0; ii<bs; ii++,value+=stepval) { 1863 for (jj=ii; jj<bs2; jj+=bs) { 1864 bap[jj] += *value++; 1865 } 1866 } 1867 } else { 1868 for (ii=0; ii<bs; ii++,value+=stepval) { 1869 for (jj=ii; jj<bs2; jj+=bs) { 1870 bap[jj] = *value++; 1871 } 1872 } 1873 } 1874 } else { 1875 if (is == ADD_VALUES) { 1876 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1877 for (jj=0; jj<bs; jj++) { 1878 bap[jj] += value[jj]; 1879 } 1880 bap += bs; 1881 } 1882 } else { 1883 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1884 for (jj=0; jj<bs; jj++) { 1885 bap[jj] = value[jj]; 1886 } 1887 bap += bs; 1888 } 1889 } 1890 } 1891 goto noinsert2; 1892 } 1893 } 1894 if (nonew == 1) goto noinsert2; 1895 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 1896 if (A->structure_only) { 1897 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1898 } else { 1899 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1900 } 1901 N = nrow++ - 1; high++; 1902 /* shift up all the later entries in this row */ 1903 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 1904 rp[i] = col; 1905 if (!A->structure_only) { 1906 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 1907 bap = ap + bs2*i; 1908 if (roworiented) { 1909 for (ii=0; ii<bs; ii++,value+=stepval) { 1910 for (jj=ii; jj<bs2; jj+=bs) { 1911 bap[jj] = *value++; 1912 } 1913 } 1914 } else { 1915 for (ii=0; ii<bs; ii++,value+=stepval) { 1916 for (jj=0; jj<bs; jj++) { 1917 *bap++ = *value++; 1918 } 1919 } 1920 } 1921 } 1922 noinsert2:; 1923 low = i; 1924 } 1925 ailen[row] = nrow; 1926 } 1927 PetscFunctionReturn(0); 1928 } 1929 1930 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1931 { 1932 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1933 PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax; 1934 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1935 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1936 MatScalar *aa = a->a,*ap; 1937 PetscReal ratio=0.6; 1938 1939 PetscFunctionBegin; 1940 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1941 1942 if (m) rmax = ailen[0]; 1943 for (i=1; i<mbs; i++) { 1944 /* move each row back by the amount of empty slots (fshift) before it*/ 1945 fshift += imax[i-1] - ailen[i-1]; 1946 rmax = PetscMax(rmax,ailen[i]); 1947 if (fshift) { 1948 ip = aj + ai[i]; 1949 ap = aa + bs2*ai[i]; 1950 N = ailen[i]; 1951 PetscCall(PetscArraymove(ip-fshift,ip,N)); 1952 if (!A->structure_only) { 1953 PetscCall(PetscArraymove(ap-bs2*fshift,ap,bs2*N)); 1954 } 1955 } 1956 ai[i] = ai[i-1] + ailen[i-1]; 1957 } 1958 if (mbs) { 1959 fshift += imax[mbs-1] - ailen[mbs-1]; 1960 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1961 } 1962 1963 /* reset ilen and imax for each row */ 1964 a->nonzerorowcnt = 0; 1965 if (A->structure_only) { 1966 PetscCall(PetscFree2(a->imax,a->ilen)); 1967 } else { /* !A->structure_only */ 1968 for (i=0; i<mbs; i++) { 1969 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1970 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1971 } 1972 } 1973 a->nz = ai[mbs]; 1974 1975 /* diagonals may have moved, so kill the diagonal pointers */ 1976 a->idiagvalid = PETSC_FALSE; 1977 if (fshift && a->diag) { 1978 PetscCall(PetscFree(a->diag)); 1979 PetscCall(PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt))); 1980 a->diag = NULL; 1981 } 1982 if (fshift) PetscCheck(a->nounused != -1,PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1983 PetscCall(PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2)); 1984 PetscCall(PetscInfo(A,"Number of mallocs during MatSetValues is %" PetscInt_FMT "\n",a->reallocs)); 1985 PetscCall(PetscInfo(A,"Most nonzeros blocks in any row is %" PetscInt_FMT "\n",rmax)); 1986 1987 A->info.mallocs += a->reallocs; 1988 a->reallocs = 0; 1989 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1990 a->rmax = rmax; 1991 1992 if (!A->structure_only) { 1993 PetscCall(MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio)); 1994 } 1995 PetscFunctionReturn(0); 1996 } 1997 1998 /* 1999 This function returns an array of flags which indicate the locations of contiguous 2000 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2001 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2002 Assume: sizes should be long enough to hold all the values. 2003 */ 2004 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2005 { 2006 PetscInt i,j,k,row; 2007 PetscBool flg; 2008 2009 PetscFunctionBegin; 2010 for (i=0,j=0; i<n; j++) { 2011 row = idx[i]; 2012 if (row%bs!=0) { /* Not the beginning of a block */ 2013 sizes[j] = 1; 2014 i++; 2015 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2016 sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2017 i++; 2018 } else { /* Beginning of the block, so check if the complete block exists */ 2019 flg = PETSC_TRUE; 2020 for (k=1; k<bs; k++) { 2021 if (row+k != idx[i+k]) { /* break in the block */ 2022 flg = PETSC_FALSE; 2023 break; 2024 } 2025 } 2026 if (flg) { /* No break in the bs */ 2027 sizes[j] = bs; 2028 i += bs; 2029 } else { 2030 sizes[j] = 1; 2031 i++; 2032 } 2033 } 2034 } 2035 *bs_max = j; 2036 PetscFunctionReturn(0); 2037 } 2038 2039 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2040 { 2041 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2042 PetscInt i,j,k,count,*rows; 2043 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2044 PetscScalar zero = 0.0; 2045 MatScalar *aa; 2046 const PetscScalar *xx; 2047 PetscScalar *bb; 2048 2049 PetscFunctionBegin; 2050 /* fix right hand side if needed */ 2051 if (x && b) { 2052 PetscCall(VecGetArrayRead(x,&xx)); 2053 PetscCall(VecGetArray(b,&bb)); 2054 for (i=0; i<is_n; i++) { 2055 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2056 } 2057 PetscCall(VecRestoreArrayRead(x,&xx)); 2058 PetscCall(VecRestoreArray(b,&bb)); 2059 } 2060 2061 /* Make a copy of the IS and sort it */ 2062 /* allocate memory for rows,sizes */ 2063 PetscCall(PetscMalloc2(is_n,&rows,2*is_n,&sizes)); 2064 2065 /* copy IS values to rows, and sort them */ 2066 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2067 PetscCall(PetscSortInt(is_n,rows)); 2068 2069 if (baij->keepnonzeropattern) { 2070 for (i=0; i<is_n; i++) sizes[i] = 1; 2071 bs_max = is_n; 2072 } else { 2073 PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max)); 2074 A->nonzerostate++; 2075 } 2076 2077 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2078 row = rows[j]; 2079 PetscCheck(row >= 0 && row <= A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %" PetscInt_FMT " out of range",row); 2080 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2081 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2082 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2083 if (diag != (PetscScalar)0.0) { 2084 if (baij->ilen[row/bs] > 0) { 2085 baij->ilen[row/bs] = 1; 2086 baij->j[baij->i[row/bs]] = row/bs; 2087 2088 PetscCall(PetscArrayzero(aa,count*bs)); 2089 } 2090 /* Now insert all the diagonal values for this bs */ 2091 for (k=0; k<bs; k++) { 2092 PetscCall((*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES)); 2093 } 2094 } else { /* (diag == 0.0) */ 2095 baij->ilen[row/bs] = 0; 2096 } /* end (diag == 0.0) */ 2097 } else { /* (sizes[i] != bs) */ 2098 PetscAssert(sizes[i] == 1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2099 for (k=0; k<count; k++) { 2100 aa[0] = zero; 2101 aa += bs; 2102 } 2103 if (diag != (PetscScalar)0.0) { 2104 PetscCall((*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES)); 2105 } 2106 } 2107 } 2108 2109 PetscCall(PetscFree2(rows,sizes)); 2110 PetscCall(MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY)); 2111 PetscFunctionReturn(0); 2112 } 2113 2114 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2115 { 2116 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2117 PetscInt i,j,k,count; 2118 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2119 PetscScalar zero = 0.0; 2120 MatScalar *aa; 2121 const PetscScalar *xx; 2122 PetscScalar *bb; 2123 PetscBool *zeroed,vecs = PETSC_FALSE; 2124 2125 PetscFunctionBegin; 2126 /* fix right hand side if needed */ 2127 if (x && b) { 2128 PetscCall(VecGetArrayRead(x,&xx)); 2129 PetscCall(VecGetArray(b,&bb)); 2130 vecs = PETSC_TRUE; 2131 } 2132 2133 /* zero the columns */ 2134 PetscCall(PetscCalloc1(A->rmap->n,&zeroed)); 2135 for (i=0; i<is_n; i++) { 2136 PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %" PetscInt_FMT " out of range",is_idx[i]); 2137 zeroed[is_idx[i]] = PETSC_TRUE; 2138 } 2139 for (i=0; i<A->rmap->N; i++) { 2140 if (!zeroed[i]) { 2141 row = i/bs; 2142 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2143 for (k=0; k<bs; k++) { 2144 col = bs*baij->j[j] + k; 2145 if (zeroed[col]) { 2146 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2147 if (vecs) bb[i] -= aa[0]*xx[col]; 2148 aa[0] = 0.0; 2149 } 2150 } 2151 } 2152 } else if (vecs) bb[i] = diag*xx[i]; 2153 } 2154 PetscCall(PetscFree(zeroed)); 2155 if (vecs) { 2156 PetscCall(VecRestoreArrayRead(x,&xx)); 2157 PetscCall(VecRestoreArray(b,&bb)); 2158 } 2159 2160 /* zero the rows */ 2161 for (i=0; i<is_n; i++) { 2162 row = is_idx[i]; 2163 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2164 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2165 for (k=0; k<count; k++) { 2166 aa[0] = zero; 2167 aa += bs; 2168 } 2169 if (diag != (PetscScalar)0.0) { 2170 PetscCall((*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES)); 2171 } 2172 } 2173 PetscCall(MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY)); 2174 PetscFunctionReturn(0); 2175 } 2176 2177 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2178 { 2179 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2180 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2181 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2182 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2183 PetscInt ridx,cidx,bs2=a->bs2; 2184 PetscBool roworiented=a->roworiented; 2185 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2186 2187 PetscFunctionBegin; 2188 for (k=0; k<m; k++) { /* loop over added rows */ 2189 row = im[k]; 2190 brow = row/bs; 2191 if (row < 0) continue; 2192 PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->N-1); 2193 rp = aj + ai[brow]; 2194 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2195 rmax = imax[brow]; 2196 nrow = ailen[brow]; 2197 low = 0; 2198 high = nrow; 2199 for (l=0; l<n; l++) { /* loop over added columns */ 2200 if (in[l] < 0) continue; 2201 PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[l],A->cmap->n-1); 2202 col = in[l]; bcol = col/bs; 2203 ridx = row % bs; cidx = col % bs; 2204 if (!A->structure_only) { 2205 if (roworiented) { 2206 value = v[l + k*n]; 2207 } else { 2208 value = v[k + l*m]; 2209 } 2210 } 2211 if (col <= lastcol) low = 0; else high = nrow; 2212 lastcol = col; 2213 while (high-low > 7) { 2214 t = (low+high)/2; 2215 if (rp[t] > bcol) high = t; 2216 else low = t; 2217 } 2218 for (i=low; i<high; i++) { 2219 if (rp[i] > bcol) break; 2220 if (rp[i] == bcol) { 2221 bap = ap + bs2*i + bs*cidx + ridx; 2222 if (!A->structure_only) { 2223 if (is == ADD_VALUES) *bap += value; 2224 else *bap = value; 2225 } 2226 goto noinsert1; 2227 } 2228 } 2229 if (nonew == 1) goto noinsert1; 2230 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2231 if (A->structure_only) { 2232 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2233 } else { 2234 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2235 } 2236 N = nrow++ - 1; high++; 2237 /* shift up all the later entries in this row */ 2238 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 2239 rp[i] = bcol; 2240 if (!A->structure_only) { 2241 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 2242 PetscCall(PetscArrayzero(ap+bs2*i,bs2)); 2243 ap[bs2*i + bs*cidx + ridx] = value; 2244 } 2245 a->nz++; 2246 A->nonzerostate++; 2247 noinsert1:; 2248 low = i; 2249 } 2250 ailen[brow] = nrow; 2251 } 2252 PetscFunctionReturn(0); 2253 } 2254 2255 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2256 { 2257 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2258 Mat outA; 2259 PetscBool row_identity,col_identity; 2260 2261 PetscFunctionBegin; 2262 PetscCheck(info->levels == 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2263 PetscCall(ISIdentity(row,&row_identity)); 2264 PetscCall(ISIdentity(col,&col_identity)); 2265 PetscCheck(row_identity && col_identity,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2266 2267 outA = inA; 2268 inA->factortype = MAT_FACTOR_LU; 2269 PetscCall(PetscFree(inA->solvertype)); 2270 PetscCall(PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype)); 2271 2272 PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2273 2274 PetscCall(PetscObjectReference((PetscObject)row)); 2275 PetscCall(ISDestroy(&a->row)); 2276 a->row = row; 2277 PetscCall(PetscObjectReference((PetscObject)col)); 2278 PetscCall(ISDestroy(&a->col)); 2279 a->col = col; 2280 2281 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2282 PetscCall(ISDestroy(&a->icol)); 2283 PetscCall(ISInvertPermutation(col,PETSC_DECIDE,&a->icol)); 2284 PetscCall(PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol)); 2285 2286 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity))); 2287 if (!a->solve_work) { 2288 PetscCall(PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work)); 2289 PetscCall(PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar))); 2290 } 2291 PetscCall(MatLUFactorNumeric(outA,inA,info)); 2292 PetscFunctionReturn(0); 2293 } 2294 2295 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2296 { 2297 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2298 PetscInt i,nz,mbs; 2299 2300 PetscFunctionBegin; 2301 nz = baij->maxnz; 2302 mbs = baij->mbs; 2303 for (i=0; i<nz; i++) { 2304 baij->j[i] = indices[i]; 2305 } 2306 baij->nz = nz; 2307 for (i=0; i<mbs; i++) { 2308 baij->ilen[i] = baij->imax[i]; 2309 } 2310 PetscFunctionReturn(0); 2311 } 2312 2313 /*@ 2314 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2315 in the matrix. 2316 2317 Input Parameters: 2318 + mat - the SeqBAIJ matrix 2319 - indices - the column indices 2320 2321 Level: advanced 2322 2323 Notes: 2324 This can be called if you have precomputed the nonzero structure of the 2325 matrix and want to provide it to the matrix object to improve the performance 2326 of the MatSetValues() operation. 2327 2328 You MUST have set the correct numbers of nonzeros per row in the call to 2329 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2330 2331 MUST be called before any calls to MatSetValues(); 2332 2333 @*/ 2334 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2335 { 2336 PetscFunctionBegin; 2337 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2338 PetscValidIntPointer(indices,2); 2339 PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices)); 2340 PetscFunctionReturn(0); 2341 } 2342 2343 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2344 { 2345 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2346 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2347 PetscReal atmp; 2348 PetscScalar *x,zero = 0.0; 2349 MatScalar *aa; 2350 PetscInt ncols,brow,krow,kcol; 2351 2352 PetscFunctionBegin; 2353 /* why is this not a macro???????????????????????????????????????????????????????????????? */ 2354 PetscCheck(!A->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2355 bs = A->rmap->bs; 2356 aa = a->a; 2357 ai = a->i; 2358 aj = a->j; 2359 mbs = a->mbs; 2360 2361 PetscCall(VecSet(v,zero)); 2362 PetscCall(VecGetArray(v,&x)); 2363 PetscCall(VecGetLocalSize(v,&n)); 2364 PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2365 for (i=0; i<mbs; i++) { 2366 ncols = ai[1] - ai[0]; ai++; 2367 brow = bs*i; 2368 for (j=0; j<ncols; j++) { 2369 for (kcol=0; kcol<bs; kcol++) { 2370 for (krow=0; krow<bs; krow++) { 2371 atmp = PetscAbsScalar(*aa);aa++; 2372 row = brow + krow; /* row index */ 2373 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2374 } 2375 } 2376 aj++; 2377 } 2378 } 2379 PetscCall(VecRestoreArray(v,&x)); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2384 { 2385 PetscFunctionBegin; 2386 /* If the two matrices have the same copy implementation, use fast copy. */ 2387 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2388 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2389 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2390 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2391 2392 PetscCheck(a->i[ambs] == b->i[bmbs],PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different",a->i[ambs],b->i[bmbs]); 2393 PetscCheck(abs == bbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different",abs,bbs); 2394 PetscCall(PetscArraycpy(b->a,a->a,bs2*a->i[ambs])); 2395 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2396 } else { 2397 PetscCall(MatCopy_Basic(A,B,str)); 2398 } 2399 PetscFunctionReturn(0); 2400 } 2401 2402 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2403 { 2404 PetscFunctionBegin; 2405 PetscCall(MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL)); 2406 PetscFunctionReturn(0); 2407 } 2408 2409 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2410 { 2411 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2412 2413 PetscFunctionBegin; 2414 *array = a->a; 2415 PetscFunctionReturn(0); 2416 } 2417 2418 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2419 { 2420 PetscFunctionBegin; 2421 *array = NULL; 2422 PetscFunctionReturn(0); 2423 } 2424 2425 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2426 { 2427 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2428 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2429 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2430 2431 PetscFunctionBegin; 2432 /* Set the number of nonzeros in the new matrix */ 2433 PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz)); 2434 PetscFunctionReturn(0); 2435 } 2436 2437 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2438 { 2439 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2440 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2441 PetscBLASInt one=1; 2442 2443 PetscFunctionBegin; 2444 if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2445 PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2446 if (e) { 2447 PetscCall(PetscArraycmp(x->i,y->i,x->mbs+1,&e)); 2448 if (e) { 2449 PetscCall(PetscArraycmp(x->j,y->j,x->i[x->mbs],&e)); 2450 if (e) str = SAME_NONZERO_PATTERN; 2451 } 2452 } 2453 if (!e) PetscCheck(str != SAME_NONZERO_PATTERN,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"MatStructure is not SAME_NONZERO_PATTERN"); 2454 } 2455 if (str == SAME_NONZERO_PATTERN) { 2456 PetscScalar alpha = a; 2457 PetscBLASInt bnz; 2458 PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz)); 2459 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2460 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2461 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2462 PetscCall(MatAXPY_Basic(Y,a,X,str)); 2463 } else { 2464 Mat B; 2465 PetscInt *nnz; 2466 PetscCheck(bs == X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2467 PetscCall(PetscMalloc1(Y->rmap->N,&nnz)); 2468 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 2469 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 2470 PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N)); 2471 PetscCall(MatSetBlockSizesFromMats(B,Y,Y)); 2472 PetscCall(MatSetType(B,(MatType) ((PetscObject)Y)->type_name)); 2473 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz)); 2474 PetscCall(MatSeqBAIJSetPreallocation(B,bs,0,nnz)); 2475 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 2476 PetscCall(MatHeaderMerge(Y,&B)); 2477 PetscCall(PetscFree(nnz)); 2478 } 2479 PetscFunctionReturn(0); 2480 } 2481 2482 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2483 { 2484 #if defined(PETSC_USE_COMPLEX) 2485 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2486 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2487 MatScalar *aa = a->a; 2488 2489 PetscFunctionBegin; 2490 for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]); 2491 #else 2492 PetscFunctionBegin; 2493 #endif 2494 PetscFunctionReturn(0); 2495 } 2496 2497 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2498 { 2499 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2500 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2501 MatScalar *aa = a->a; 2502 2503 PetscFunctionBegin; 2504 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2505 PetscFunctionReturn(0); 2506 } 2507 2508 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2509 { 2510 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2511 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2512 MatScalar *aa = a->a; 2513 2514 PetscFunctionBegin; 2515 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2516 PetscFunctionReturn(0); 2517 } 2518 2519 /* 2520 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2521 */ 2522 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2523 { 2524 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2525 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2526 PetscInt nz = a->i[m],row,*jj,mr,col; 2527 2528 PetscFunctionBegin; 2529 *nn = n; 2530 if (!ia) PetscFunctionReturn(0); 2531 PetscCheck(!symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2532 PetscCall(PetscCalloc1(n,&collengths)); 2533 PetscCall(PetscMalloc1(n+1,&cia)); 2534 PetscCall(PetscMalloc1(nz,&cja)); 2535 jj = a->j; 2536 for (i=0; i<nz; i++) { 2537 collengths[jj[i]]++; 2538 } 2539 cia[0] = oshift; 2540 for (i=0; i<n; i++) { 2541 cia[i+1] = cia[i] + collengths[i]; 2542 } 2543 PetscCall(PetscArrayzero(collengths,n)); 2544 jj = a->j; 2545 for (row=0; row<m; row++) { 2546 mr = a->i[row+1] - a->i[row]; 2547 for (i=0; i<mr; i++) { 2548 col = *jj++; 2549 2550 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2551 } 2552 } 2553 PetscCall(PetscFree(collengths)); 2554 *ia = cia; *ja = cja; 2555 PetscFunctionReturn(0); 2556 } 2557 2558 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2559 { 2560 PetscFunctionBegin; 2561 if (!ia) PetscFunctionReturn(0); 2562 PetscCall(PetscFree(*ia)); 2563 PetscCall(PetscFree(*ja)); 2564 PetscFunctionReturn(0); 2565 } 2566 2567 /* 2568 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2569 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2570 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2571 */ 2572 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2573 { 2574 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2575 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2576 PetscInt nz = a->i[m],row,*jj,mr,col; 2577 PetscInt *cspidx; 2578 2579 PetscFunctionBegin; 2580 *nn = n; 2581 if (!ia) PetscFunctionReturn(0); 2582 2583 PetscCall(PetscCalloc1(n,&collengths)); 2584 PetscCall(PetscMalloc1(n+1,&cia)); 2585 PetscCall(PetscMalloc1(nz,&cja)); 2586 PetscCall(PetscMalloc1(nz,&cspidx)); 2587 jj = a->j; 2588 for (i=0; i<nz; i++) { 2589 collengths[jj[i]]++; 2590 } 2591 cia[0] = oshift; 2592 for (i=0; i<n; i++) { 2593 cia[i+1] = cia[i] + collengths[i]; 2594 } 2595 PetscCall(PetscArrayzero(collengths,n)); 2596 jj = a->j; 2597 for (row=0; row<m; row++) { 2598 mr = a->i[row+1] - a->i[row]; 2599 for (i=0; i<mr; i++) { 2600 col = *jj++; 2601 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2602 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2603 } 2604 } 2605 PetscCall(PetscFree(collengths)); 2606 *ia = cia; 2607 *ja = cja; 2608 *spidx = cspidx; 2609 PetscFunctionReturn(0); 2610 } 2611 2612 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2613 { 2614 PetscFunctionBegin; 2615 PetscCall(MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done)); 2616 PetscCall(PetscFree(*spidx)); 2617 PetscFunctionReturn(0); 2618 } 2619 2620 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2621 { 2622 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2623 2624 PetscFunctionBegin; 2625 if (!Y->preallocated || !aij->nz) { 2626 PetscCall(MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL)); 2627 } 2628 PetscCall(MatShift_Basic(Y,a)); 2629 PetscFunctionReturn(0); 2630 } 2631 2632 /* -------------------------------------------------------------------*/ 2633 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2634 MatGetRow_SeqBAIJ, 2635 MatRestoreRow_SeqBAIJ, 2636 MatMult_SeqBAIJ_N, 2637 /* 4*/ MatMultAdd_SeqBAIJ_N, 2638 MatMultTranspose_SeqBAIJ, 2639 MatMultTransposeAdd_SeqBAIJ, 2640 NULL, 2641 NULL, 2642 NULL, 2643 /* 10*/ NULL, 2644 MatLUFactor_SeqBAIJ, 2645 NULL, 2646 NULL, 2647 MatTranspose_SeqBAIJ, 2648 /* 15*/ MatGetInfo_SeqBAIJ, 2649 MatEqual_SeqBAIJ, 2650 MatGetDiagonal_SeqBAIJ, 2651 MatDiagonalScale_SeqBAIJ, 2652 MatNorm_SeqBAIJ, 2653 /* 20*/ NULL, 2654 MatAssemblyEnd_SeqBAIJ, 2655 MatSetOption_SeqBAIJ, 2656 MatZeroEntries_SeqBAIJ, 2657 /* 24*/ MatZeroRows_SeqBAIJ, 2658 NULL, 2659 NULL, 2660 NULL, 2661 NULL, 2662 /* 29*/ MatSetUp_SeqBAIJ, 2663 NULL, 2664 NULL, 2665 NULL, 2666 NULL, 2667 /* 34*/ MatDuplicate_SeqBAIJ, 2668 NULL, 2669 NULL, 2670 MatILUFactor_SeqBAIJ, 2671 NULL, 2672 /* 39*/ MatAXPY_SeqBAIJ, 2673 MatCreateSubMatrices_SeqBAIJ, 2674 MatIncreaseOverlap_SeqBAIJ, 2675 MatGetValues_SeqBAIJ, 2676 MatCopy_SeqBAIJ, 2677 /* 44*/ NULL, 2678 MatScale_SeqBAIJ, 2679 MatShift_SeqBAIJ, 2680 NULL, 2681 MatZeroRowsColumns_SeqBAIJ, 2682 /* 49*/ NULL, 2683 MatGetRowIJ_SeqBAIJ, 2684 MatRestoreRowIJ_SeqBAIJ, 2685 MatGetColumnIJ_SeqBAIJ, 2686 MatRestoreColumnIJ_SeqBAIJ, 2687 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2688 NULL, 2689 NULL, 2690 NULL, 2691 MatSetValuesBlocked_SeqBAIJ, 2692 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2693 MatDestroy_SeqBAIJ, 2694 MatView_SeqBAIJ, 2695 NULL, 2696 NULL, 2697 /* 64*/ NULL, 2698 NULL, 2699 NULL, 2700 NULL, 2701 NULL, 2702 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2703 NULL, 2704 MatConvert_Basic, 2705 NULL, 2706 NULL, 2707 /* 74*/ NULL, 2708 MatFDColoringApply_BAIJ, 2709 NULL, 2710 NULL, 2711 NULL, 2712 /* 79*/ NULL, 2713 NULL, 2714 NULL, 2715 NULL, 2716 MatLoad_SeqBAIJ, 2717 /* 84*/ NULL, 2718 NULL, 2719 NULL, 2720 NULL, 2721 NULL, 2722 /* 89*/ NULL, 2723 NULL, 2724 NULL, 2725 NULL, 2726 NULL, 2727 /* 94*/ NULL, 2728 NULL, 2729 NULL, 2730 NULL, 2731 NULL, 2732 /* 99*/ NULL, 2733 NULL, 2734 NULL, 2735 MatConjugate_SeqBAIJ, 2736 NULL, 2737 /*104*/ NULL, 2738 MatRealPart_SeqBAIJ, 2739 MatImaginaryPart_SeqBAIJ, 2740 NULL, 2741 NULL, 2742 /*109*/ NULL, 2743 NULL, 2744 NULL, 2745 NULL, 2746 MatMissingDiagonal_SeqBAIJ, 2747 /*114*/ NULL, 2748 NULL, 2749 NULL, 2750 NULL, 2751 NULL, 2752 /*119*/ NULL, 2753 NULL, 2754 MatMultHermitianTranspose_SeqBAIJ, 2755 MatMultHermitianTransposeAdd_SeqBAIJ, 2756 NULL, 2757 /*124*/ NULL, 2758 MatGetColumnReductions_SeqBAIJ, 2759 MatInvertBlockDiagonal_SeqBAIJ, 2760 NULL, 2761 NULL, 2762 /*129*/ NULL, 2763 NULL, 2764 NULL, 2765 NULL, 2766 NULL, 2767 /*134*/ NULL, 2768 NULL, 2769 NULL, 2770 NULL, 2771 NULL, 2772 /*139*/ MatSetBlockSizes_Default, 2773 NULL, 2774 NULL, 2775 MatFDColoringSetUp_SeqXAIJ, 2776 NULL, 2777 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2778 MatDestroySubMatrices_SeqBAIJ, 2779 NULL, 2780 NULL, 2781 NULL, 2782 NULL 2783 }; 2784 2785 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2786 { 2787 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2788 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2789 2790 PetscFunctionBegin; 2791 PetscCheck(aij->nonew == 1,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2792 2793 /* allocate space for values if not already there */ 2794 if (!aij->saved_values) { 2795 PetscCall(PetscMalloc1(nz+1,&aij->saved_values)); 2796 PetscCall(PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar))); 2797 } 2798 2799 /* copy values over */ 2800 PetscCall(PetscArraycpy(aij->saved_values,aij->a,nz)); 2801 PetscFunctionReturn(0); 2802 } 2803 2804 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2805 { 2806 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2807 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2808 2809 PetscFunctionBegin; 2810 PetscCheck(aij->nonew == 1,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2811 PetscCheck(aij->saved_values,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2812 2813 /* copy values over */ 2814 PetscCall(PetscArraycpy(aij->a,aij->saved_values,nz)); 2815 PetscFunctionReturn(0); 2816 } 2817 2818 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2819 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2820 2821 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2822 { 2823 Mat_SeqBAIJ *b; 2824 PetscInt i,mbs,nbs,bs2; 2825 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2826 2827 PetscFunctionBegin; 2828 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2829 if (nz == MAT_SKIP_ALLOCATION) { 2830 skipallocation = PETSC_TRUE; 2831 nz = 0; 2832 } 2833 2834 PetscCall(MatSetBlockSize(B,PetscAbs(bs))); 2835 PetscCall(PetscLayoutSetUp(B->rmap)); 2836 PetscCall(PetscLayoutSetUp(B->cmap)); 2837 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 2838 2839 B->preallocated = PETSC_TRUE; 2840 2841 mbs = B->rmap->n/bs; 2842 nbs = B->cmap->n/bs; 2843 bs2 = bs*bs; 2844 2845 PetscCheck(mbs*bs==B->rmap->n && nbs*bs==B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,B->cmap->n,bs); 2846 2847 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2848 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %" PetscInt_FMT,nz); 2849 if (nnz) { 2850 for (i=0; i<mbs; i++) { 2851 PetscCheck(nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,nnz[i]); 2852 PetscCheck(nnz[i] <= nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT,i,nnz[i],nbs); 2853 } 2854 } 2855 2856 b = (Mat_SeqBAIJ*)B->data; 2857 PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat"); 2858 PetscCall(PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL)); 2859 PetscOptionsEnd(); 2860 2861 if (!flg) { 2862 switch (bs) { 2863 case 1: 2864 B->ops->mult = MatMult_SeqBAIJ_1; 2865 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2866 break; 2867 case 2: 2868 B->ops->mult = MatMult_SeqBAIJ_2; 2869 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2870 break; 2871 case 3: 2872 B->ops->mult = MatMult_SeqBAIJ_3; 2873 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2874 break; 2875 case 4: 2876 B->ops->mult = MatMult_SeqBAIJ_4; 2877 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2878 break; 2879 case 5: 2880 B->ops->mult = MatMult_SeqBAIJ_5; 2881 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2882 break; 2883 case 6: 2884 B->ops->mult = MatMult_SeqBAIJ_6; 2885 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2886 break; 2887 case 7: 2888 B->ops->mult = MatMult_SeqBAIJ_7; 2889 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2890 break; 2891 case 9: 2892 { 2893 PetscInt version = 1; 2894 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL)); 2895 switch (version) { 2896 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2897 case 1: 2898 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2899 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2900 PetscCall(PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2901 break; 2902 #endif 2903 default: 2904 B->ops->mult = MatMult_SeqBAIJ_N; 2905 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2906 PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2907 break; 2908 } 2909 break; 2910 } 2911 case 11: 2912 B->ops->mult = MatMult_SeqBAIJ_11; 2913 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2914 break; 2915 case 12: 2916 { 2917 PetscInt version = 1; 2918 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL)); 2919 switch (version) { 2920 case 1: 2921 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 2922 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 2923 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2924 break; 2925 case 2: 2926 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 2927 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 2928 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2929 break; 2930 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2931 case 3: 2932 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 2933 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 2934 PetscCall(PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2935 break; 2936 #endif 2937 default: 2938 B->ops->mult = MatMult_SeqBAIJ_N; 2939 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2940 PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2941 break; 2942 } 2943 break; 2944 } 2945 case 15: 2946 { 2947 PetscInt version = 1; 2948 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL)); 2949 switch (version) { 2950 case 1: 2951 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2952 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2953 break; 2954 case 2: 2955 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 2956 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2957 break; 2958 case 3: 2959 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 2960 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2961 break; 2962 case 4: 2963 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 2964 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2965 break; 2966 default: 2967 B->ops->mult = MatMult_SeqBAIJ_N; 2968 PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2969 break; 2970 } 2971 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2972 break; 2973 } 2974 default: 2975 B->ops->mult = MatMult_SeqBAIJ_N; 2976 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2977 PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2978 break; 2979 } 2980 } 2981 B->ops->sor = MatSOR_SeqBAIJ; 2982 b->mbs = mbs; 2983 b->nbs = nbs; 2984 if (!skipallocation) { 2985 if (!b->imax) { 2986 PetscCall(PetscMalloc2(mbs,&b->imax,mbs,&b->ilen)); 2987 PetscCall(PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt))); 2988 2989 b->free_imax_ilen = PETSC_TRUE; 2990 } 2991 /* b->ilen will count nonzeros in each block row so far. */ 2992 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2993 if (!nnz) { 2994 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2995 else if (nz < 0) nz = 1; 2996 nz = PetscMin(nz,nbs); 2997 for (i=0; i<mbs; i++) b->imax[i] = nz; 2998 PetscCall(PetscIntMultError(nz,mbs,&nz)); 2999 } else { 3000 PetscInt64 nz64 = 0; 3001 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];} 3002 PetscCall(PetscIntCast(nz64,&nz)); 3003 } 3004 3005 /* allocate the matrix space */ 3006 PetscCall(MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i)); 3007 if (B->structure_only) { 3008 PetscCall(PetscMalloc1(nz,&b->j)); 3009 PetscCall(PetscMalloc1(B->rmap->N+1,&b->i)); 3010 PetscCall(PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt))); 3011 } else { 3012 PetscInt nzbs2 = 0; 3013 PetscCall(PetscIntMultError(nz,bs2,&nzbs2)); 3014 PetscCall(PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i)); 3015 PetscCall(PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)))); 3016 PetscCall(PetscArrayzero(b->a,nz*bs2)); 3017 } 3018 PetscCall(PetscArrayzero(b->j,nz)); 3019 3020 if (B->structure_only) { 3021 b->singlemalloc = PETSC_FALSE; 3022 b->free_a = PETSC_FALSE; 3023 } else { 3024 b->singlemalloc = PETSC_TRUE; 3025 b->free_a = PETSC_TRUE; 3026 } 3027 b->free_ij = PETSC_TRUE; 3028 3029 b->i[0] = 0; 3030 for (i=1; i<mbs+1; i++) { 3031 b->i[i] = b->i[i-1] + b->imax[i-1]; 3032 } 3033 3034 } else { 3035 b->free_a = PETSC_FALSE; 3036 b->free_ij = PETSC_FALSE; 3037 } 3038 3039 b->bs2 = bs2; 3040 b->mbs = mbs; 3041 b->nz = 0; 3042 b->maxnz = nz; 3043 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3044 B->was_assembled = PETSC_FALSE; 3045 B->assembled = PETSC_FALSE; 3046 if (realalloc) PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 3047 PetscFunctionReturn(0); 3048 } 3049 3050 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3051 { 3052 PetscInt i,m,nz,nz_max=0,*nnz; 3053 PetscScalar *values=NULL; 3054 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 3055 3056 PetscFunctionBegin; 3057 PetscCheck(bs >= 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs); 3058 PetscCall(PetscLayoutSetBlockSize(B->rmap,bs)); 3059 PetscCall(PetscLayoutSetBlockSize(B->cmap,bs)); 3060 PetscCall(PetscLayoutSetUp(B->rmap)); 3061 PetscCall(PetscLayoutSetUp(B->cmap)); 3062 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 3063 m = B->rmap->n/bs; 3064 3065 PetscCheck(ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]); 3066 PetscCall(PetscMalloc1(m+1, &nnz)); 3067 for (i=0; i<m; i++) { 3068 nz = ii[i+1]- ii[i]; 3069 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz); 3070 nz_max = PetscMax(nz_max, nz); 3071 nnz[i] = nz; 3072 } 3073 PetscCall(MatSeqBAIJSetPreallocation(B,bs,0,nnz)); 3074 PetscCall(PetscFree(nnz)); 3075 3076 values = (PetscScalar*)V; 3077 if (!values) { 3078 PetscCall(PetscCalloc1(bs*bs*(nz_max+1),&values)); 3079 } 3080 for (i=0; i<m; i++) { 3081 PetscInt ncols = ii[i+1] - ii[i]; 3082 const PetscInt *icols = jj + ii[i]; 3083 if (bs == 1 || !roworiented) { 3084 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3085 PetscCall(MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES)); 3086 } else { 3087 PetscInt j; 3088 for (j=0; j<ncols; j++) { 3089 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 3090 PetscCall(MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES)); 3091 } 3092 } 3093 } 3094 if (!V) PetscCall(PetscFree(values)); 3095 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3096 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3097 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3098 PetscFunctionReturn(0); 3099 } 3100 3101 /*@C 3102 MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored 3103 3104 Not Collective 3105 3106 Input Parameter: 3107 . mat - a MATSEQBAIJ matrix 3108 3109 Output Parameter: 3110 . array - pointer to the data 3111 3112 Level: intermediate 3113 3114 .seealso: `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3115 @*/ 3116 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array) 3117 { 3118 PetscFunctionBegin; 3119 PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array)); 3120 PetscFunctionReturn(0); 3121 } 3122 3123 /*@C 3124 MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray() 3125 3126 Not Collective 3127 3128 Input Parameters: 3129 + mat - a MATSEQBAIJ matrix 3130 - array - pointer to the data 3131 3132 Level: intermediate 3133 3134 .seealso: `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3135 @*/ 3136 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array) 3137 { 3138 PetscFunctionBegin; 3139 PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array)); 3140 PetscFunctionReturn(0); 3141 } 3142 3143 /*MC 3144 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3145 block sparse compressed row format. 3146 3147 Options Database Keys: 3148 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3149 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3150 3151 Level: beginner 3152 3153 Notes: 3154 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 3155 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 3156 3157 Run with -info to see what version of the matrix-vector product is being used 3158 3159 .seealso: `MatCreateSeqBAIJ()` 3160 M*/ 3161 3162 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3163 3164 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3165 { 3166 PetscMPIInt size; 3167 Mat_SeqBAIJ *b; 3168 3169 PetscFunctionBegin; 3170 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 3171 PetscCheck(size == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3172 3173 PetscCall(PetscNewLog(B,&b)); 3174 B->data = (void*)b; 3175 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 3176 3177 b->row = NULL; 3178 b->col = NULL; 3179 b->icol = NULL; 3180 b->reallocs = 0; 3181 b->saved_values = NULL; 3182 3183 b->roworiented = PETSC_TRUE; 3184 b->nonew = 0; 3185 b->diag = NULL; 3186 B->spptr = NULL; 3187 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3188 b->keepnonzeropattern = PETSC_FALSE; 3189 3190 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ)); 3191 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ)); 3192 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ)); 3193 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ)); 3194 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ)); 3195 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ)); 3196 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ)); 3197 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ)); 3198 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 3199 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ)); 3200 #if defined(PETSC_HAVE_HYPRE) 3201 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE)); 3202 #endif 3203 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS)); 3204 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ)); 3205 PetscFunctionReturn(0); 3206 } 3207 3208 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3209 { 3210 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3211 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3212 3213 PetscFunctionBegin; 3214 PetscCheck(a->i[mbs] == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3215 3216 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3217 c->imax = a->imax; 3218 c->ilen = a->ilen; 3219 c->free_imax_ilen = PETSC_FALSE; 3220 } else { 3221 PetscCall(PetscMalloc2(mbs,&c->imax,mbs,&c->ilen)); 3222 PetscCall(PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt))); 3223 for (i=0; i<mbs; i++) { 3224 c->imax[i] = a->imax[i]; 3225 c->ilen[i] = a->ilen[i]; 3226 } 3227 c->free_imax_ilen = PETSC_TRUE; 3228 } 3229 3230 /* allocate the matrix space */ 3231 if (mallocmatspace) { 3232 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3233 PetscCall(PetscCalloc1(bs2*nz,&c->a)); 3234 PetscCall(PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar))); 3235 3236 c->i = a->i; 3237 c->j = a->j; 3238 c->singlemalloc = PETSC_FALSE; 3239 c->free_a = PETSC_TRUE; 3240 c->free_ij = PETSC_FALSE; 3241 c->parent = A; 3242 C->preallocated = PETSC_TRUE; 3243 C->assembled = PETSC_TRUE; 3244 3245 PetscCall(PetscObjectReference((PetscObject)A)); 3246 PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3247 PetscCall(MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3248 } else { 3249 PetscCall(PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i)); 3250 PetscCall(PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt))); 3251 3252 c->singlemalloc = PETSC_TRUE; 3253 c->free_a = PETSC_TRUE; 3254 c->free_ij = PETSC_TRUE; 3255 3256 PetscCall(PetscArraycpy(c->i,a->i,mbs+1)); 3257 if (mbs > 0) { 3258 PetscCall(PetscArraycpy(c->j,a->j,nz)); 3259 if (cpvalues == MAT_COPY_VALUES) { 3260 PetscCall(PetscArraycpy(c->a,a->a,bs2*nz)); 3261 } else { 3262 PetscCall(PetscArrayzero(c->a,bs2*nz)); 3263 } 3264 } 3265 C->preallocated = PETSC_TRUE; 3266 C->assembled = PETSC_TRUE; 3267 } 3268 } 3269 3270 c->roworiented = a->roworiented; 3271 c->nonew = a->nonew; 3272 3273 PetscCall(PetscLayoutReference(A->rmap,&C->rmap)); 3274 PetscCall(PetscLayoutReference(A->cmap,&C->cmap)); 3275 3276 c->bs2 = a->bs2; 3277 c->mbs = a->mbs; 3278 c->nbs = a->nbs; 3279 3280 if (a->diag) { 3281 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3282 c->diag = a->diag; 3283 c->free_diag = PETSC_FALSE; 3284 } else { 3285 PetscCall(PetscMalloc1(mbs+1,&c->diag)); 3286 PetscCall(PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt))); 3287 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3288 c->free_diag = PETSC_TRUE; 3289 } 3290 } else c->diag = NULL; 3291 3292 c->nz = a->nz; 3293 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3294 c->solve_work = NULL; 3295 c->mult_work = NULL; 3296 c->sor_workt = NULL; 3297 c->sor_work = NULL; 3298 3299 c->compressedrow.use = a->compressedrow.use; 3300 c->compressedrow.nrows = a->compressedrow.nrows; 3301 if (a->compressedrow.use) { 3302 i = a->compressedrow.nrows; 3303 PetscCall(PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex)); 3304 PetscCall(PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt))); 3305 PetscCall(PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1)); 3306 PetscCall(PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i)); 3307 } else { 3308 c->compressedrow.use = PETSC_FALSE; 3309 c->compressedrow.i = NULL; 3310 c->compressedrow.rindex = NULL; 3311 } 3312 C->nonzerostate = A->nonzerostate; 3313 3314 PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist)); 3315 PetscFunctionReturn(0); 3316 } 3317 3318 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3319 { 3320 PetscFunctionBegin; 3321 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),B)); 3322 PetscCall(MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n)); 3323 PetscCall(MatSetType(*B,MATSEQBAIJ)); 3324 PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE)); 3325 PetscFunctionReturn(0); 3326 } 3327 3328 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3329 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 3330 { 3331 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3332 PetscInt *rowidxs,*colidxs; 3333 PetscScalar *matvals; 3334 3335 PetscFunctionBegin; 3336 PetscCall(PetscViewerSetUp(viewer)); 3337 3338 /* read matrix header */ 3339 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3340 PetscCheck(header[0] == MAT_FILE_CLASSID,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3341 M = header[1]; N = header[2]; nz = header[3]; 3342 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3343 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3344 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3345 3346 /* set block sizes from the viewer's .info file */ 3347 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3348 /* set local and global sizes if not set already */ 3349 if (mat->rmap->n < 0) mat->rmap->n = M; 3350 if (mat->cmap->n < 0) mat->cmap->n = N; 3351 if (mat->rmap->N < 0) mat->rmap->N = M; 3352 if (mat->cmap->N < 0) mat->cmap->N = N; 3353 PetscCall(PetscLayoutSetUp(mat->rmap)); 3354 PetscCall(PetscLayoutSetUp(mat->cmap)); 3355 3356 /* check if the matrix sizes are correct */ 3357 PetscCall(MatGetSize(mat,&rows,&cols)); 3358 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3359 PetscCall(MatGetBlockSize(mat,&bs)); 3360 PetscCall(MatGetLocalSize(mat,&m,&n)); 3361 mbs = m/bs; nbs = n/bs; 3362 3363 /* read in row lengths, column indices and nonzero values */ 3364 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3365 PetscCall(PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT)); 3366 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3367 sum = rowidxs[m]; 3368 PetscCheck(sum == nz,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3369 3370 /* read in column indices and nonzero values */ 3371 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals)); 3372 PetscCall(PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT)); 3373 PetscCall(PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR)); 3374 3375 { /* preallocate matrix storage */ 3376 PetscBT bt; /* helper bit set to count nonzeros */ 3377 PetscInt *nnz; 3378 PetscBool sbaij; 3379 3380 PetscCall(PetscBTCreate(nbs,&bt)); 3381 PetscCall(PetscCalloc1(mbs,&nnz)); 3382 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij)); 3383 for (i=0; i<mbs; i++) { 3384 PetscCall(PetscBTMemzero(nbs,bt)); 3385 for (k=0; k<bs; k++) { 3386 PetscInt row = bs*i + k; 3387 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3388 PetscInt col = colidxs[j]; 3389 if (!sbaij || col >= row) 3390 if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++; 3391 } 3392 } 3393 } 3394 PetscCall(PetscBTDestroy(&bt)); 3395 PetscCall(MatSeqBAIJSetPreallocation(mat,bs,0,nnz)); 3396 PetscCall(MatSeqSBAIJSetPreallocation(mat,bs,0,nnz)); 3397 PetscCall(PetscFree(nnz)); 3398 } 3399 3400 /* store matrix values */ 3401 for (i=0; i<m; i++) { 3402 PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1]; 3403 PetscCall((*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES)); 3404 } 3405 3406 PetscCall(PetscFree(rowidxs)); 3407 PetscCall(PetscFree2(colidxs,matvals)); 3408 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 3409 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 3410 PetscFunctionReturn(0); 3411 } 3412 3413 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer) 3414 { 3415 PetscBool isbinary; 3416 3417 PetscFunctionBegin; 3418 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 3419 PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3420 PetscCall(MatLoad_SeqBAIJ_Binary(mat,viewer)); 3421 PetscFunctionReturn(0); 3422 } 3423 3424 /*@C 3425 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3426 compressed row) format. For good matrix assembly performance the 3427 user should preallocate the matrix storage by setting the parameter nz 3428 (or the array nnz). By setting these parameters accurately, performance 3429 during matrix assembly can be increased by more than a factor of 50. 3430 3431 Collective 3432 3433 Input Parameters: 3434 + comm - MPI communicator, set to PETSC_COMM_SELF 3435 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3436 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3437 . m - number of rows 3438 . n - number of columns 3439 . nz - number of nonzero blocks per block row (same for all rows) 3440 - nnz - array containing the number of nonzero blocks in the various block rows 3441 (possibly different for each block row) or NULL 3442 3443 Output Parameter: 3444 . A - the matrix 3445 3446 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3447 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3448 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3449 3450 Options Database Keys: 3451 + -mat_no_unroll - uses code that does not unroll the loops in the 3452 block calculations (much slower) 3453 - -mat_block_size - size of the blocks to use 3454 3455 Level: intermediate 3456 3457 Notes: 3458 The number of rows and columns must be divisible by blocksize. 3459 3460 If the nnz parameter is given then the nz parameter is ignored 3461 3462 A nonzero block is any block that as 1 or more nonzeros in it 3463 3464 The block AIJ format is fully compatible with standard Fortran 77 3465 storage. That is, the stored row and column indices can begin at 3466 either one (as in Fortran) or zero. See the users' manual for details. 3467 3468 Specify the preallocated storage with either nz or nnz (not both). 3469 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3470 allocation. See Users-Manual: ch_mat for details. 3471 matrices. 3472 3473 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3474 @*/ 3475 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3476 { 3477 PetscFunctionBegin; 3478 PetscCall(MatCreate(comm,A)); 3479 PetscCall(MatSetSizes(*A,m,n,m,n)); 3480 PetscCall(MatSetType(*A,MATSEQBAIJ)); 3481 PetscCall(MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz)); 3482 PetscFunctionReturn(0); 3483 } 3484 3485 /*@C 3486 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3487 per row in the matrix. For good matrix assembly performance the 3488 user should preallocate the matrix storage by setting the parameter nz 3489 (or the array nnz). By setting these parameters accurately, performance 3490 during matrix assembly can be increased by more than a factor of 50. 3491 3492 Collective 3493 3494 Input Parameters: 3495 + B - the matrix 3496 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3497 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3498 . nz - number of block nonzeros per block row (same for all rows) 3499 - nnz - array containing the number of block nonzeros in the various block rows 3500 (possibly different for each block row) or NULL 3501 3502 Options Database Keys: 3503 + -mat_no_unroll - uses code that does not unroll the loops in the 3504 block calculations (much slower) 3505 - -mat_block_size - size of the blocks to use 3506 3507 Level: intermediate 3508 3509 Notes: 3510 If the nnz parameter is given then the nz parameter is ignored 3511 3512 You can call MatGetInfo() to get information on how effective the preallocation was; 3513 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3514 You can also run with the option -info and look for messages with the string 3515 malloc in them to see if additional memory allocation was needed. 3516 3517 The block AIJ format is fully compatible with standard Fortran 77 3518 storage. That is, the stored row and column indices can begin at 3519 either one (as in Fortran) or zero. See the users' manual for details. 3520 3521 Specify the preallocated storage with either nz or nnz (not both). 3522 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3523 allocation. See Users-Manual: ch_mat for details. 3524 3525 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3526 @*/ 3527 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3528 { 3529 PetscFunctionBegin; 3530 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3531 PetscValidType(B,1); 3532 PetscValidLogicalCollectiveInt(B,bs,2); 3533 PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz)); 3534 PetscFunctionReturn(0); 3535 } 3536 3537 /*@C 3538 MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 3539 3540 Collective 3541 3542 Input Parameters: 3543 + B - the matrix 3544 . i - the indices into j for the start of each local row (starts with zero) 3545 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3546 - v - optional values in the matrix 3547 3548 Level: advanced 3549 3550 Notes: 3551 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3552 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3553 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3554 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3555 block column and the second index is over columns within a block. 3556 3557 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3558 3559 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3560 @*/ 3561 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3562 { 3563 PetscFunctionBegin; 3564 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3565 PetscValidType(B,1); 3566 PetscValidLogicalCollectiveInt(B,bs,2); 3567 PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v)); 3568 PetscFunctionReturn(0); 3569 } 3570 3571 /*@ 3572 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3573 3574 Collective 3575 3576 Input Parameters: 3577 + comm - must be an MPI communicator of size 1 3578 . bs - size of block 3579 . m - number of rows 3580 . n - number of columns 3581 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3582 . j - column indices 3583 - a - matrix values 3584 3585 Output Parameter: 3586 . mat - the matrix 3587 3588 Level: advanced 3589 3590 Notes: 3591 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3592 once the matrix is destroyed 3593 3594 You cannot set new nonzero locations into this matrix, that will generate an error. 3595 3596 The i and j indices are 0 based 3597 3598 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3599 3600 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3601 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3602 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3603 with column-major ordering within blocks. 3604 3605 .seealso: `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3606 3607 @*/ 3608 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3609 { 3610 PetscInt ii; 3611 Mat_SeqBAIJ *baij; 3612 3613 PetscFunctionBegin; 3614 PetscCheck(bs == 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %" PetscInt_FMT " > 1 is not supported yet",bs); 3615 if (m > 0) PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3616 3617 PetscCall(MatCreate(comm,mat)); 3618 PetscCall(MatSetSizes(*mat,m,n,m,n)); 3619 PetscCall(MatSetType(*mat,MATSEQBAIJ)); 3620 PetscCall(MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL)); 3621 baij = (Mat_SeqBAIJ*)(*mat)->data; 3622 PetscCall(PetscMalloc2(m,&baij->imax,m,&baij->ilen)); 3623 PetscCall(PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt))); 3624 3625 baij->i = i; 3626 baij->j = j; 3627 baij->a = a; 3628 3629 baij->singlemalloc = PETSC_FALSE; 3630 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3631 baij->free_a = PETSC_FALSE; 3632 baij->free_ij = PETSC_FALSE; 3633 3634 for (ii=0; ii<m; ii++) { 3635 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3636 PetscCheck(i[ii+1] - i[ii] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT,ii,i[ii+1] - i[ii]); 3637 } 3638 if (PetscDefined(USE_DEBUG)) { 3639 for (ii=0; ii<baij->i[m]; ii++) { 3640 PetscCheck(j[ii] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT,ii,j[ii]); 3641 PetscCheck(j[ii] <= n - 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT,ii,j[ii]); 3642 } 3643 } 3644 3645 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 3646 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 3647 PetscFunctionReturn(0); 3648 } 3649 3650 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3651 { 3652 PetscFunctionBegin; 3653 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat)); 3654 PetscFunctionReturn(0); 3655 } 3656