1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19 20 PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A,PetscInt type,PetscReal *reductions) 21 { 22 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) A->data; 23 PetscInt m,n,i; 24 PetscInt ib,jb,bs = A->rmap->bs; 25 MatScalar *a_val = a_aij->a; 26 27 PetscFunctionBegin; 28 PetscCall(MatGetSize(A,&m,&n)); 29 for (i=0; i<n; i++) reductions[i] = 0.0; 30 if (type == NORM_2) { 31 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 32 for (jb=0; jb<bs; jb++) { 33 for (ib=0; ib<bs; ib++) { 34 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 35 a_val++; 36 } 37 } 38 } 39 } else if (type == NORM_1) { 40 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 41 for (jb=0; jb<bs; jb++) { 42 for (ib=0; ib<bs; ib++) { 43 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 44 a_val++; 45 } 46 } 47 } 48 } else if (type == NORM_INFINITY) { 49 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 50 for (jb=0; jb<bs; jb++) { 51 for (ib=0; ib<bs; ib++) { 52 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 53 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 54 a_val++; 55 } 56 } 57 } 58 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 59 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 60 for (jb=0; jb<bs; jb++) { 61 for (ib=0; ib<bs; ib++) { 62 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 63 a_val++; 64 } 65 } 66 } 67 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 68 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 69 for (jb=0; jb<bs; jb++) { 70 for (ib=0; ib<bs; ib++) { 71 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 72 a_val++; 73 } 74 } 75 } 76 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 77 if (type == NORM_2) { 78 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 79 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 80 for (i=0; i<n; i++) reductions[i] /= m; 81 } 82 PetscFunctionReturn(0); 83 } 84 85 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 86 { 87 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 88 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 89 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 90 PetscReal shift = 0.0; 91 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 92 93 PetscFunctionBegin; 94 allowzeropivot = PetscNot(A->erroriffailure); 95 96 if (a->idiagvalid) { 97 if (values) *values = a->idiag; 98 PetscFunctionReturn(0); 99 } 100 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 101 diag_offset = a->diag; 102 if (!a->idiag) { 103 PetscCall(PetscMalloc1(bs2*mbs,&a->idiag)); 104 PetscCall(PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar))); 105 } 106 diag = a->idiag; 107 if (values) *values = a->idiag; 108 /* factor and invert each block */ 109 switch (bs) { 110 case 1: 111 for (i=0; i<mbs; i++) { 112 odiag = v + 1*diag_offset[i]; 113 diag[0] = odiag[0]; 114 115 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 116 if (allowzeropivot) { 117 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 118 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 119 A->factorerror_zeropivot_row = i; 120 PetscCall(PetscInfo(A,"Zero pivot, row %" PetscInt_FMT "\n",i)); 121 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 122 } 123 124 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 125 diag += 1; 126 } 127 break; 128 case 2: 129 for (i=0; i<mbs; i++) { 130 odiag = v + 4*diag_offset[i]; 131 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 132 PetscCall(PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected)); 133 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 134 diag += 4; 135 } 136 break; 137 case 3: 138 for (i=0; i<mbs; i++) { 139 odiag = v + 9*diag_offset[i]; 140 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 141 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 142 diag[8] = odiag[8]; 143 PetscCall(PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected)); 144 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 145 diag += 9; 146 } 147 break; 148 case 4: 149 for (i=0; i<mbs; i++) { 150 odiag = v + 16*diag_offset[i]; 151 PetscCall(PetscArraycpy(diag,odiag,16)); 152 PetscCall(PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected)); 153 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 154 diag += 16; 155 } 156 break; 157 case 5: 158 for (i=0; i<mbs; i++) { 159 odiag = v + 25*diag_offset[i]; 160 PetscCall(PetscArraycpy(diag,odiag,25)); 161 PetscCall(PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected)); 162 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 163 diag += 25; 164 } 165 break; 166 case 6: 167 for (i=0; i<mbs; i++) { 168 odiag = v + 36*diag_offset[i]; 169 PetscCall(PetscArraycpy(diag,odiag,36)); 170 PetscCall(PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected)); 171 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 172 diag += 36; 173 } 174 break; 175 case 7: 176 for (i=0; i<mbs; i++) { 177 odiag = v + 49*diag_offset[i]; 178 PetscCall(PetscArraycpy(diag,odiag,49)); 179 PetscCall(PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected)); 180 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 181 diag += 49; 182 } 183 break; 184 default: 185 PetscCall(PetscMalloc2(bs,&v_work,bs,&v_pivots)); 186 for (i=0; i<mbs; i++) { 187 odiag = v + bs2*diag_offset[i]; 188 PetscCall(PetscArraycpy(diag,odiag,bs2)); 189 PetscCall(PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected)); 190 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 191 diag += bs2; 192 } 193 PetscCall(PetscFree2(v_work,v_pivots)); 194 } 195 a->idiagvalid = PETSC_TRUE; 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 200 { 201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 202 PetscScalar *x,*work,*w,*workt,*t; 203 const MatScalar *v,*aa = a->a, *idiag; 204 const PetscScalar *b,*xb; 205 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 206 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 207 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 208 209 PetscFunctionBegin; 210 its = its*lits; 211 PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 212 PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits); 213 PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 214 PetscCheck(omega == 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor"); 215 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 216 217 if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A,NULL)); 218 219 if (!m) PetscFunctionReturn(0); 220 diag = a->diag; 221 idiag = a->idiag; 222 k = PetscMax(A->rmap->n,A->cmap->n); 223 if (!a->mult_work) { 224 PetscCall(PetscMalloc1(k+1,&a->mult_work)); 225 } 226 if (!a->sor_workt) { 227 PetscCall(PetscMalloc1(k,&a->sor_workt)); 228 } 229 if (!a->sor_work) { 230 PetscCall(PetscMalloc1(bs,&a->sor_work)); 231 } 232 work = a->mult_work; 233 t = a->sor_workt; 234 w = a->sor_work; 235 236 PetscCall(VecGetArray(xx,&x)); 237 PetscCall(VecGetArrayRead(bb,&b)); 238 239 if (flag & SOR_ZERO_INITIAL_GUESS) { 240 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 241 switch (bs) { 242 case 1: 243 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 244 t[0] = b[0]; 245 i2 = 1; 246 idiag += 1; 247 for (i=1; i<m; i++) { 248 v = aa + ai[i]; 249 vi = aj + ai[i]; 250 nz = diag[i] - ai[i]; 251 s[0] = b[i2]; 252 for (j=0; j<nz; j++) { 253 xw[0] = x[vi[j]]; 254 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 255 } 256 t[i2] = s[0]; 257 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 258 x[i2] = xw[0]; 259 idiag += 1; 260 i2 += 1; 261 } 262 break; 263 case 2: 264 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 265 t[0] = b[0]; t[1] = b[1]; 266 i2 = 2; 267 idiag += 4; 268 for (i=1; i<m; i++) { 269 v = aa + 4*ai[i]; 270 vi = aj + ai[i]; 271 nz = diag[i] - ai[i]; 272 s[0] = b[i2]; s[1] = b[i2+1]; 273 for (j=0; j<nz; j++) { 274 idx = 2*vi[j]; 275 it = 4*j; 276 xw[0] = x[idx]; xw[1] = x[1+idx]; 277 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 278 } 279 t[i2] = s[0]; t[i2+1] = s[1]; 280 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 281 x[i2] = xw[0]; x[i2+1] = xw[1]; 282 idiag += 4; 283 i2 += 2; 284 } 285 break; 286 case 3: 287 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 288 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 289 i2 = 3; 290 idiag += 9; 291 for (i=1; i<m; i++) { 292 v = aa + 9*ai[i]; 293 vi = aj + ai[i]; 294 nz = diag[i] - ai[i]; 295 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 296 while (nz--) { 297 idx = 3*(*vi++); 298 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 299 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 300 v += 9; 301 } 302 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 303 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 304 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 305 idiag += 9; 306 i2 += 3; 307 } 308 break; 309 case 4: 310 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 311 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 312 i2 = 4; 313 idiag += 16; 314 for (i=1; i<m; i++) { 315 v = aa + 16*ai[i]; 316 vi = aj + ai[i]; 317 nz = diag[i] - ai[i]; 318 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 319 while (nz--) { 320 idx = 4*(*vi++); 321 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 322 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 323 v += 16; 324 } 325 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 326 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 327 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 328 idiag += 16; 329 i2 += 4; 330 } 331 break; 332 case 5: 333 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 334 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 335 i2 = 5; 336 idiag += 25; 337 for (i=1; i<m; i++) { 338 v = aa + 25*ai[i]; 339 vi = aj + ai[i]; 340 nz = diag[i] - ai[i]; 341 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 342 while (nz--) { 343 idx = 5*(*vi++); 344 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 345 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 346 v += 25; 347 } 348 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 349 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 350 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 351 idiag += 25; 352 i2 += 5; 353 } 354 break; 355 case 6: 356 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 357 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 358 i2 = 6; 359 idiag += 36; 360 for (i=1; i<m; i++) { 361 v = aa + 36*ai[i]; 362 vi = aj + ai[i]; 363 nz = diag[i] - ai[i]; 364 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 365 while (nz--) { 366 idx = 6*(*vi++); 367 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 368 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 369 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 370 v += 36; 371 } 372 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 373 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 374 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 375 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 376 idiag += 36; 377 i2 += 6; 378 } 379 break; 380 case 7: 381 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 382 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 383 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 384 i2 = 7; 385 idiag += 49; 386 for (i=1; i<m; i++) { 387 v = aa + 49*ai[i]; 388 vi = aj + ai[i]; 389 nz = diag[i] - ai[i]; 390 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 391 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 392 while (nz--) { 393 idx = 7*(*vi++); 394 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 395 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 396 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 397 v += 49; 398 } 399 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 400 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 401 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 402 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 403 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 404 idiag += 49; 405 i2 += 7; 406 } 407 break; 408 default: 409 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 410 PetscCall(PetscArraycpy(t,b,bs)); 411 i2 = bs; 412 idiag += bs2; 413 for (i=1; i<m; i++) { 414 v = aa + bs2*ai[i]; 415 vi = aj + ai[i]; 416 nz = diag[i] - ai[i]; 417 418 PetscCall(PetscArraycpy(w,b+i2,bs)); 419 /* copy all rows of x that are needed into contiguous space */ 420 workt = work; 421 for (j=0; j<nz; j++) { 422 PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs)); 423 workt += bs; 424 } 425 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 426 PetscCall(PetscArraycpy(t+i2,w,bs)); 427 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 428 429 idiag += bs2; 430 i2 += bs; 431 } 432 break; 433 } 434 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 435 PetscCall(PetscLogFlops(1.0*bs2*a->nz)); 436 xb = t; 437 } 438 else xb = b; 439 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 440 idiag = a->idiag+bs2*(a->mbs-1); 441 i2 = bs * (m-1); 442 switch (bs) { 443 case 1: 444 s[0] = xb[i2]; 445 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 446 x[i2] = xw[0]; 447 i2 -= 1; 448 for (i=m-2; i>=0; i--) { 449 v = aa + (diag[i]+1); 450 vi = aj + diag[i] + 1; 451 nz = ai[i+1] - diag[i] - 1; 452 s[0] = xb[i2]; 453 for (j=0; j<nz; j++) { 454 xw[0] = x[vi[j]]; 455 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 456 } 457 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 458 x[i2] = xw[0]; 459 idiag -= 1; 460 i2 -= 1; 461 } 462 break; 463 case 2: 464 s[0] = xb[i2]; s[1] = xb[i2+1]; 465 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 466 x[i2] = xw[0]; x[i2+1] = xw[1]; 467 i2 -= 2; 468 idiag -= 4; 469 for (i=m-2; i>=0; i--) { 470 v = aa + 4*(diag[i] + 1); 471 vi = aj + diag[i] + 1; 472 nz = ai[i+1] - diag[i] - 1; 473 s[0] = xb[i2]; s[1] = xb[i2+1]; 474 for (j=0; j<nz; j++) { 475 idx = 2*vi[j]; 476 it = 4*j; 477 xw[0] = x[idx]; xw[1] = x[1+idx]; 478 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 479 } 480 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 481 x[i2] = xw[0]; x[i2+1] = xw[1]; 482 idiag -= 4; 483 i2 -= 2; 484 } 485 break; 486 case 3: 487 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 488 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 489 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 490 i2 -= 3; 491 idiag -= 9; 492 for (i=m-2; i>=0; i--) { 493 v = aa + 9*(diag[i]+1); 494 vi = aj + diag[i] + 1; 495 nz = ai[i+1] - diag[i] - 1; 496 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 497 while (nz--) { 498 idx = 3*(*vi++); 499 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 500 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 501 v += 9; 502 } 503 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 504 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 505 idiag -= 9; 506 i2 -= 3; 507 } 508 break; 509 case 4: 510 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 511 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 512 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 513 i2 -= 4; 514 idiag -= 16; 515 for (i=m-2; i>=0; i--) { 516 v = aa + 16*(diag[i]+1); 517 vi = aj + diag[i] + 1; 518 nz = ai[i+1] - diag[i] - 1; 519 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 520 while (nz--) { 521 idx = 4*(*vi++); 522 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 523 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 524 v += 16; 525 } 526 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 527 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 528 idiag -= 16; 529 i2 -= 4; 530 } 531 break; 532 case 5: 533 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 534 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 535 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 536 i2 -= 5; 537 idiag -= 25; 538 for (i=m-2; i>=0; i--) { 539 v = aa + 25*(diag[i]+1); 540 vi = aj + diag[i] + 1; 541 nz = ai[i+1] - diag[i] - 1; 542 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 543 while (nz--) { 544 idx = 5*(*vi++); 545 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 546 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 547 v += 25; 548 } 549 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 550 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 551 idiag -= 25; 552 i2 -= 5; 553 } 554 break; 555 case 6: 556 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 557 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 558 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 559 i2 -= 6; 560 idiag -= 36; 561 for (i=m-2; i>=0; i--) { 562 v = aa + 36*(diag[i]+1); 563 vi = aj + diag[i] + 1; 564 nz = ai[i+1] - diag[i] - 1; 565 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 566 while (nz--) { 567 idx = 6*(*vi++); 568 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 569 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 570 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 571 v += 36; 572 } 573 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 574 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 575 idiag -= 36; 576 i2 -= 6; 577 } 578 break; 579 case 7: 580 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 581 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 582 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 583 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 584 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 585 i2 -= 7; 586 idiag -= 49; 587 for (i=m-2; i>=0; i--) { 588 v = aa + 49*(diag[i]+1); 589 vi = aj + diag[i] + 1; 590 nz = ai[i+1] - diag[i] - 1; 591 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 592 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 593 while (nz--) { 594 idx = 7*(*vi++); 595 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 596 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 597 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 598 v += 49; 599 } 600 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 601 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 602 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 603 idiag -= 49; 604 i2 -= 7; 605 } 606 break; 607 default: 608 PetscCall(PetscArraycpy(w,xb+i2,bs)); 609 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 610 i2 -= bs; 611 idiag -= bs2; 612 for (i=m-2; i>=0; i--) { 613 v = aa + bs2*(diag[i]+1); 614 vi = aj + diag[i] + 1; 615 nz = ai[i+1] - diag[i] - 1; 616 617 PetscCall(PetscArraycpy(w,xb+i2,bs)); 618 /* copy all rows of x that are needed into contiguous space */ 619 workt = work; 620 for (j=0; j<nz; j++) { 621 PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs)); 622 workt += bs; 623 } 624 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 625 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 626 627 idiag -= bs2; 628 i2 -= bs; 629 } 630 break; 631 } 632 PetscCall(PetscLogFlops(1.0*bs2*(a->nz))); 633 } 634 its--; 635 } 636 while (its--) { 637 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 638 idiag = a->idiag; 639 i2 = 0; 640 switch (bs) { 641 case 1: 642 for (i=0; i<m; i++) { 643 v = aa + ai[i]; 644 vi = aj + ai[i]; 645 nz = ai[i+1] - ai[i]; 646 s[0] = b[i2]; 647 for (j=0; j<nz; j++) { 648 xw[0] = x[vi[j]]; 649 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 650 } 651 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 652 x[i2] += xw[0]; 653 idiag += 1; 654 i2 += 1; 655 } 656 break; 657 case 2: 658 for (i=0; i<m; i++) { 659 v = aa + 4*ai[i]; 660 vi = aj + ai[i]; 661 nz = ai[i+1] - ai[i]; 662 s[0] = b[i2]; s[1] = b[i2+1]; 663 for (j=0; j<nz; j++) { 664 idx = 2*vi[j]; 665 it = 4*j; 666 xw[0] = x[idx]; xw[1] = x[1+idx]; 667 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 668 } 669 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 670 x[i2] += xw[0]; x[i2+1] += xw[1]; 671 idiag += 4; 672 i2 += 2; 673 } 674 break; 675 case 3: 676 for (i=0; i<m; i++) { 677 v = aa + 9*ai[i]; 678 vi = aj + ai[i]; 679 nz = ai[i+1] - ai[i]; 680 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 681 while (nz--) { 682 idx = 3*(*vi++); 683 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 684 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 685 v += 9; 686 } 687 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 688 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 689 idiag += 9; 690 i2 += 3; 691 } 692 break; 693 case 4: 694 for (i=0; i<m; i++) { 695 v = aa + 16*ai[i]; 696 vi = aj + ai[i]; 697 nz = ai[i+1] - ai[i]; 698 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 699 while (nz--) { 700 idx = 4*(*vi++); 701 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 702 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 703 v += 16; 704 } 705 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 706 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 707 idiag += 16; 708 i2 += 4; 709 } 710 break; 711 case 5: 712 for (i=0; i<m; i++) { 713 v = aa + 25*ai[i]; 714 vi = aj + ai[i]; 715 nz = ai[i+1] - ai[i]; 716 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 717 while (nz--) { 718 idx = 5*(*vi++); 719 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 720 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 721 v += 25; 722 } 723 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 724 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 725 idiag += 25; 726 i2 += 5; 727 } 728 break; 729 case 6: 730 for (i=0; i<m; i++) { 731 v = aa + 36*ai[i]; 732 vi = aj + ai[i]; 733 nz = ai[i+1] - ai[i]; 734 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 735 while (nz--) { 736 idx = 6*(*vi++); 737 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 738 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 739 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 740 v += 36; 741 } 742 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 743 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 744 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 745 idiag += 36; 746 i2 += 6; 747 } 748 break; 749 case 7: 750 for (i=0; i<m; i++) { 751 v = aa + 49*ai[i]; 752 vi = aj + ai[i]; 753 nz = ai[i+1] - ai[i]; 754 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 755 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 756 while (nz--) { 757 idx = 7*(*vi++); 758 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 759 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 760 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 761 v += 49; 762 } 763 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 764 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 765 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 766 idiag += 49; 767 i2 += 7; 768 } 769 break; 770 default: 771 for (i=0; i<m; i++) { 772 v = aa + bs2*ai[i]; 773 vi = aj + ai[i]; 774 nz = ai[i+1] - ai[i]; 775 776 PetscCall(PetscArraycpy(w,b+i2,bs)); 777 /* copy all rows of x that are needed into contiguous space */ 778 workt = work; 779 for (j=0; j<nz; j++) { 780 PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs)); 781 workt += bs; 782 } 783 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 784 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 785 786 idiag += bs2; 787 i2 += bs; 788 } 789 break; 790 } 791 PetscCall(PetscLogFlops(2.0*bs2*a->nz)); 792 } 793 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 794 idiag = a->idiag+bs2*(a->mbs-1); 795 i2 = bs * (m-1); 796 switch (bs) { 797 case 1: 798 for (i=m-1; i>=0; i--) { 799 v = aa + ai[i]; 800 vi = aj + ai[i]; 801 nz = ai[i+1] - ai[i]; 802 s[0] = b[i2]; 803 for (j=0; j<nz; j++) { 804 xw[0] = x[vi[j]]; 805 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 806 } 807 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 808 x[i2] += xw[0]; 809 idiag -= 1; 810 i2 -= 1; 811 } 812 break; 813 case 2: 814 for (i=m-1; i>=0; i--) { 815 v = aa + 4*ai[i]; 816 vi = aj + ai[i]; 817 nz = ai[i+1] - ai[i]; 818 s[0] = b[i2]; s[1] = b[i2+1]; 819 for (j=0; j<nz; j++) { 820 idx = 2*vi[j]; 821 it = 4*j; 822 xw[0] = x[idx]; xw[1] = x[1+idx]; 823 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 824 } 825 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 826 x[i2] += xw[0]; x[i2+1] += xw[1]; 827 idiag -= 4; 828 i2 -= 2; 829 } 830 break; 831 case 3: 832 for (i=m-1; i>=0; i--) { 833 v = aa + 9*ai[i]; 834 vi = aj + ai[i]; 835 nz = ai[i+1] - ai[i]; 836 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 837 while (nz--) { 838 idx = 3*(*vi++); 839 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 840 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 841 v += 9; 842 } 843 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 844 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 845 idiag -= 9; 846 i2 -= 3; 847 } 848 break; 849 case 4: 850 for (i=m-1; i>=0; i--) { 851 v = aa + 16*ai[i]; 852 vi = aj + ai[i]; 853 nz = ai[i+1] - ai[i]; 854 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 855 while (nz--) { 856 idx = 4*(*vi++); 857 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 858 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 859 v += 16; 860 } 861 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 862 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 863 idiag -= 16; 864 i2 -= 4; 865 } 866 break; 867 case 5: 868 for (i=m-1; i>=0; i--) { 869 v = aa + 25*ai[i]; 870 vi = aj + ai[i]; 871 nz = ai[i+1] - ai[i]; 872 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 873 while (nz--) { 874 idx = 5*(*vi++); 875 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 876 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 877 v += 25; 878 } 879 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 880 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 881 idiag -= 25; 882 i2 -= 5; 883 } 884 break; 885 case 6: 886 for (i=m-1; i>=0; i--) { 887 v = aa + 36*ai[i]; 888 vi = aj + ai[i]; 889 nz = ai[i+1] - ai[i]; 890 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 891 while (nz--) { 892 idx = 6*(*vi++); 893 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 894 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 895 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 896 v += 36; 897 } 898 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 899 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 900 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 901 idiag -= 36; 902 i2 -= 6; 903 } 904 break; 905 case 7: 906 for (i=m-1; i>=0; i--) { 907 v = aa + 49*ai[i]; 908 vi = aj + ai[i]; 909 nz = ai[i+1] - ai[i]; 910 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 911 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 912 while (nz--) { 913 idx = 7*(*vi++); 914 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 915 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 916 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 917 v += 49; 918 } 919 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 920 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 921 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 922 idiag -= 49; 923 i2 -= 7; 924 } 925 break; 926 default: 927 for (i=m-1; i>=0; i--) { 928 v = aa + bs2*ai[i]; 929 vi = aj + ai[i]; 930 nz = ai[i+1] - ai[i]; 931 932 PetscCall(PetscArraycpy(w,b+i2,bs)); 933 /* copy all rows of x that are needed into contiguous space */ 934 workt = work; 935 for (j=0; j<nz; j++) { 936 PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs)); 937 workt += bs; 938 } 939 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 940 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 941 942 idiag -= bs2; 943 i2 -= bs; 944 } 945 break; 946 } 947 PetscCall(PetscLogFlops(2.0*bs2*(a->nz))); 948 } 949 } 950 PetscCall(VecRestoreArray(xx,&x)); 951 PetscCall(VecRestoreArrayRead(bb,&b)); 952 PetscFunctionReturn(0); 953 } 954 955 /* 956 Special version for direct calls from Fortran (Used in PETSc-fun3d) 957 */ 958 #if defined(PETSC_HAVE_FORTRAN_CAPS) 959 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 960 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 961 #define matsetvaluesblocked4_ matsetvaluesblocked4 962 #endif 963 964 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 965 { 966 Mat A = *AA; 967 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 968 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 969 PetscInt *ai =a->i,*ailen=a->ilen; 970 PetscInt *aj =a->j,stepval,lastcol = -1; 971 const PetscScalar *value = v; 972 MatScalar *ap,*aa = a->a,*bap; 973 974 PetscFunctionBegin; 975 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 976 stepval = (n-1)*4; 977 for (k=0; k<m; k++) { /* loop over added rows */ 978 row = im[k]; 979 rp = aj + ai[row]; 980 ap = aa + 16*ai[row]; 981 nrow = ailen[row]; 982 low = 0; 983 high = nrow; 984 for (l=0; l<n; l++) { /* loop over added columns */ 985 col = in[l]; 986 if (col <= lastcol) low = 0; 987 else high = nrow; 988 lastcol = col; 989 value = v + k*(stepval+4 + l)*4; 990 while (high-low > 7) { 991 t = (low+high)/2; 992 if (rp[t] > col) high = t; 993 else low = t; 994 } 995 for (i=low; i<high; i++) { 996 if (rp[i] > col) break; 997 if (rp[i] == col) { 998 bap = ap + 16*i; 999 for (ii=0; ii<4; ii++,value+=stepval) { 1000 for (jj=ii; jj<16; jj+=4) { 1001 bap[jj] += *value++; 1002 } 1003 } 1004 goto noinsert2; 1005 } 1006 } 1007 N = nrow++ - 1; 1008 high++; /* added new column index thus must search to one higher than before */ 1009 /* shift up all the later entries in this row */ 1010 for (ii=N; ii>=i; ii--) { 1011 rp[ii+1] = rp[ii]; 1012 PetscCallVoid(PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16)); 1013 } 1014 if (N >= i) { 1015 PetscCallVoid(PetscArrayzero(ap+16*i,16)); 1016 } 1017 rp[i] = col; 1018 bap = ap + 16*i; 1019 for (ii=0; ii<4; ii++,value+=stepval) { 1020 for (jj=ii; jj<16; jj+=4) { 1021 bap[jj] = *value++; 1022 } 1023 } 1024 noinsert2:; 1025 low = i; 1026 } 1027 ailen[row] = nrow; 1028 } 1029 PetscFunctionReturnVoid(); 1030 } 1031 1032 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1033 #define matsetvalues4_ MATSETVALUES4 1034 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1035 #define matsetvalues4_ matsetvalues4 1036 #endif 1037 1038 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1039 { 1040 Mat A = *AA; 1041 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1042 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm; 1043 PetscInt *ai=a->i,*ailen=a->ilen; 1044 PetscInt *aj=a->j,brow,bcol; 1045 PetscInt ridx,cidx,lastcol = -1; 1046 MatScalar *ap,value,*aa=a->a,*bap; 1047 1048 PetscFunctionBegin; 1049 for (k=0; k<m; k++) { /* loop over added rows */ 1050 row = im[k]; brow = row/4; 1051 rp = aj + ai[brow]; 1052 ap = aa + 16*ai[brow]; 1053 nrow = ailen[brow]; 1054 low = 0; 1055 high = nrow; 1056 for (l=0; l<n; l++) { /* loop over added columns */ 1057 col = in[l]; bcol = col/4; 1058 ridx = row % 4; cidx = col % 4; 1059 value = v[l + k*n]; 1060 if (col <= lastcol) low = 0; 1061 else high = nrow; 1062 lastcol = col; 1063 while (high-low > 7) { 1064 t = (low+high)/2; 1065 if (rp[t] > bcol) high = t; 1066 else low = t; 1067 } 1068 for (i=low; i<high; i++) { 1069 if (rp[i] > bcol) break; 1070 if (rp[i] == bcol) { 1071 bap = ap + 16*i + 4*cidx + ridx; 1072 *bap += value; 1073 goto noinsert1; 1074 } 1075 } 1076 N = nrow++ - 1; 1077 high++; /* added new column thus must search to one higher than before */ 1078 /* shift up all the later entries in this row */ 1079 PetscCallVoid(PetscArraymove(rp+i+1,rp+i,N-i+1)); 1080 PetscCallVoid(PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1))); 1081 PetscCallVoid(PetscArrayzero(ap+16*i,16)); 1082 rp[i] = bcol; 1083 ap[16*i + 4*cidx + ridx] = value; 1084 noinsert1:; 1085 low = i; 1086 } 1087 ailen[brow] = nrow; 1088 } 1089 PetscFunctionReturnVoid(); 1090 } 1091 1092 /* 1093 Checks for missing diagonals 1094 */ 1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1096 { 1097 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1098 PetscInt *diag,*ii = a->i,i; 1099 1100 PetscFunctionBegin; 1101 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 1102 *missing = PETSC_FALSE; 1103 if (A->rmap->n > 0 && !ii) { 1104 *missing = PETSC_TRUE; 1105 if (d) *d = 0; 1106 PetscCall(PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n")); 1107 } else { 1108 PetscInt n; 1109 n = PetscMin(a->mbs, a->nbs); 1110 diag = a->diag; 1111 for (i=0; i<n; i++) { 1112 if (diag[i] >= ii[i+1]) { 1113 *missing = PETSC_TRUE; 1114 if (d) *d = i; 1115 PetscCall(PetscInfo(A,"Matrix is missing block diagonal number %" PetscInt_FMT "\n",i)); 1116 break; 1117 } 1118 } 1119 } 1120 PetscFunctionReturn(0); 1121 } 1122 1123 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1124 { 1125 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1126 PetscInt i,j,m = a->mbs; 1127 1128 PetscFunctionBegin; 1129 if (!a->diag) { 1130 PetscCall(PetscMalloc1(m,&a->diag)); 1131 PetscCall(PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt))); 1132 a->free_diag = PETSC_TRUE; 1133 } 1134 for (i=0; i<m; i++) { 1135 a->diag[i] = a->i[i+1]; 1136 for (j=a->i[i]; j<a->i[i+1]; j++) { 1137 if (a->j[j] == i) { 1138 a->diag[i] = j; 1139 break; 1140 } 1141 } 1142 } 1143 PetscFunctionReturn(0); 1144 } 1145 1146 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1147 { 1148 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1149 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1150 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1151 1152 PetscFunctionBegin; 1153 *nn = n; 1154 if (!ia) PetscFunctionReturn(0); 1155 if (symmetric) { 1156 PetscCall(MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja)); 1157 nz = tia[n]; 1158 } else { 1159 tia = a->i; tja = a->j; 1160 } 1161 1162 if (!blockcompressed && bs > 1) { 1163 (*nn) *= bs; 1164 /* malloc & create the natural set of indices */ 1165 PetscCall(PetscMalloc1((n+1)*bs,ia)); 1166 if (n) { 1167 (*ia)[0] = oshift; 1168 for (j=1; j<bs; j++) { 1169 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1170 } 1171 } 1172 1173 for (i=1; i<n; i++) { 1174 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1175 for (j=1; j<bs; j++) { 1176 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1177 } 1178 } 1179 if (n) { 1180 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1181 } 1182 1183 if (inja) { 1184 PetscCall(PetscMalloc1(nz*bs*bs,ja)); 1185 cnt = 0; 1186 for (i=0; i<n; i++) { 1187 for (j=0; j<bs; j++) { 1188 for (k=tia[i]; k<tia[i+1]; k++) { 1189 for (l=0; l<bs; l++) { 1190 (*ja)[cnt++] = bs*tja[k] + l; 1191 } 1192 } 1193 } 1194 } 1195 } 1196 1197 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1198 PetscCall(PetscFree(tia)); 1199 PetscCall(PetscFree(tja)); 1200 } 1201 } else if (oshift == 1) { 1202 if (symmetric) { 1203 nz = tia[A->rmap->n/bs]; 1204 /* add 1 to i and j indices */ 1205 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1206 *ia = tia; 1207 if (ja) { 1208 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1209 *ja = tja; 1210 } 1211 } else { 1212 nz = a->i[A->rmap->n/bs]; 1213 /* malloc space and add 1 to i and j indices */ 1214 PetscCall(PetscMalloc1(A->rmap->n/bs+1,ia)); 1215 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1216 if (ja) { 1217 PetscCall(PetscMalloc1(nz,ja)); 1218 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1219 } 1220 } 1221 } else { 1222 *ia = tia; 1223 if (ja) *ja = tja; 1224 } 1225 PetscFunctionReturn(0); 1226 } 1227 1228 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1229 { 1230 PetscFunctionBegin; 1231 if (!ia) PetscFunctionReturn(0); 1232 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1233 PetscCall(PetscFree(*ia)); 1234 if (ja) PetscCall(PetscFree(*ja)); 1235 } 1236 PetscFunctionReturn(0); 1237 } 1238 1239 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1240 { 1241 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1242 1243 PetscFunctionBegin; 1244 #if defined(PETSC_USE_LOG) 1245 PetscLogObjectState((PetscObject)A,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT,A->rmap->N,A->cmap->n,a->nz); 1246 #endif 1247 PetscCall(MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i)); 1248 PetscCall(ISDestroy(&a->row)); 1249 PetscCall(ISDestroy(&a->col)); 1250 if (a->free_diag) PetscCall(PetscFree(a->diag)); 1251 PetscCall(PetscFree(a->idiag)); 1252 if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax,a->ilen)); 1253 PetscCall(PetscFree(a->solve_work)); 1254 PetscCall(PetscFree(a->mult_work)); 1255 PetscCall(PetscFree(a->sor_workt)); 1256 PetscCall(PetscFree(a->sor_work)); 1257 PetscCall(ISDestroy(&a->icol)); 1258 PetscCall(PetscFree(a->saved_values)); 1259 PetscCall(PetscFree2(a->compressedrow.i,a->compressedrow.rindex)); 1260 1261 PetscCall(MatDestroy(&a->sbaijMat)); 1262 PetscCall(MatDestroy(&a->parent)); 1263 PetscCall(PetscFree(A->data)); 1264 1265 PetscCall(PetscObjectChangeTypeName((PetscObject)A,NULL)); 1266 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL)); 1267 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL)); 1268 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL)); 1269 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL)); 1270 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL)); 1271 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL)); 1272 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL)); 1273 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL)); 1274 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL)); 1275 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL)); 1276 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL)); 1277 #if defined(PETSC_HAVE_HYPRE) 1278 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL)); 1279 #endif 1280 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL)); 1281 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL)); 1282 PetscFunctionReturn(0); 1283 } 1284 1285 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1286 { 1287 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1288 1289 PetscFunctionBegin; 1290 switch (op) { 1291 case MAT_ROW_ORIENTED: 1292 a->roworiented = flg; 1293 break; 1294 case MAT_KEEP_NONZERO_PATTERN: 1295 a->keepnonzeropattern = flg; 1296 break; 1297 case MAT_NEW_NONZERO_LOCATIONS: 1298 a->nonew = (flg ? 0 : 1); 1299 break; 1300 case MAT_NEW_NONZERO_LOCATION_ERR: 1301 a->nonew = (flg ? -1 : 0); 1302 break; 1303 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1304 a->nonew = (flg ? -2 : 0); 1305 break; 1306 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1307 a->nounused = (flg ? -1 : 0); 1308 break; 1309 case MAT_FORCE_DIAGONAL_ENTRIES: 1310 case MAT_IGNORE_OFF_PROC_ENTRIES: 1311 case MAT_USE_HASH_TABLE: 1312 case MAT_SORTED_FULL: 1313 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1314 break; 1315 case MAT_SPD: 1316 case MAT_SYMMETRIC: 1317 case MAT_STRUCTURALLY_SYMMETRIC: 1318 case MAT_HERMITIAN: 1319 case MAT_SYMMETRY_ETERNAL: 1320 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1321 case MAT_SUBMAT_SINGLEIS: 1322 case MAT_STRUCTURE_ONLY: 1323 case MAT_SPD_ETERNAL: 1324 /* if the diagonal matrix is square it inherits some of the properties above */ 1325 break; 1326 default: 1327 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1328 } 1329 PetscFunctionReturn(0); 1330 } 1331 1332 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1333 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1334 { 1335 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1336 MatScalar *aa_i; 1337 PetscScalar *v_i; 1338 1339 PetscFunctionBegin; 1340 bs = A->rmap->bs; 1341 bs2 = bs*bs; 1342 PetscCheck(row >= 0 && row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range", row); 1343 1344 bn = row/bs; /* Block number */ 1345 bp = row % bs; /* Block Position */ 1346 M = ai[bn+1] - ai[bn]; 1347 *nz = bs*M; 1348 1349 if (v) { 1350 *v = NULL; 1351 if (*nz) { 1352 PetscCall(PetscMalloc1(*nz,v)); 1353 for (i=0; i<M; i++) { /* for each block in the block row */ 1354 v_i = *v + i*bs; 1355 aa_i = aa + bs2*(ai[bn] + i); 1356 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1357 } 1358 } 1359 } 1360 1361 if (idx) { 1362 *idx = NULL; 1363 if (*nz) { 1364 PetscCall(PetscMalloc1(*nz,idx)); 1365 for (i=0; i<M; i++) { /* for each block in the block row */ 1366 idx_i = *idx + i*bs; 1367 itmp = bs*aj[ai[bn] + i]; 1368 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1369 } 1370 } 1371 } 1372 PetscFunctionReturn(0); 1373 } 1374 1375 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1376 { 1377 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1378 1379 PetscFunctionBegin; 1380 PetscCall(MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a)); 1381 PetscFunctionReturn(0); 1382 } 1383 1384 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1385 { 1386 PetscFunctionBegin; 1387 if (nz) *nz = 0; 1388 if (idx) PetscCall(PetscFree(*idx)); 1389 if (v) PetscCall(PetscFree(*v)); 1390 PetscFunctionReturn(0); 1391 } 1392 1393 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1394 { 1395 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at; 1396 Mat C; 1397 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill; 1398 PetscInt bs2=a->bs2,*ati,*atj,anzj,kr; 1399 MatScalar *ata,*aa=a->a; 1400 1401 PetscFunctionBegin; 1402 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A,*B)); 1403 PetscCall(PetscCalloc1(1+nbs,&atfill)); 1404 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1405 for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1406 1407 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&C)); 1408 PetscCall(MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N)); 1409 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 1410 PetscCall(MatSeqBAIJSetPreallocation(C,bs,0,atfill)); 1411 1412 at = (Mat_SeqBAIJ*)C->data; 1413 ati = at->i; 1414 for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i]; 1415 } else { 1416 C = *B; 1417 at = (Mat_SeqBAIJ*)C->data; 1418 ati = at->i; 1419 } 1420 1421 atj = at->j; 1422 ata = at->a; 1423 1424 /* Copy ati into atfill so we have locations of the next free space in atj */ 1425 PetscCall(PetscArraycpy(atfill,ati,nbs)); 1426 1427 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1428 for (i=0; i<mbs; i++) { 1429 anzj = ai[i+1] - ai[i]; 1430 for (j=0; j<anzj; j++) { 1431 atj[atfill[*aj]] = i; 1432 for (kr=0; kr<bs; kr++) { 1433 for (k=0; k<bs; k++) { 1434 ata[bs2*atfill[*aj]+k*bs+kr] = *aa++; 1435 } 1436 } 1437 atfill[*aj++] += 1; 1438 } 1439 } 1440 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 1441 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 1442 1443 /* Clean up temporary space and complete requests. */ 1444 PetscCall(PetscFree(atfill)); 1445 1446 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1447 PetscCall(MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1448 *B = C; 1449 } else { 1450 PetscCall(MatHeaderMerge(A,&C)); 1451 } 1452 PetscFunctionReturn(0); 1453 } 1454 1455 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1456 { 1457 Mat Btrans; 1458 1459 PetscFunctionBegin; 1460 *f = PETSC_FALSE; 1461 PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&Btrans)); 1462 PetscCall(MatEqual_SeqBAIJ(B,Btrans,f)); 1463 PetscCall(MatDestroy(&Btrans)); 1464 PetscFunctionReturn(0); 1465 } 1466 1467 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1468 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 1469 { 1470 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)mat->data; 1471 PetscInt header[4],M,N,m,bs,nz,cnt,i,j,k,l; 1472 PetscInt *rowlens,*colidxs; 1473 PetscScalar *matvals; 1474 1475 PetscFunctionBegin; 1476 PetscCall(PetscViewerSetUp(viewer)); 1477 1478 M = mat->rmap->N; 1479 N = mat->cmap->N; 1480 m = mat->rmap->n; 1481 bs = mat->rmap->bs; 1482 nz = bs*bs*A->nz; 1483 1484 /* write matrix header */ 1485 header[0] = MAT_FILE_CLASSID; 1486 header[1] = M; header[2] = N; header[3] = nz; 1487 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1488 1489 /* store row lengths */ 1490 PetscCall(PetscMalloc1(m,&rowlens)); 1491 for (cnt=0, i=0; i<A->mbs; i++) 1492 for (j=0; j<bs; j++) 1493 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]); 1494 PetscCall(PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT)); 1495 PetscCall(PetscFree(rowlens)); 1496 1497 /* store column indices */ 1498 PetscCall(PetscMalloc1(nz,&colidxs)); 1499 for (cnt=0, i=0; i<A->mbs; i++) 1500 for (k=0; k<bs; k++) 1501 for (j=A->i[i]; j<A->i[i+1]; j++) 1502 for (l=0; l<bs; l++) 1503 colidxs[cnt++] = bs*A->j[j] + l; 1504 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1505 PetscCall(PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT)); 1506 PetscCall(PetscFree(colidxs)); 1507 1508 /* store nonzero values */ 1509 PetscCall(PetscMalloc1(nz,&matvals)); 1510 for (cnt=0, i=0; i<A->mbs; i++) 1511 for (k=0; k<bs; k++) 1512 for (j=A->i[i]; j<A->i[i+1]; j++) 1513 for (l=0; l<bs; l++) 1514 matvals[cnt++] = A->a[bs*(bs*j + l) + k]; 1515 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1516 PetscCall(PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR)); 1517 PetscCall(PetscFree(matvals)); 1518 1519 /* write block size option to the viewer's .info file */ 1520 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1521 PetscFunctionReturn(0); 1522 } 1523 1524 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1525 { 1526 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1527 PetscInt i,bs = A->rmap->bs,k; 1528 1529 PetscFunctionBegin; 1530 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE)); 1531 for (i=0; i<a->mbs; i++) { 1532 PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT "-%" PetscInt_FMT ":",i*bs,i*bs+bs-1)); 1533 for (k=a->i[i]; k<a->i[i+1]; k++) { 1534 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT "-%" PetscInt_FMT ") ",bs*a->j[k],bs*a->j[k]+bs-1)); 1535 } 1536 PetscCall(PetscViewerASCIIPrintf(viewer,"\n")); 1537 } 1538 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE)); 1539 PetscFunctionReturn(0); 1540 } 1541 1542 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1543 { 1544 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1545 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1546 PetscViewerFormat format; 1547 1548 PetscFunctionBegin; 1549 if (A->structure_only) { 1550 PetscCall(MatView_SeqBAIJ_ASCII_structonly(A,viewer)); 1551 PetscFunctionReturn(0); 1552 } 1553 1554 PetscCall(PetscViewerGetFormat(viewer,&format)); 1555 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1556 PetscCall(PetscViewerASCIIPrintf(viewer," block size is %" PetscInt_FMT "\n",bs)); 1557 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1558 const char *matname; 1559 Mat aij; 1560 PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij)); 1561 PetscCall(PetscObjectGetName((PetscObject)A,&matname)); 1562 PetscCall(PetscObjectSetName((PetscObject)aij,matname)); 1563 PetscCall(MatView(aij,viewer)); 1564 PetscCall(MatDestroy(&aij)); 1565 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1566 PetscFunctionReturn(0); 1567 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1568 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE)); 1569 for (i=0; i<a->mbs; i++) { 1570 for (j=0; j<bs; j++) { 1571 PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j)); 1572 for (k=a->i[i]; k<a->i[i+1]; k++) { 1573 for (l=0; l<bs; l++) { 1574 #if defined(PETSC_USE_COMPLEX) 1575 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1576 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %gi) ",bs*a->j[k]+l, 1577 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]))); 1578 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1579 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %gi) ",bs*a->j[k]+l, 1580 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]))); 1581 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1582 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]))); 1583 } 1584 #else 1585 if (a->a[bs2*k + l*bs + j] != 0.0) { 1586 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j])); 1587 } 1588 #endif 1589 } 1590 } 1591 PetscCall(PetscViewerASCIIPrintf(viewer,"\n")); 1592 } 1593 } 1594 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE)); 1595 } else { 1596 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE)); 1597 for (i=0; i<a->mbs; i++) { 1598 for (j=0; j<bs; j++) { 1599 PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j)); 1600 for (k=a->i[i]; k<a->i[i+1]; k++) { 1601 for (l=0; l<bs; l++) { 1602 #if defined(PETSC_USE_COMPLEX) 1603 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1604 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i) ",bs*a->j[k]+l, 1605 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]))); 1606 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1607 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i) ",bs*a->j[k]+l, 1608 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]))); 1609 } else { 1610 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]))); 1611 } 1612 #else 1613 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j])); 1614 #endif 1615 } 1616 } 1617 PetscCall(PetscViewerASCIIPrintf(viewer,"\n")); 1618 } 1619 } 1620 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE)); 1621 } 1622 PetscCall(PetscViewerFlush(viewer)); 1623 PetscFunctionReturn(0); 1624 } 1625 1626 #include <petscdraw.h> 1627 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1628 { 1629 Mat A = (Mat) Aa; 1630 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1631 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1632 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1633 MatScalar *aa; 1634 PetscViewer viewer; 1635 PetscViewerFormat format; 1636 1637 PetscFunctionBegin; 1638 PetscCall(PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer)); 1639 PetscCall(PetscViewerGetFormat(viewer,&format)); 1640 PetscCall(PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr)); 1641 1642 /* loop over matrix elements drawing boxes */ 1643 1644 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1645 PetscDrawCollectiveBegin(draw); 1646 /* Blue for negative, Cyan for zero and Red for positive */ 1647 color = PETSC_DRAW_BLUE; 1648 for (i=0,row=0; i<mbs; i++,row+=bs) { 1649 for (j=a->i[i]; j<a->i[i+1]; j++) { 1650 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1651 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1652 aa = a->a + j*bs2; 1653 for (k=0; k<bs; k++) { 1654 for (l=0; l<bs; l++) { 1655 if (PetscRealPart(*aa++) >= 0.) continue; 1656 PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color)); 1657 } 1658 } 1659 } 1660 } 1661 color = PETSC_DRAW_CYAN; 1662 for (i=0,row=0; i<mbs; i++,row+=bs) { 1663 for (j=a->i[i]; j<a->i[i+1]; j++) { 1664 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1665 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1666 aa = a->a + j*bs2; 1667 for (k=0; k<bs; k++) { 1668 for (l=0; l<bs; l++) { 1669 if (PetscRealPart(*aa++) != 0.) continue; 1670 PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color)); 1671 } 1672 } 1673 } 1674 } 1675 color = PETSC_DRAW_RED; 1676 for (i=0,row=0; i<mbs; i++,row+=bs) { 1677 for (j=a->i[i]; j<a->i[i+1]; j++) { 1678 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1679 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1680 aa = a->a + j*bs2; 1681 for (k=0; k<bs; k++) { 1682 for (l=0; l<bs; l++) { 1683 if (PetscRealPart(*aa++) <= 0.) continue; 1684 PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color)); 1685 } 1686 } 1687 } 1688 } 1689 PetscDrawCollectiveEnd(draw); 1690 } else { 1691 /* use contour shading to indicate magnitude of values */ 1692 /* first determine max of all nonzero values */ 1693 PetscReal minv = 0.0, maxv = 0.0; 1694 PetscDraw popup; 1695 1696 for (i=0; i<a->nz*a->bs2; i++) { 1697 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1698 } 1699 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1700 PetscCall(PetscDrawGetPopup(draw,&popup)); 1701 PetscCall(PetscDrawScalePopup(popup,0.0,maxv)); 1702 1703 PetscDrawCollectiveBegin(draw); 1704 for (i=0,row=0; i<mbs; i++,row+=bs) { 1705 for (j=a->i[i]; j<a->i[i+1]; j++) { 1706 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1707 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1708 aa = a->a + j*bs2; 1709 for (k=0; k<bs; k++) { 1710 for (l=0; l<bs; l++) { 1711 MatScalar v = *aa++; 1712 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1713 PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color)); 1714 } 1715 } 1716 } 1717 } 1718 PetscDrawCollectiveEnd(draw); 1719 } 1720 PetscFunctionReturn(0); 1721 } 1722 1723 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1724 { 1725 PetscReal xl,yl,xr,yr,w,h; 1726 PetscDraw draw; 1727 PetscBool isnull; 1728 1729 PetscFunctionBegin; 1730 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1731 PetscCall(PetscDrawIsNull(draw,&isnull)); 1732 if (isnull) PetscFunctionReturn(0); 1733 1734 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1735 xr += w; yr += h; xl = -w; yl = -h; 1736 PetscCall(PetscDrawSetCoordinates(draw,xl,yl,xr,yr)); 1737 PetscCall(PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer)); 1738 PetscCall(PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A)); 1739 PetscCall(PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL)); 1740 PetscCall(PetscDrawSave(draw)); 1741 PetscFunctionReturn(0); 1742 } 1743 1744 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1745 { 1746 PetscBool iascii,isbinary,isdraw; 1747 1748 PetscFunctionBegin; 1749 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1750 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1751 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1752 if (iascii) { 1753 PetscCall(MatView_SeqBAIJ_ASCII(A,viewer)); 1754 } else if (isbinary) { 1755 PetscCall(MatView_SeqBAIJ_Binary(A,viewer)); 1756 } else if (isdraw) { 1757 PetscCall(MatView_SeqBAIJ_Draw(A,viewer)); 1758 } else { 1759 Mat B; 1760 PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 1761 PetscCall(MatView(B,viewer)); 1762 PetscCall(MatDestroy(&B)); 1763 } 1764 PetscFunctionReturn(0); 1765 } 1766 1767 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1768 { 1769 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1770 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1771 PetscInt *ai = a->i,*ailen = a->ilen; 1772 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1773 MatScalar *ap,*aa = a->a; 1774 1775 PetscFunctionBegin; 1776 for (k=0; k<m; k++) { /* loop over rows */ 1777 row = im[k]; brow = row/bs; 1778 if (row < 0) {v += n; continue;} /* negative row */ 1779 PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " too large", row); 1780 rp = aj ? aj + ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 1781 ap = aa ? aa + bs2*ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 1782 nrow = ailen[brow]; 1783 for (l=0; l<n; l++) { /* loop over columns */ 1784 if (in[l] < 0) {v++; continue;} /* negative column */ 1785 PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %" PetscInt_FMT " too large", in[l]); 1786 col = in[l]; 1787 bcol = col/bs; 1788 cidx = col%bs; 1789 ridx = row%bs; 1790 high = nrow; 1791 low = 0; /* assume unsorted */ 1792 while (high-low > 5) { 1793 t = (low+high)/2; 1794 if (rp[t] > bcol) high = t; 1795 else low = t; 1796 } 1797 for (i=low; i<high; i++) { 1798 if (rp[i] > bcol) break; 1799 if (rp[i] == bcol) { 1800 *v++ = ap[bs2*i+bs*cidx+ridx]; 1801 goto finished; 1802 } 1803 } 1804 *v++ = 0.0; 1805 finished:; 1806 } 1807 } 1808 PetscFunctionReturn(0); 1809 } 1810 1811 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1812 { 1813 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1814 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1815 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1816 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1817 PetscBool roworiented=a->roworiented; 1818 const PetscScalar *value = v; 1819 MatScalar *ap=NULL,*aa = a->a,*bap; 1820 1821 PetscFunctionBegin; 1822 if (roworiented) { 1823 stepval = (n-1)*bs; 1824 } else { 1825 stepval = (m-1)*bs; 1826 } 1827 for (k=0; k<m; k++) { /* loop over added rows */ 1828 row = im[k]; 1829 if (row < 0) continue; 1830 PetscCheck(row < a->mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT,row,a->mbs-1); 1831 rp = aj + ai[row]; 1832 if (!A->structure_only) ap = aa + bs2*ai[row]; 1833 rmax = imax[row]; 1834 nrow = ailen[row]; 1835 low = 0; 1836 high = nrow; 1837 for (l=0; l<n; l++) { /* loop over added columns */ 1838 if (in[l] < 0) continue; 1839 PetscCheck(in[l] < a->nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT,in[l],a->nbs-1); 1840 col = in[l]; 1841 if (!A->structure_only) { 1842 if (roworiented) { 1843 value = v + (k*(stepval+bs) + l)*bs; 1844 } else { 1845 value = v + (l*(stepval+bs) + k)*bs; 1846 } 1847 } 1848 if (col <= lastcol) low = 0; 1849 else high = nrow; 1850 lastcol = col; 1851 while (high-low > 7) { 1852 t = (low+high)/2; 1853 if (rp[t] > col) high = t; 1854 else low = t; 1855 } 1856 for (i=low; i<high; i++) { 1857 if (rp[i] > col) break; 1858 if (rp[i] == col) { 1859 if (A->structure_only) goto noinsert2; 1860 bap = ap + bs2*i; 1861 if (roworiented) { 1862 if (is == ADD_VALUES) { 1863 for (ii=0; ii<bs; ii++,value+=stepval) { 1864 for (jj=ii; jj<bs2; jj+=bs) { 1865 bap[jj] += *value++; 1866 } 1867 } 1868 } else { 1869 for (ii=0; ii<bs; ii++,value+=stepval) { 1870 for (jj=ii; jj<bs2; jj+=bs) { 1871 bap[jj] = *value++; 1872 } 1873 } 1874 } 1875 } else { 1876 if (is == ADD_VALUES) { 1877 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1878 for (jj=0; jj<bs; jj++) { 1879 bap[jj] += value[jj]; 1880 } 1881 bap += bs; 1882 } 1883 } else { 1884 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1885 for (jj=0; jj<bs; jj++) { 1886 bap[jj] = value[jj]; 1887 } 1888 bap += bs; 1889 } 1890 } 1891 } 1892 goto noinsert2; 1893 } 1894 } 1895 if (nonew == 1) goto noinsert2; 1896 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 1897 if (A->structure_only) { 1898 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1899 } else { 1900 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1901 } 1902 N = nrow++ - 1; high++; 1903 /* shift up all the later entries in this row */ 1904 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 1905 rp[i] = col; 1906 if (!A->structure_only) { 1907 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 1908 bap = ap + bs2*i; 1909 if (roworiented) { 1910 for (ii=0; ii<bs; ii++,value+=stepval) { 1911 for (jj=ii; jj<bs2; jj+=bs) { 1912 bap[jj] = *value++; 1913 } 1914 } 1915 } else { 1916 for (ii=0; ii<bs; ii++,value+=stepval) { 1917 for (jj=0; jj<bs; jj++) { 1918 *bap++ = *value++; 1919 } 1920 } 1921 } 1922 } 1923 noinsert2:; 1924 low = i; 1925 } 1926 ailen[row] = nrow; 1927 } 1928 PetscFunctionReturn(0); 1929 } 1930 1931 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1932 { 1933 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1934 PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax; 1935 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1936 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1937 MatScalar *aa = a->a,*ap; 1938 PetscReal ratio=0.6; 1939 1940 PetscFunctionBegin; 1941 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1942 1943 if (m) rmax = ailen[0]; 1944 for (i=1; i<mbs; i++) { 1945 /* move each row back by the amount of empty slots (fshift) before it*/ 1946 fshift += imax[i-1] - ailen[i-1]; 1947 rmax = PetscMax(rmax,ailen[i]); 1948 if (fshift) { 1949 ip = aj + ai[i]; 1950 ap = aa + bs2*ai[i]; 1951 N = ailen[i]; 1952 PetscCall(PetscArraymove(ip-fshift,ip,N)); 1953 if (!A->structure_only) { 1954 PetscCall(PetscArraymove(ap-bs2*fshift,ap,bs2*N)); 1955 } 1956 } 1957 ai[i] = ai[i-1] + ailen[i-1]; 1958 } 1959 if (mbs) { 1960 fshift += imax[mbs-1] - ailen[mbs-1]; 1961 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1962 } 1963 1964 /* reset ilen and imax for each row */ 1965 a->nonzerorowcnt = 0; 1966 if (A->structure_only) { 1967 PetscCall(PetscFree2(a->imax,a->ilen)); 1968 } else { /* !A->structure_only */ 1969 for (i=0; i<mbs; i++) { 1970 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1971 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1972 } 1973 } 1974 a->nz = ai[mbs]; 1975 1976 /* diagonals may have moved, so kill the diagonal pointers */ 1977 a->idiagvalid = PETSC_FALSE; 1978 if (fshift && a->diag) { 1979 PetscCall(PetscFree(a->diag)); 1980 PetscCall(PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt))); 1981 a->diag = NULL; 1982 } 1983 if (fshift) PetscCheck(a->nounused != -1,PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1984 PetscCall(PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2)); 1985 PetscCall(PetscInfo(A,"Number of mallocs during MatSetValues is %" PetscInt_FMT "\n",a->reallocs)); 1986 PetscCall(PetscInfo(A,"Most nonzeros blocks in any row is %" PetscInt_FMT "\n",rmax)); 1987 1988 A->info.mallocs += a->reallocs; 1989 a->reallocs = 0; 1990 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1991 a->rmax = rmax; 1992 1993 if (!A->structure_only) { 1994 PetscCall(MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio)); 1995 } 1996 PetscFunctionReturn(0); 1997 } 1998 1999 /* 2000 This function returns an array of flags which indicate the locations of contiguous 2001 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2002 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2003 Assume: sizes should be long enough to hold all the values. 2004 */ 2005 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2006 { 2007 PetscInt i,j,k,row; 2008 PetscBool flg; 2009 2010 PetscFunctionBegin; 2011 for (i=0,j=0; i<n; j++) { 2012 row = idx[i]; 2013 if (row%bs!=0) { /* Not the beginning of a block */ 2014 sizes[j] = 1; 2015 i++; 2016 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2017 sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2018 i++; 2019 } else { /* Beginning of the block, so check if the complete block exists */ 2020 flg = PETSC_TRUE; 2021 for (k=1; k<bs; k++) { 2022 if (row+k != idx[i+k]) { /* break in the block */ 2023 flg = PETSC_FALSE; 2024 break; 2025 } 2026 } 2027 if (flg) { /* No break in the bs */ 2028 sizes[j] = bs; 2029 i += bs; 2030 } else { 2031 sizes[j] = 1; 2032 i++; 2033 } 2034 } 2035 } 2036 *bs_max = j; 2037 PetscFunctionReturn(0); 2038 } 2039 2040 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2041 { 2042 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2043 PetscInt i,j,k,count,*rows; 2044 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2045 PetscScalar zero = 0.0; 2046 MatScalar *aa; 2047 const PetscScalar *xx; 2048 PetscScalar *bb; 2049 2050 PetscFunctionBegin; 2051 /* fix right hand side if needed */ 2052 if (x && b) { 2053 PetscCall(VecGetArrayRead(x,&xx)); 2054 PetscCall(VecGetArray(b,&bb)); 2055 for (i=0; i<is_n; i++) { 2056 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2057 } 2058 PetscCall(VecRestoreArrayRead(x,&xx)); 2059 PetscCall(VecRestoreArray(b,&bb)); 2060 } 2061 2062 /* Make a copy of the IS and sort it */ 2063 /* allocate memory for rows,sizes */ 2064 PetscCall(PetscMalloc2(is_n,&rows,2*is_n,&sizes)); 2065 2066 /* copy IS values to rows, and sort them */ 2067 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2068 PetscCall(PetscSortInt(is_n,rows)); 2069 2070 if (baij->keepnonzeropattern) { 2071 for (i=0; i<is_n; i++) sizes[i] = 1; 2072 bs_max = is_n; 2073 } else { 2074 PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max)); 2075 A->nonzerostate++; 2076 } 2077 2078 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2079 row = rows[j]; 2080 PetscCheck(row >= 0 && row <= A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %" PetscInt_FMT " out of range",row); 2081 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2082 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2083 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2084 if (diag != (PetscScalar)0.0) { 2085 if (baij->ilen[row/bs] > 0) { 2086 baij->ilen[row/bs] = 1; 2087 baij->j[baij->i[row/bs]] = row/bs; 2088 2089 PetscCall(PetscArrayzero(aa,count*bs)); 2090 } 2091 /* Now insert all the diagonal values for this bs */ 2092 for (k=0; k<bs; k++) { 2093 PetscCall((*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES)); 2094 } 2095 } else { /* (diag == 0.0) */ 2096 baij->ilen[row/bs] = 0; 2097 } /* end (diag == 0.0) */ 2098 } else { /* (sizes[i] != bs) */ 2099 PetscAssert(sizes[i] == 1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2100 for (k=0; k<count; k++) { 2101 aa[0] = zero; 2102 aa += bs; 2103 } 2104 if (diag != (PetscScalar)0.0) { 2105 PetscCall((*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES)); 2106 } 2107 } 2108 } 2109 2110 PetscCall(PetscFree2(rows,sizes)); 2111 PetscCall(MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY)); 2112 PetscFunctionReturn(0); 2113 } 2114 2115 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2116 { 2117 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2118 PetscInt i,j,k,count; 2119 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2120 PetscScalar zero = 0.0; 2121 MatScalar *aa; 2122 const PetscScalar *xx; 2123 PetscScalar *bb; 2124 PetscBool *zeroed,vecs = PETSC_FALSE; 2125 2126 PetscFunctionBegin; 2127 /* fix right hand side if needed */ 2128 if (x && b) { 2129 PetscCall(VecGetArrayRead(x,&xx)); 2130 PetscCall(VecGetArray(b,&bb)); 2131 vecs = PETSC_TRUE; 2132 } 2133 2134 /* zero the columns */ 2135 PetscCall(PetscCalloc1(A->rmap->n,&zeroed)); 2136 for (i=0; i<is_n; i++) { 2137 PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %" PetscInt_FMT " out of range",is_idx[i]); 2138 zeroed[is_idx[i]] = PETSC_TRUE; 2139 } 2140 for (i=0; i<A->rmap->N; i++) { 2141 if (!zeroed[i]) { 2142 row = i/bs; 2143 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2144 for (k=0; k<bs; k++) { 2145 col = bs*baij->j[j] + k; 2146 if (zeroed[col]) { 2147 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2148 if (vecs) bb[i] -= aa[0]*xx[col]; 2149 aa[0] = 0.0; 2150 } 2151 } 2152 } 2153 } else if (vecs) bb[i] = diag*xx[i]; 2154 } 2155 PetscCall(PetscFree(zeroed)); 2156 if (vecs) { 2157 PetscCall(VecRestoreArrayRead(x,&xx)); 2158 PetscCall(VecRestoreArray(b,&bb)); 2159 } 2160 2161 /* zero the rows */ 2162 for (i=0; i<is_n; i++) { 2163 row = is_idx[i]; 2164 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2165 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2166 for (k=0; k<count; k++) { 2167 aa[0] = zero; 2168 aa += bs; 2169 } 2170 if (diag != (PetscScalar)0.0) { 2171 PetscCall((*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES)); 2172 } 2173 } 2174 PetscCall(MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY)); 2175 PetscFunctionReturn(0); 2176 } 2177 2178 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2179 { 2180 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2181 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2182 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2183 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2184 PetscInt ridx,cidx,bs2=a->bs2; 2185 PetscBool roworiented=a->roworiented; 2186 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2187 2188 PetscFunctionBegin; 2189 for (k=0; k<m; k++) { /* loop over added rows */ 2190 row = im[k]; 2191 brow = row/bs; 2192 if (row < 0) continue; 2193 PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->N-1); 2194 rp = aj + ai[brow]; 2195 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2196 rmax = imax[brow]; 2197 nrow = ailen[brow]; 2198 low = 0; 2199 high = nrow; 2200 for (l=0; l<n; l++) { /* loop over added columns */ 2201 if (in[l] < 0) continue; 2202 PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[l],A->cmap->n-1); 2203 col = in[l]; bcol = col/bs; 2204 ridx = row % bs; cidx = col % bs; 2205 if (!A->structure_only) { 2206 if (roworiented) { 2207 value = v[l + k*n]; 2208 } else { 2209 value = v[k + l*m]; 2210 } 2211 } 2212 if (col <= lastcol) low = 0; else high = nrow; 2213 lastcol = col; 2214 while (high-low > 7) { 2215 t = (low+high)/2; 2216 if (rp[t] > bcol) high = t; 2217 else low = t; 2218 } 2219 for (i=low; i<high; i++) { 2220 if (rp[i] > bcol) break; 2221 if (rp[i] == bcol) { 2222 bap = ap + bs2*i + bs*cidx + ridx; 2223 if (!A->structure_only) { 2224 if (is == ADD_VALUES) *bap += value; 2225 else *bap = value; 2226 } 2227 goto noinsert1; 2228 } 2229 } 2230 if (nonew == 1) goto noinsert1; 2231 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2232 if (A->structure_only) { 2233 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2234 } else { 2235 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2236 } 2237 N = nrow++ - 1; high++; 2238 /* shift up all the later entries in this row */ 2239 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 2240 rp[i] = bcol; 2241 if (!A->structure_only) { 2242 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 2243 PetscCall(PetscArrayzero(ap+bs2*i,bs2)); 2244 ap[bs2*i + bs*cidx + ridx] = value; 2245 } 2246 a->nz++; 2247 A->nonzerostate++; 2248 noinsert1:; 2249 low = i; 2250 } 2251 ailen[brow] = nrow; 2252 } 2253 PetscFunctionReturn(0); 2254 } 2255 2256 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2257 { 2258 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2259 Mat outA; 2260 PetscBool row_identity,col_identity; 2261 2262 PetscFunctionBegin; 2263 PetscCheck(info->levels == 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2264 PetscCall(ISIdentity(row,&row_identity)); 2265 PetscCall(ISIdentity(col,&col_identity)); 2266 PetscCheck(row_identity && col_identity,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2267 2268 outA = inA; 2269 inA->factortype = MAT_FACTOR_LU; 2270 PetscCall(PetscFree(inA->solvertype)); 2271 PetscCall(PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype)); 2272 2273 PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2274 2275 PetscCall(PetscObjectReference((PetscObject)row)); 2276 PetscCall(ISDestroy(&a->row)); 2277 a->row = row; 2278 PetscCall(PetscObjectReference((PetscObject)col)); 2279 PetscCall(ISDestroy(&a->col)); 2280 a->col = col; 2281 2282 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2283 PetscCall(ISDestroy(&a->icol)); 2284 PetscCall(ISInvertPermutation(col,PETSC_DECIDE,&a->icol)); 2285 PetscCall(PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol)); 2286 2287 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity))); 2288 if (!a->solve_work) { 2289 PetscCall(PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work)); 2290 PetscCall(PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar))); 2291 } 2292 PetscCall(MatLUFactorNumeric(outA,inA,info)); 2293 PetscFunctionReturn(0); 2294 } 2295 2296 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2297 { 2298 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2299 PetscInt i,nz,mbs; 2300 2301 PetscFunctionBegin; 2302 nz = baij->maxnz; 2303 mbs = baij->mbs; 2304 for (i=0; i<nz; i++) { 2305 baij->j[i] = indices[i]; 2306 } 2307 baij->nz = nz; 2308 for (i=0; i<mbs; i++) { 2309 baij->ilen[i] = baij->imax[i]; 2310 } 2311 PetscFunctionReturn(0); 2312 } 2313 2314 /*@ 2315 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2316 in the matrix. 2317 2318 Input Parameters: 2319 + mat - the SeqBAIJ matrix 2320 - indices - the column indices 2321 2322 Level: advanced 2323 2324 Notes: 2325 This can be called if you have precomputed the nonzero structure of the 2326 matrix and want to provide it to the matrix object to improve the performance 2327 of the MatSetValues() operation. 2328 2329 You MUST have set the correct numbers of nonzeros per row in the call to 2330 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2331 2332 MUST be called before any calls to MatSetValues(); 2333 2334 @*/ 2335 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2336 { 2337 PetscFunctionBegin; 2338 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2339 PetscValidIntPointer(indices,2); 2340 PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices)); 2341 PetscFunctionReturn(0); 2342 } 2343 2344 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2345 { 2346 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2347 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2348 PetscReal atmp; 2349 PetscScalar *x,zero = 0.0; 2350 MatScalar *aa; 2351 PetscInt ncols,brow,krow,kcol; 2352 2353 PetscFunctionBegin; 2354 /* why is this not a macro???????????????????????????????????????????????????????????????? */ 2355 PetscCheck(!A->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2356 bs = A->rmap->bs; 2357 aa = a->a; 2358 ai = a->i; 2359 aj = a->j; 2360 mbs = a->mbs; 2361 2362 PetscCall(VecSet(v,zero)); 2363 PetscCall(VecGetArray(v,&x)); 2364 PetscCall(VecGetLocalSize(v,&n)); 2365 PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2366 for (i=0; i<mbs; i++) { 2367 ncols = ai[1] - ai[0]; ai++; 2368 brow = bs*i; 2369 for (j=0; j<ncols; j++) { 2370 for (kcol=0; kcol<bs; kcol++) { 2371 for (krow=0; krow<bs; krow++) { 2372 atmp = PetscAbsScalar(*aa);aa++; 2373 row = brow + krow; /* row index */ 2374 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2375 } 2376 } 2377 aj++; 2378 } 2379 } 2380 PetscCall(VecRestoreArray(v,&x)); 2381 PetscFunctionReturn(0); 2382 } 2383 2384 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2385 { 2386 PetscFunctionBegin; 2387 /* If the two matrices have the same copy implementation, use fast copy. */ 2388 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2389 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2390 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2391 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2392 2393 PetscCheck(a->i[ambs] == b->i[bmbs],PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different",a->i[ambs],b->i[bmbs]); 2394 PetscCheck(abs == bbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different",abs,bbs); 2395 PetscCall(PetscArraycpy(b->a,a->a,bs2*a->i[ambs])); 2396 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2397 } else { 2398 PetscCall(MatCopy_Basic(A,B,str)); 2399 } 2400 PetscFunctionReturn(0); 2401 } 2402 2403 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2404 { 2405 PetscFunctionBegin; 2406 PetscCall(MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL)); 2407 PetscFunctionReturn(0); 2408 } 2409 2410 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2411 { 2412 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2413 2414 PetscFunctionBegin; 2415 *array = a->a; 2416 PetscFunctionReturn(0); 2417 } 2418 2419 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2420 { 2421 PetscFunctionBegin; 2422 *array = NULL; 2423 PetscFunctionReturn(0); 2424 } 2425 2426 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2427 { 2428 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2429 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2430 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2431 2432 PetscFunctionBegin; 2433 /* Set the number of nonzeros in the new matrix */ 2434 PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz)); 2435 PetscFunctionReturn(0); 2436 } 2437 2438 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2439 { 2440 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2441 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2442 PetscBLASInt one=1; 2443 2444 PetscFunctionBegin; 2445 if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2446 PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2447 if (e) { 2448 PetscCall(PetscArraycmp(x->i,y->i,x->mbs+1,&e)); 2449 if (e) { 2450 PetscCall(PetscArraycmp(x->j,y->j,x->i[x->mbs],&e)); 2451 if (e) str = SAME_NONZERO_PATTERN; 2452 } 2453 } 2454 if (!e) PetscCheck(str != SAME_NONZERO_PATTERN,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"MatStructure is not SAME_NONZERO_PATTERN"); 2455 } 2456 if (str == SAME_NONZERO_PATTERN) { 2457 PetscScalar alpha = a; 2458 PetscBLASInt bnz; 2459 PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz)); 2460 PetscCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2461 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2462 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2463 PetscCall(MatAXPY_Basic(Y,a,X,str)); 2464 } else { 2465 Mat B; 2466 PetscInt *nnz; 2467 PetscCheck(bs == X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2468 PetscCall(PetscMalloc1(Y->rmap->N,&nnz)); 2469 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 2470 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 2471 PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N)); 2472 PetscCall(MatSetBlockSizesFromMats(B,Y,Y)); 2473 PetscCall(MatSetType(B,(MatType) ((PetscObject)Y)->type_name)); 2474 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz)); 2475 PetscCall(MatSeqBAIJSetPreallocation(B,bs,0,nnz)); 2476 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 2477 PetscCall(MatHeaderMerge(Y,&B)); 2478 PetscCall(PetscFree(nnz)); 2479 } 2480 PetscFunctionReturn(0); 2481 } 2482 2483 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2484 { 2485 #if defined(PETSC_USE_COMPLEX) 2486 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2487 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2488 MatScalar *aa = a->a; 2489 2490 PetscFunctionBegin; 2491 for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]); 2492 #else 2493 PetscFunctionBegin; 2494 #endif 2495 PetscFunctionReturn(0); 2496 } 2497 2498 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2499 { 2500 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2501 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2502 MatScalar *aa = a->a; 2503 2504 PetscFunctionBegin; 2505 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2506 PetscFunctionReturn(0); 2507 } 2508 2509 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2510 { 2511 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2512 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2513 MatScalar *aa = a->a; 2514 2515 PetscFunctionBegin; 2516 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2517 PetscFunctionReturn(0); 2518 } 2519 2520 /* 2521 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2522 */ 2523 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2524 { 2525 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2526 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2527 PetscInt nz = a->i[m],row,*jj,mr,col; 2528 2529 PetscFunctionBegin; 2530 *nn = n; 2531 if (!ia) PetscFunctionReturn(0); 2532 PetscCheck(!symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2533 PetscCall(PetscCalloc1(n,&collengths)); 2534 PetscCall(PetscMalloc1(n+1,&cia)); 2535 PetscCall(PetscMalloc1(nz,&cja)); 2536 jj = a->j; 2537 for (i=0; i<nz; i++) { 2538 collengths[jj[i]]++; 2539 } 2540 cia[0] = oshift; 2541 for (i=0; i<n; i++) { 2542 cia[i+1] = cia[i] + collengths[i]; 2543 } 2544 PetscCall(PetscArrayzero(collengths,n)); 2545 jj = a->j; 2546 for (row=0; row<m; row++) { 2547 mr = a->i[row+1] - a->i[row]; 2548 for (i=0; i<mr; i++) { 2549 col = *jj++; 2550 2551 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2552 } 2553 } 2554 PetscCall(PetscFree(collengths)); 2555 *ia = cia; *ja = cja; 2556 PetscFunctionReturn(0); 2557 } 2558 2559 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2560 { 2561 PetscFunctionBegin; 2562 if (!ia) PetscFunctionReturn(0); 2563 PetscCall(PetscFree(*ia)); 2564 PetscCall(PetscFree(*ja)); 2565 PetscFunctionReturn(0); 2566 } 2567 2568 /* 2569 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2570 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2571 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2572 */ 2573 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2574 { 2575 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2576 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2577 PetscInt nz = a->i[m],row,*jj,mr,col; 2578 PetscInt *cspidx; 2579 2580 PetscFunctionBegin; 2581 *nn = n; 2582 if (!ia) PetscFunctionReturn(0); 2583 2584 PetscCall(PetscCalloc1(n,&collengths)); 2585 PetscCall(PetscMalloc1(n+1,&cia)); 2586 PetscCall(PetscMalloc1(nz,&cja)); 2587 PetscCall(PetscMalloc1(nz,&cspidx)); 2588 jj = a->j; 2589 for (i=0; i<nz; i++) { 2590 collengths[jj[i]]++; 2591 } 2592 cia[0] = oshift; 2593 for (i=0; i<n; i++) { 2594 cia[i+1] = cia[i] + collengths[i]; 2595 } 2596 PetscCall(PetscArrayzero(collengths,n)); 2597 jj = a->j; 2598 for (row=0; row<m; row++) { 2599 mr = a->i[row+1] - a->i[row]; 2600 for (i=0; i<mr; i++) { 2601 col = *jj++; 2602 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2603 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2604 } 2605 } 2606 PetscCall(PetscFree(collengths)); 2607 *ia = cia; 2608 *ja = cja; 2609 *spidx = cspidx; 2610 PetscFunctionReturn(0); 2611 } 2612 2613 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2614 { 2615 PetscFunctionBegin; 2616 PetscCall(MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done)); 2617 PetscCall(PetscFree(*spidx)); 2618 PetscFunctionReturn(0); 2619 } 2620 2621 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2622 { 2623 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2624 2625 PetscFunctionBegin; 2626 if (!Y->preallocated || !aij->nz) { 2627 PetscCall(MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL)); 2628 } 2629 PetscCall(MatShift_Basic(Y,a)); 2630 PetscFunctionReturn(0); 2631 } 2632 2633 /* -------------------------------------------------------------------*/ 2634 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2635 MatGetRow_SeqBAIJ, 2636 MatRestoreRow_SeqBAIJ, 2637 MatMult_SeqBAIJ_N, 2638 /* 4*/ MatMultAdd_SeqBAIJ_N, 2639 MatMultTranspose_SeqBAIJ, 2640 MatMultTransposeAdd_SeqBAIJ, 2641 NULL, 2642 NULL, 2643 NULL, 2644 /* 10*/ NULL, 2645 MatLUFactor_SeqBAIJ, 2646 NULL, 2647 NULL, 2648 MatTranspose_SeqBAIJ, 2649 /* 15*/ MatGetInfo_SeqBAIJ, 2650 MatEqual_SeqBAIJ, 2651 MatGetDiagonal_SeqBAIJ, 2652 MatDiagonalScale_SeqBAIJ, 2653 MatNorm_SeqBAIJ, 2654 /* 20*/ NULL, 2655 MatAssemblyEnd_SeqBAIJ, 2656 MatSetOption_SeqBAIJ, 2657 MatZeroEntries_SeqBAIJ, 2658 /* 24*/ MatZeroRows_SeqBAIJ, 2659 NULL, 2660 NULL, 2661 NULL, 2662 NULL, 2663 /* 29*/ MatSetUp_SeqBAIJ, 2664 NULL, 2665 NULL, 2666 NULL, 2667 NULL, 2668 /* 34*/ MatDuplicate_SeqBAIJ, 2669 NULL, 2670 NULL, 2671 MatILUFactor_SeqBAIJ, 2672 NULL, 2673 /* 39*/ MatAXPY_SeqBAIJ, 2674 MatCreateSubMatrices_SeqBAIJ, 2675 MatIncreaseOverlap_SeqBAIJ, 2676 MatGetValues_SeqBAIJ, 2677 MatCopy_SeqBAIJ, 2678 /* 44*/ NULL, 2679 MatScale_SeqBAIJ, 2680 MatShift_SeqBAIJ, 2681 NULL, 2682 MatZeroRowsColumns_SeqBAIJ, 2683 /* 49*/ NULL, 2684 MatGetRowIJ_SeqBAIJ, 2685 MatRestoreRowIJ_SeqBAIJ, 2686 MatGetColumnIJ_SeqBAIJ, 2687 MatRestoreColumnIJ_SeqBAIJ, 2688 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2689 NULL, 2690 NULL, 2691 NULL, 2692 MatSetValuesBlocked_SeqBAIJ, 2693 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2694 MatDestroy_SeqBAIJ, 2695 MatView_SeqBAIJ, 2696 NULL, 2697 NULL, 2698 /* 64*/ NULL, 2699 NULL, 2700 NULL, 2701 NULL, 2702 NULL, 2703 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2704 NULL, 2705 MatConvert_Basic, 2706 NULL, 2707 NULL, 2708 /* 74*/ NULL, 2709 MatFDColoringApply_BAIJ, 2710 NULL, 2711 NULL, 2712 NULL, 2713 /* 79*/ NULL, 2714 NULL, 2715 NULL, 2716 NULL, 2717 MatLoad_SeqBAIJ, 2718 /* 84*/ NULL, 2719 NULL, 2720 NULL, 2721 NULL, 2722 NULL, 2723 /* 89*/ NULL, 2724 NULL, 2725 NULL, 2726 NULL, 2727 NULL, 2728 /* 94*/ NULL, 2729 NULL, 2730 NULL, 2731 NULL, 2732 NULL, 2733 /* 99*/ NULL, 2734 NULL, 2735 NULL, 2736 MatConjugate_SeqBAIJ, 2737 NULL, 2738 /*104*/ NULL, 2739 MatRealPart_SeqBAIJ, 2740 MatImaginaryPart_SeqBAIJ, 2741 NULL, 2742 NULL, 2743 /*109*/ NULL, 2744 NULL, 2745 NULL, 2746 NULL, 2747 MatMissingDiagonal_SeqBAIJ, 2748 /*114*/ NULL, 2749 NULL, 2750 NULL, 2751 NULL, 2752 NULL, 2753 /*119*/ NULL, 2754 NULL, 2755 MatMultHermitianTranspose_SeqBAIJ, 2756 MatMultHermitianTransposeAdd_SeqBAIJ, 2757 NULL, 2758 /*124*/ NULL, 2759 MatGetColumnReductions_SeqBAIJ, 2760 MatInvertBlockDiagonal_SeqBAIJ, 2761 NULL, 2762 NULL, 2763 /*129*/ NULL, 2764 NULL, 2765 NULL, 2766 NULL, 2767 NULL, 2768 /*134*/ NULL, 2769 NULL, 2770 NULL, 2771 NULL, 2772 NULL, 2773 /*139*/ MatSetBlockSizes_Default, 2774 NULL, 2775 NULL, 2776 MatFDColoringSetUp_SeqXAIJ, 2777 NULL, 2778 /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2779 MatDestroySubMatrices_SeqBAIJ, 2780 NULL, 2781 NULL, 2782 NULL, 2783 NULL, 2784 /*150*/ NULL, 2785 }; 2786 2787 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2788 { 2789 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2790 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2791 2792 PetscFunctionBegin; 2793 PetscCheck(aij->nonew == 1,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2794 2795 /* allocate space for values if not already there */ 2796 if (!aij->saved_values) { 2797 PetscCall(PetscMalloc1(nz+1,&aij->saved_values)); 2798 PetscCall(PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar))); 2799 } 2800 2801 /* copy values over */ 2802 PetscCall(PetscArraycpy(aij->saved_values,aij->a,nz)); 2803 PetscFunctionReturn(0); 2804 } 2805 2806 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2807 { 2808 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2809 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2810 2811 PetscFunctionBegin; 2812 PetscCheck(aij->nonew == 1,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2813 PetscCheck(aij->saved_values,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2814 2815 /* copy values over */ 2816 PetscCall(PetscArraycpy(aij->a,aij->saved_values,nz)); 2817 PetscFunctionReturn(0); 2818 } 2819 2820 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2821 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2822 2823 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2824 { 2825 Mat_SeqBAIJ *b; 2826 PetscInt i,mbs,nbs,bs2; 2827 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2828 2829 PetscFunctionBegin; 2830 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2831 if (nz == MAT_SKIP_ALLOCATION) { 2832 skipallocation = PETSC_TRUE; 2833 nz = 0; 2834 } 2835 2836 PetscCall(MatSetBlockSize(B,PetscAbs(bs))); 2837 PetscCall(PetscLayoutSetUp(B->rmap)); 2838 PetscCall(PetscLayoutSetUp(B->cmap)); 2839 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 2840 2841 B->preallocated = PETSC_TRUE; 2842 2843 mbs = B->rmap->n/bs; 2844 nbs = B->cmap->n/bs; 2845 bs2 = bs*bs; 2846 2847 PetscCheck(mbs*bs==B->rmap->n && nbs*bs==B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,B->cmap->n,bs); 2848 2849 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2850 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %" PetscInt_FMT,nz); 2851 if (nnz) { 2852 for (i=0; i<mbs; i++) { 2853 PetscCheck(nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,nnz[i]); 2854 PetscCheck(nnz[i] <= nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT,i,nnz[i],nbs); 2855 } 2856 } 2857 2858 b = (Mat_SeqBAIJ*)B->data; 2859 PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat"); 2860 PetscCall(PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL)); 2861 PetscOptionsEnd(); 2862 2863 if (!flg) { 2864 switch (bs) { 2865 case 1: 2866 B->ops->mult = MatMult_SeqBAIJ_1; 2867 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2868 break; 2869 case 2: 2870 B->ops->mult = MatMult_SeqBAIJ_2; 2871 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2872 break; 2873 case 3: 2874 B->ops->mult = MatMult_SeqBAIJ_3; 2875 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2876 break; 2877 case 4: 2878 B->ops->mult = MatMult_SeqBAIJ_4; 2879 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2880 break; 2881 case 5: 2882 B->ops->mult = MatMult_SeqBAIJ_5; 2883 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2884 break; 2885 case 6: 2886 B->ops->mult = MatMult_SeqBAIJ_6; 2887 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2888 break; 2889 case 7: 2890 B->ops->mult = MatMult_SeqBAIJ_7; 2891 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2892 break; 2893 case 9: 2894 { 2895 PetscInt version = 1; 2896 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL)); 2897 switch (version) { 2898 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2899 case 1: 2900 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2901 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2902 PetscCall(PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2903 break; 2904 #endif 2905 default: 2906 B->ops->mult = MatMult_SeqBAIJ_N; 2907 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2908 PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2909 break; 2910 } 2911 break; 2912 } 2913 case 11: 2914 B->ops->mult = MatMult_SeqBAIJ_11; 2915 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2916 break; 2917 case 12: 2918 { 2919 PetscInt version = 1; 2920 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL)); 2921 switch (version) { 2922 case 1: 2923 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 2924 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 2925 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2926 break; 2927 case 2: 2928 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 2929 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 2930 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2931 break; 2932 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2933 case 3: 2934 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 2935 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 2936 PetscCall(PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2937 break; 2938 #endif 2939 default: 2940 B->ops->mult = MatMult_SeqBAIJ_N; 2941 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2942 PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2943 break; 2944 } 2945 break; 2946 } 2947 case 15: 2948 { 2949 PetscInt version = 1; 2950 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL)); 2951 switch (version) { 2952 case 1: 2953 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2954 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2955 break; 2956 case 2: 2957 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 2958 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2959 break; 2960 case 3: 2961 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 2962 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2963 break; 2964 case 4: 2965 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 2966 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2967 break; 2968 default: 2969 B->ops->mult = MatMult_SeqBAIJ_N; 2970 PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2971 break; 2972 } 2973 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2974 break; 2975 } 2976 default: 2977 B->ops->mult = MatMult_SeqBAIJ_N; 2978 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2979 PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2980 break; 2981 } 2982 } 2983 B->ops->sor = MatSOR_SeqBAIJ; 2984 b->mbs = mbs; 2985 b->nbs = nbs; 2986 if (!skipallocation) { 2987 if (!b->imax) { 2988 PetscCall(PetscMalloc2(mbs,&b->imax,mbs,&b->ilen)); 2989 PetscCall(PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt))); 2990 2991 b->free_imax_ilen = PETSC_TRUE; 2992 } 2993 /* b->ilen will count nonzeros in each block row so far. */ 2994 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2995 if (!nnz) { 2996 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2997 else if (nz < 0) nz = 1; 2998 nz = PetscMin(nz,nbs); 2999 for (i=0; i<mbs; i++) b->imax[i] = nz; 3000 PetscCall(PetscIntMultError(nz,mbs,&nz)); 3001 } else { 3002 PetscInt64 nz64 = 0; 3003 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];} 3004 PetscCall(PetscIntCast(nz64,&nz)); 3005 } 3006 3007 /* allocate the matrix space */ 3008 PetscCall(MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i)); 3009 if (B->structure_only) { 3010 PetscCall(PetscMalloc1(nz,&b->j)); 3011 PetscCall(PetscMalloc1(B->rmap->N+1,&b->i)); 3012 PetscCall(PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt))); 3013 } else { 3014 PetscInt nzbs2 = 0; 3015 PetscCall(PetscIntMultError(nz,bs2,&nzbs2)); 3016 PetscCall(PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i)); 3017 PetscCall(PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)))); 3018 PetscCall(PetscArrayzero(b->a,nz*bs2)); 3019 } 3020 PetscCall(PetscArrayzero(b->j,nz)); 3021 3022 if (B->structure_only) { 3023 b->singlemalloc = PETSC_FALSE; 3024 b->free_a = PETSC_FALSE; 3025 } else { 3026 b->singlemalloc = PETSC_TRUE; 3027 b->free_a = PETSC_TRUE; 3028 } 3029 b->free_ij = PETSC_TRUE; 3030 3031 b->i[0] = 0; 3032 for (i=1; i<mbs+1; i++) { 3033 b->i[i] = b->i[i-1] + b->imax[i-1]; 3034 } 3035 3036 } else { 3037 b->free_a = PETSC_FALSE; 3038 b->free_ij = PETSC_FALSE; 3039 } 3040 3041 b->bs2 = bs2; 3042 b->mbs = mbs; 3043 b->nz = 0; 3044 b->maxnz = nz; 3045 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3046 B->was_assembled = PETSC_FALSE; 3047 B->assembled = PETSC_FALSE; 3048 if (realalloc) PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 3049 PetscFunctionReturn(0); 3050 } 3051 3052 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3053 { 3054 PetscInt i,m,nz,nz_max=0,*nnz; 3055 PetscScalar *values=NULL; 3056 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 3057 3058 PetscFunctionBegin; 3059 PetscCheck(bs >= 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs); 3060 PetscCall(PetscLayoutSetBlockSize(B->rmap,bs)); 3061 PetscCall(PetscLayoutSetBlockSize(B->cmap,bs)); 3062 PetscCall(PetscLayoutSetUp(B->rmap)); 3063 PetscCall(PetscLayoutSetUp(B->cmap)); 3064 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 3065 m = B->rmap->n/bs; 3066 3067 PetscCheck(ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]); 3068 PetscCall(PetscMalloc1(m+1, &nnz)); 3069 for (i=0; i<m; i++) { 3070 nz = ii[i+1]- ii[i]; 3071 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz); 3072 nz_max = PetscMax(nz_max, nz); 3073 nnz[i] = nz; 3074 } 3075 PetscCall(MatSeqBAIJSetPreallocation(B,bs,0,nnz)); 3076 PetscCall(PetscFree(nnz)); 3077 3078 values = (PetscScalar*)V; 3079 if (!values) { 3080 PetscCall(PetscCalloc1(bs*bs*(nz_max+1),&values)); 3081 } 3082 for (i=0; i<m; i++) { 3083 PetscInt ncols = ii[i+1] - ii[i]; 3084 const PetscInt *icols = jj + ii[i]; 3085 if (bs == 1 || !roworiented) { 3086 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3087 PetscCall(MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES)); 3088 } else { 3089 PetscInt j; 3090 for (j=0; j<ncols; j++) { 3091 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 3092 PetscCall(MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES)); 3093 } 3094 } 3095 } 3096 if (!V) PetscCall(PetscFree(values)); 3097 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3098 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3099 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3100 PetscFunctionReturn(0); 3101 } 3102 3103 /*@C 3104 MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored 3105 3106 Not Collective 3107 3108 Input Parameter: 3109 . mat - a MATSEQBAIJ matrix 3110 3111 Output Parameter: 3112 . array - pointer to the data 3113 3114 Level: intermediate 3115 3116 .seealso: `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3117 @*/ 3118 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array) 3119 { 3120 PetscFunctionBegin; 3121 PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array)); 3122 PetscFunctionReturn(0); 3123 } 3124 3125 /*@C 3126 MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray() 3127 3128 Not Collective 3129 3130 Input Parameters: 3131 + mat - a MATSEQBAIJ matrix 3132 - array - pointer to the data 3133 3134 Level: intermediate 3135 3136 .seealso: `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3137 @*/ 3138 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array) 3139 { 3140 PetscFunctionBegin; 3141 PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array)); 3142 PetscFunctionReturn(0); 3143 } 3144 3145 /*MC 3146 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3147 block sparse compressed row format. 3148 3149 Options Database Keys: 3150 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3151 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3152 3153 Level: beginner 3154 3155 Notes: 3156 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 3157 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 3158 3159 Run with -info to see what version of the matrix-vector product is being used 3160 3161 .seealso: `MatCreateSeqBAIJ()` 3162 M*/ 3163 3164 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3165 3166 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3167 { 3168 PetscMPIInt size; 3169 Mat_SeqBAIJ *b; 3170 3171 PetscFunctionBegin; 3172 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 3173 PetscCheck(size == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3174 3175 PetscCall(PetscNewLog(B,&b)); 3176 B->data = (void*)b; 3177 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 3178 3179 b->row = NULL; 3180 b->col = NULL; 3181 b->icol = NULL; 3182 b->reallocs = 0; 3183 b->saved_values = NULL; 3184 3185 b->roworiented = PETSC_TRUE; 3186 b->nonew = 0; 3187 b->diag = NULL; 3188 B->spptr = NULL; 3189 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3190 b->keepnonzeropattern = PETSC_FALSE; 3191 3192 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ)); 3193 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ)); 3194 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ)); 3195 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ)); 3196 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ)); 3197 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ)); 3198 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ)); 3199 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ)); 3200 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 3201 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ)); 3202 #if defined(PETSC_HAVE_HYPRE) 3203 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE)); 3204 #endif 3205 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS)); 3206 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ)); 3207 PetscFunctionReturn(0); 3208 } 3209 3210 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3211 { 3212 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3213 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3214 3215 PetscFunctionBegin; 3216 PetscCheck(a->i[mbs] == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3217 3218 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3219 c->imax = a->imax; 3220 c->ilen = a->ilen; 3221 c->free_imax_ilen = PETSC_FALSE; 3222 } else { 3223 PetscCall(PetscMalloc2(mbs,&c->imax,mbs,&c->ilen)); 3224 PetscCall(PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt))); 3225 for (i=0; i<mbs; i++) { 3226 c->imax[i] = a->imax[i]; 3227 c->ilen[i] = a->ilen[i]; 3228 } 3229 c->free_imax_ilen = PETSC_TRUE; 3230 } 3231 3232 /* allocate the matrix space */ 3233 if (mallocmatspace) { 3234 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3235 PetscCall(PetscCalloc1(bs2*nz,&c->a)); 3236 PetscCall(PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar))); 3237 3238 c->i = a->i; 3239 c->j = a->j; 3240 c->singlemalloc = PETSC_FALSE; 3241 c->free_a = PETSC_TRUE; 3242 c->free_ij = PETSC_FALSE; 3243 c->parent = A; 3244 C->preallocated = PETSC_TRUE; 3245 C->assembled = PETSC_TRUE; 3246 3247 PetscCall(PetscObjectReference((PetscObject)A)); 3248 PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3249 PetscCall(MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3250 } else { 3251 PetscCall(PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i)); 3252 PetscCall(PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt))); 3253 3254 c->singlemalloc = PETSC_TRUE; 3255 c->free_a = PETSC_TRUE; 3256 c->free_ij = PETSC_TRUE; 3257 3258 PetscCall(PetscArraycpy(c->i,a->i,mbs+1)); 3259 if (mbs > 0) { 3260 PetscCall(PetscArraycpy(c->j,a->j,nz)); 3261 if (cpvalues == MAT_COPY_VALUES) { 3262 PetscCall(PetscArraycpy(c->a,a->a,bs2*nz)); 3263 } else { 3264 PetscCall(PetscArrayzero(c->a,bs2*nz)); 3265 } 3266 } 3267 C->preallocated = PETSC_TRUE; 3268 C->assembled = PETSC_TRUE; 3269 } 3270 } 3271 3272 c->roworiented = a->roworiented; 3273 c->nonew = a->nonew; 3274 3275 PetscCall(PetscLayoutReference(A->rmap,&C->rmap)); 3276 PetscCall(PetscLayoutReference(A->cmap,&C->cmap)); 3277 3278 c->bs2 = a->bs2; 3279 c->mbs = a->mbs; 3280 c->nbs = a->nbs; 3281 3282 if (a->diag) { 3283 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3284 c->diag = a->diag; 3285 c->free_diag = PETSC_FALSE; 3286 } else { 3287 PetscCall(PetscMalloc1(mbs+1,&c->diag)); 3288 PetscCall(PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt))); 3289 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3290 c->free_diag = PETSC_TRUE; 3291 } 3292 } else c->diag = NULL; 3293 3294 c->nz = a->nz; 3295 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3296 c->solve_work = NULL; 3297 c->mult_work = NULL; 3298 c->sor_workt = NULL; 3299 c->sor_work = NULL; 3300 3301 c->compressedrow.use = a->compressedrow.use; 3302 c->compressedrow.nrows = a->compressedrow.nrows; 3303 if (a->compressedrow.use) { 3304 i = a->compressedrow.nrows; 3305 PetscCall(PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex)); 3306 PetscCall(PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt))); 3307 PetscCall(PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1)); 3308 PetscCall(PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i)); 3309 } else { 3310 c->compressedrow.use = PETSC_FALSE; 3311 c->compressedrow.i = NULL; 3312 c->compressedrow.rindex = NULL; 3313 } 3314 C->nonzerostate = A->nonzerostate; 3315 3316 PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist)); 3317 PetscFunctionReturn(0); 3318 } 3319 3320 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3321 { 3322 PetscFunctionBegin; 3323 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),B)); 3324 PetscCall(MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n)); 3325 PetscCall(MatSetType(*B,MATSEQBAIJ)); 3326 PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE)); 3327 PetscFunctionReturn(0); 3328 } 3329 3330 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3331 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 3332 { 3333 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3334 PetscInt *rowidxs,*colidxs; 3335 PetscScalar *matvals; 3336 3337 PetscFunctionBegin; 3338 PetscCall(PetscViewerSetUp(viewer)); 3339 3340 /* read matrix header */ 3341 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3342 PetscCheck(header[0] == MAT_FILE_CLASSID,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3343 M = header[1]; N = header[2]; nz = header[3]; 3344 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3345 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3346 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3347 3348 /* set block sizes from the viewer's .info file */ 3349 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3350 /* set local and global sizes if not set already */ 3351 if (mat->rmap->n < 0) mat->rmap->n = M; 3352 if (mat->cmap->n < 0) mat->cmap->n = N; 3353 if (mat->rmap->N < 0) mat->rmap->N = M; 3354 if (mat->cmap->N < 0) mat->cmap->N = N; 3355 PetscCall(PetscLayoutSetUp(mat->rmap)); 3356 PetscCall(PetscLayoutSetUp(mat->cmap)); 3357 3358 /* check if the matrix sizes are correct */ 3359 PetscCall(MatGetSize(mat,&rows,&cols)); 3360 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3361 PetscCall(MatGetBlockSize(mat,&bs)); 3362 PetscCall(MatGetLocalSize(mat,&m,&n)); 3363 mbs = m/bs; nbs = n/bs; 3364 3365 /* read in row lengths, column indices and nonzero values */ 3366 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3367 PetscCall(PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT)); 3368 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3369 sum = rowidxs[m]; 3370 PetscCheck(sum == nz,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3371 3372 /* read in column indices and nonzero values */ 3373 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals)); 3374 PetscCall(PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT)); 3375 PetscCall(PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR)); 3376 3377 { /* preallocate matrix storage */ 3378 PetscBT bt; /* helper bit set to count nonzeros */ 3379 PetscInt *nnz; 3380 PetscBool sbaij; 3381 3382 PetscCall(PetscBTCreate(nbs,&bt)); 3383 PetscCall(PetscCalloc1(mbs,&nnz)); 3384 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij)); 3385 for (i=0; i<mbs; i++) { 3386 PetscCall(PetscBTMemzero(nbs,bt)); 3387 for (k=0; k<bs; k++) { 3388 PetscInt row = bs*i + k; 3389 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3390 PetscInt col = colidxs[j]; 3391 if (!sbaij || col >= row) 3392 if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++; 3393 } 3394 } 3395 } 3396 PetscCall(PetscBTDestroy(&bt)); 3397 PetscCall(MatSeqBAIJSetPreallocation(mat,bs,0,nnz)); 3398 PetscCall(MatSeqSBAIJSetPreallocation(mat,bs,0,nnz)); 3399 PetscCall(PetscFree(nnz)); 3400 } 3401 3402 /* store matrix values */ 3403 for (i=0; i<m; i++) { 3404 PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1]; 3405 PetscCall((*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES)); 3406 } 3407 3408 PetscCall(PetscFree(rowidxs)); 3409 PetscCall(PetscFree2(colidxs,matvals)); 3410 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 3411 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 3412 PetscFunctionReturn(0); 3413 } 3414 3415 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer) 3416 { 3417 PetscBool isbinary; 3418 3419 PetscFunctionBegin; 3420 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 3421 PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3422 PetscCall(MatLoad_SeqBAIJ_Binary(mat,viewer)); 3423 PetscFunctionReturn(0); 3424 } 3425 3426 /*@C 3427 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3428 compressed row) format. For good matrix assembly performance the 3429 user should preallocate the matrix storage by setting the parameter nz 3430 (or the array nnz). By setting these parameters accurately, performance 3431 during matrix assembly can be increased by more than a factor of 50. 3432 3433 Collective 3434 3435 Input Parameters: 3436 + comm - MPI communicator, set to PETSC_COMM_SELF 3437 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3438 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3439 . m - number of rows 3440 . n - number of columns 3441 . nz - number of nonzero blocks per block row (same for all rows) 3442 - nnz - array containing the number of nonzero blocks in the various block rows 3443 (possibly different for each block row) or NULL 3444 3445 Output Parameter: 3446 . A - the matrix 3447 3448 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3449 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3450 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3451 3452 Options Database Keys: 3453 + -mat_no_unroll - uses code that does not unroll the loops in the 3454 block calculations (much slower) 3455 - -mat_block_size - size of the blocks to use 3456 3457 Level: intermediate 3458 3459 Notes: 3460 The number of rows and columns must be divisible by blocksize. 3461 3462 If the nnz parameter is given then the nz parameter is ignored 3463 3464 A nonzero block is any block that as 1 or more nonzeros in it 3465 3466 The block AIJ format is fully compatible with standard Fortran 77 3467 storage. That is, the stored row and column indices can begin at 3468 either one (as in Fortran) or zero. See the users' manual for details. 3469 3470 Specify the preallocated storage with either nz or nnz (not both). 3471 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3472 allocation. See Users-Manual: ch_mat for details. 3473 matrices. 3474 3475 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3476 @*/ 3477 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3478 { 3479 PetscFunctionBegin; 3480 PetscCall(MatCreate(comm,A)); 3481 PetscCall(MatSetSizes(*A,m,n,m,n)); 3482 PetscCall(MatSetType(*A,MATSEQBAIJ)); 3483 PetscCall(MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz)); 3484 PetscFunctionReturn(0); 3485 } 3486 3487 /*@C 3488 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3489 per row in the matrix. For good matrix assembly performance the 3490 user should preallocate the matrix storage by setting the parameter nz 3491 (or the array nnz). By setting these parameters accurately, performance 3492 during matrix assembly can be increased by more than a factor of 50. 3493 3494 Collective 3495 3496 Input Parameters: 3497 + B - the matrix 3498 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3499 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3500 . nz - number of block nonzeros per block row (same for all rows) 3501 - nnz - array containing the number of block nonzeros in the various block rows 3502 (possibly different for each block row) or NULL 3503 3504 Options Database Keys: 3505 + -mat_no_unroll - uses code that does not unroll the loops in the 3506 block calculations (much slower) 3507 - -mat_block_size - size of the blocks to use 3508 3509 Level: intermediate 3510 3511 Notes: 3512 If the nnz parameter is given then the nz parameter is ignored 3513 3514 You can call MatGetInfo() to get information on how effective the preallocation was; 3515 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3516 You can also run with the option -info and look for messages with the string 3517 malloc in them to see if additional memory allocation was needed. 3518 3519 The block AIJ format is fully compatible with standard Fortran 77 3520 storage. That is, the stored row and column indices can begin at 3521 either one (as in Fortran) or zero. See the users' manual for details. 3522 3523 Specify the preallocated storage with either nz or nnz (not both). 3524 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3525 allocation. See Users-Manual: ch_mat for details. 3526 3527 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3528 @*/ 3529 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3530 { 3531 PetscFunctionBegin; 3532 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3533 PetscValidType(B,1); 3534 PetscValidLogicalCollectiveInt(B,bs,2); 3535 PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz)); 3536 PetscFunctionReturn(0); 3537 } 3538 3539 /*@C 3540 MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 3541 3542 Collective 3543 3544 Input Parameters: 3545 + B - the matrix 3546 . i - the indices into j for the start of each local row (starts with zero) 3547 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3548 - v - optional values in the matrix 3549 3550 Level: advanced 3551 3552 Notes: 3553 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3554 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3555 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3556 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3557 block column and the second index is over columns within a block. 3558 3559 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3560 3561 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3562 @*/ 3563 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3564 { 3565 PetscFunctionBegin; 3566 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3567 PetscValidType(B,1); 3568 PetscValidLogicalCollectiveInt(B,bs,2); 3569 PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v)); 3570 PetscFunctionReturn(0); 3571 } 3572 3573 /*@ 3574 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3575 3576 Collective 3577 3578 Input Parameters: 3579 + comm - must be an MPI communicator of size 1 3580 . bs - size of block 3581 . m - number of rows 3582 . n - number of columns 3583 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3584 . j - column indices 3585 - a - matrix values 3586 3587 Output Parameter: 3588 . mat - the matrix 3589 3590 Level: advanced 3591 3592 Notes: 3593 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3594 once the matrix is destroyed 3595 3596 You cannot set new nonzero locations into this matrix, that will generate an error. 3597 3598 The i and j indices are 0 based 3599 3600 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3601 3602 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3603 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3604 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3605 with column-major ordering within blocks. 3606 3607 .seealso: `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3608 3609 @*/ 3610 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3611 { 3612 PetscInt ii; 3613 Mat_SeqBAIJ *baij; 3614 3615 PetscFunctionBegin; 3616 PetscCheck(bs == 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %" PetscInt_FMT " > 1 is not supported yet",bs); 3617 if (m > 0) PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3618 3619 PetscCall(MatCreate(comm,mat)); 3620 PetscCall(MatSetSizes(*mat,m,n,m,n)); 3621 PetscCall(MatSetType(*mat,MATSEQBAIJ)); 3622 PetscCall(MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL)); 3623 baij = (Mat_SeqBAIJ*)(*mat)->data; 3624 PetscCall(PetscMalloc2(m,&baij->imax,m,&baij->ilen)); 3625 PetscCall(PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt))); 3626 3627 baij->i = i; 3628 baij->j = j; 3629 baij->a = a; 3630 3631 baij->singlemalloc = PETSC_FALSE; 3632 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3633 baij->free_a = PETSC_FALSE; 3634 baij->free_ij = PETSC_FALSE; 3635 3636 for (ii=0; ii<m; ii++) { 3637 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3638 PetscCheck(i[ii+1] - i[ii] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT,ii,i[ii+1] - i[ii]); 3639 } 3640 if (PetscDefined(USE_DEBUG)) { 3641 for (ii=0; ii<baij->i[m]; ii++) { 3642 PetscCheck(j[ii] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT,ii,j[ii]); 3643 PetscCheck(j[ii] <= n - 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT,ii,j[ii]); 3644 } 3645 } 3646 3647 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 3648 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 3649 PetscFunctionReturn(0); 3650 } 3651 3652 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3653 { 3654 PetscFunctionBegin; 3655 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat)); 3656 PetscFunctionReturn(0); 3657 } 3658