1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19 20 PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A,PetscInt type,PetscReal *reductions) 21 { 22 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) A->data; 23 PetscInt m,n,i; 24 PetscInt ib,jb,bs = A->rmap->bs; 25 MatScalar *a_val = a_aij->a; 26 27 PetscFunctionBegin; 28 PetscCall(MatGetSize(A,&m,&n)); 29 for (i=0; i<n; i++) reductions[i] = 0.0; 30 if (type == NORM_2) { 31 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 32 for (jb=0; jb<bs; jb++) { 33 for (ib=0; ib<bs; ib++) { 34 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 35 a_val++; 36 } 37 } 38 } 39 } else if (type == NORM_1) { 40 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 41 for (jb=0; jb<bs; jb++) { 42 for (ib=0; ib<bs; ib++) { 43 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 44 a_val++; 45 } 46 } 47 } 48 } else if (type == NORM_INFINITY) { 49 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 50 for (jb=0; jb<bs; jb++) { 51 for (ib=0; ib<bs; ib++) { 52 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 53 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 54 a_val++; 55 } 56 } 57 } 58 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 59 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 60 for (jb=0; jb<bs; jb++) { 61 for (ib=0; ib<bs; ib++) { 62 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 63 a_val++; 64 } 65 } 66 } 67 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 68 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 69 for (jb=0; jb<bs; jb++) { 70 for (ib=0; ib<bs; ib++) { 71 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 72 a_val++; 73 } 74 } 75 } 76 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 77 if (type == NORM_2) { 78 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 79 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 80 for (i=0; i<n; i++) reductions[i] /= m; 81 } 82 PetscFunctionReturn(0); 83 } 84 85 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 86 { 87 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 88 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 89 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 90 PetscReal shift = 0.0; 91 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 92 93 PetscFunctionBegin; 94 allowzeropivot = PetscNot(A->erroriffailure); 95 96 if (a->idiagvalid) { 97 if (values) *values = a->idiag; 98 PetscFunctionReturn(0); 99 } 100 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 101 diag_offset = a->diag; 102 if (!a->idiag) { 103 PetscCall(PetscMalloc1(bs2*mbs,&a->idiag)); 104 PetscCall(PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar))); 105 } 106 diag = a->idiag; 107 if (values) *values = a->idiag; 108 /* factor and invert each block */ 109 switch (bs) { 110 case 1: 111 for (i=0; i<mbs; i++) { 112 odiag = v + 1*diag_offset[i]; 113 diag[0] = odiag[0]; 114 115 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 116 if (allowzeropivot) { 117 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 118 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 119 A->factorerror_zeropivot_row = i; 120 PetscCall(PetscInfo(A,"Zero pivot, row %" PetscInt_FMT "\n",i)); 121 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 122 } 123 124 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 125 diag += 1; 126 } 127 break; 128 case 2: 129 for (i=0; i<mbs; i++) { 130 odiag = v + 4*diag_offset[i]; 131 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 132 PetscCall(PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected)); 133 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 134 diag += 4; 135 } 136 break; 137 case 3: 138 for (i=0; i<mbs; i++) { 139 odiag = v + 9*diag_offset[i]; 140 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 141 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 142 diag[8] = odiag[8]; 143 PetscCall(PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected)); 144 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 145 diag += 9; 146 } 147 break; 148 case 4: 149 for (i=0; i<mbs; i++) { 150 odiag = v + 16*diag_offset[i]; 151 PetscCall(PetscArraycpy(diag,odiag,16)); 152 PetscCall(PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected)); 153 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 154 diag += 16; 155 } 156 break; 157 case 5: 158 for (i=0; i<mbs; i++) { 159 odiag = v + 25*diag_offset[i]; 160 PetscCall(PetscArraycpy(diag,odiag,25)); 161 PetscCall(PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected)); 162 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 163 diag += 25; 164 } 165 break; 166 case 6: 167 for (i=0; i<mbs; i++) { 168 odiag = v + 36*diag_offset[i]; 169 PetscCall(PetscArraycpy(diag,odiag,36)); 170 PetscCall(PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected)); 171 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 172 diag += 36; 173 } 174 break; 175 case 7: 176 for (i=0; i<mbs; i++) { 177 odiag = v + 49*diag_offset[i]; 178 PetscCall(PetscArraycpy(diag,odiag,49)); 179 PetscCall(PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected)); 180 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 181 diag += 49; 182 } 183 break; 184 default: 185 PetscCall(PetscMalloc2(bs,&v_work,bs,&v_pivots)); 186 for (i=0; i<mbs; i++) { 187 odiag = v + bs2*diag_offset[i]; 188 PetscCall(PetscArraycpy(diag,odiag,bs2)); 189 PetscCall(PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected)); 190 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 191 diag += bs2; 192 } 193 PetscCall(PetscFree2(v_work,v_pivots)); 194 } 195 a->idiagvalid = PETSC_TRUE; 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 200 { 201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 202 PetscScalar *x,*work,*w,*workt,*t; 203 const MatScalar *v,*aa = a->a, *idiag; 204 const PetscScalar *b,*xb; 205 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 206 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 207 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 208 209 PetscFunctionBegin; 210 its = its*lits; 211 PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 212 PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits); 213 PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 214 PetscCheck(omega == 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor"); 215 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 216 217 if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A,NULL)); 218 219 if (!m) PetscFunctionReturn(0); 220 diag = a->diag; 221 idiag = a->idiag; 222 k = PetscMax(A->rmap->n,A->cmap->n); 223 if (!a->mult_work) { 224 PetscCall(PetscMalloc1(k+1,&a->mult_work)); 225 } 226 if (!a->sor_workt) { 227 PetscCall(PetscMalloc1(k,&a->sor_workt)); 228 } 229 if (!a->sor_work) { 230 PetscCall(PetscMalloc1(bs,&a->sor_work)); 231 } 232 work = a->mult_work; 233 t = a->sor_workt; 234 w = a->sor_work; 235 236 PetscCall(VecGetArray(xx,&x)); 237 PetscCall(VecGetArrayRead(bb,&b)); 238 239 if (flag & SOR_ZERO_INITIAL_GUESS) { 240 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 241 switch (bs) { 242 case 1: 243 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 244 t[0] = b[0]; 245 i2 = 1; 246 idiag += 1; 247 for (i=1; i<m; i++) { 248 v = aa + ai[i]; 249 vi = aj + ai[i]; 250 nz = diag[i] - ai[i]; 251 s[0] = b[i2]; 252 for (j=0; j<nz; j++) { 253 xw[0] = x[vi[j]]; 254 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 255 } 256 t[i2] = s[0]; 257 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 258 x[i2] = xw[0]; 259 idiag += 1; 260 i2 += 1; 261 } 262 break; 263 case 2: 264 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 265 t[0] = b[0]; t[1] = b[1]; 266 i2 = 2; 267 idiag += 4; 268 for (i=1; i<m; i++) { 269 v = aa + 4*ai[i]; 270 vi = aj + ai[i]; 271 nz = diag[i] - ai[i]; 272 s[0] = b[i2]; s[1] = b[i2+1]; 273 for (j=0; j<nz; j++) { 274 idx = 2*vi[j]; 275 it = 4*j; 276 xw[0] = x[idx]; xw[1] = x[1+idx]; 277 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 278 } 279 t[i2] = s[0]; t[i2+1] = s[1]; 280 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 281 x[i2] = xw[0]; x[i2+1] = xw[1]; 282 idiag += 4; 283 i2 += 2; 284 } 285 break; 286 case 3: 287 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 288 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 289 i2 = 3; 290 idiag += 9; 291 for (i=1; i<m; i++) { 292 v = aa + 9*ai[i]; 293 vi = aj + ai[i]; 294 nz = diag[i] - ai[i]; 295 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 296 while (nz--) { 297 idx = 3*(*vi++); 298 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 299 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 300 v += 9; 301 } 302 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 303 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 304 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 305 idiag += 9; 306 i2 += 3; 307 } 308 break; 309 case 4: 310 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 311 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 312 i2 = 4; 313 idiag += 16; 314 for (i=1; i<m; i++) { 315 v = aa + 16*ai[i]; 316 vi = aj + ai[i]; 317 nz = diag[i] - ai[i]; 318 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 319 while (nz--) { 320 idx = 4*(*vi++); 321 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 322 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 323 v += 16; 324 } 325 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 326 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 327 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 328 idiag += 16; 329 i2 += 4; 330 } 331 break; 332 case 5: 333 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 334 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 335 i2 = 5; 336 idiag += 25; 337 for (i=1; i<m; i++) { 338 v = aa + 25*ai[i]; 339 vi = aj + ai[i]; 340 nz = diag[i] - ai[i]; 341 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 342 while (nz--) { 343 idx = 5*(*vi++); 344 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 345 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 346 v += 25; 347 } 348 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 349 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 350 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 351 idiag += 25; 352 i2 += 5; 353 } 354 break; 355 case 6: 356 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 357 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 358 i2 = 6; 359 idiag += 36; 360 for (i=1; i<m; i++) { 361 v = aa + 36*ai[i]; 362 vi = aj + ai[i]; 363 nz = diag[i] - ai[i]; 364 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 365 while (nz--) { 366 idx = 6*(*vi++); 367 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 368 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 369 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 370 v += 36; 371 } 372 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 373 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 374 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 375 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 376 idiag += 36; 377 i2 += 6; 378 } 379 break; 380 case 7: 381 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 382 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 383 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 384 i2 = 7; 385 idiag += 49; 386 for (i=1; i<m; i++) { 387 v = aa + 49*ai[i]; 388 vi = aj + ai[i]; 389 nz = diag[i] - ai[i]; 390 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 391 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 392 while (nz--) { 393 idx = 7*(*vi++); 394 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 395 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 396 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 397 v += 49; 398 } 399 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 400 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 401 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 402 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 403 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 404 idiag += 49; 405 i2 += 7; 406 } 407 break; 408 default: 409 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 410 PetscCall(PetscArraycpy(t,b,bs)); 411 i2 = bs; 412 idiag += bs2; 413 for (i=1; i<m; i++) { 414 v = aa + bs2*ai[i]; 415 vi = aj + ai[i]; 416 nz = diag[i] - ai[i]; 417 418 PetscCall(PetscArraycpy(w,b+i2,bs)); 419 /* copy all rows of x that are needed into contiguous space */ 420 workt = work; 421 for (j=0; j<nz; j++) { 422 PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs)); 423 workt += bs; 424 } 425 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 426 PetscCall(PetscArraycpy(t+i2,w,bs)); 427 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 428 429 idiag += bs2; 430 i2 += bs; 431 } 432 break; 433 } 434 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 435 PetscCall(PetscLogFlops(1.0*bs2*a->nz)); 436 xb = t; 437 } 438 else xb = b; 439 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 440 idiag = a->idiag+bs2*(a->mbs-1); 441 i2 = bs * (m-1); 442 switch (bs) { 443 case 1: 444 s[0] = xb[i2]; 445 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 446 x[i2] = xw[0]; 447 i2 -= 1; 448 for (i=m-2; i>=0; i--) { 449 v = aa + (diag[i]+1); 450 vi = aj + diag[i] + 1; 451 nz = ai[i+1] - diag[i] - 1; 452 s[0] = xb[i2]; 453 for (j=0; j<nz; j++) { 454 xw[0] = x[vi[j]]; 455 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 456 } 457 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 458 x[i2] = xw[0]; 459 idiag -= 1; 460 i2 -= 1; 461 } 462 break; 463 case 2: 464 s[0] = xb[i2]; s[1] = xb[i2+1]; 465 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 466 x[i2] = xw[0]; x[i2+1] = xw[1]; 467 i2 -= 2; 468 idiag -= 4; 469 for (i=m-2; i>=0; i--) { 470 v = aa + 4*(diag[i] + 1); 471 vi = aj + diag[i] + 1; 472 nz = ai[i+1] - diag[i] - 1; 473 s[0] = xb[i2]; s[1] = xb[i2+1]; 474 for (j=0; j<nz; j++) { 475 idx = 2*vi[j]; 476 it = 4*j; 477 xw[0] = x[idx]; xw[1] = x[1+idx]; 478 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 479 } 480 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 481 x[i2] = xw[0]; x[i2+1] = xw[1]; 482 idiag -= 4; 483 i2 -= 2; 484 } 485 break; 486 case 3: 487 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 488 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 489 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 490 i2 -= 3; 491 idiag -= 9; 492 for (i=m-2; i>=0; i--) { 493 v = aa + 9*(diag[i]+1); 494 vi = aj + diag[i] + 1; 495 nz = ai[i+1] - diag[i] - 1; 496 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 497 while (nz--) { 498 idx = 3*(*vi++); 499 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 500 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 501 v += 9; 502 } 503 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 504 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 505 idiag -= 9; 506 i2 -= 3; 507 } 508 break; 509 case 4: 510 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 511 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 512 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 513 i2 -= 4; 514 idiag -= 16; 515 for (i=m-2; i>=0; i--) { 516 v = aa + 16*(diag[i]+1); 517 vi = aj + diag[i] + 1; 518 nz = ai[i+1] - diag[i] - 1; 519 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 520 while (nz--) { 521 idx = 4*(*vi++); 522 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 523 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 524 v += 16; 525 } 526 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 527 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 528 idiag -= 16; 529 i2 -= 4; 530 } 531 break; 532 case 5: 533 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 534 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 535 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 536 i2 -= 5; 537 idiag -= 25; 538 for (i=m-2; i>=0; i--) { 539 v = aa + 25*(diag[i]+1); 540 vi = aj + diag[i] + 1; 541 nz = ai[i+1] - diag[i] - 1; 542 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 543 while (nz--) { 544 idx = 5*(*vi++); 545 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 546 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 547 v += 25; 548 } 549 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 550 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 551 idiag -= 25; 552 i2 -= 5; 553 } 554 break; 555 case 6: 556 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 557 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 558 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 559 i2 -= 6; 560 idiag -= 36; 561 for (i=m-2; i>=0; i--) { 562 v = aa + 36*(diag[i]+1); 563 vi = aj + diag[i] + 1; 564 nz = ai[i+1] - diag[i] - 1; 565 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 566 while (nz--) { 567 idx = 6*(*vi++); 568 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 569 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 570 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 571 v += 36; 572 } 573 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 574 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 575 idiag -= 36; 576 i2 -= 6; 577 } 578 break; 579 case 7: 580 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 581 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 582 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 583 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 584 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 585 i2 -= 7; 586 idiag -= 49; 587 for (i=m-2; i>=0; i--) { 588 v = aa + 49*(diag[i]+1); 589 vi = aj + diag[i] + 1; 590 nz = ai[i+1] - diag[i] - 1; 591 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 592 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 593 while (nz--) { 594 idx = 7*(*vi++); 595 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 596 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 597 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 598 v += 49; 599 } 600 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 601 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 602 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 603 idiag -= 49; 604 i2 -= 7; 605 } 606 break; 607 default: 608 PetscCall(PetscArraycpy(w,xb+i2,bs)); 609 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 610 i2 -= bs; 611 idiag -= bs2; 612 for (i=m-2; i>=0; i--) { 613 v = aa + bs2*(diag[i]+1); 614 vi = aj + diag[i] + 1; 615 nz = ai[i+1] - diag[i] - 1; 616 617 PetscCall(PetscArraycpy(w,xb+i2,bs)); 618 /* copy all rows of x that are needed into contiguous space */ 619 workt = work; 620 for (j=0; j<nz; j++) { 621 PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs)); 622 workt += bs; 623 } 624 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 625 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 626 627 idiag -= bs2; 628 i2 -= bs; 629 } 630 break; 631 } 632 PetscCall(PetscLogFlops(1.0*bs2*(a->nz))); 633 } 634 its--; 635 } 636 while (its--) { 637 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 638 idiag = a->idiag; 639 i2 = 0; 640 switch (bs) { 641 case 1: 642 for (i=0; i<m; i++) { 643 v = aa + ai[i]; 644 vi = aj + ai[i]; 645 nz = ai[i+1] - ai[i]; 646 s[0] = b[i2]; 647 for (j=0; j<nz; j++) { 648 xw[0] = x[vi[j]]; 649 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 650 } 651 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 652 x[i2] += xw[0]; 653 idiag += 1; 654 i2 += 1; 655 } 656 break; 657 case 2: 658 for (i=0; i<m; i++) { 659 v = aa + 4*ai[i]; 660 vi = aj + ai[i]; 661 nz = ai[i+1] - ai[i]; 662 s[0] = b[i2]; s[1] = b[i2+1]; 663 for (j=0; j<nz; j++) { 664 idx = 2*vi[j]; 665 it = 4*j; 666 xw[0] = x[idx]; xw[1] = x[1+idx]; 667 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 668 } 669 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 670 x[i2] += xw[0]; x[i2+1] += xw[1]; 671 idiag += 4; 672 i2 += 2; 673 } 674 break; 675 case 3: 676 for (i=0; i<m; i++) { 677 v = aa + 9*ai[i]; 678 vi = aj + ai[i]; 679 nz = ai[i+1] - ai[i]; 680 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 681 while (nz--) { 682 idx = 3*(*vi++); 683 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 684 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 685 v += 9; 686 } 687 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 688 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 689 idiag += 9; 690 i2 += 3; 691 } 692 break; 693 case 4: 694 for (i=0; i<m; i++) { 695 v = aa + 16*ai[i]; 696 vi = aj + ai[i]; 697 nz = ai[i+1] - ai[i]; 698 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 699 while (nz--) { 700 idx = 4*(*vi++); 701 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 702 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 703 v += 16; 704 } 705 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 706 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 707 idiag += 16; 708 i2 += 4; 709 } 710 break; 711 case 5: 712 for (i=0; i<m; i++) { 713 v = aa + 25*ai[i]; 714 vi = aj + ai[i]; 715 nz = ai[i+1] - ai[i]; 716 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 717 while (nz--) { 718 idx = 5*(*vi++); 719 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 720 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 721 v += 25; 722 } 723 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 724 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 725 idiag += 25; 726 i2 += 5; 727 } 728 break; 729 case 6: 730 for (i=0; i<m; i++) { 731 v = aa + 36*ai[i]; 732 vi = aj + ai[i]; 733 nz = ai[i+1] - ai[i]; 734 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 735 while (nz--) { 736 idx = 6*(*vi++); 737 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 738 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 739 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 740 v += 36; 741 } 742 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 743 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 744 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 745 idiag += 36; 746 i2 += 6; 747 } 748 break; 749 case 7: 750 for (i=0; i<m; i++) { 751 v = aa + 49*ai[i]; 752 vi = aj + ai[i]; 753 nz = ai[i+1] - ai[i]; 754 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 755 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 756 while (nz--) { 757 idx = 7*(*vi++); 758 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 759 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 760 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 761 v += 49; 762 } 763 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 764 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 765 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 766 idiag += 49; 767 i2 += 7; 768 } 769 break; 770 default: 771 for (i=0; i<m; i++) { 772 v = aa + bs2*ai[i]; 773 vi = aj + ai[i]; 774 nz = ai[i+1] - ai[i]; 775 776 PetscCall(PetscArraycpy(w,b+i2,bs)); 777 /* copy all rows of x that are needed into contiguous space */ 778 workt = work; 779 for (j=0; j<nz; j++) { 780 PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs)); 781 workt += bs; 782 } 783 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 784 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 785 786 idiag += bs2; 787 i2 += bs; 788 } 789 break; 790 } 791 PetscCall(PetscLogFlops(2.0*bs2*a->nz)); 792 } 793 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 794 idiag = a->idiag+bs2*(a->mbs-1); 795 i2 = bs * (m-1); 796 switch (bs) { 797 case 1: 798 for (i=m-1; i>=0; i--) { 799 v = aa + ai[i]; 800 vi = aj + ai[i]; 801 nz = ai[i+1] - ai[i]; 802 s[0] = b[i2]; 803 for (j=0; j<nz; j++) { 804 xw[0] = x[vi[j]]; 805 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 806 } 807 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 808 x[i2] += xw[0]; 809 idiag -= 1; 810 i2 -= 1; 811 } 812 break; 813 case 2: 814 for (i=m-1; i>=0; i--) { 815 v = aa + 4*ai[i]; 816 vi = aj + ai[i]; 817 nz = ai[i+1] - ai[i]; 818 s[0] = b[i2]; s[1] = b[i2+1]; 819 for (j=0; j<nz; j++) { 820 idx = 2*vi[j]; 821 it = 4*j; 822 xw[0] = x[idx]; xw[1] = x[1+idx]; 823 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 824 } 825 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 826 x[i2] += xw[0]; x[i2+1] += xw[1]; 827 idiag -= 4; 828 i2 -= 2; 829 } 830 break; 831 case 3: 832 for (i=m-1; i>=0; i--) { 833 v = aa + 9*ai[i]; 834 vi = aj + ai[i]; 835 nz = ai[i+1] - ai[i]; 836 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 837 while (nz--) { 838 idx = 3*(*vi++); 839 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 840 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 841 v += 9; 842 } 843 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 844 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 845 idiag -= 9; 846 i2 -= 3; 847 } 848 break; 849 case 4: 850 for (i=m-1; i>=0; i--) { 851 v = aa + 16*ai[i]; 852 vi = aj + ai[i]; 853 nz = ai[i+1] - ai[i]; 854 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 855 while (nz--) { 856 idx = 4*(*vi++); 857 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 858 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 859 v += 16; 860 } 861 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 862 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 863 idiag -= 16; 864 i2 -= 4; 865 } 866 break; 867 case 5: 868 for (i=m-1; i>=0; i--) { 869 v = aa + 25*ai[i]; 870 vi = aj + ai[i]; 871 nz = ai[i+1] - ai[i]; 872 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 873 while (nz--) { 874 idx = 5*(*vi++); 875 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 876 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 877 v += 25; 878 } 879 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 880 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 881 idiag -= 25; 882 i2 -= 5; 883 } 884 break; 885 case 6: 886 for (i=m-1; i>=0; i--) { 887 v = aa + 36*ai[i]; 888 vi = aj + ai[i]; 889 nz = ai[i+1] - ai[i]; 890 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 891 while (nz--) { 892 idx = 6*(*vi++); 893 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 894 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 895 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 896 v += 36; 897 } 898 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 899 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 900 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 901 idiag -= 36; 902 i2 -= 6; 903 } 904 break; 905 case 7: 906 for (i=m-1; i>=0; i--) { 907 v = aa + 49*ai[i]; 908 vi = aj + ai[i]; 909 nz = ai[i+1] - ai[i]; 910 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 911 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 912 while (nz--) { 913 idx = 7*(*vi++); 914 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 915 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 916 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 917 v += 49; 918 } 919 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 920 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 921 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 922 idiag -= 49; 923 i2 -= 7; 924 } 925 break; 926 default: 927 for (i=m-1; i>=0; i--) { 928 v = aa + bs2*ai[i]; 929 vi = aj + ai[i]; 930 nz = ai[i+1] - ai[i]; 931 932 PetscCall(PetscArraycpy(w,b+i2,bs)); 933 /* copy all rows of x that are needed into contiguous space */ 934 workt = work; 935 for (j=0; j<nz; j++) { 936 PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs)); 937 workt += bs; 938 } 939 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 940 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 941 942 idiag -= bs2; 943 i2 -= bs; 944 } 945 break; 946 } 947 PetscCall(PetscLogFlops(2.0*bs2*(a->nz))); 948 } 949 } 950 PetscCall(VecRestoreArray(xx,&x)); 951 PetscCall(VecRestoreArrayRead(bb,&b)); 952 PetscFunctionReturn(0); 953 } 954 955 /* 956 Special version for direct calls from Fortran (Used in PETSc-fun3d) 957 */ 958 #if defined(PETSC_HAVE_FORTRAN_CAPS) 959 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 960 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 961 #define matsetvaluesblocked4_ matsetvaluesblocked4 962 #endif 963 964 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 965 { 966 Mat A = *AA; 967 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 968 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 969 PetscInt *ai =a->i,*ailen=a->ilen; 970 PetscInt *aj =a->j,stepval,lastcol = -1; 971 const PetscScalar *value = v; 972 MatScalar *ap,*aa = a->a,*bap; 973 974 PetscFunctionBegin; 975 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 976 stepval = (n-1)*4; 977 for (k=0; k<m; k++) { /* loop over added rows */ 978 row = im[k]; 979 rp = aj + ai[row]; 980 ap = aa + 16*ai[row]; 981 nrow = ailen[row]; 982 low = 0; 983 high = nrow; 984 for (l=0; l<n; l++) { /* loop over added columns */ 985 col = in[l]; 986 if (col <= lastcol) low = 0; 987 else high = nrow; 988 lastcol = col; 989 value = v + k*(stepval+4 + l)*4; 990 while (high-low > 7) { 991 t = (low+high)/2; 992 if (rp[t] > col) high = t; 993 else low = t; 994 } 995 for (i=low; i<high; i++) { 996 if (rp[i] > col) break; 997 if (rp[i] == col) { 998 bap = ap + 16*i; 999 for (ii=0; ii<4; ii++,value+=stepval) { 1000 for (jj=ii; jj<16; jj+=4) { 1001 bap[jj] += *value++; 1002 } 1003 } 1004 goto noinsert2; 1005 } 1006 } 1007 N = nrow++ - 1; 1008 high++; /* added new column index thus must search to one higher than before */ 1009 /* shift up all the later entries in this row */ 1010 for (ii=N; ii>=i; ii--) { 1011 rp[ii+1] = rp[ii]; 1012 PetscCallVoid(PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16)); 1013 } 1014 if (N >= i) { 1015 PetscCallVoid(PetscArrayzero(ap+16*i,16)); 1016 } 1017 rp[i] = col; 1018 bap = ap + 16*i; 1019 for (ii=0; ii<4; ii++,value+=stepval) { 1020 for (jj=ii; jj<16; jj+=4) { 1021 bap[jj] = *value++; 1022 } 1023 } 1024 noinsert2:; 1025 low = i; 1026 } 1027 ailen[row] = nrow; 1028 } 1029 PetscFunctionReturnVoid(); 1030 } 1031 1032 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1033 #define matsetvalues4_ MATSETVALUES4 1034 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1035 #define matsetvalues4_ matsetvalues4 1036 #endif 1037 1038 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1039 { 1040 Mat A = *AA; 1041 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1042 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm; 1043 PetscInt *ai=a->i,*ailen=a->ilen; 1044 PetscInt *aj=a->j,brow,bcol; 1045 PetscInt ridx,cidx,lastcol = -1; 1046 MatScalar *ap,value,*aa=a->a,*bap; 1047 1048 PetscFunctionBegin; 1049 for (k=0; k<m; k++) { /* loop over added rows */ 1050 row = im[k]; brow = row/4; 1051 rp = aj + ai[brow]; 1052 ap = aa + 16*ai[brow]; 1053 nrow = ailen[brow]; 1054 low = 0; 1055 high = nrow; 1056 for (l=0; l<n; l++) { /* loop over added columns */ 1057 col = in[l]; bcol = col/4; 1058 ridx = row % 4; cidx = col % 4; 1059 value = v[l + k*n]; 1060 if (col <= lastcol) low = 0; 1061 else high = nrow; 1062 lastcol = col; 1063 while (high-low > 7) { 1064 t = (low+high)/2; 1065 if (rp[t] > bcol) high = t; 1066 else low = t; 1067 } 1068 for (i=low; i<high; i++) { 1069 if (rp[i] > bcol) break; 1070 if (rp[i] == bcol) { 1071 bap = ap + 16*i + 4*cidx + ridx; 1072 *bap += value; 1073 goto noinsert1; 1074 } 1075 } 1076 N = nrow++ - 1; 1077 high++; /* added new column thus must search to one higher than before */ 1078 /* shift up all the later entries in this row */ 1079 PetscCallVoid(PetscArraymove(rp+i+1,rp+i,N-i+1)); 1080 PetscCallVoid(PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1))); 1081 PetscCallVoid(PetscArrayzero(ap+16*i,16)); 1082 rp[i] = bcol; 1083 ap[16*i + 4*cidx + ridx] = value; 1084 noinsert1:; 1085 low = i; 1086 } 1087 ailen[brow] = nrow; 1088 } 1089 PetscFunctionReturnVoid(); 1090 } 1091 1092 /* 1093 Checks for missing diagonals 1094 */ 1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1096 { 1097 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1098 PetscInt *diag,*ii = a->i,i; 1099 1100 PetscFunctionBegin; 1101 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 1102 *missing = PETSC_FALSE; 1103 if (A->rmap->n > 0 && !ii) { 1104 *missing = PETSC_TRUE; 1105 if (d) *d = 0; 1106 PetscCall(PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n")); 1107 } else { 1108 PetscInt n; 1109 n = PetscMin(a->mbs, a->nbs); 1110 diag = a->diag; 1111 for (i=0; i<n; i++) { 1112 if (diag[i] >= ii[i+1]) { 1113 *missing = PETSC_TRUE; 1114 if (d) *d = i; 1115 PetscCall(PetscInfo(A,"Matrix is missing block diagonal number %" PetscInt_FMT "\n",i)); 1116 break; 1117 } 1118 } 1119 } 1120 PetscFunctionReturn(0); 1121 } 1122 1123 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1124 { 1125 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1126 PetscInt i,j,m = a->mbs; 1127 1128 PetscFunctionBegin; 1129 if (!a->diag) { 1130 PetscCall(PetscMalloc1(m,&a->diag)); 1131 PetscCall(PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt))); 1132 a->free_diag = PETSC_TRUE; 1133 } 1134 for (i=0; i<m; i++) { 1135 a->diag[i] = a->i[i+1]; 1136 for (j=a->i[i]; j<a->i[i+1]; j++) { 1137 if (a->j[j] == i) { 1138 a->diag[i] = j; 1139 break; 1140 } 1141 } 1142 } 1143 PetscFunctionReturn(0); 1144 } 1145 1146 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1147 { 1148 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1149 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1150 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1151 1152 PetscFunctionBegin; 1153 *nn = n; 1154 if (!ia) PetscFunctionReturn(0); 1155 if (symmetric) { 1156 PetscCall(MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja)); 1157 nz = tia[n]; 1158 } else { 1159 tia = a->i; tja = a->j; 1160 } 1161 1162 if (!blockcompressed && bs > 1) { 1163 (*nn) *= bs; 1164 /* malloc & create the natural set of indices */ 1165 PetscCall(PetscMalloc1((n+1)*bs,ia)); 1166 if (n) { 1167 (*ia)[0] = oshift; 1168 for (j=1; j<bs; j++) { 1169 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1170 } 1171 } 1172 1173 for (i=1; i<n; i++) { 1174 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1175 for (j=1; j<bs; j++) { 1176 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1177 } 1178 } 1179 if (n) { 1180 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1181 } 1182 1183 if (inja) { 1184 PetscCall(PetscMalloc1(nz*bs*bs,ja)); 1185 cnt = 0; 1186 for (i=0; i<n; i++) { 1187 for (j=0; j<bs; j++) { 1188 for (k=tia[i]; k<tia[i+1]; k++) { 1189 for (l=0; l<bs; l++) { 1190 (*ja)[cnt++] = bs*tja[k] + l; 1191 } 1192 } 1193 } 1194 } 1195 } 1196 1197 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1198 PetscCall(PetscFree(tia)); 1199 PetscCall(PetscFree(tja)); 1200 } 1201 } else if (oshift == 1) { 1202 if (symmetric) { 1203 nz = tia[A->rmap->n/bs]; 1204 /* add 1 to i and j indices */ 1205 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1206 *ia = tia; 1207 if (ja) { 1208 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1209 *ja = tja; 1210 } 1211 } else { 1212 nz = a->i[A->rmap->n/bs]; 1213 /* malloc space and add 1 to i and j indices */ 1214 PetscCall(PetscMalloc1(A->rmap->n/bs+1,ia)); 1215 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1216 if (ja) { 1217 PetscCall(PetscMalloc1(nz,ja)); 1218 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1219 } 1220 } 1221 } else { 1222 *ia = tia; 1223 if (ja) *ja = tja; 1224 } 1225 PetscFunctionReturn(0); 1226 } 1227 1228 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1229 { 1230 PetscFunctionBegin; 1231 if (!ia) PetscFunctionReturn(0); 1232 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1233 PetscCall(PetscFree(*ia)); 1234 if (ja) PetscCall(PetscFree(*ja)); 1235 } 1236 PetscFunctionReturn(0); 1237 } 1238 1239 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1240 { 1241 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1242 1243 PetscFunctionBegin; 1244 #if defined(PETSC_USE_LOG) 1245 PetscLogObjectState((PetscObject)A,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT,A->rmap->N,A->cmap->n,a->nz); 1246 #endif 1247 PetscCall(MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i)); 1248 PetscCall(ISDestroy(&a->row)); 1249 PetscCall(ISDestroy(&a->col)); 1250 if (a->free_diag) PetscCall(PetscFree(a->diag)); 1251 PetscCall(PetscFree(a->idiag)); 1252 if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax,a->ilen)); 1253 PetscCall(PetscFree(a->solve_work)); 1254 PetscCall(PetscFree(a->mult_work)); 1255 PetscCall(PetscFree(a->sor_workt)); 1256 PetscCall(PetscFree(a->sor_work)); 1257 PetscCall(ISDestroy(&a->icol)); 1258 PetscCall(PetscFree(a->saved_values)); 1259 PetscCall(PetscFree2(a->compressedrow.i,a->compressedrow.rindex)); 1260 1261 PetscCall(MatDestroy(&a->sbaijMat)); 1262 PetscCall(MatDestroy(&a->parent)); 1263 PetscCall(PetscFree(A->data)); 1264 1265 PetscCall(PetscObjectChangeTypeName((PetscObject)A,NULL)); 1266 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL)); 1267 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL)); 1268 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL)); 1269 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL)); 1270 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL)); 1271 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL)); 1272 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL)); 1273 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL)); 1274 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL)); 1275 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL)); 1276 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL)); 1277 #if defined(PETSC_HAVE_HYPRE) 1278 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL)); 1279 #endif 1280 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL)); 1281 PetscFunctionReturn(0); 1282 } 1283 1284 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1285 { 1286 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1287 1288 PetscFunctionBegin; 1289 switch (op) { 1290 case MAT_ROW_ORIENTED: 1291 a->roworiented = flg; 1292 break; 1293 case MAT_KEEP_NONZERO_PATTERN: 1294 a->keepnonzeropattern = flg; 1295 break; 1296 case MAT_NEW_NONZERO_LOCATIONS: 1297 a->nonew = (flg ? 0 : 1); 1298 break; 1299 case MAT_NEW_NONZERO_LOCATION_ERR: 1300 a->nonew = (flg ? -1 : 0); 1301 break; 1302 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1303 a->nonew = (flg ? -2 : 0); 1304 break; 1305 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1306 a->nounused = (flg ? -1 : 0); 1307 break; 1308 case MAT_FORCE_DIAGONAL_ENTRIES: 1309 case MAT_IGNORE_OFF_PROC_ENTRIES: 1310 case MAT_USE_HASH_TABLE: 1311 case MAT_SORTED_FULL: 1312 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1313 break; 1314 case MAT_SPD: 1315 case MAT_SYMMETRIC: 1316 case MAT_STRUCTURALLY_SYMMETRIC: 1317 case MAT_HERMITIAN: 1318 case MAT_SYMMETRY_ETERNAL: 1319 case MAT_SUBMAT_SINGLEIS: 1320 case MAT_STRUCTURE_ONLY: 1321 /* These options are handled directly by MatSetOption() */ 1322 break; 1323 default: 1324 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1325 } 1326 PetscFunctionReturn(0); 1327 } 1328 1329 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1330 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1331 { 1332 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1333 MatScalar *aa_i; 1334 PetscScalar *v_i; 1335 1336 PetscFunctionBegin; 1337 bs = A->rmap->bs; 1338 bs2 = bs*bs; 1339 PetscCheck(row >= 0 && row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range", row); 1340 1341 bn = row/bs; /* Block number */ 1342 bp = row % bs; /* Block Position */ 1343 M = ai[bn+1] - ai[bn]; 1344 *nz = bs*M; 1345 1346 if (v) { 1347 *v = NULL; 1348 if (*nz) { 1349 PetscCall(PetscMalloc1(*nz,v)); 1350 for (i=0; i<M; i++) { /* for each block in the block row */ 1351 v_i = *v + i*bs; 1352 aa_i = aa + bs2*(ai[bn] + i); 1353 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1354 } 1355 } 1356 } 1357 1358 if (idx) { 1359 *idx = NULL; 1360 if (*nz) { 1361 PetscCall(PetscMalloc1(*nz,idx)); 1362 for (i=0; i<M; i++) { /* for each block in the block row */ 1363 idx_i = *idx + i*bs; 1364 itmp = bs*aj[ai[bn] + i]; 1365 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1366 } 1367 } 1368 } 1369 PetscFunctionReturn(0); 1370 } 1371 1372 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1373 { 1374 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1375 1376 PetscFunctionBegin; 1377 PetscCall(MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a)); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1382 { 1383 PetscFunctionBegin; 1384 if (nz) *nz = 0; 1385 if (idx) PetscCall(PetscFree(*idx)); 1386 if (v) PetscCall(PetscFree(*v)); 1387 PetscFunctionReturn(0); 1388 } 1389 1390 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1391 { 1392 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at; 1393 Mat C; 1394 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill; 1395 PetscInt bs2=a->bs2,*ati,*atj,anzj,kr; 1396 MatScalar *ata,*aa=a->a; 1397 1398 PetscFunctionBegin; 1399 PetscCall(PetscCalloc1(1+nbs,&atfill)); 1400 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1401 for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1402 1403 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&C)); 1404 PetscCall(MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N)); 1405 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 1406 PetscCall(MatSeqBAIJSetPreallocation(C,bs,0,atfill)); 1407 1408 at = (Mat_SeqBAIJ*)C->data; 1409 ati = at->i; 1410 for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i]; 1411 } else { 1412 C = *B; 1413 at = (Mat_SeqBAIJ*)C->data; 1414 ati = at->i; 1415 } 1416 1417 atj = at->j; 1418 ata = at->a; 1419 1420 /* Copy ati into atfill so we have locations of the next free space in atj */ 1421 PetscCall(PetscArraycpy(atfill,ati,nbs)); 1422 1423 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1424 for (i=0; i<mbs; i++) { 1425 anzj = ai[i+1] - ai[i]; 1426 for (j=0; j<anzj; j++) { 1427 atj[atfill[*aj]] = i; 1428 for (kr=0; kr<bs; kr++) { 1429 for (k=0; k<bs; k++) { 1430 ata[bs2*atfill[*aj]+k*bs+kr] = *aa++; 1431 } 1432 } 1433 atfill[*aj++] += 1; 1434 } 1435 } 1436 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 1437 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 1438 1439 /* Clean up temporary space and complete requests. */ 1440 PetscCall(PetscFree(atfill)); 1441 1442 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1443 PetscCall(MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1444 *B = C; 1445 } else { 1446 PetscCall(MatHeaderMerge(A,&C)); 1447 } 1448 PetscFunctionReturn(0); 1449 } 1450 1451 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1452 { 1453 Mat Btrans; 1454 1455 PetscFunctionBegin; 1456 *f = PETSC_FALSE; 1457 PetscCall(MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans)); 1458 PetscCall(MatEqual_SeqBAIJ(B,Btrans,f)); 1459 PetscCall(MatDestroy(&Btrans)); 1460 PetscFunctionReturn(0); 1461 } 1462 1463 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1464 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 1465 { 1466 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)mat->data; 1467 PetscInt header[4],M,N,m,bs,nz,cnt,i,j,k,l; 1468 PetscInt *rowlens,*colidxs; 1469 PetscScalar *matvals; 1470 1471 PetscFunctionBegin; 1472 PetscCall(PetscViewerSetUp(viewer)); 1473 1474 M = mat->rmap->N; 1475 N = mat->cmap->N; 1476 m = mat->rmap->n; 1477 bs = mat->rmap->bs; 1478 nz = bs*bs*A->nz; 1479 1480 /* write matrix header */ 1481 header[0] = MAT_FILE_CLASSID; 1482 header[1] = M; header[2] = N; header[3] = nz; 1483 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1484 1485 /* store row lengths */ 1486 PetscCall(PetscMalloc1(m,&rowlens)); 1487 for (cnt=0, i=0; i<A->mbs; i++) 1488 for (j=0; j<bs; j++) 1489 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]); 1490 PetscCall(PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT)); 1491 PetscCall(PetscFree(rowlens)); 1492 1493 /* store column indices */ 1494 PetscCall(PetscMalloc1(nz,&colidxs)); 1495 for (cnt=0, i=0; i<A->mbs; i++) 1496 for (k=0; k<bs; k++) 1497 for (j=A->i[i]; j<A->i[i+1]; j++) 1498 for (l=0; l<bs; l++) 1499 colidxs[cnt++] = bs*A->j[j] + l; 1500 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1501 PetscCall(PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT)); 1502 PetscCall(PetscFree(colidxs)); 1503 1504 /* store nonzero values */ 1505 PetscCall(PetscMalloc1(nz,&matvals)); 1506 for (cnt=0, i=0; i<A->mbs; i++) 1507 for (k=0; k<bs; k++) 1508 for (j=A->i[i]; j<A->i[i+1]; j++) 1509 for (l=0; l<bs; l++) 1510 matvals[cnt++] = A->a[bs*(bs*j + l) + k]; 1511 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1512 PetscCall(PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR)); 1513 PetscCall(PetscFree(matvals)); 1514 1515 /* write block size option to the viewer's .info file */ 1516 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1517 PetscFunctionReturn(0); 1518 } 1519 1520 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1521 { 1522 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1523 PetscInt i,bs = A->rmap->bs,k; 1524 1525 PetscFunctionBegin; 1526 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE)); 1527 for (i=0; i<a->mbs; i++) { 1528 PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT "-%" PetscInt_FMT ":",i*bs,i*bs+bs-1)); 1529 for (k=a->i[i]; k<a->i[i+1]; k++) { 1530 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT "-%" PetscInt_FMT ") ",bs*a->j[k],bs*a->j[k]+bs-1)); 1531 } 1532 PetscCall(PetscViewerASCIIPrintf(viewer,"\n")); 1533 } 1534 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE)); 1535 PetscFunctionReturn(0); 1536 } 1537 1538 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1539 { 1540 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1541 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1542 PetscViewerFormat format; 1543 1544 PetscFunctionBegin; 1545 if (A->structure_only) { 1546 PetscCall(MatView_SeqBAIJ_ASCII_structonly(A,viewer)); 1547 PetscFunctionReturn(0); 1548 } 1549 1550 PetscCall(PetscViewerGetFormat(viewer,&format)); 1551 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1552 PetscCall(PetscViewerASCIIPrintf(viewer," block size is %" PetscInt_FMT "\n",bs)); 1553 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1554 const char *matname; 1555 Mat aij; 1556 PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij)); 1557 PetscCall(PetscObjectGetName((PetscObject)A,&matname)); 1558 PetscCall(PetscObjectSetName((PetscObject)aij,matname)); 1559 PetscCall(MatView(aij,viewer)); 1560 PetscCall(MatDestroy(&aij)); 1561 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1562 PetscFunctionReturn(0); 1563 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1564 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE)); 1565 for (i=0; i<a->mbs; i++) { 1566 for (j=0; j<bs; j++) { 1567 PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j)); 1568 for (k=a->i[i]; k<a->i[i+1]; k++) { 1569 for (l=0; l<bs; l++) { 1570 #if defined(PETSC_USE_COMPLEX) 1571 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1572 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %gi) ",bs*a->j[k]+l, 1573 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]))); 1574 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1575 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %gi) ",bs*a->j[k]+l, 1576 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]))); 1577 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1578 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]))); 1579 } 1580 #else 1581 if (a->a[bs2*k + l*bs + j] != 0.0) { 1582 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j])); 1583 } 1584 #endif 1585 } 1586 } 1587 PetscCall(PetscViewerASCIIPrintf(viewer,"\n")); 1588 } 1589 } 1590 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE)); 1591 } else { 1592 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE)); 1593 for (i=0; i<a->mbs; i++) { 1594 for (j=0; j<bs; j++) { 1595 PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j)); 1596 for (k=a->i[i]; k<a->i[i+1]; k++) { 1597 for (l=0; l<bs; l++) { 1598 #if defined(PETSC_USE_COMPLEX) 1599 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1600 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i) ",bs*a->j[k]+l, 1601 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]))); 1602 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1603 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i) ",bs*a->j[k]+l, 1604 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]))); 1605 } else { 1606 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]))); 1607 } 1608 #else 1609 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j])); 1610 #endif 1611 } 1612 } 1613 PetscCall(PetscViewerASCIIPrintf(viewer,"\n")); 1614 } 1615 } 1616 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE)); 1617 } 1618 PetscCall(PetscViewerFlush(viewer)); 1619 PetscFunctionReturn(0); 1620 } 1621 1622 #include <petscdraw.h> 1623 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1624 { 1625 Mat A = (Mat) Aa; 1626 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1627 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1628 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1629 MatScalar *aa; 1630 PetscViewer viewer; 1631 PetscViewerFormat format; 1632 1633 PetscFunctionBegin; 1634 PetscCall(PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer)); 1635 PetscCall(PetscViewerGetFormat(viewer,&format)); 1636 PetscCall(PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr)); 1637 1638 /* loop over matrix elements drawing boxes */ 1639 1640 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1641 PetscDrawCollectiveBegin(draw); 1642 /* Blue for negative, Cyan for zero and Red for positive */ 1643 color = PETSC_DRAW_BLUE; 1644 for (i=0,row=0; i<mbs; i++,row+=bs) { 1645 for (j=a->i[i]; j<a->i[i+1]; j++) { 1646 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1647 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1648 aa = a->a + j*bs2; 1649 for (k=0; k<bs; k++) { 1650 for (l=0; l<bs; l++) { 1651 if (PetscRealPart(*aa++) >= 0.) continue; 1652 PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color)); 1653 } 1654 } 1655 } 1656 } 1657 color = PETSC_DRAW_CYAN; 1658 for (i=0,row=0; i<mbs; i++,row+=bs) { 1659 for (j=a->i[i]; j<a->i[i+1]; j++) { 1660 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1661 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1662 aa = a->a + j*bs2; 1663 for (k=0; k<bs; k++) { 1664 for (l=0; l<bs; l++) { 1665 if (PetscRealPart(*aa++) != 0.) continue; 1666 PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color)); 1667 } 1668 } 1669 } 1670 } 1671 color = PETSC_DRAW_RED; 1672 for (i=0,row=0; i<mbs; i++,row+=bs) { 1673 for (j=a->i[i]; j<a->i[i+1]; j++) { 1674 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1675 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1676 aa = a->a + j*bs2; 1677 for (k=0; k<bs; k++) { 1678 for (l=0; l<bs; l++) { 1679 if (PetscRealPart(*aa++) <= 0.) continue; 1680 PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color)); 1681 } 1682 } 1683 } 1684 } 1685 PetscDrawCollectiveEnd(draw); 1686 } else { 1687 /* use contour shading to indicate magnitude of values */ 1688 /* first determine max of all nonzero values */ 1689 PetscReal minv = 0.0, maxv = 0.0; 1690 PetscDraw popup; 1691 1692 for (i=0; i<a->nz*a->bs2; i++) { 1693 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1694 } 1695 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1696 PetscCall(PetscDrawGetPopup(draw,&popup)); 1697 PetscCall(PetscDrawScalePopup(popup,0.0,maxv)); 1698 1699 PetscDrawCollectiveBegin(draw); 1700 for (i=0,row=0; i<mbs; i++,row+=bs) { 1701 for (j=a->i[i]; j<a->i[i+1]; j++) { 1702 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1703 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1704 aa = a->a + j*bs2; 1705 for (k=0; k<bs; k++) { 1706 for (l=0; l<bs; l++) { 1707 MatScalar v = *aa++; 1708 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1709 PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color)); 1710 } 1711 } 1712 } 1713 } 1714 PetscDrawCollectiveEnd(draw); 1715 } 1716 PetscFunctionReturn(0); 1717 } 1718 1719 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1720 { 1721 PetscReal xl,yl,xr,yr,w,h; 1722 PetscDraw draw; 1723 PetscBool isnull; 1724 1725 PetscFunctionBegin; 1726 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1727 PetscCall(PetscDrawIsNull(draw,&isnull)); 1728 if (isnull) PetscFunctionReturn(0); 1729 1730 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1731 xr += w; yr += h; xl = -w; yl = -h; 1732 PetscCall(PetscDrawSetCoordinates(draw,xl,yl,xr,yr)); 1733 PetscCall(PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer)); 1734 PetscCall(PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A)); 1735 PetscCall(PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL)); 1736 PetscCall(PetscDrawSave(draw)); 1737 PetscFunctionReturn(0); 1738 } 1739 1740 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1741 { 1742 PetscBool iascii,isbinary,isdraw; 1743 1744 PetscFunctionBegin; 1745 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1746 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1747 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1748 if (iascii) { 1749 PetscCall(MatView_SeqBAIJ_ASCII(A,viewer)); 1750 } else if (isbinary) { 1751 PetscCall(MatView_SeqBAIJ_Binary(A,viewer)); 1752 } else if (isdraw) { 1753 PetscCall(MatView_SeqBAIJ_Draw(A,viewer)); 1754 } else { 1755 Mat B; 1756 PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 1757 PetscCall(MatView(B,viewer)); 1758 PetscCall(MatDestroy(&B)); 1759 } 1760 PetscFunctionReturn(0); 1761 } 1762 1763 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1764 { 1765 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1766 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1767 PetscInt *ai = a->i,*ailen = a->ilen; 1768 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1769 MatScalar *ap,*aa = a->a; 1770 1771 PetscFunctionBegin; 1772 for (k=0; k<m; k++) { /* loop over rows */ 1773 row = im[k]; brow = row/bs; 1774 if (row < 0) {v += n; continue;} /* negative row */ 1775 PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " too large", row); 1776 rp = aj ? aj + ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 1777 ap = aa ? aa + bs2*ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 1778 nrow = ailen[brow]; 1779 for (l=0; l<n; l++) { /* loop over columns */ 1780 if (in[l] < 0) {v++; continue;} /* negative column */ 1781 PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %" PetscInt_FMT " too large", in[l]); 1782 col = in[l]; 1783 bcol = col/bs; 1784 cidx = col%bs; 1785 ridx = row%bs; 1786 high = nrow; 1787 low = 0; /* assume unsorted */ 1788 while (high-low > 5) { 1789 t = (low+high)/2; 1790 if (rp[t] > bcol) high = t; 1791 else low = t; 1792 } 1793 for (i=low; i<high; i++) { 1794 if (rp[i] > bcol) break; 1795 if (rp[i] == bcol) { 1796 *v++ = ap[bs2*i+bs*cidx+ridx]; 1797 goto finished; 1798 } 1799 } 1800 *v++ = 0.0; 1801 finished:; 1802 } 1803 } 1804 PetscFunctionReturn(0); 1805 } 1806 1807 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1808 { 1809 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1810 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1811 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1812 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1813 PetscBool roworiented=a->roworiented; 1814 const PetscScalar *value = v; 1815 MatScalar *ap=NULL,*aa = a->a,*bap; 1816 1817 PetscFunctionBegin; 1818 if (roworiented) { 1819 stepval = (n-1)*bs; 1820 } else { 1821 stepval = (m-1)*bs; 1822 } 1823 for (k=0; k<m; k++) { /* loop over added rows */ 1824 row = im[k]; 1825 if (row < 0) continue; 1826 PetscCheck(row < a->mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT,row,a->mbs-1); 1827 rp = aj + ai[row]; 1828 if (!A->structure_only) ap = aa + bs2*ai[row]; 1829 rmax = imax[row]; 1830 nrow = ailen[row]; 1831 low = 0; 1832 high = nrow; 1833 for (l=0; l<n; l++) { /* loop over added columns */ 1834 if (in[l] < 0) continue; 1835 PetscCheck(in[l] < a->nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT,in[l],a->nbs-1); 1836 col = in[l]; 1837 if (!A->structure_only) { 1838 if (roworiented) { 1839 value = v + (k*(stepval+bs) + l)*bs; 1840 } else { 1841 value = v + (l*(stepval+bs) + k)*bs; 1842 } 1843 } 1844 if (col <= lastcol) low = 0; 1845 else high = nrow; 1846 lastcol = col; 1847 while (high-low > 7) { 1848 t = (low+high)/2; 1849 if (rp[t] > col) high = t; 1850 else low = t; 1851 } 1852 for (i=low; i<high; i++) { 1853 if (rp[i] > col) break; 1854 if (rp[i] == col) { 1855 if (A->structure_only) goto noinsert2; 1856 bap = ap + bs2*i; 1857 if (roworiented) { 1858 if (is == ADD_VALUES) { 1859 for (ii=0; ii<bs; ii++,value+=stepval) { 1860 for (jj=ii; jj<bs2; jj+=bs) { 1861 bap[jj] += *value++; 1862 } 1863 } 1864 } else { 1865 for (ii=0; ii<bs; ii++,value+=stepval) { 1866 for (jj=ii; jj<bs2; jj+=bs) { 1867 bap[jj] = *value++; 1868 } 1869 } 1870 } 1871 } else { 1872 if (is == ADD_VALUES) { 1873 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1874 for (jj=0; jj<bs; jj++) { 1875 bap[jj] += value[jj]; 1876 } 1877 bap += bs; 1878 } 1879 } else { 1880 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1881 for (jj=0; jj<bs; jj++) { 1882 bap[jj] = value[jj]; 1883 } 1884 bap += bs; 1885 } 1886 } 1887 } 1888 goto noinsert2; 1889 } 1890 } 1891 if (nonew == 1) goto noinsert2; 1892 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 1893 if (A->structure_only) { 1894 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1895 } else { 1896 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1897 } 1898 N = nrow++ - 1; high++; 1899 /* shift up all the later entries in this row */ 1900 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 1901 rp[i] = col; 1902 if (!A->structure_only) { 1903 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 1904 bap = ap + bs2*i; 1905 if (roworiented) { 1906 for (ii=0; ii<bs; ii++,value+=stepval) { 1907 for (jj=ii; jj<bs2; jj+=bs) { 1908 bap[jj] = *value++; 1909 } 1910 } 1911 } else { 1912 for (ii=0; ii<bs; ii++,value+=stepval) { 1913 for (jj=0; jj<bs; jj++) { 1914 *bap++ = *value++; 1915 } 1916 } 1917 } 1918 } 1919 noinsert2:; 1920 low = i; 1921 } 1922 ailen[row] = nrow; 1923 } 1924 PetscFunctionReturn(0); 1925 } 1926 1927 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1928 { 1929 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1930 PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax; 1931 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1932 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1933 MatScalar *aa = a->a,*ap; 1934 PetscReal ratio=0.6; 1935 1936 PetscFunctionBegin; 1937 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1938 1939 if (m) rmax = ailen[0]; 1940 for (i=1; i<mbs; i++) { 1941 /* move each row back by the amount of empty slots (fshift) before it*/ 1942 fshift += imax[i-1] - ailen[i-1]; 1943 rmax = PetscMax(rmax,ailen[i]); 1944 if (fshift) { 1945 ip = aj + ai[i]; 1946 ap = aa + bs2*ai[i]; 1947 N = ailen[i]; 1948 PetscCall(PetscArraymove(ip-fshift,ip,N)); 1949 if (!A->structure_only) { 1950 PetscCall(PetscArraymove(ap-bs2*fshift,ap,bs2*N)); 1951 } 1952 } 1953 ai[i] = ai[i-1] + ailen[i-1]; 1954 } 1955 if (mbs) { 1956 fshift += imax[mbs-1] - ailen[mbs-1]; 1957 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1958 } 1959 1960 /* reset ilen and imax for each row */ 1961 a->nonzerorowcnt = 0; 1962 if (A->structure_only) { 1963 PetscCall(PetscFree2(a->imax,a->ilen)); 1964 } else { /* !A->structure_only */ 1965 for (i=0; i<mbs; i++) { 1966 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1967 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1968 } 1969 } 1970 a->nz = ai[mbs]; 1971 1972 /* diagonals may have moved, so kill the diagonal pointers */ 1973 a->idiagvalid = PETSC_FALSE; 1974 if (fshift && a->diag) { 1975 PetscCall(PetscFree(a->diag)); 1976 PetscCall(PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt))); 1977 a->diag = NULL; 1978 } 1979 if (fshift) PetscCheck(a->nounused != -1,PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1980 PetscCall(PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2)); 1981 PetscCall(PetscInfo(A,"Number of mallocs during MatSetValues is %" PetscInt_FMT "\n",a->reallocs)); 1982 PetscCall(PetscInfo(A,"Most nonzeros blocks in any row is %" PetscInt_FMT "\n",rmax)); 1983 1984 A->info.mallocs += a->reallocs; 1985 a->reallocs = 0; 1986 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1987 a->rmax = rmax; 1988 1989 if (!A->structure_only) { 1990 PetscCall(MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio)); 1991 } 1992 PetscFunctionReturn(0); 1993 } 1994 1995 /* 1996 This function returns an array of flags which indicate the locations of contiguous 1997 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1998 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1999 Assume: sizes should be long enough to hold all the values. 2000 */ 2001 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2002 { 2003 PetscInt i,j,k,row; 2004 PetscBool flg; 2005 2006 PetscFunctionBegin; 2007 for (i=0,j=0; i<n; j++) { 2008 row = idx[i]; 2009 if (row%bs!=0) { /* Not the beginning of a block */ 2010 sizes[j] = 1; 2011 i++; 2012 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2013 sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2014 i++; 2015 } else { /* Beginning of the block, so check if the complete block exists */ 2016 flg = PETSC_TRUE; 2017 for (k=1; k<bs; k++) { 2018 if (row+k != idx[i+k]) { /* break in the block */ 2019 flg = PETSC_FALSE; 2020 break; 2021 } 2022 } 2023 if (flg) { /* No break in the bs */ 2024 sizes[j] = bs; 2025 i += bs; 2026 } else { 2027 sizes[j] = 1; 2028 i++; 2029 } 2030 } 2031 } 2032 *bs_max = j; 2033 PetscFunctionReturn(0); 2034 } 2035 2036 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2037 { 2038 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2039 PetscInt i,j,k,count,*rows; 2040 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2041 PetscScalar zero = 0.0; 2042 MatScalar *aa; 2043 const PetscScalar *xx; 2044 PetscScalar *bb; 2045 2046 PetscFunctionBegin; 2047 /* fix right hand side if needed */ 2048 if (x && b) { 2049 PetscCall(VecGetArrayRead(x,&xx)); 2050 PetscCall(VecGetArray(b,&bb)); 2051 for (i=0; i<is_n; i++) { 2052 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2053 } 2054 PetscCall(VecRestoreArrayRead(x,&xx)); 2055 PetscCall(VecRestoreArray(b,&bb)); 2056 } 2057 2058 /* Make a copy of the IS and sort it */ 2059 /* allocate memory for rows,sizes */ 2060 PetscCall(PetscMalloc2(is_n,&rows,2*is_n,&sizes)); 2061 2062 /* copy IS values to rows, and sort them */ 2063 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2064 PetscCall(PetscSortInt(is_n,rows)); 2065 2066 if (baij->keepnonzeropattern) { 2067 for (i=0; i<is_n; i++) sizes[i] = 1; 2068 bs_max = is_n; 2069 } else { 2070 PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max)); 2071 A->nonzerostate++; 2072 } 2073 2074 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2075 row = rows[j]; 2076 PetscCheck(row >= 0 && row <= A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %" PetscInt_FMT " out of range",row); 2077 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2078 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2079 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2080 if (diag != (PetscScalar)0.0) { 2081 if (baij->ilen[row/bs] > 0) { 2082 baij->ilen[row/bs] = 1; 2083 baij->j[baij->i[row/bs]] = row/bs; 2084 2085 PetscCall(PetscArrayzero(aa,count*bs)); 2086 } 2087 /* Now insert all the diagonal values for this bs */ 2088 for (k=0; k<bs; k++) { 2089 PetscCall((*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES)); 2090 } 2091 } else { /* (diag == 0.0) */ 2092 baij->ilen[row/bs] = 0; 2093 } /* end (diag == 0.0) */ 2094 } else { /* (sizes[i] != bs) */ 2095 PetscAssert(sizes[i] == 1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2096 for (k=0; k<count; k++) { 2097 aa[0] = zero; 2098 aa += bs; 2099 } 2100 if (diag != (PetscScalar)0.0) { 2101 PetscCall((*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES)); 2102 } 2103 } 2104 } 2105 2106 PetscCall(PetscFree2(rows,sizes)); 2107 PetscCall(MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY)); 2108 PetscFunctionReturn(0); 2109 } 2110 2111 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2112 { 2113 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2114 PetscInt i,j,k,count; 2115 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2116 PetscScalar zero = 0.0; 2117 MatScalar *aa; 2118 const PetscScalar *xx; 2119 PetscScalar *bb; 2120 PetscBool *zeroed,vecs = PETSC_FALSE; 2121 2122 PetscFunctionBegin; 2123 /* fix right hand side if needed */ 2124 if (x && b) { 2125 PetscCall(VecGetArrayRead(x,&xx)); 2126 PetscCall(VecGetArray(b,&bb)); 2127 vecs = PETSC_TRUE; 2128 } 2129 2130 /* zero the columns */ 2131 PetscCall(PetscCalloc1(A->rmap->n,&zeroed)); 2132 for (i=0; i<is_n; i++) { 2133 PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %" PetscInt_FMT " out of range",is_idx[i]); 2134 zeroed[is_idx[i]] = PETSC_TRUE; 2135 } 2136 for (i=0; i<A->rmap->N; i++) { 2137 if (!zeroed[i]) { 2138 row = i/bs; 2139 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2140 for (k=0; k<bs; k++) { 2141 col = bs*baij->j[j] + k; 2142 if (zeroed[col]) { 2143 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2144 if (vecs) bb[i] -= aa[0]*xx[col]; 2145 aa[0] = 0.0; 2146 } 2147 } 2148 } 2149 } else if (vecs) bb[i] = diag*xx[i]; 2150 } 2151 PetscCall(PetscFree(zeroed)); 2152 if (vecs) { 2153 PetscCall(VecRestoreArrayRead(x,&xx)); 2154 PetscCall(VecRestoreArray(b,&bb)); 2155 } 2156 2157 /* zero the rows */ 2158 for (i=0; i<is_n; i++) { 2159 row = is_idx[i]; 2160 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2161 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2162 for (k=0; k<count; k++) { 2163 aa[0] = zero; 2164 aa += bs; 2165 } 2166 if (diag != (PetscScalar)0.0) { 2167 PetscCall((*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES)); 2168 } 2169 } 2170 PetscCall(MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY)); 2171 PetscFunctionReturn(0); 2172 } 2173 2174 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2175 { 2176 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2177 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2178 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2179 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2180 PetscInt ridx,cidx,bs2=a->bs2; 2181 PetscBool roworiented=a->roworiented; 2182 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2183 2184 PetscFunctionBegin; 2185 for (k=0; k<m; k++) { /* loop over added rows */ 2186 row = im[k]; 2187 brow = row/bs; 2188 if (row < 0) continue; 2189 PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->N-1); 2190 rp = aj + ai[brow]; 2191 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2192 rmax = imax[brow]; 2193 nrow = ailen[brow]; 2194 low = 0; 2195 high = nrow; 2196 for (l=0; l<n; l++) { /* loop over added columns */ 2197 if (in[l] < 0) continue; 2198 PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[l],A->cmap->n-1); 2199 col = in[l]; bcol = col/bs; 2200 ridx = row % bs; cidx = col % bs; 2201 if (!A->structure_only) { 2202 if (roworiented) { 2203 value = v[l + k*n]; 2204 } else { 2205 value = v[k + l*m]; 2206 } 2207 } 2208 if (col <= lastcol) low = 0; else high = nrow; 2209 lastcol = col; 2210 while (high-low > 7) { 2211 t = (low+high)/2; 2212 if (rp[t] > bcol) high = t; 2213 else low = t; 2214 } 2215 for (i=low; i<high; i++) { 2216 if (rp[i] > bcol) break; 2217 if (rp[i] == bcol) { 2218 bap = ap + bs2*i + bs*cidx + ridx; 2219 if (!A->structure_only) { 2220 if (is == ADD_VALUES) *bap += value; 2221 else *bap = value; 2222 } 2223 goto noinsert1; 2224 } 2225 } 2226 if (nonew == 1) goto noinsert1; 2227 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2228 if (A->structure_only) { 2229 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2230 } else { 2231 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2232 } 2233 N = nrow++ - 1; high++; 2234 /* shift up all the later entries in this row */ 2235 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 2236 rp[i] = bcol; 2237 if (!A->structure_only) { 2238 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 2239 PetscCall(PetscArrayzero(ap+bs2*i,bs2)); 2240 ap[bs2*i + bs*cidx + ridx] = value; 2241 } 2242 a->nz++; 2243 A->nonzerostate++; 2244 noinsert1:; 2245 low = i; 2246 } 2247 ailen[brow] = nrow; 2248 } 2249 PetscFunctionReturn(0); 2250 } 2251 2252 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2253 { 2254 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2255 Mat outA; 2256 PetscBool row_identity,col_identity; 2257 2258 PetscFunctionBegin; 2259 PetscCheck(info->levels == 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2260 PetscCall(ISIdentity(row,&row_identity)); 2261 PetscCall(ISIdentity(col,&col_identity)); 2262 PetscCheck(row_identity && col_identity,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2263 2264 outA = inA; 2265 inA->factortype = MAT_FACTOR_LU; 2266 PetscCall(PetscFree(inA->solvertype)); 2267 PetscCall(PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype)); 2268 2269 PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2270 2271 PetscCall(PetscObjectReference((PetscObject)row)); 2272 PetscCall(ISDestroy(&a->row)); 2273 a->row = row; 2274 PetscCall(PetscObjectReference((PetscObject)col)); 2275 PetscCall(ISDestroy(&a->col)); 2276 a->col = col; 2277 2278 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2279 PetscCall(ISDestroy(&a->icol)); 2280 PetscCall(ISInvertPermutation(col,PETSC_DECIDE,&a->icol)); 2281 PetscCall(PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol)); 2282 2283 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity))); 2284 if (!a->solve_work) { 2285 PetscCall(PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work)); 2286 PetscCall(PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar))); 2287 } 2288 PetscCall(MatLUFactorNumeric(outA,inA,info)); 2289 PetscFunctionReturn(0); 2290 } 2291 2292 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2293 { 2294 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2295 PetscInt i,nz,mbs; 2296 2297 PetscFunctionBegin; 2298 nz = baij->maxnz; 2299 mbs = baij->mbs; 2300 for (i=0; i<nz; i++) { 2301 baij->j[i] = indices[i]; 2302 } 2303 baij->nz = nz; 2304 for (i=0; i<mbs; i++) { 2305 baij->ilen[i] = baij->imax[i]; 2306 } 2307 PetscFunctionReturn(0); 2308 } 2309 2310 /*@ 2311 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2312 in the matrix. 2313 2314 Input Parameters: 2315 + mat - the SeqBAIJ matrix 2316 - indices - the column indices 2317 2318 Level: advanced 2319 2320 Notes: 2321 This can be called if you have precomputed the nonzero structure of the 2322 matrix and want to provide it to the matrix object to improve the performance 2323 of the MatSetValues() operation. 2324 2325 You MUST have set the correct numbers of nonzeros per row in the call to 2326 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2327 2328 MUST be called before any calls to MatSetValues(); 2329 2330 @*/ 2331 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2332 { 2333 PetscFunctionBegin; 2334 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2335 PetscValidIntPointer(indices,2); 2336 PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices)); 2337 PetscFunctionReturn(0); 2338 } 2339 2340 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2341 { 2342 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2343 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2344 PetscReal atmp; 2345 PetscScalar *x,zero = 0.0; 2346 MatScalar *aa; 2347 PetscInt ncols,brow,krow,kcol; 2348 2349 PetscFunctionBegin; 2350 /* why is this not a macro???????????????????????????????????????????????????????????????? */ 2351 PetscCheck(!A->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2352 bs = A->rmap->bs; 2353 aa = a->a; 2354 ai = a->i; 2355 aj = a->j; 2356 mbs = a->mbs; 2357 2358 PetscCall(VecSet(v,zero)); 2359 PetscCall(VecGetArray(v,&x)); 2360 PetscCall(VecGetLocalSize(v,&n)); 2361 PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2362 for (i=0; i<mbs; i++) { 2363 ncols = ai[1] - ai[0]; ai++; 2364 brow = bs*i; 2365 for (j=0; j<ncols; j++) { 2366 for (kcol=0; kcol<bs; kcol++) { 2367 for (krow=0; krow<bs; krow++) { 2368 atmp = PetscAbsScalar(*aa);aa++; 2369 row = brow + krow; /* row index */ 2370 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2371 } 2372 } 2373 aj++; 2374 } 2375 } 2376 PetscCall(VecRestoreArray(v,&x)); 2377 PetscFunctionReturn(0); 2378 } 2379 2380 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2381 { 2382 PetscFunctionBegin; 2383 /* If the two matrices have the same copy implementation, use fast copy. */ 2384 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2385 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2386 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2387 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2388 2389 PetscCheck(a->i[ambs] == b->i[bmbs],PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different",a->i[ambs],b->i[bmbs]); 2390 PetscCheck(abs == bbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different",abs,bbs); 2391 PetscCall(PetscArraycpy(b->a,a->a,bs2*a->i[ambs])); 2392 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2393 } else { 2394 PetscCall(MatCopy_Basic(A,B,str)); 2395 } 2396 PetscFunctionReturn(0); 2397 } 2398 2399 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2400 { 2401 PetscFunctionBegin; 2402 PetscCall(MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL)); 2403 PetscFunctionReturn(0); 2404 } 2405 2406 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2407 { 2408 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2409 2410 PetscFunctionBegin; 2411 *array = a->a; 2412 PetscFunctionReturn(0); 2413 } 2414 2415 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2416 { 2417 PetscFunctionBegin; 2418 *array = NULL; 2419 PetscFunctionReturn(0); 2420 } 2421 2422 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2423 { 2424 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2425 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2426 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2427 2428 PetscFunctionBegin; 2429 /* Set the number of nonzeros in the new matrix */ 2430 PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz)); 2431 PetscFunctionReturn(0); 2432 } 2433 2434 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2435 { 2436 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2437 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2438 PetscBLASInt one=1; 2439 2440 PetscFunctionBegin; 2441 if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2442 PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2443 if (e) { 2444 PetscCall(PetscArraycmp(x->i,y->i,x->mbs+1,&e)); 2445 if (e) { 2446 PetscCall(PetscArraycmp(x->j,y->j,x->i[x->mbs],&e)); 2447 if (e) str = SAME_NONZERO_PATTERN; 2448 } 2449 } 2450 if (!e) PetscCheck(str != SAME_NONZERO_PATTERN,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"MatStructure is not SAME_NONZERO_PATTERN"); 2451 } 2452 if (str == SAME_NONZERO_PATTERN) { 2453 PetscScalar alpha = a; 2454 PetscBLASInt bnz; 2455 PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz)); 2456 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2457 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2458 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2459 PetscCall(MatAXPY_Basic(Y,a,X,str)); 2460 } else { 2461 Mat B; 2462 PetscInt *nnz; 2463 PetscCheck(bs == X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2464 PetscCall(PetscMalloc1(Y->rmap->N,&nnz)); 2465 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 2466 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 2467 PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N)); 2468 PetscCall(MatSetBlockSizesFromMats(B,Y,Y)); 2469 PetscCall(MatSetType(B,(MatType) ((PetscObject)Y)->type_name)); 2470 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz)); 2471 PetscCall(MatSeqBAIJSetPreallocation(B,bs,0,nnz)); 2472 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 2473 PetscCall(MatHeaderMerge(Y,&B)); 2474 PetscCall(PetscFree(nnz)); 2475 } 2476 PetscFunctionReturn(0); 2477 } 2478 2479 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2480 { 2481 #if defined(PETSC_USE_COMPLEX) 2482 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2483 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2484 MatScalar *aa = a->a; 2485 2486 PetscFunctionBegin; 2487 for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]); 2488 #else 2489 PetscFunctionBegin; 2490 #endif 2491 PetscFunctionReturn(0); 2492 } 2493 2494 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2495 { 2496 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2497 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2498 MatScalar *aa = a->a; 2499 2500 PetscFunctionBegin; 2501 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2502 PetscFunctionReturn(0); 2503 } 2504 2505 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2506 { 2507 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2508 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2509 MatScalar *aa = a->a; 2510 2511 PetscFunctionBegin; 2512 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2513 PetscFunctionReturn(0); 2514 } 2515 2516 /* 2517 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2518 */ 2519 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2520 { 2521 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2522 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2523 PetscInt nz = a->i[m],row,*jj,mr,col; 2524 2525 PetscFunctionBegin; 2526 *nn = n; 2527 if (!ia) PetscFunctionReturn(0); 2528 PetscCheck(!symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2529 else { 2530 PetscCall(PetscCalloc1(n,&collengths)); 2531 PetscCall(PetscMalloc1(n+1,&cia)); 2532 PetscCall(PetscMalloc1(nz,&cja)); 2533 jj = a->j; 2534 for (i=0; i<nz; i++) { 2535 collengths[jj[i]]++; 2536 } 2537 cia[0] = oshift; 2538 for (i=0; i<n; i++) { 2539 cia[i+1] = cia[i] + collengths[i]; 2540 } 2541 PetscCall(PetscArrayzero(collengths,n)); 2542 jj = a->j; 2543 for (row=0; row<m; row++) { 2544 mr = a->i[row+1] - a->i[row]; 2545 for (i=0; i<mr; i++) { 2546 col = *jj++; 2547 2548 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2549 } 2550 } 2551 PetscCall(PetscFree(collengths)); 2552 *ia = cia; *ja = cja; 2553 } 2554 PetscFunctionReturn(0); 2555 } 2556 2557 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2558 { 2559 PetscFunctionBegin; 2560 if (!ia) PetscFunctionReturn(0); 2561 PetscCall(PetscFree(*ia)); 2562 PetscCall(PetscFree(*ja)); 2563 PetscFunctionReturn(0); 2564 } 2565 2566 /* 2567 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2568 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2569 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2570 */ 2571 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2572 { 2573 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2574 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2575 PetscInt nz = a->i[m],row,*jj,mr,col; 2576 PetscInt *cspidx; 2577 2578 PetscFunctionBegin; 2579 *nn = n; 2580 if (!ia) PetscFunctionReturn(0); 2581 2582 PetscCall(PetscCalloc1(n,&collengths)); 2583 PetscCall(PetscMalloc1(n+1,&cia)); 2584 PetscCall(PetscMalloc1(nz,&cja)); 2585 PetscCall(PetscMalloc1(nz,&cspidx)); 2586 jj = a->j; 2587 for (i=0; i<nz; i++) { 2588 collengths[jj[i]]++; 2589 } 2590 cia[0] = oshift; 2591 for (i=0; i<n; i++) { 2592 cia[i+1] = cia[i] + collengths[i]; 2593 } 2594 PetscCall(PetscArrayzero(collengths,n)); 2595 jj = a->j; 2596 for (row=0; row<m; row++) { 2597 mr = a->i[row+1] - a->i[row]; 2598 for (i=0; i<mr; i++) { 2599 col = *jj++; 2600 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2601 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2602 } 2603 } 2604 PetscCall(PetscFree(collengths)); 2605 *ia = cia; 2606 *ja = cja; 2607 *spidx = cspidx; 2608 PetscFunctionReturn(0); 2609 } 2610 2611 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2612 { 2613 PetscFunctionBegin; 2614 PetscCall(MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done)); 2615 PetscCall(PetscFree(*spidx)); 2616 PetscFunctionReturn(0); 2617 } 2618 2619 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2620 { 2621 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2622 2623 PetscFunctionBegin; 2624 if (!Y->preallocated || !aij->nz) { 2625 PetscCall(MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL)); 2626 } 2627 PetscCall(MatShift_Basic(Y,a)); 2628 PetscFunctionReturn(0); 2629 } 2630 2631 /* -------------------------------------------------------------------*/ 2632 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2633 MatGetRow_SeqBAIJ, 2634 MatRestoreRow_SeqBAIJ, 2635 MatMult_SeqBAIJ_N, 2636 /* 4*/ MatMultAdd_SeqBAIJ_N, 2637 MatMultTranspose_SeqBAIJ, 2638 MatMultTransposeAdd_SeqBAIJ, 2639 NULL, 2640 NULL, 2641 NULL, 2642 /* 10*/ NULL, 2643 MatLUFactor_SeqBAIJ, 2644 NULL, 2645 NULL, 2646 MatTranspose_SeqBAIJ, 2647 /* 15*/ MatGetInfo_SeqBAIJ, 2648 MatEqual_SeqBAIJ, 2649 MatGetDiagonal_SeqBAIJ, 2650 MatDiagonalScale_SeqBAIJ, 2651 MatNorm_SeqBAIJ, 2652 /* 20*/ NULL, 2653 MatAssemblyEnd_SeqBAIJ, 2654 MatSetOption_SeqBAIJ, 2655 MatZeroEntries_SeqBAIJ, 2656 /* 24*/ MatZeroRows_SeqBAIJ, 2657 NULL, 2658 NULL, 2659 NULL, 2660 NULL, 2661 /* 29*/ MatSetUp_SeqBAIJ, 2662 NULL, 2663 NULL, 2664 NULL, 2665 NULL, 2666 /* 34*/ MatDuplicate_SeqBAIJ, 2667 NULL, 2668 NULL, 2669 MatILUFactor_SeqBAIJ, 2670 NULL, 2671 /* 39*/ MatAXPY_SeqBAIJ, 2672 MatCreateSubMatrices_SeqBAIJ, 2673 MatIncreaseOverlap_SeqBAIJ, 2674 MatGetValues_SeqBAIJ, 2675 MatCopy_SeqBAIJ, 2676 /* 44*/ NULL, 2677 MatScale_SeqBAIJ, 2678 MatShift_SeqBAIJ, 2679 NULL, 2680 MatZeroRowsColumns_SeqBAIJ, 2681 /* 49*/ NULL, 2682 MatGetRowIJ_SeqBAIJ, 2683 MatRestoreRowIJ_SeqBAIJ, 2684 MatGetColumnIJ_SeqBAIJ, 2685 MatRestoreColumnIJ_SeqBAIJ, 2686 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2687 NULL, 2688 NULL, 2689 NULL, 2690 MatSetValuesBlocked_SeqBAIJ, 2691 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2692 MatDestroy_SeqBAIJ, 2693 MatView_SeqBAIJ, 2694 NULL, 2695 NULL, 2696 /* 64*/ NULL, 2697 NULL, 2698 NULL, 2699 NULL, 2700 NULL, 2701 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2702 NULL, 2703 MatConvert_Basic, 2704 NULL, 2705 NULL, 2706 /* 74*/ NULL, 2707 MatFDColoringApply_BAIJ, 2708 NULL, 2709 NULL, 2710 NULL, 2711 /* 79*/ NULL, 2712 NULL, 2713 NULL, 2714 NULL, 2715 MatLoad_SeqBAIJ, 2716 /* 84*/ NULL, 2717 NULL, 2718 NULL, 2719 NULL, 2720 NULL, 2721 /* 89*/ NULL, 2722 NULL, 2723 NULL, 2724 NULL, 2725 NULL, 2726 /* 94*/ NULL, 2727 NULL, 2728 NULL, 2729 NULL, 2730 NULL, 2731 /* 99*/ NULL, 2732 NULL, 2733 NULL, 2734 MatConjugate_SeqBAIJ, 2735 NULL, 2736 /*104*/ NULL, 2737 MatRealPart_SeqBAIJ, 2738 MatImaginaryPart_SeqBAIJ, 2739 NULL, 2740 NULL, 2741 /*109*/ NULL, 2742 NULL, 2743 NULL, 2744 NULL, 2745 MatMissingDiagonal_SeqBAIJ, 2746 /*114*/ NULL, 2747 NULL, 2748 NULL, 2749 NULL, 2750 NULL, 2751 /*119*/ NULL, 2752 NULL, 2753 MatMultHermitianTranspose_SeqBAIJ, 2754 MatMultHermitianTransposeAdd_SeqBAIJ, 2755 NULL, 2756 /*124*/ NULL, 2757 MatGetColumnReductions_SeqBAIJ, 2758 MatInvertBlockDiagonal_SeqBAIJ, 2759 NULL, 2760 NULL, 2761 /*129*/ NULL, 2762 NULL, 2763 NULL, 2764 NULL, 2765 NULL, 2766 /*134*/ NULL, 2767 NULL, 2768 NULL, 2769 NULL, 2770 NULL, 2771 /*139*/ MatSetBlockSizes_Default, 2772 NULL, 2773 NULL, 2774 MatFDColoringSetUp_SeqXAIJ, 2775 NULL, 2776 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2777 MatDestroySubMatrices_SeqBAIJ, 2778 NULL, 2779 NULL 2780 }; 2781 2782 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2783 { 2784 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2785 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2786 2787 PetscFunctionBegin; 2788 PetscCheck(aij->nonew == 1,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2789 2790 /* allocate space for values if not already there */ 2791 if (!aij->saved_values) { 2792 PetscCall(PetscMalloc1(nz+1,&aij->saved_values)); 2793 PetscCall(PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar))); 2794 } 2795 2796 /* copy values over */ 2797 PetscCall(PetscArraycpy(aij->saved_values,aij->a,nz)); 2798 PetscFunctionReturn(0); 2799 } 2800 2801 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2802 { 2803 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2804 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2805 2806 PetscFunctionBegin; 2807 PetscCheck(aij->nonew == 1,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2808 PetscCheck(aij->saved_values,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2809 2810 /* copy values over */ 2811 PetscCall(PetscArraycpy(aij->a,aij->saved_values,nz)); 2812 PetscFunctionReturn(0); 2813 } 2814 2815 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2816 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2817 2818 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2819 { 2820 Mat_SeqBAIJ *b; 2821 PetscInt i,mbs,nbs,bs2; 2822 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2823 2824 PetscFunctionBegin; 2825 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2826 if (nz == MAT_SKIP_ALLOCATION) { 2827 skipallocation = PETSC_TRUE; 2828 nz = 0; 2829 } 2830 2831 PetscCall(MatSetBlockSize(B,PetscAbs(bs))); 2832 PetscCall(PetscLayoutSetUp(B->rmap)); 2833 PetscCall(PetscLayoutSetUp(B->cmap)); 2834 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 2835 2836 B->preallocated = PETSC_TRUE; 2837 2838 mbs = B->rmap->n/bs; 2839 nbs = B->cmap->n/bs; 2840 bs2 = bs*bs; 2841 2842 PetscCheck(mbs*bs==B->rmap->n && nbs*bs==B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,B->cmap->n,bs); 2843 2844 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2845 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %" PetscInt_FMT,nz); 2846 if (nnz) { 2847 for (i=0; i<mbs; i++) { 2848 PetscCheck(nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,nnz[i]); 2849 PetscCheck(nnz[i] <= nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT,i,nnz[i],nbs); 2850 } 2851 } 2852 2853 b = (Mat_SeqBAIJ*)B->data; 2854 PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat"); 2855 PetscCall(PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL)); 2856 PetscOptionsEnd(); 2857 2858 if (!flg) { 2859 switch (bs) { 2860 case 1: 2861 B->ops->mult = MatMult_SeqBAIJ_1; 2862 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2863 break; 2864 case 2: 2865 B->ops->mult = MatMult_SeqBAIJ_2; 2866 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2867 break; 2868 case 3: 2869 B->ops->mult = MatMult_SeqBAIJ_3; 2870 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2871 break; 2872 case 4: 2873 B->ops->mult = MatMult_SeqBAIJ_4; 2874 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2875 break; 2876 case 5: 2877 B->ops->mult = MatMult_SeqBAIJ_5; 2878 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2879 break; 2880 case 6: 2881 B->ops->mult = MatMult_SeqBAIJ_6; 2882 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2883 break; 2884 case 7: 2885 B->ops->mult = MatMult_SeqBAIJ_7; 2886 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2887 break; 2888 case 9: 2889 { 2890 PetscInt version = 1; 2891 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL)); 2892 switch (version) { 2893 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2894 case 1: 2895 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2896 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2897 PetscCall(PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2898 break; 2899 #endif 2900 default: 2901 B->ops->mult = MatMult_SeqBAIJ_N; 2902 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2903 PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2904 break; 2905 } 2906 break; 2907 } 2908 case 11: 2909 B->ops->mult = MatMult_SeqBAIJ_11; 2910 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2911 break; 2912 case 12: 2913 { 2914 PetscInt version = 1; 2915 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL)); 2916 switch (version) { 2917 case 1: 2918 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 2919 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 2920 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2921 break; 2922 case 2: 2923 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 2924 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 2925 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2926 break; 2927 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2928 case 3: 2929 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 2930 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 2931 PetscCall(PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2932 break; 2933 #endif 2934 default: 2935 B->ops->mult = MatMult_SeqBAIJ_N; 2936 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2937 PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2938 break; 2939 } 2940 break; 2941 } 2942 case 15: 2943 { 2944 PetscInt version = 1; 2945 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL)); 2946 switch (version) { 2947 case 1: 2948 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2949 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2950 break; 2951 case 2: 2952 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 2953 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2954 break; 2955 case 3: 2956 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 2957 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2958 break; 2959 case 4: 2960 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 2961 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2962 break; 2963 default: 2964 B->ops->mult = MatMult_SeqBAIJ_N; 2965 PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2966 break; 2967 } 2968 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2969 break; 2970 } 2971 default: 2972 B->ops->mult = MatMult_SeqBAIJ_N; 2973 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2974 PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2975 break; 2976 } 2977 } 2978 B->ops->sor = MatSOR_SeqBAIJ; 2979 b->mbs = mbs; 2980 b->nbs = nbs; 2981 if (!skipallocation) { 2982 if (!b->imax) { 2983 PetscCall(PetscMalloc2(mbs,&b->imax,mbs,&b->ilen)); 2984 PetscCall(PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt))); 2985 2986 b->free_imax_ilen = PETSC_TRUE; 2987 } 2988 /* b->ilen will count nonzeros in each block row so far. */ 2989 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2990 if (!nnz) { 2991 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2992 else if (nz < 0) nz = 1; 2993 nz = PetscMin(nz,nbs); 2994 for (i=0; i<mbs; i++) b->imax[i] = nz; 2995 PetscCall(PetscIntMultError(nz,mbs,&nz)); 2996 } else { 2997 PetscInt64 nz64 = 0; 2998 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];} 2999 PetscCall(PetscIntCast(nz64,&nz)); 3000 } 3001 3002 /* allocate the matrix space */ 3003 PetscCall(MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i)); 3004 if (B->structure_only) { 3005 PetscCall(PetscMalloc1(nz,&b->j)); 3006 PetscCall(PetscMalloc1(B->rmap->N+1,&b->i)); 3007 PetscCall(PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt))); 3008 } else { 3009 PetscInt nzbs2 = 0; 3010 PetscCall(PetscIntMultError(nz,bs2,&nzbs2)); 3011 PetscCall(PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i)); 3012 PetscCall(PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)))); 3013 PetscCall(PetscArrayzero(b->a,nz*bs2)); 3014 } 3015 PetscCall(PetscArrayzero(b->j,nz)); 3016 3017 if (B->structure_only) { 3018 b->singlemalloc = PETSC_FALSE; 3019 b->free_a = PETSC_FALSE; 3020 } else { 3021 b->singlemalloc = PETSC_TRUE; 3022 b->free_a = PETSC_TRUE; 3023 } 3024 b->free_ij = PETSC_TRUE; 3025 3026 b->i[0] = 0; 3027 for (i=1; i<mbs+1; i++) { 3028 b->i[i] = b->i[i-1] + b->imax[i-1]; 3029 } 3030 3031 } else { 3032 b->free_a = PETSC_FALSE; 3033 b->free_ij = PETSC_FALSE; 3034 } 3035 3036 b->bs2 = bs2; 3037 b->mbs = mbs; 3038 b->nz = 0; 3039 b->maxnz = nz; 3040 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3041 B->was_assembled = PETSC_FALSE; 3042 B->assembled = PETSC_FALSE; 3043 if (realalloc) PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 3044 PetscFunctionReturn(0); 3045 } 3046 3047 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3048 { 3049 PetscInt i,m,nz,nz_max=0,*nnz; 3050 PetscScalar *values=NULL; 3051 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 3052 3053 PetscFunctionBegin; 3054 PetscCheck(bs >= 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs); 3055 PetscCall(PetscLayoutSetBlockSize(B->rmap,bs)); 3056 PetscCall(PetscLayoutSetBlockSize(B->cmap,bs)); 3057 PetscCall(PetscLayoutSetUp(B->rmap)); 3058 PetscCall(PetscLayoutSetUp(B->cmap)); 3059 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 3060 m = B->rmap->n/bs; 3061 3062 PetscCheck(ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]); 3063 PetscCall(PetscMalloc1(m+1, &nnz)); 3064 for (i=0; i<m; i++) { 3065 nz = ii[i+1]- ii[i]; 3066 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz); 3067 nz_max = PetscMax(nz_max, nz); 3068 nnz[i] = nz; 3069 } 3070 PetscCall(MatSeqBAIJSetPreallocation(B,bs,0,nnz)); 3071 PetscCall(PetscFree(nnz)); 3072 3073 values = (PetscScalar*)V; 3074 if (!values) { 3075 PetscCall(PetscCalloc1(bs*bs*(nz_max+1),&values)); 3076 } 3077 for (i=0; i<m; i++) { 3078 PetscInt ncols = ii[i+1] - ii[i]; 3079 const PetscInt *icols = jj + ii[i]; 3080 if (bs == 1 || !roworiented) { 3081 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3082 PetscCall(MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES)); 3083 } else { 3084 PetscInt j; 3085 for (j=0; j<ncols; j++) { 3086 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 3087 PetscCall(MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES)); 3088 } 3089 } 3090 } 3091 if (!V) PetscCall(PetscFree(values)); 3092 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3093 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3094 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3095 PetscFunctionReturn(0); 3096 } 3097 3098 /*@C 3099 MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored 3100 3101 Not Collective 3102 3103 Input Parameter: 3104 . mat - a MATSEQBAIJ matrix 3105 3106 Output Parameter: 3107 . array - pointer to the data 3108 3109 Level: intermediate 3110 3111 .seealso: `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3112 @*/ 3113 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array) 3114 { 3115 PetscFunctionBegin; 3116 PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array)); 3117 PetscFunctionReturn(0); 3118 } 3119 3120 /*@C 3121 MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray() 3122 3123 Not Collective 3124 3125 Input Parameters: 3126 + mat - a MATSEQBAIJ matrix 3127 - array - pointer to the data 3128 3129 Level: intermediate 3130 3131 .seealso: `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3132 @*/ 3133 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array) 3134 { 3135 PetscFunctionBegin; 3136 PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array)); 3137 PetscFunctionReturn(0); 3138 } 3139 3140 /*MC 3141 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3142 block sparse compressed row format. 3143 3144 Options Database Keys: 3145 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3146 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3147 3148 Level: beginner 3149 3150 Notes: 3151 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 3152 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 3153 3154 Run with -info to see what version of the matrix-vector product is being used 3155 3156 .seealso: `MatCreateSeqBAIJ()` 3157 M*/ 3158 3159 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3160 3161 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3162 { 3163 PetscMPIInt size; 3164 Mat_SeqBAIJ *b; 3165 3166 PetscFunctionBegin; 3167 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 3168 PetscCheck(size == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3169 3170 PetscCall(PetscNewLog(B,&b)); 3171 B->data = (void*)b; 3172 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 3173 3174 b->row = NULL; 3175 b->col = NULL; 3176 b->icol = NULL; 3177 b->reallocs = 0; 3178 b->saved_values = NULL; 3179 3180 b->roworiented = PETSC_TRUE; 3181 b->nonew = 0; 3182 b->diag = NULL; 3183 B->spptr = NULL; 3184 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3185 b->keepnonzeropattern = PETSC_FALSE; 3186 3187 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ)); 3188 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ)); 3189 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ)); 3190 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ)); 3191 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ)); 3192 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ)); 3193 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ)); 3194 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ)); 3195 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 3196 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ)); 3197 #if defined(PETSC_HAVE_HYPRE) 3198 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE)); 3199 #endif 3200 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS)); 3201 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ)); 3202 PetscFunctionReturn(0); 3203 } 3204 3205 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3206 { 3207 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3208 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3209 3210 PetscFunctionBegin; 3211 PetscCheck(a->i[mbs] == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3212 3213 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3214 c->imax = a->imax; 3215 c->ilen = a->ilen; 3216 c->free_imax_ilen = PETSC_FALSE; 3217 } else { 3218 PetscCall(PetscMalloc2(mbs,&c->imax,mbs,&c->ilen)); 3219 PetscCall(PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt))); 3220 for (i=0; i<mbs; i++) { 3221 c->imax[i] = a->imax[i]; 3222 c->ilen[i] = a->ilen[i]; 3223 } 3224 c->free_imax_ilen = PETSC_TRUE; 3225 } 3226 3227 /* allocate the matrix space */ 3228 if (mallocmatspace) { 3229 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3230 PetscCall(PetscCalloc1(bs2*nz,&c->a)); 3231 PetscCall(PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar))); 3232 3233 c->i = a->i; 3234 c->j = a->j; 3235 c->singlemalloc = PETSC_FALSE; 3236 c->free_a = PETSC_TRUE; 3237 c->free_ij = PETSC_FALSE; 3238 c->parent = A; 3239 C->preallocated = PETSC_TRUE; 3240 C->assembled = PETSC_TRUE; 3241 3242 PetscCall(PetscObjectReference((PetscObject)A)); 3243 PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3244 PetscCall(MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3245 } else { 3246 PetscCall(PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i)); 3247 PetscCall(PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt))); 3248 3249 c->singlemalloc = PETSC_TRUE; 3250 c->free_a = PETSC_TRUE; 3251 c->free_ij = PETSC_TRUE; 3252 3253 PetscCall(PetscArraycpy(c->i,a->i,mbs+1)); 3254 if (mbs > 0) { 3255 PetscCall(PetscArraycpy(c->j,a->j,nz)); 3256 if (cpvalues == MAT_COPY_VALUES) { 3257 PetscCall(PetscArraycpy(c->a,a->a,bs2*nz)); 3258 } else { 3259 PetscCall(PetscArrayzero(c->a,bs2*nz)); 3260 } 3261 } 3262 C->preallocated = PETSC_TRUE; 3263 C->assembled = PETSC_TRUE; 3264 } 3265 } 3266 3267 c->roworiented = a->roworiented; 3268 c->nonew = a->nonew; 3269 3270 PetscCall(PetscLayoutReference(A->rmap,&C->rmap)); 3271 PetscCall(PetscLayoutReference(A->cmap,&C->cmap)); 3272 3273 c->bs2 = a->bs2; 3274 c->mbs = a->mbs; 3275 c->nbs = a->nbs; 3276 3277 if (a->diag) { 3278 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3279 c->diag = a->diag; 3280 c->free_diag = PETSC_FALSE; 3281 } else { 3282 PetscCall(PetscMalloc1(mbs+1,&c->diag)); 3283 PetscCall(PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt))); 3284 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3285 c->free_diag = PETSC_TRUE; 3286 } 3287 } else c->diag = NULL; 3288 3289 c->nz = a->nz; 3290 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3291 c->solve_work = NULL; 3292 c->mult_work = NULL; 3293 c->sor_workt = NULL; 3294 c->sor_work = NULL; 3295 3296 c->compressedrow.use = a->compressedrow.use; 3297 c->compressedrow.nrows = a->compressedrow.nrows; 3298 if (a->compressedrow.use) { 3299 i = a->compressedrow.nrows; 3300 PetscCall(PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex)); 3301 PetscCall(PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt))); 3302 PetscCall(PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1)); 3303 PetscCall(PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i)); 3304 } else { 3305 c->compressedrow.use = PETSC_FALSE; 3306 c->compressedrow.i = NULL; 3307 c->compressedrow.rindex = NULL; 3308 } 3309 C->nonzerostate = A->nonzerostate; 3310 3311 PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist)); 3312 PetscFunctionReturn(0); 3313 } 3314 3315 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3316 { 3317 PetscFunctionBegin; 3318 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),B)); 3319 PetscCall(MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n)); 3320 PetscCall(MatSetType(*B,MATSEQBAIJ)); 3321 PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE)); 3322 PetscFunctionReturn(0); 3323 } 3324 3325 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3326 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 3327 { 3328 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3329 PetscInt *rowidxs,*colidxs; 3330 PetscScalar *matvals; 3331 3332 PetscFunctionBegin; 3333 PetscCall(PetscViewerSetUp(viewer)); 3334 3335 /* read matrix header */ 3336 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3337 PetscCheck(header[0] == MAT_FILE_CLASSID,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3338 M = header[1]; N = header[2]; nz = header[3]; 3339 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3340 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3341 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3342 3343 /* set block sizes from the viewer's .info file */ 3344 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3345 /* set local and global sizes if not set already */ 3346 if (mat->rmap->n < 0) mat->rmap->n = M; 3347 if (mat->cmap->n < 0) mat->cmap->n = N; 3348 if (mat->rmap->N < 0) mat->rmap->N = M; 3349 if (mat->cmap->N < 0) mat->cmap->N = N; 3350 PetscCall(PetscLayoutSetUp(mat->rmap)); 3351 PetscCall(PetscLayoutSetUp(mat->cmap)); 3352 3353 /* check if the matrix sizes are correct */ 3354 PetscCall(MatGetSize(mat,&rows,&cols)); 3355 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3356 PetscCall(MatGetBlockSize(mat,&bs)); 3357 PetscCall(MatGetLocalSize(mat,&m,&n)); 3358 mbs = m/bs; nbs = n/bs; 3359 3360 /* read in row lengths, column indices and nonzero values */ 3361 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3362 PetscCall(PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT)); 3363 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3364 sum = rowidxs[m]; 3365 PetscCheck(sum == nz,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3366 3367 /* read in column indices and nonzero values */ 3368 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals)); 3369 PetscCall(PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT)); 3370 PetscCall(PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR)); 3371 3372 { /* preallocate matrix storage */ 3373 PetscBT bt; /* helper bit set to count nonzeros */ 3374 PetscInt *nnz; 3375 PetscBool sbaij; 3376 3377 PetscCall(PetscBTCreate(nbs,&bt)); 3378 PetscCall(PetscCalloc1(mbs,&nnz)); 3379 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij)); 3380 for (i=0; i<mbs; i++) { 3381 PetscCall(PetscBTMemzero(nbs,bt)); 3382 for (k=0; k<bs; k++) { 3383 PetscInt row = bs*i + k; 3384 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3385 PetscInt col = colidxs[j]; 3386 if (!sbaij || col >= row) 3387 if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++; 3388 } 3389 } 3390 } 3391 PetscCall(PetscBTDestroy(&bt)); 3392 PetscCall(MatSeqBAIJSetPreallocation(mat,bs,0,nnz)); 3393 PetscCall(MatSeqSBAIJSetPreallocation(mat,bs,0,nnz)); 3394 PetscCall(PetscFree(nnz)); 3395 } 3396 3397 /* store matrix values */ 3398 for (i=0; i<m; i++) { 3399 PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1]; 3400 PetscCall((*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES)); 3401 } 3402 3403 PetscCall(PetscFree(rowidxs)); 3404 PetscCall(PetscFree2(colidxs,matvals)); 3405 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 3406 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 3407 PetscFunctionReturn(0); 3408 } 3409 3410 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer) 3411 { 3412 PetscBool isbinary; 3413 3414 PetscFunctionBegin; 3415 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 3416 PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3417 PetscCall(MatLoad_SeqBAIJ_Binary(mat,viewer)); 3418 PetscFunctionReturn(0); 3419 } 3420 3421 /*@C 3422 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3423 compressed row) format. For good matrix assembly performance the 3424 user should preallocate the matrix storage by setting the parameter nz 3425 (or the array nnz). By setting these parameters accurately, performance 3426 during matrix assembly can be increased by more than a factor of 50. 3427 3428 Collective 3429 3430 Input Parameters: 3431 + comm - MPI communicator, set to PETSC_COMM_SELF 3432 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3433 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3434 . m - number of rows 3435 . n - number of columns 3436 . nz - number of nonzero blocks per block row (same for all rows) 3437 - nnz - array containing the number of nonzero blocks in the various block rows 3438 (possibly different for each block row) or NULL 3439 3440 Output Parameter: 3441 . A - the matrix 3442 3443 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3444 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3445 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3446 3447 Options Database Keys: 3448 + -mat_no_unroll - uses code that does not unroll the loops in the 3449 block calculations (much slower) 3450 - -mat_block_size - size of the blocks to use 3451 3452 Level: intermediate 3453 3454 Notes: 3455 The number of rows and columns must be divisible by blocksize. 3456 3457 If the nnz parameter is given then the nz parameter is ignored 3458 3459 A nonzero block is any block that as 1 or more nonzeros in it 3460 3461 The block AIJ format is fully compatible with standard Fortran 77 3462 storage. That is, the stored row and column indices can begin at 3463 either one (as in Fortran) or zero. See the users' manual for details. 3464 3465 Specify the preallocated storage with either nz or nnz (not both). 3466 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3467 allocation. See Users-Manual: ch_mat for details. 3468 matrices. 3469 3470 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3471 @*/ 3472 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3473 { 3474 PetscFunctionBegin; 3475 PetscCall(MatCreate(comm,A)); 3476 PetscCall(MatSetSizes(*A,m,n,m,n)); 3477 PetscCall(MatSetType(*A,MATSEQBAIJ)); 3478 PetscCall(MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz)); 3479 PetscFunctionReturn(0); 3480 } 3481 3482 /*@C 3483 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3484 per row in the matrix. For good matrix assembly performance the 3485 user should preallocate the matrix storage by setting the parameter nz 3486 (or the array nnz). By setting these parameters accurately, performance 3487 during matrix assembly can be increased by more than a factor of 50. 3488 3489 Collective 3490 3491 Input Parameters: 3492 + B - the matrix 3493 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3494 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3495 . nz - number of block nonzeros per block row (same for all rows) 3496 - nnz - array containing the number of block nonzeros in the various block rows 3497 (possibly different for each block row) or NULL 3498 3499 Options Database Keys: 3500 + -mat_no_unroll - uses code that does not unroll the loops in the 3501 block calculations (much slower) 3502 - -mat_block_size - size of the blocks to use 3503 3504 Level: intermediate 3505 3506 Notes: 3507 If the nnz parameter is given then the nz parameter is ignored 3508 3509 You can call MatGetInfo() to get information on how effective the preallocation was; 3510 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3511 You can also run with the option -info and look for messages with the string 3512 malloc in them to see if additional memory allocation was needed. 3513 3514 The block AIJ format is fully compatible with standard Fortran 77 3515 storage. That is, the stored row and column indices can begin at 3516 either one (as in Fortran) or zero. See the users' manual for details. 3517 3518 Specify the preallocated storage with either nz or nnz (not both). 3519 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3520 allocation. See Users-Manual: ch_mat for details. 3521 3522 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3523 @*/ 3524 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3525 { 3526 PetscFunctionBegin; 3527 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3528 PetscValidType(B,1); 3529 PetscValidLogicalCollectiveInt(B,bs,2); 3530 PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz)); 3531 PetscFunctionReturn(0); 3532 } 3533 3534 /*@C 3535 MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 3536 3537 Collective 3538 3539 Input Parameters: 3540 + B - the matrix 3541 . i - the indices into j for the start of each local row (starts with zero) 3542 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3543 - v - optional values in the matrix 3544 3545 Level: advanced 3546 3547 Notes: 3548 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3549 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3550 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3551 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3552 block column and the second index is over columns within a block. 3553 3554 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3555 3556 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3557 @*/ 3558 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3559 { 3560 PetscFunctionBegin; 3561 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3562 PetscValidType(B,1); 3563 PetscValidLogicalCollectiveInt(B,bs,2); 3564 PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v)); 3565 PetscFunctionReturn(0); 3566 } 3567 3568 /*@ 3569 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3570 3571 Collective 3572 3573 Input Parameters: 3574 + comm - must be an MPI communicator of size 1 3575 . bs - size of block 3576 . m - number of rows 3577 . n - number of columns 3578 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3579 . j - column indices 3580 - a - matrix values 3581 3582 Output Parameter: 3583 . mat - the matrix 3584 3585 Level: advanced 3586 3587 Notes: 3588 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3589 once the matrix is destroyed 3590 3591 You cannot set new nonzero locations into this matrix, that will generate an error. 3592 3593 The i and j indices are 0 based 3594 3595 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3596 3597 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3598 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3599 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3600 with column-major ordering within blocks. 3601 3602 .seealso: `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3603 3604 @*/ 3605 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3606 { 3607 PetscInt ii; 3608 Mat_SeqBAIJ *baij; 3609 3610 PetscFunctionBegin; 3611 PetscCheck(bs == 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %" PetscInt_FMT " > 1 is not supported yet",bs); 3612 if (m > 0) PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3613 3614 PetscCall(MatCreate(comm,mat)); 3615 PetscCall(MatSetSizes(*mat,m,n,m,n)); 3616 PetscCall(MatSetType(*mat,MATSEQBAIJ)); 3617 PetscCall(MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL)); 3618 baij = (Mat_SeqBAIJ*)(*mat)->data; 3619 PetscCall(PetscMalloc2(m,&baij->imax,m,&baij->ilen)); 3620 PetscCall(PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt))); 3621 3622 baij->i = i; 3623 baij->j = j; 3624 baij->a = a; 3625 3626 baij->singlemalloc = PETSC_FALSE; 3627 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3628 baij->free_a = PETSC_FALSE; 3629 baij->free_ij = PETSC_FALSE; 3630 3631 for (ii=0; ii<m; ii++) { 3632 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3633 PetscCheck(i[ii+1] - i[ii] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT,ii,i[ii+1] - i[ii]); 3634 } 3635 if (PetscDefined(USE_DEBUG)) { 3636 for (ii=0; ii<baij->i[m]; ii++) { 3637 PetscCheck(j[ii] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT,ii,j[ii]); 3638 PetscCheck(j[ii] <= n - 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT,ii,j[ii]); 3639 } 3640 } 3641 3642 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 3643 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 3644 PetscFunctionReturn(0); 3645 } 3646 3647 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3648 { 3649 PetscMPIInt size; 3650 3651 PetscFunctionBegin; 3652 PetscCallMPI(MPI_Comm_size(comm,&size)); 3653 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3654 PetscCall(MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN)); 3655 } else { 3656 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat)); 3657 } 3658 PetscFunctionReturn(0); 3659 } 3660