1 /* 2 Defines the basic matrix operations for the BAIJ (compressed row) 3 matrix storage format. 4 */ 5 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <petsc/private/kernels/blockinvert.h> 8 #include <petsc/private/kernels/blockmatmult.h> 9 10 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */ 11 #define TYPE BAIJ 12 #define TYPE_BS 13 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 14 #undef TYPE_BS 15 #define TYPE_BS _BS 16 #define TYPE_BS_ON 17 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 18 #undef TYPE_BS 19 #include "../src/mat/impls/aij/seq/seqhashmat.h" 20 #undef TYPE 21 #undef TYPE_BS_ON 22 23 #if defined(PETSC_HAVE_HYPRE) 24 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 25 #endif 26 27 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 28 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 29 #endif 30 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 31 32 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) 33 { 34 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data; 35 PetscInt m, n, ib, jb, bs = A->rmap->bs; 36 MatScalar *a_val = a_aij->a; 37 38 PetscFunctionBegin; 39 PetscCall(MatGetSize(A, &m, &n)); 40 PetscCall(PetscArrayzero(reductions, n)); 41 if (type == NORM_2) { 42 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 43 for (jb = 0; jb < bs; jb++) { 44 for (ib = 0; ib < bs; ib++) { 45 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 46 a_val++; 47 } 48 } 49 } 50 } else if (type == NORM_1) { 51 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 52 for (jb = 0; jb < bs; jb++) { 53 for (ib = 0; ib < bs; ib++) { 54 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 55 a_val++; 56 } 57 } 58 } 59 } else if (type == NORM_INFINITY) { 60 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 61 for (jb = 0; jb < bs; jb++) { 62 for (ib = 0; ib < bs; ib++) { 63 PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb; 64 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 65 a_val++; 66 } 67 } 68 } 69 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 70 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 71 for (jb = 0; jb < bs; jb++) { 72 for (ib = 0; ib < bs; ib++) { 73 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 74 a_val++; 75 } 76 } 77 } 78 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 79 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 80 for (jb = 0; jb < bs; jb++) { 81 for (ib = 0; ib < bs; ib++) { 82 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 83 a_val++; 84 } 85 } 86 } 87 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 88 if (type == NORM_2) { 89 for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 90 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 91 for (PetscInt i = 0; i < n; i++) reductions[i] /= m; 92 } 93 PetscFunctionReturn(PETSC_SUCCESS); 94 } 95 96 static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) 97 { 98 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 99 PetscInt *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots; 100 MatScalar *v = a->a, *odiag, *diag, work[25], *v_work; 101 PetscReal shift = 0.0; 102 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 103 104 PetscFunctionBegin; 105 allowzeropivot = PetscNot(A->erroriffailure); 106 107 if (a->idiagvalid) { 108 if (values) *values = a->idiag; 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 112 diag_offset = a->diag; 113 if (!a->idiag) PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); 114 diag = a->idiag; 115 if (values) *values = a->idiag; 116 /* factor and invert each block */ 117 switch (bs) { 118 case 1: 119 for (i = 0; i < mbs; i++) { 120 odiag = v + 1 * diag_offset[i]; 121 diag[0] = odiag[0]; 122 123 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 124 PetscCheck(allowzeropivot, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON); 125 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 126 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 127 A->factorerror_zeropivot_row = i; 128 PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i)); 129 } 130 131 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 132 diag += 1; 133 } 134 break; 135 case 2: 136 for (i = 0; i < mbs; i++) { 137 odiag = v + 4 * diag_offset[i]; 138 diag[0] = odiag[0]; 139 diag[1] = odiag[1]; 140 diag[2] = odiag[2]; 141 diag[3] = odiag[3]; 142 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 143 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 144 diag += 4; 145 } 146 break; 147 case 3: 148 for (i = 0; i < mbs; i++) { 149 odiag = v + 9 * diag_offset[i]; 150 diag[0] = odiag[0]; 151 diag[1] = odiag[1]; 152 diag[2] = odiag[2]; 153 diag[3] = odiag[3]; 154 diag[4] = odiag[4]; 155 diag[5] = odiag[5]; 156 diag[6] = odiag[6]; 157 diag[7] = odiag[7]; 158 diag[8] = odiag[8]; 159 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 160 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 161 diag += 9; 162 } 163 break; 164 case 4: 165 for (i = 0; i < mbs; i++) { 166 odiag = v + 16 * diag_offset[i]; 167 PetscCall(PetscArraycpy(diag, odiag, 16)); 168 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 169 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 170 diag += 16; 171 } 172 break; 173 case 5: 174 for (i = 0; i < mbs; i++) { 175 odiag = v + 25 * diag_offset[i]; 176 PetscCall(PetscArraycpy(diag, odiag, 25)); 177 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 178 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 179 diag += 25; 180 } 181 break; 182 case 6: 183 for (i = 0; i < mbs; i++) { 184 odiag = v + 36 * diag_offset[i]; 185 PetscCall(PetscArraycpy(diag, odiag, 36)); 186 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 187 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 188 diag += 36; 189 } 190 break; 191 case 7: 192 for (i = 0; i < mbs; i++) { 193 odiag = v + 49 * diag_offset[i]; 194 PetscCall(PetscArraycpy(diag, odiag, 49)); 195 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 196 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 197 diag += 49; 198 } 199 break; 200 default: 201 PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots)); 202 for (i = 0; i < mbs; i++) { 203 odiag = v + bs2 * diag_offset[i]; 204 PetscCall(PetscArraycpy(diag, odiag, bs2)); 205 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 206 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 207 diag += bs2; 208 } 209 PetscCall(PetscFree2(v_work, v_pivots)); 210 } 211 a->idiagvalid = PETSC_TRUE; 212 PetscFunctionReturn(PETSC_SUCCESS); 213 } 214 215 static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 216 { 217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 218 PetscScalar *x, *work, *w, *workt, *t; 219 const MatScalar *v, *aa = a->a, *idiag; 220 const PetscScalar *b, *xb; 221 PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */ 222 PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it; 223 const PetscInt *diag, *ai = a->i, *aj = a->j, *vi; 224 225 PetscFunctionBegin; 226 its = its * lits; 227 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 228 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 229 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 230 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor"); 231 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 232 233 if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL)); 234 235 if (!m) PetscFunctionReturn(PETSC_SUCCESS); 236 diag = a->diag; 237 idiag = a->idiag; 238 k = PetscMax(A->rmap->n, A->cmap->n); 239 if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work)); 240 if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt)); 241 if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work)); 242 work = a->mult_work; 243 t = a->sor_workt; 244 w = a->sor_work; 245 246 PetscCall(VecGetArray(xx, &x)); 247 PetscCall(VecGetArrayRead(bb, &b)); 248 249 if (flag & SOR_ZERO_INITIAL_GUESS) { 250 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 251 switch (bs) { 252 case 1: 253 PetscKernel_v_gets_A_times_w_1(x, idiag, b); 254 t[0] = b[0]; 255 i2 = 1; 256 idiag += 1; 257 for (i = 1; i < m; i++) { 258 v = aa + ai[i]; 259 vi = aj + ai[i]; 260 nz = diag[i] - ai[i]; 261 s[0] = b[i2]; 262 for (j = 0; j < nz; j++) { 263 xw[0] = x[vi[j]]; 264 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 265 } 266 t[i2] = s[0]; 267 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 268 x[i2] = xw[0]; 269 idiag += 1; 270 i2 += 1; 271 } 272 break; 273 case 2: 274 PetscKernel_v_gets_A_times_w_2(x, idiag, b); 275 t[0] = b[0]; 276 t[1] = b[1]; 277 i2 = 2; 278 idiag += 4; 279 for (i = 1; i < m; i++) { 280 v = aa + 4 * ai[i]; 281 vi = aj + ai[i]; 282 nz = diag[i] - ai[i]; 283 s[0] = b[i2]; 284 s[1] = b[i2 + 1]; 285 for (j = 0; j < nz; j++) { 286 idx = 2 * vi[j]; 287 it = 4 * j; 288 xw[0] = x[idx]; 289 xw[1] = x[1 + idx]; 290 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 291 } 292 t[i2] = s[0]; 293 t[i2 + 1] = s[1]; 294 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 295 x[i2] = xw[0]; 296 x[i2 + 1] = xw[1]; 297 idiag += 4; 298 i2 += 2; 299 } 300 break; 301 case 3: 302 PetscKernel_v_gets_A_times_w_3(x, idiag, b); 303 t[0] = b[0]; 304 t[1] = b[1]; 305 t[2] = b[2]; 306 i2 = 3; 307 idiag += 9; 308 for (i = 1; i < m; i++) { 309 v = aa + 9 * ai[i]; 310 vi = aj + ai[i]; 311 nz = diag[i] - ai[i]; 312 s[0] = b[i2]; 313 s[1] = b[i2 + 1]; 314 s[2] = b[i2 + 2]; 315 while (nz--) { 316 idx = 3 * (*vi++); 317 xw[0] = x[idx]; 318 xw[1] = x[1 + idx]; 319 xw[2] = x[2 + idx]; 320 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 321 v += 9; 322 } 323 t[i2] = s[0]; 324 t[i2 + 1] = s[1]; 325 t[i2 + 2] = s[2]; 326 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 327 x[i2] = xw[0]; 328 x[i2 + 1] = xw[1]; 329 x[i2 + 2] = xw[2]; 330 idiag += 9; 331 i2 += 3; 332 } 333 break; 334 case 4: 335 PetscKernel_v_gets_A_times_w_4(x, idiag, b); 336 t[0] = b[0]; 337 t[1] = b[1]; 338 t[2] = b[2]; 339 t[3] = b[3]; 340 i2 = 4; 341 idiag += 16; 342 for (i = 1; i < m; i++) { 343 v = aa + 16 * ai[i]; 344 vi = aj + ai[i]; 345 nz = diag[i] - ai[i]; 346 s[0] = b[i2]; 347 s[1] = b[i2 + 1]; 348 s[2] = b[i2 + 2]; 349 s[3] = b[i2 + 3]; 350 while (nz--) { 351 idx = 4 * (*vi++); 352 xw[0] = x[idx]; 353 xw[1] = x[1 + idx]; 354 xw[2] = x[2 + idx]; 355 xw[3] = x[3 + idx]; 356 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 357 v += 16; 358 } 359 t[i2] = s[0]; 360 t[i2 + 1] = s[1]; 361 t[i2 + 2] = s[2]; 362 t[i2 + 3] = s[3]; 363 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 364 x[i2] = xw[0]; 365 x[i2 + 1] = xw[1]; 366 x[i2 + 2] = xw[2]; 367 x[i2 + 3] = xw[3]; 368 idiag += 16; 369 i2 += 4; 370 } 371 break; 372 case 5: 373 PetscKernel_v_gets_A_times_w_5(x, idiag, b); 374 t[0] = b[0]; 375 t[1] = b[1]; 376 t[2] = b[2]; 377 t[3] = b[3]; 378 t[4] = b[4]; 379 i2 = 5; 380 idiag += 25; 381 for (i = 1; i < m; i++) { 382 v = aa + 25 * ai[i]; 383 vi = aj + ai[i]; 384 nz = diag[i] - ai[i]; 385 s[0] = b[i2]; 386 s[1] = b[i2 + 1]; 387 s[2] = b[i2 + 2]; 388 s[3] = b[i2 + 3]; 389 s[4] = b[i2 + 4]; 390 while (nz--) { 391 idx = 5 * (*vi++); 392 xw[0] = x[idx]; 393 xw[1] = x[1 + idx]; 394 xw[2] = x[2 + idx]; 395 xw[3] = x[3 + idx]; 396 xw[4] = x[4 + idx]; 397 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 398 v += 25; 399 } 400 t[i2] = s[0]; 401 t[i2 + 1] = s[1]; 402 t[i2 + 2] = s[2]; 403 t[i2 + 3] = s[3]; 404 t[i2 + 4] = s[4]; 405 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 406 x[i2] = xw[0]; 407 x[i2 + 1] = xw[1]; 408 x[i2 + 2] = xw[2]; 409 x[i2 + 3] = xw[3]; 410 x[i2 + 4] = xw[4]; 411 idiag += 25; 412 i2 += 5; 413 } 414 break; 415 case 6: 416 PetscKernel_v_gets_A_times_w_6(x, idiag, b); 417 t[0] = b[0]; 418 t[1] = b[1]; 419 t[2] = b[2]; 420 t[3] = b[3]; 421 t[4] = b[4]; 422 t[5] = b[5]; 423 i2 = 6; 424 idiag += 36; 425 for (i = 1; i < m; i++) { 426 v = aa + 36 * ai[i]; 427 vi = aj + ai[i]; 428 nz = diag[i] - ai[i]; 429 s[0] = b[i2]; 430 s[1] = b[i2 + 1]; 431 s[2] = b[i2 + 2]; 432 s[3] = b[i2 + 3]; 433 s[4] = b[i2 + 4]; 434 s[5] = b[i2 + 5]; 435 while (nz--) { 436 idx = 6 * (*vi++); 437 xw[0] = x[idx]; 438 xw[1] = x[1 + idx]; 439 xw[2] = x[2 + idx]; 440 xw[3] = x[3 + idx]; 441 xw[4] = x[4 + idx]; 442 xw[5] = x[5 + idx]; 443 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 444 v += 36; 445 } 446 t[i2] = s[0]; 447 t[i2 + 1] = s[1]; 448 t[i2 + 2] = s[2]; 449 t[i2 + 3] = s[3]; 450 t[i2 + 4] = s[4]; 451 t[i2 + 5] = s[5]; 452 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 453 x[i2] = xw[0]; 454 x[i2 + 1] = xw[1]; 455 x[i2 + 2] = xw[2]; 456 x[i2 + 3] = xw[3]; 457 x[i2 + 4] = xw[4]; 458 x[i2 + 5] = xw[5]; 459 idiag += 36; 460 i2 += 6; 461 } 462 break; 463 case 7: 464 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 465 t[0] = b[0]; 466 t[1] = b[1]; 467 t[2] = b[2]; 468 t[3] = b[3]; 469 t[4] = b[4]; 470 t[5] = b[5]; 471 t[6] = b[6]; 472 i2 = 7; 473 idiag += 49; 474 for (i = 1; i < m; i++) { 475 v = aa + 49 * ai[i]; 476 vi = aj + ai[i]; 477 nz = diag[i] - ai[i]; 478 s[0] = b[i2]; 479 s[1] = b[i2 + 1]; 480 s[2] = b[i2 + 2]; 481 s[3] = b[i2 + 3]; 482 s[4] = b[i2 + 4]; 483 s[5] = b[i2 + 5]; 484 s[6] = b[i2 + 6]; 485 while (nz--) { 486 idx = 7 * (*vi++); 487 xw[0] = x[idx]; 488 xw[1] = x[1 + idx]; 489 xw[2] = x[2 + idx]; 490 xw[3] = x[3 + idx]; 491 xw[4] = x[4 + idx]; 492 xw[5] = x[5 + idx]; 493 xw[6] = x[6 + idx]; 494 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 495 v += 49; 496 } 497 t[i2] = s[0]; 498 t[i2 + 1] = s[1]; 499 t[i2 + 2] = s[2]; 500 t[i2 + 3] = s[3]; 501 t[i2 + 4] = s[4]; 502 t[i2 + 5] = s[5]; 503 t[i2 + 6] = s[6]; 504 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 505 x[i2] = xw[0]; 506 x[i2 + 1] = xw[1]; 507 x[i2 + 2] = xw[2]; 508 x[i2 + 3] = xw[3]; 509 x[i2 + 4] = xw[4]; 510 x[i2 + 5] = xw[5]; 511 x[i2 + 6] = xw[6]; 512 idiag += 49; 513 i2 += 7; 514 } 515 break; 516 default: 517 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); 518 PetscCall(PetscArraycpy(t, b, bs)); 519 i2 = bs; 520 idiag += bs2; 521 for (i = 1; i < m; i++) { 522 v = aa + bs2 * ai[i]; 523 vi = aj + ai[i]; 524 nz = diag[i] - ai[i]; 525 526 PetscCall(PetscArraycpy(w, b + i2, bs)); 527 /* copy all rows of x that are needed into contiguous space */ 528 workt = work; 529 for (j = 0; j < nz; j++) { 530 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 531 workt += bs; 532 } 533 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 534 PetscCall(PetscArraycpy(t + i2, w, bs)); 535 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 536 537 idiag += bs2; 538 i2 += bs; 539 } 540 break; 541 } 542 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 543 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 544 xb = t; 545 } else xb = b; 546 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 547 idiag = a->idiag + bs2 * (a->mbs - 1); 548 i2 = bs * (m - 1); 549 switch (bs) { 550 case 1: 551 s[0] = xb[i2]; 552 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 553 x[i2] = xw[0]; 554 i2 -= 1; 555 for (i = m - 2; i >= 0; i--) { 556 v = aa + (diag[i] + 1); 557 vi = aj + diag[i] + 1; 558 nz = ai[i + 1] - diag[i] - 1; 559 s[0] = xb[i2]; 560 for (j = 0; j < nz; j++) { 561 xw[0] = x[vi[j]]; 562 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 563 } 564 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 565 x[i2] = xw[0]; 566 idiag -= 1; 567 i2 -= 1; 568 } 569 break; 570 case 2: 571 s[0] = xb[i2]; 572 s[1] = xb[i2 + 1]; 573 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 574 x[i2] = xw[0]; 575 x[i2 + 1] = xw[1]; 576 i2 -= 2; 577 idiag -= 4; 578 for (i = m - 2; i >= 0; i--) { 579 v = aa + 4 * (diag[i] + 1); 580 vi = aj + diag[i] + 1; 581 nz = ai[i + 1] - diag[i] - 1; 582 s[0] = xb[i2]; 583 s[1] = xb[i2 + 1]; 584 for (j = 0; j < nz; j++) { 585 idx = 2 * vi[j]; 586 it = 4 * j; 587 xw[0] = x[idx]; 588 xw[1] = x[1 + idx]; 589 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 590 } 591 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 592 x[i2] = xw[0]; 593 x[i2 + 1] = xw[1]; 594 idiag -= 4; 595 i2 -= 2; 596 } 597 break; 598 case 3: 599 s[0] = xb[i2]; 600 s[1] = xb[i2 + 1]; 601 s[2] = xb[i2 + 2]; 602 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 603 x[i2] = xw[0]; 604 x[i2 + 1] = xw[1]; 605 x[i2 + 2] = xw[2]; 606 i2 -= 3; 607 idiag -= 9; 608 for (i = m - 2; i >= 0; i--) { 609 v = aa + 9 * (diag[i] + 1); 610 vi = aj + diag[i] + 1; 611 nz = ai[i + 1] - diag[i] - 1; 612 s[0] = xb[i2]; 613 s[1] = xb[i2 + 1]; 614 s[2] = xb[i2 + 2]; 615 while (nz--) { 616 idx = 3 * (*vi++); 617 xw[0] = x[idx]; 618 xw[1] = x[1 + idx]; 619 xw[2] = x[2 + idx]; 620 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 621 v += 9; 622 } 623 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 624 x[i2] = xw[0]; 625 x[i2 + 1] = xw[1]; 626 x[i2 + 2] = xw[2]; 627 idiag -= 9; 628 i2 -= 3; 629 } 630 break; 631 case 4: 632 s[0] = xb[i2]; 633 s[1] = xb[i2 + 1]; 634 s[2] = xb[i2 + 2]; 635 s[3] = xb[i2 + 3]; 636 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 637 x[i2] = xw[0]; 638 x[i2 + 1] = xw[1]; 639 x[i2 + 2] = xw[2]; 640 x[i2 + 3] = xw[3]; 641 i2 -= 4; 642 idiag -= 16; 643 for (i = m - 2; i >= 0; i--) { 644 v = aa + 16 * (diag[i] + 1); 645 vi = aj + diag[i] + 1; 646 nz = ai[i + 1] - diag[i] - 1; 647 s[0] = xb[i2]; 648 s[1] = xb[i2 + 1]; 649 s[2] = xb[i2 + 2]; 650 s[3] = xb[i2 + 3]; 651 while (nz--) { 652 idx = 4 * (*vi++); 653 xw[0] = x[idx]; 654 xw[1] = x[1 + idx]; 655 xw[2] = x[2 + idx]; 656 xw[3] = x[3 + idx]; 657 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 658 v += 16; 659 } 660 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 661 x[i2] = xw[0]; 662 x[i2 + 1] = xw[1]; 663 x[i2 + 2] = xw[2]; 664 x[i2 + 3] = xw[3]; 665 idiag -= 16; 666 i2 -= 4; 667 } 668 break; 669 case 5: 670 s[0] = xb[i2]; 671 s[1] = xb[i2 + 1]; 672 s[2] = xb[i2 + 2]; 673 s[3] = xb[i2 + 3]; 674 s[4] = xb[i2 + 4]; 675 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 676 x[i2] = xw[0]; 677 x[i2 + 1] = xw[1]; 678 x[i2 + 2] = xw[2]; 679 x[i2 + 3] = xw[3]; 680 x[i2 + 4] = xw[4]; 681 i2 -= 5; 682 idiag -= 25; 683 for (i = m - 2; i >= 0; i--) { 684 v = aa + 25 * (diag[i] + 1); 685 vi = aj + diag[i] + 1; 686 nz = ai[i + 1] - diag[i] - 1; 687 s[0] = xb[i2]; 688 s[1] = xb[i2 + 1]; 689 s[2] = xb[i2 + 2]; 690 s[3] = xb[i2 + 3]; 691 s[4] = xb[i2 + 4]; 692 while (nz--) { 693 idx = 5 * (*vi++); 694 xw[0] = x[idx]; 695 xw[1] = x[1 + idx]; 696 xw[2] = x[2 + idx]; 697 xw[3] = x[3 + idx]; 698 xw[4] = x[4 + idx]; 699 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 700 v += 25; 701 } 702 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 703 x[i2] = xw[0]; 704 x[i2 + 1] = xw[1]; 705 x[i2 + 2] = xw[2]; 706 x[i2 + 3] = xw[3]; 707 x[i2 + 4] = xw[4]; 708 idiag -= 25; 709 i2 -= 5; 710 } 711 break; 712 case 6: 713 s[0] = xb[i2]; 714 s[1] = xb[i2 + 1]; 715 s[2] = xb[i2 + 2]; 716 s[3] = xb[i2 + 3]; 717 s[4] = xb[i2 + 4]; 718 s[5] = xb[i2 + 5]; 719 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 720 x[i2] = xw[0]; 721 x[i2 + 1] = xw[1]; 722 x[i2 + 2] = xw[2]; 723 x[i2 + 3] = xw[3]; 724 x[i2 + 4] = xw[4]; 725 x[i2 + 5] = xw[5]; 726 i2 -= 6; 727 idiag -= 36; 728 for (i = m - 2; i >= 0; i--) { 729 v = aa + 36 * (diag[i] + 1); 730 vi = aj + diag[i] + 1; 731 nz = ai[i + 1] - diag[i] - 1; 732 s[0] = xb[i2]; 733 s[1] = xb[i2 + 1]; 734 s[2] = xb[i2 + 2]; 735 s[3] = xb[i2 + 3]; 736 s[4] = xb[i2 + 4]; 737 s[5] = xb[i2 + 5]; 738 while (nz--) { 739 idx = 6 * (*vi++); 740 xw[0] = x[idx]; 741 xw[1] = x[1 + idx]; 742 xw[2] = x[2 + idx]; 743 xw[3] = x[3 + idx]; 744 xw[4] = x[4 + idx]; 745 xw[5] = x[5 + idx]; 746 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 747 v += 36; 748 } 749 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 750 x[i2] = xw[0]; 751 x[i2 + 1] = xw[1]; 752 x[i2 + 2] = xw[2]; 753 x[i2 + 3] = xw[3]; 754 x[i2 + 4] = xw[4]; 755 x[i2 + 5] = xw[5]; 756 idiag -= 36; 757 i2 -= 6; 758 } 759 break; 760 case 7: 761 s[0] = xb[i2]; 762 s[1] = xb[i2 + 1]; 763 s[2] = xb[i2 + 2]; 764 s[3] = xb[i2 + 3]; 765 s[4] = xb[i2 + 4]; 766 s[5] = xb[i2 + 5]; 767 s[6] = xb[i2 + 6]; 768 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 769 x[i2] = xw[0]; 770 x[i2 + 1] = xw[1]; 771 x[i2 + 2] = xw[2]; 772 x[i2 + 3] = xw[3]; 773 x[i2 + 4] = xw[4]; 774 x[i2 + 5] = xw[5]; 775 x[i2 + 6] = xw[6]; 776 i2 -= 7; 777 idiag -= 49; 778 for (i = m - 2; i >= 0; i--) { 779 v = aa + 49 * (diag[i] + 1); 780 vi = aj + diag[i] + 1; 781 nz = ai[i + 1] - diag[i] - 1; 782 s[0] = xb[i2]; 783 s[1] = xb[i2 + 1]; 784 s[2] = xb[i2 + 2]; 785 s[3] = xb[i2 + 3]; 786 s[4] = xb[i2 + 4]; 787 s[5] = xb[i2 + 5]; 788 s[6] = xb[i2 + 6]; 789 while (nz--) { 790 idx = 7 * (*vi++); 791 xw[0] = x[idx]; 792 xw[1] = x[1 + idx]; 793 xw[2] = x[2 + idx]; 794 xw[3] = x[3 + idx]; 795 xw[4] = x[4 + idx]; 796 xw[5] = x[5 + idx]; 797 xw[6] = x[6 + idx]; 798 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 799 v += 49; 800 } 801 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 802 x[i2] = xw[0]; 803 x[i2 + 1] = xw[1]; 804 x[i2 + 2] = xw[2]; 805 x[i2 + 3] = xw[3]; 806 x[i2 + 4] = xw[4]; 807 x[i2 + 5] = xw[5]; 808 x[i2 + 6] = xw[6]; 809 idiag -= 49; 810 i2 -= 7; 811 } 812 break; 813 default: 814 PetscCall(PetscArraycpy(w, xb + i2, bs)); 815 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 816 i2 -= bs; 817 idiag -= bs2; 818 for (i = m - 2; i >= 0; i--) { 819 v = aa + bs2 * (diag[i] + 1); 820 vi = aj + diag[i] + 1; 821 nz = ai[i + 1] - diag[i] - 1; 822 823 PetscCall(PetscArraycpy(w, xb + i2, bs)); 824 /* copy all rows of x that are needed into contiguous space */ 825 workt = work; 826 for (j = 0; j < nz; j++) { 827 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 828 workt += bs; 829 } 830 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 831 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 832 833 idiag -= bs2; 834 i2 -= bs; 835 } 836 break; 837 } 838 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 839 } 840 its--; 841 } 842 while (its--) { 843 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 844 idiag = a->idiag; 845 i2 = 0; 846 switch (bs) { 847 case 1: 848 for (i = 0; i < m; i++) { 849 v = aa + ai[i]; 850 vi = aj + ai[i]; 851 nz = ai[i + 1] - ai[i]; 852 s[0] = b[i2]; 853 for (j = 0; j < nz; j++) { 854 xw[0] = x[vi[j]]; 855 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 856 } 857 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 858 x[i2] += xw[0]; 859 idiag += 1; 860 i2 += 1; 861 } 862 break; 863 case 2: 864 for (i = 0; i < m; i++) { 865 v = aa + 4 * ai[i]; 866 vi = aj + ai[i]; 867 nz = ai[i + 1] - ai[i]; 868 s[0] = b[i2]; 869 s[1] = b[i2 + 1]; 870 for (j = 0; j < nz; j++) { 871 idx = 2 * vi[j]; 872 it = 4 * j; 873 xw[0] = x[idx]; 874 xw[1] = x[1 + idx]; 875 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 876 } 877 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 878 x[i2] += xw[0]; 879 x[i2 + 1] += xw[1]; 880 idiag += 4; 881 i2 += 2; 882 } 883 break; 884 case 3: 885 for (i = 0; i < m; i++) { 886 v = aa + 9 * ai[i]; 887 vi = aj + ai[i]; 888 nz = ai[i + 1] - ai[i]; 889 s[0] = b[i2]; 890 s[1] = b[i2 + 1]; 891 s[2] = b[i2 + 2]; 892 while (nz--) { 893 idx = 3 * (*vi++); 894 xw[0] = x[idx]; 895 xw[1] = x[1 + idx]; 896 xw[2] = x[2 + idx]; 897 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 898 v += 9; 899 } 900 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 901 x[i2] += xw[0]; 902 x[i2 + 1] += xw[1]; 903 x[i2 + 2] += xw[2]; 904 idiag += 9; 905 i2 += 3; 906 } 907 break; 908 case 4: 909 for (i = 0; i < m; i++) { 910 v = aa + 16 * ai[i]; 911 vi = aj + ai[i]; 912 nz = ai[i + 1] - ai[i]; 913 s[0] = b[i2]; 914 s[1] = b[i2 + 1]; 915 s[2] = b[i2 + 2]; 916 s[3] = b[i2 + 3]; 917 while (nz--) { 918 idx = 4 * (*vi++); 919 xw[0] = x[idx]; 920 xw[1] = x[1 + idx]; 921 xw[2] = x[2 + idx]; 922 xw[3] = x[3 + idx]; 923 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 924 v += 16; 925 } 926 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 927 x[i2] += xw[0]; 928 x[i2 + 1] += xw[1]; 929 x[i2 + 2] += xw[2]; 930 x[i2 + 3] += xw[3]; 931 idiag += 16; 932 i2 += 4; 933 } 934 break; 935 case 5: 936 for (i = 0; i < m; i++) { 937 v = aa + 25 * ai[i]; 938 vi = aj + ai[i]; 939 nz = ai[i + 1] - ai[i]; 940 s[0] = b[i2]; 941 s[1] = b[i2 + 1]; 942 s[2] = b[i2 + 2]; 943 s[3] = b[i2 + 3]; 944 s[4] = b[i2 + 4]; 945 while (nz--) { 946 idx = 5 * (*vi++); 947 xw[0] = x[idx]; 948 xw[1] = x[1 + idx]; 949 xw[2] = x[2 + idx]; 950 xw[3] = x[3 + idx]; 951 xw[4] = x[4 + idx]; 952 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 953 v += 25; 954 } 955 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 956 x[i2] += xw[0]; 957 x[i2 + 1] += xw[1]; 958 x[i2 + 2] += xw[2]; 959 x[i2 + 3] += xw[3]; 960 x[i2 + 4] += xw[4]; 961 idiag += 25; 962 i2 += 5; 963 } 964 break; 965 case 6: 966 for (i = 0; i < m; i++) { 967 v = aa + 36 * ai[i]; 968 vi = aj + ai[i]; 969 nz = ai[i + 1] - ai[i]; 970 s[0] = b[i2]; 971 s[1] = b[i2 + 1]; 972 s[2] = b[i2 + 2]; 973 s[3] = b[i2 + 3]; 974 s[4] = b[i2 + 4]; 975 s[5] = b[i2 + 5]; 976 while (nz--) { 977 idx = 6 * (*vi++); 978 xw[0] = x[idx]; 979 xw[1] = x[1 + idx]; 980 xw[2] = x[2 + idx]; 981 xw[3] = x[3 + idx]; 982 xw[4] = x[4 + idx]; 983 xw[5] = x[5 + idx]; 984 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 985 v += 36; 986 } 987 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 988 x[i2] += xw[0]; 989 x[i2 + 1] += xw[1]; 990 x[i2 + 2] += xw[2]; 991 x[i2 + 3] += xw[3]; 992 x[i2 + 4] += xw[4]; 993 x[i2 + 5] += xw[5]; 994 idiag += 36; 995 i2 += 6; 996 } 997 break; 998 case 7: 999 for (i = 0; i < m; i++) { 1000 v = aa + 49 * ai[i]; 1001 vi = aj + ai[i]; 1002 nz = ai[i + 1] - ai[i]; 1003 s[0] = b[i2]; 1004 s[1] = b[i2 + 1]; 1005 s[2] = b[i2 + 2]; 1006 s[3] = b[i2 + 3]; 1007 s[4] = b[i2 + 4]; 1008 s[5] = b[i2 + 5]; 1009 s[6] = b[i2 + 6]; 1010 while (nz--) { 1011 idx = 7 * (*vi++); 1012 xw[0] = x[idx]; 1013 xw[1] = x[1 + idx]; 1014 xw[2] = x[2 + idx]; 1015 xw[3] = x[3 + idx]; 1016 xw[4] = x[4 + idx]; 1017 xw[5] = x[5 + idx]; 1018 xw[6] = x[6 + idx]; 1019 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1020 v += 49; 1021 } 1022 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1023 x[i2] += xw[0]; 1024 x[i2 + 1] += xw[1]; 1025 x[i2 + 2] += xw[2]; 1026 x[i2 + 3] += xw[3]; 1027 x[i2 + 4] += xw[4]; 1028 x[i2 + 5] += xw[5]; 1029 x[i2 + 6] += xw[6]; 1030 idiag += 49; 1031 i2 += 7; 1032 } 1033 break; 1034 default: 1035 for (i = 0; i < m; i++) { 1036 v = aa + bs2 * ai[i]; 1037 vi = aj + ai[i]; 1038 nz = ai[i + 1] - ai[i]; 1039 1040 PetscCall(PetscArraycpy(w, b + i2, bs)); 1041 /* copy all rows of x that are needed into contiguous space */ 1042 workt = work; 1043 for (j = 0; j < nz; j++) { 1044 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1045 workt += bs; 1046 } 1047 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1048 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1049 1050 idiag += bs2; 1051 i2 += bs; 1052 } 1053 break; 1054 } 1055 PetscCall(PetscLogFlops(2.0 * bs2 * a->nz)); 1056 } 1057 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1058 idiag = a->idiag + bs2 * (a->mbs - 1); 1059 i2 = bs * (m - 1); 1060 switch (bs) { 1061 case 1: 1062 for (i = m - 1; i >= 0; i--) { 1063 v = aa + ai[i]; 1064 vi = aj + ai[i]; 1065 nz = ai[i + 1] - ai[i]; 1066 s[0] = b[i2]; 1067 for (j = 0; j < nz; j++) { 1068 xw[0] = x[vi[j]]; 1069 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 1070 } 1071 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 1072 x[i2] += xw[0]; 1073 idiag -= 1; 1074 i2 -= 1; 1075 } 1076 break; 1077 case 2: 1078 for (i = m - 1; i >= 0; i--) { 1079 v = aa + 4 * ai[i]; 1080 vi = aj + ai[i]; 1081 nz = ai[i + 1] - ai[i]; 1082 s[0] = b[i2]; 1083 s[1] = b[i2 + 1]; 1084 for (j = 0; j < nz; j++) { 1085 idx = 2 * vi[j]; 1086 it = 4 * j; 1087 xw[0] = x[idx]; 1088 xw[1] = x[1 + idx]; 1089 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 1090 } 1091 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 1092 x[i2] += xw[0]; 1093 x[i2 + 1] += xw[1]; 1094 idiag -= 4; 1095 i2 -= 2; 1096 } 1097 break; 1098 case 3: 1099 for (i = m - 1; i >= 0; i--) { 1100 v = aa + 9 * ai[i]; 1101 vi = aj + ai[i]; 1102 nz = ai[i + 1] - ai[i]; 1103 s[0] = b[i2]; 1104 s[1] = b[i2 + 1]; 1105 s[2] = b[i2 + 2]; 1106 while (nz--) { 1107 idx = 3 * (*vi++); 1108 xw[0] = x[idx]; 1109 xw[1] = x[1 + idx]; 1110 xw[2] = x[2 + idx]; 1111 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 1112 v += 9; 1113 } 1114 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 1115 x[i2] += xw[0]; 1116 x[i2 + 1] += xw[1]; 1117 x[i2 + 2] += xw[2]; 1118 idiag -= 9; 1119 i2 -= 3; 1120 } 1121 break; 1122 case 4: 1123 for (i = m - 1; i >= 0; i--) { 1124 v = aa + 16 * ai[i]; 1125 vi = aj + ai[i]; 1126 nz = ai[i + 1] - ai[i]; 1127 s[0] = b[i2]; 1128 s[1] = b[i2 + 1]; 1129 s[2] = b[i2 + 2]; 1130 s[3] = b[i2 + 3]; 1131 while (nz--) { 1132 idx = 4 * (*vi++); 1133 xw[0] = x[idx]; 1134 xw[1] = x[1 + idx]; 1135 xw[2] = x[2 + idx]; 1136 xw[3] = x[3 + idx]; 1137 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 1138 v += 16; 1139 } 1140 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 1141 x[i2] += xw[0]; 1142 x[i2 + 1] += xw[1]; 1143 x[i2 + 2] += xw[2]; 1144 x[i2 + 3] += xw[3]; 1145 idiag -= 16; 1146 i2 -= 4; 1147 } 1148 break; 1149 case 5: 1150 for (i = m - 1; i >= 0; i--) { 1151 v = aa + 25 * ai[i]; 1152 vi = aj + ai[i]; 1153 nz = ai[i + 1] - ai[i]; 1154 s[0] = b[i2]; 1155 s[1] = b[i2 + 1]; 1156 s[2] = b[i2 + 2]; 1157 s[3] = b[i2 + 3]; 1158 s[4] = b[i2 + 4]; 1159 while (nz--) { 1160 idx = 5 * (*vi++); 1161 xw[0] = x[idx]; 1162 xw[1] = x[1 + idx]; 1163 xw[2] = x[2 + idx]; 1164 xw[3] = x[3 + idx]; 1165 xw[4] = x[4 + idx]; 1166 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 1167 v += 25; 1168 } 1169 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 1170 x[i2] += xw[0]; 1171 x[i2 + 1] += xw[1]; 1172 x[i2 + 2] += xw[2]; 1173 x[i2 + 3] += xw[3]; 1174 x[i2 + 4] += xw[4]; 1175 idiag -= 25; 1176 i2 -= 5; 1177 } 1178 break; 1179 case 6: 1180 for (i = m - 1; i >= 0; i--) { 1181 v = aa + 36 * ai[i]; 1182 vi = aj + ai[i]; 1183 nz = ai[i + 1] - ai[i]; 1184 s[0] = b[i2]; 1185 s[1] = b[i2 + 1]; 1186 s[2] = b[i2 + 2]; 1187 s[3] = b[i2 + 3]; 1188 s[4] = b[i2 + 4]; 1189 s[5] = b[i2 + 5]; 1190 while (nz--) { 1191 idx = 6 * (*vi++); 1192 xw[0] = x[idx]; 1193 xw[1] = x[1 + idx]; 1194 xw[2] = x[2 + idx]; 1195 xw[3] = x[3 + idx]; 1196 xw[4] = x[4 + idx]; 1197 xw[5] = x[5 + idx]; 1198 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 1199 v += 36; 1200 } 1201 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 1202 x[i2] += xw[0]; 1203 x[i2 + 1] += xw[1]; 1204 x[i2 + 2] += xw[2]; 1205 x[i2 + 3] += xw[3]; 1206 x[i2 + 4] += xw[4]; 1207 x[i2 + 5] += xw[5]; 1208 idiag -= 36; 1209 i2 -= 6; 1210 } 1211 break; 1212 case 7: 1213 for (i = m - 1; i >= 0; i--) { 1214 v = aa + 49 * ai[i]; 1215 vi = aj + ai[i]; 1216 nz = ai[i + 1] - ai[i]; 1217 s[0] = b[i2]; 1218 s[1] = b[i2 + 1]; 1219 s[2] = b[i2 + 2]; 1220 s[3] = b[i2 + 3]; 1221 s[4] = b[i2 + 4]; 1222 s[5] = b[i2 + 5]; 1223 s[6] = b[i2 + 6]; 1224 while (nz--) { 1225 idx = 7 * (*vi++); 1226 xw[0] = x[idx]; 1227 xw[1] = x[1 + idx]; 1228 xw[2] = x[2 + idx]; 1229 xw[3] = x[3 + idx]; 1230 xw[4] = x[4 + idx]; 1231 xw[5] = x[5 + idx]; 1232 xw[6] = x[6 + idx]; 1233 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1234 v += 49; 1235 } 1236 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1237 x[i2] += xw[0]; 1238 x[i2 + 1] += xw[1]; 1239 x[i2 + 2] += xw[2]; 1240 x[i2 + 3] += xw[3]; 1241 x[i2 + 4] += xw[4]; 1242 x[i2 + 5] += xw[5]; 1243 x[i2 + 6] += xw[6]; 1244 idiag -= 49; 1245 i2 -= 7; 1246 } 1247 break; 1248 default: 1249 for (i = m - 1; i >= 0; i--) { 1250 v = aa + bs2 * ai[i]; 1251 vi = aj + ai[i]; 1252 nz = ai[i + 1] - ai[i]; 1253 1254 PetscCall(PetscArraycpy(w, b + i2, bs)); 1255 /* copy all rows of x that are needed into contiguous space */ 1256 workt = work; 1257 for (j = 0; j < nz; j++) { 1258 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1259 workt += bs; 1260 } 1261 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1262 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1263 1264 idiag -= bs2; 1265 i2 -= bs; 1266 } 1267 break; 1268 } 1269 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz))); 1270 } 1271 } 1272 PetscCall(VecRestoreArray(xx, &x)); 1273 PetscCall(VecRestoreArrayRead(bb, &b)); 1274 PetscFunctionReturn(PETSC_SUCCESS); 1275 } 1276 1277 /* 1278 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1279 */ 1280 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1281 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1282 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1283 #define matsetvaluesblocked4_ matsetvaluesblocked4 1284 #endif 1285 1286 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) 1287 { 1288 Mat A = *AA; 1289 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1290 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn; 1291 PetscInt *ai = a->i, *ailen = a->ilen; 1292 PetscInt *aj = a->j, stepval, lastcol = -1; 1293 const PetscScalar *value = v; 1294 MatScalar *ap, *aa = a->a, *bap; 1295 1296 PetscFunctionBegin; 1297 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4"); 1298 stepval = (n - 1) * 4; 1299 for (k = 0; k < m; k++) { /* loop over added rows */ 1300 row = im[k]; 1301 rp = aj + ai[row]; 1302 ap = aa + 16 * ai[row]; 1303 nrow = ailen[row]; 1304 low = 0; 1305 high = nrow; 1306 for (l = 0; l < n; l++) { /* loop over added columns */ 1307 col = in[l]; 1308 if (col <= lastcol) low = 0; 1309 else high = nrow; 1310 lastcol = col; 1311 value = v + k * (stepval + 4 + l) * 4; 1312 while (high - low > 7) { 1313 t = (low + high) / 2; 1314 if (rp[t] > col) high = t; 1315 else low = t; 1316 } 1317 for (i = low; i < high; i++) { 1318 if (rp[i] > col) break; 1319 if (rp[i] == col) { 1320 bap = ap + 16 * i; 1321 for (ii = 0; ii < 4; ii++, value += stepval) { 1322 for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++; 1323 } 1324 goto noinsert2; 1325 } 1326 } 1327 N = nrow++ - 1; 1328 high++; /* added new column index thus must search to one higher than before */ 1329 /* shift up all the later entries in this row */ 1330 for (ii = N; ii >= i; ii--) { 1331 rp[ii + 1] = rp[ii]; 1332 PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16)); 1333 } 1334 if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1335 rp[i] = col; 1336 bap = ap + 16 * i; 1337 for (ii = 0; ii < 4; ii++, value += stepval) { 1338 for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++; 1339 } 1340 noinsert2:; 1341 low = i; 1342 } 1343 ailen[row] = nrow; 1344 } 1345 PetscFunctionReturnVoid(); 1346 } 1347 1348 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1349 #define matsetvalues4_ MATSETVALUES4 1350 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1351 #define matsetvalues4_ matsetvalues4 1352 #endif 1353 1354 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) 1355 { 1356 Mat A = *AA; 1357 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1358 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm; 1359 PetscInt *ai = a->i, *ailen = a->ilen; 1360 PetscInt *aj = a->j, brow, bcol; 1361 PetscInt ridx, cidx, lastcol = -1; 1362 MatScalar *ap, value, *aa = a->a, *bap; 1363 1364 PetscFunctionBegin; 1365 for (k = 0; k < m; k++) { /* loop over added rows */ 1366 row = im[k]; 1367 brow = row / 4; 1368 rp = aj + ai[brow]; 1369 ap = aa + 16 * ai[brow]; 1370 nrow = ailen[brow]; 1371 low = 0; 1372 high = nrow; 1373 for (l = 0; l < n; l++) { /* loop over added columns */ 1374 col = in[l]; 1375 bcol = col / 4; 1376 ridx = row % 4; 1377 cidx = col % 4; 1378 value = v[l + k * n]; 1379 if (col <= lastcol) low = 0; 1380 else high = nrow; 1381 lastcol = col; 1382 while (high - low > 7) { 1383 t = (low + high) / 2; 1384 if (rp[t] > bcol) high = t; 1385 else low = t; 1386 } 1387 for (i = low; i < high; i++) { 1388 if (rp[i] > bcol) break; 1389 if (rp[i] == bcol) { 1390 bap = ap + 16 * i + 4 * cidx + ridx; 1391 *bap += value; 1392 goto noinsert1; 1393 } 1394 } 1395 N = nrow++ - 1; 1396 high++; /* added new column thus must search to one higher than before */ 1397 /* shift up all the later entries in this row */ 1398 PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 1399 PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1))); 1400 PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1401 rp[i] = bcol; 1402 ap[16 * i + 4 * cidx + ridx] = value; 1403 noinsert1:; 1404 low = i; 1405 } 1406 ailen[brow] = nrow; 1407 } 1408 PetscFunctionReturnVoid(); 1409 } 1410 1411 /* 1412 Checks for missing diagonals 1413 */ 1414 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1415 { 1416 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1417 PetscInt *diag, *ii = a->i, i; 1418 1419 PetscFunctionBegin; 1420 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 1421 *missing = PETSC_FALSE; 1422 if (A->rmap->n > 0 && !ii) { 1423 *missing = PETSC_TRUE; 1424 if (d) *d = 0; 1425 PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n")); 1426 } else { 1427 PetscInt n; 1428 n = PetscMin(a->mbs, a->nbs); 1429 diag = a->diag; 1430 for (i = 0; i < n; i++) { 1431 if (diag[i] >= ii[i + 1]) { 1432 *missing = PETSC_TRUE; 1433 if (d) *d = i; 1434 PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i)); 1435 break; 1436 } 1437 } 1438 } 1439 PetscFunctionReturn(PETSC_SUCCESS); 1440 } 1441 1442 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1443 { 1444 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1445 PetscInt i, j, m = a->mbs; 1446 1447 PetscFunctionBegin; 1448 if (!a->diag) { 1449 PetscCall(PetscMalloc1(m, &a->diag)); 1450 a->free_diag = PETSC_TRUE; 1451 } 1452 for (i = 0; i < m; i++) { 1453 a->diag[i] = a->i[i + 1]; 1454 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1455 if (a->j[j] == i) { 1456 a->diag[i] = j; 1457 break; 1458 } 1459 } 1460 } 1461 PetscFunctionReturn(PETSC_SUCCESS); 1462 } 1463 1464 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) 1465 { 1466 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1467 PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt; 1468 PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja; 1469 1470 PetscFunctionBegin; 1471 *nn = n; 1472 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1473 if (symmetric) { 1474 PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja)); 1475 nz = tia[n]; 1476 } else { 1477 tia = a->i; 1478 tja = a->j; 1479 } 1480 1481 if (!blockcompressed && bs > 1) { 1482 (*nn) *= bs; 1483 /* malloc & create the natural set of indices */ 1484 PetscCall(PetscMalloc1((n + 1) * bs, ia)); 1485 if (n) { 1486 (*ia)[0] = oshift; 1487 for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; 1488 } 1489 1490 for (i = 1; i < n; i++) { 1491 (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1]; 1492 for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; 1493 } 1494 if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; 1495 1496 if (inja) { 1497 PetscCall(PetscMalloc1(nz * bs * bs, ja)); 1498 cnt = 0; 1499 for (i = 0; i < n; i++) { 1500 for (j = 0; j < bs; j++) { 1501 for (k = tia[i]; k < tia[i + 1]; k++) { 1502 for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l; 1503 } 1504 } 1505 } 1506 } 1507 1508 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1509 PetscCall(PetscFree(tia)); 1510 PetscCall(PetscFree(tja)); 1511 } 1512 } else if (oshift == 1) { 1513 if (symmetric) { 1514 nz = tia[A->rmap->n / bs]; 1515 /* add 1 to i and j indices */ 1516 for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1; 1517 *ia = tia; 1518 if (ja) { 1519 for (i = 0; i < nz; i++) tja[i] = tja[i] + 1; 1520 *ja = tja; 1521 } 1522 } else { 1523 nz = a->i[A->rmap->n / bs]; 1524 /* malloc space and add 1 to i and j indices */ 1525 PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia)); 1526 for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1; 1527 if (ja) { 1528 PetscCall(PetscMalloc1(nz, ja)); 1529 for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1; 1530 } 1531 } 1532 } else { 1533 *ia = tia; 1534 if (ja) *ja = tja; 1535 } 1536 PetscFunctionReturn(PETSC_SUCCESS); 1537 } 1538 1539 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 1540 { 1541 PetscFunctionBegin; 1542 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1543 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1544 PetscCall(PetscFree(*ia)); 1545 if (ja) PetscCall(PetscFree(*ja)); 1546 } 1547 PetscFunctionReturn(PETSC_SUCCESS); 1548 } 1549 1550 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1551 { 1552 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1553 1554 PetscFunctionBegin; 1555 if (A->hash_active) { 1556 PetscInt bs; 1557 A->ops[0] = a->cops; 1558 PetscCall(PetscHMapIJVDestroy(&a->ht)); 1559 PetscCall(MatGetBlockSize(A, &bs)); 1560 if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht)); 1561 PetscCall(PetscFree(a->dnz)); 1562 PetscCall(PetscFree(a->bdnz)); 1563 A->hash_active = PETSC_FALSE; 1564 } 1565 PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz)); 1566 PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i)); 1567 PetscCall(ISDestroy(&a->row)); 1568 PetscCall(ISDestroy(&a->col)); 1569 if (a->free_diag) PetscCall(PetscFree(a->diag)); 1570 PetscCall(PetscFree(a->idiag)); 1571 if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen)); 1572 PetscCall(PetscFree(a->solve_work)); 1573 PetscCall(PetscFree(a->mult_work)); 1574 PetscCall(PetscFree(a->sor_workt)); 1575 PetscCall(PetscFree(a->sor_work)); 1576 PetscCall(ISDestroy(&a->icol)); 1577 PetscCall(PetscFree(a->saved_values)); 1578 PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex)); 1579 1580 PetscCall(MatDestroy(&a->sbaijMat)); 1581 PetscCall(MatDestroy(&a->parent)); 1582 PetscCall(PetscFree(A->data)); 1583 1584 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 1585 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL)); 1586 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL)); 1587 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 1588 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 1589 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL)); 1590 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL)); 1591 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL)); 1592 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL)); 1593 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL)); 1594 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL)); 1595 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL)); 1596 #if defined(PETSC_HAVE_HYPRE) 1597 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL)); 1598 #endif 1599 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL)); 1600 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1601 PetscFunctionReturn(PETSC_SUCCESS); 1602 } 1603 1604 static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) 1605 { 1606 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1607 1608 PetscFunctionBegin; 1609 switch (op) { 1610 case MAT_ROW_ORIENTED: 1611 a->roworiented = flg; 1612 break; 1613 case MAT_KEEP_NONZERO_PATTERN: 1614 a->keepnonzeropattern = flg; 1615 break; 1616 case MAT_NEW_NONZERO_LOCATIONS: 1617 a->nonew = (flg ? 0 : 1); 1618 break; 1619 case MAT_NEW_NONZERO_LOCATION_ERR: 1620 a->nonew = (flg ? -1 : 0); 1621 break; 1622 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1623 a->nonew = (flg ? -2 : 0); 1624 break; 1625 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1626 a->nounused = (flg ? -1 : 0); 1627 break; 1628 default: 1629 break; 1630 } 1631 PetscFunctionReturn(PETSC_SUCCESS); 1632 } 1633 1634 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1635 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) 1636 { 1637 PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2; 1638 MatScalar *aa_i; 1639 PetscScalar *v_i; 1640 1641 PetscFunctionBegin; 1642 bs = A->rmap->bs; 1643 bs2 = bs * bs; 1644 PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1645 1646 bn = row / bs; /* Block number */ 1647 bp = row % bs; /* Block Position */ 1648 M = ai[bn + 1] - ai[bn]; 1649 *nz = bs * M; 1650 1651 if (v) { 1652 *v = NULL; 1653 if (*nz) { 1654 PetscCall(PetscMalloc1(*nz, v)); 1655 for (i = 0; i < M; i++) { /* for each block in the block row */ 1656 v_i = *v + i * bs; 1657 aa_i = aa + bs2 * (ai[bn] + i); 1658 for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j]; 1659 } 1660 } 1661 } 1662 1663 if (idx) { 1664 *idx = NULL; 1665 if (*nz) { 1666 PetscCall(PetscMalloc1(*nz, idx)); 1667 for (i = 0; i < M; i++) { /* for each block in the block row */ 1668 idx_i = *idx + i * bs; 1669 itmp = bs * aj[ai[bn] + i]; 1670 for (j = 0; j < bs; j++) idx_i[j] = itmp++; 1671 } 1672 } 1673 } 1674 PetscFunctionReturn(PETSC_SUCCESS); 1675 } 1676 1677 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1678 { 1679 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1680 1681 PetscFunctionBegin; 1682 PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a)); 1683 PetscFunctionReturn(PETSC_SUCCESS); 1684 } 1685 1686 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1687 { 1688 PetscFunctionBegin; 1689 if (idx) PetscCall(PetscFree(*idx)); 1690 if (v) PetscCall(PetscFree(*v)); 1691 PetscFunctionReturn(PETSC_SUCCESS); 1692 } 1693 1694 static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) 1695 { 1696 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at; 1697 Mat C; 1698 PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill; 1699 PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr; 1700 MatScalar *ata, *aa = a->a; 1701 1702 PetscFunctionBegin; 1703 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1704 PetscCall(PetscCalloc1(1 + nbs, &atfill)); 1705 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1706 for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1707 1708 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 1709 PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N)); 1710 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 1711 PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill)); 1712 1713 at = (Mat_SeqBAIJ *)C->data; 1714 ati = at->i; 1715 for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i]; 1716 } else { 1717 C = *B; 1718 at = (Mat_SeqBAIJ *)C->data; 1719 ati = at->i; 1720 } 1721 1722 atj = at->j; 1723 ata = at->a; 1724 1725 /* Copy ati into atfill so we have locations of the next free space in atj */ 1726 PetscCall(PetscArraycpy(atfill, ati, nbs)); 1727 1728 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1729 for (i = 0; i < mbs; i++) { 1730 anzj = ai[i + 1] - ai[i]; 1731 for (j = 0; j < anzj; j++) { 1732 atj[atfill[*aj]] = i; 1733 for (kr = 0; kr < bs; kr++) { 1734 for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; 1735 } 1736 atfill[*aj++] += 1; 1737 } 1738 } 1739 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1740 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1741 1742 /* Clean up temporary space and complete requests. */ 1743 PetscCall(PetscFree(atfill)); 1744 1745 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1746 PetscCall(MatSetBlockSizes(C, A->cmap->bs, A->rmap->bs)); 1747 *B = C; 1748 } else { 1749 PetscCall(MatHeaderMerge(A, &C)); 1750 } 1751 PetscFunctionReturn(PETSC_SUCCESS); 1752 } 1753 1754 static PetscErrorCode MatCompare_SeqBAIJ_Private(Mat A, Mat B, PetscReal tol, PetscBool *flg) 1755 { 1756 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1757 1758 PetscFunctionBegin; 1759 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1760 if (A->rmap->N != B->rmap->N || A->cmap->n != B->cmap->n || A->rmap->bs != B->rmap->bs || a->nz != b->nz) { 1761 *flg = PETSC_FALSE; 1762 PetscFunctionReturn(PETSC_SUCCESS); 1763 } 1764 1765 /* if the a->i are the same */ 1766 PetscCall(PetscArraycmp(a->i, b->i, a->mbs + 1, flg)); 1767 if (!*flg) PetscFunctionReturn(PETSC_SUCCESS); 1768 1769 /* if a->j are the same */ 1770 PetscCall(PetscArraycmp(a->j, b->j, a->nz, flg)); 1771 if (!*flg) PetscFunctionReturn(PETSC_SUCCESS); 1772 1773 if (tol == 0.0) PetscCall(PetscArraycmp(a->a, b->a, a->nz * A->rmap->bs * A->rmap->bs, flg)); /* if a->a are the same */ 1774 else { 1775 *flg = PETSC_TRUE; 1776 for (PetscInt i = 0; (i < a->nz * A->rmap->bs * A->rmap->bs) && *flg; ++i) 1777 if (PetscAbsScalar(a->a[i] - b->a[i]) > tol) *flg = PETSC_FALSE; 1778 } 1779 PetscFunctionReturn(PETSC_SUCCESS); 1780 } 1781 1782 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) 1783 { 1784 Mat Btrans; 1785 1786 PetscFunctionBegin; 1787 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans)); 1788 PetscCall(MatCompare_SeqBAIJ_Private(A, Btrans, tol, f)); 1789 PetscCall(MatDestroy(&Btrans)); 1790 PetscFunctionReturn(PETSC_SUCCESS); 1791 } 1792 1793 static PetscErrorCode MatEqual_SeqBAIJ(Mat A, Mat B, PetscBool *flg) 1794 { 1795 PetscFunctionBegin; 1796 PetscCall(MatCompare_SeqBAIJ_Private(A, B, 0.0, flg)); 1797 PetscFunctionReturn(PETSC_SUCCESS); 1798 } 1799 1800 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1801 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 1802 { 1803 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data; 1804 PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l; 1805 PetscInt *rowlens, *colidxs; 1806 PetscScalar *matvals; 1807 1808 PetscFunctionBegin; 1809 PetscCall(PetscViewerSetUp(viewer)); 1810 1811 M = mat->rmap->N; 1812 N = mat->cmap->N; 1813 m = mat->rmap->n; 1814 bs = mat->rmap->bs; 1815 nz = bs * bs * A->nz; 1816 1817 /* write matrix header */ 1818 header[0] = MAT_FILE_CLASSID; 1819 header[1] = M; 1820 header[2] = N; 1821 header[3] = nz; 1822 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1823 1824 /* store row lengths */ 1825 PetscCall(PetscMalloc1(m, &rowlens)); 1826 for (cnt = 0, i = 0; i < A->mbs; i++) 1827 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]); 1828 PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT)); 1829 PetscCall(PetscFree(rowlens)); 1830 1831 /* store column indices */ 1832 PetscCall(PetscMalloc1(nz, &colidxs)); 1833 for (cnt = 0, i = 0; i < A->mbs; i++) 1834 for (k = 0; k < bs; k++) 1835 for (j = A->i[i]; j < A->i[i + 1]; j++) 1836 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l; 1837 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1838 PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT)); 1839 PetscCall(PetscFree(colidxs)); 1840 1841 /* store nonzero values */ 1842 PetscCall(PetscMalloc1(nz, &matvals)); 1843 for (cnt = 0, i = 0; i < A->mbs; i++) 1844 for (k = 0; k < bs; k++) 1845 for (j = A->i[i]; j < A->i[i + 1]; j++) 1846 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k]; 1847 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1848 PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR)); 1849 PetscCall(PetscFree(matvals)); 1850 1851 /* write block size option to the viewer's .info file */ 1852 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1853 PetscFunctionReturn(PETSC_SUCCESS); 1854 } 1855 1856 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) 1857 { 1858 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1859 PetscInt i, bs = A->rmap->bs, k; 1860 1861 PetscFunctionBegin; 1862 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1863 for (i = 0; i < a->mbs; i++) { 1864 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1)); 1865 for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1)); 1866 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1867 } 1868 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1869 PetscFunctionReturn(PETSC_SUCCESS); 1870 } 1871 1872 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) 1873 { 1874 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1875 PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2; 1876 PetscViewerFormat format; 1877 1878 PetscFunctionBegin; 1879 if (A->structure_only) { 1880 PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer)); 1881 PetscFunctionReturn(PETSC_SUCCESS); 1882 } 1883 1884 PetscCall(PetscViewerGetFormat(viewer, &format)); 1885 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1886 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1887 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1888 const char *matname; 1889 Mat aij; 1890 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij)); 1891 PetscCall(PetscObjectGetName((PetscObject)A, &matname)); 1892 PetscCall(PetscObjectSetName((PetscObject)aij, matname)); 1893 PetscCall(MatView(aij, viewer)); 1894 PetscCall(MatDestroy(&aij)); 1895 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1896 PetscFunctionReturn(PETSC_SUCCESS); 1897 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1898 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1899 for (i = 0; i < a->mbs; i++) { 1900 for (j = 0; j < bs; j++) { 1901 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1902 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1903 for (l = 0; l < bs; l++) { 1904 #if defined(PETSC_USE_COMPLEX) 1905 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1906 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1907 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1908 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1909 } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1910 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1911 } 1912 #else 1913 if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1914 #endif 1915 } 1916 } 1917 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1918 } 1919 } 1920 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1921 } else { 1922 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1923 for (i = 0; i < a->mbs; i++) { 1924 for (j = 0; j < bs; j++) { 1925 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1926 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1927 for (l = 0; l < bs; l++) { 1928 #if defined(PETSC_USE_COMPLEX) 1929 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) { 1930 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1931 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) { 1932 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1933 } else { 1934 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1935 } 1936 #else 1937 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1938 #endif 1939 } 1940 } 1941 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1942 } 1943 } 1944 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1945 } 1946 PetscCall(PetscViewerFlush(viewer)); 1947 PetscFunctionReturn(PETSC_SUCCESS); 1948 } 1949 1950 #include <petscdraw.h> 1951 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) 1952 { 1953 Mat A = (Mat)Aa; 1954 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1955 PetscInt row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 1956 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1957 MatScalar *aa; 1958 PetscViewer viewer; 1959 PetscViewerFormat format; 1960 int color; 1961 1962 PetscFunctionBegin; 1963 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1964 PetscCall(PetscViewerGetFormat(viewer, &format)); 1965 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1966 1967 /* loop over matrix elements drawing boxes */ 1968 1969 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1970 PetscDrawCollectiveBegin(draw); 1971 /* Blue for negative, Cyan for zero and Red for positive */ 1972 color = PETSC_DRAW_BLUE; 1973 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1974 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1975 y_l = A->rmap->N - row - 1.0; 1976 y_r = y_l + 1.0; 1977 x_l = a->j[j] * bs; 1978 x_r = x_l + 1.0; 1979 aa = a->a + j * bs2; 1980 for (k = 0; k < bs; k++) { 1981 for (l = 0; l < bs; l++) { 1982 if (PetscRealPart(*aa++) >= 0.) continue; 1983 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1984 } 1985 } 1986 } 1987 } 1988 color = PETSC_DRAW_CYAN; 1989 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1990 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1991 y_l = A->rmap->N - row - 1.0; 1992 y_r = y_l + 1.0; 1993 x_l = a->j[j] * bs; 1994 x_r = x_l + 1.0; 1995 aa = a->a + j * bs2; 1996 for (k = 0; k < bs; k++) { 1997 for (l = 0; l < bs; l++) { 1998 if (PetscRealPart(*aa++) != 0.) continue; 1999 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2000 } 2001 } 2002 } 2003 } 2004 color = PETSC_DRAW_RED; 2005 for (i = 0, row = 0; i < mbs; i++, row += bs) { 2006 for (j = a->i[i]; j < a->i[i + 1]; j++) { 2007 y_l = A->rmap->N - row - 1.0; 2008 y_r = y_l + 1.0; 2009 x_l = a->j[j] * bs; 2010 x_r = x_l + 1.0; 2011 aa = a->a + j * bs2; 2012 for (k = 0; k < bs; k++) { 2013 for (l = 0; l < bs; l++) { 2014 if (PetscRealPart(*aa++) <= 0.) continue; 2015 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2016 } 2017 } 2018 } 2019 } 2020 PetscDrawCollectiveEnd(draw); 2021 } else { 2022 /* use contour shading to indicate magnitude of values */ 2023 /* first determine max of all nonzero values */ 2024 PetscReal minv = 0.0, maxv = 0.0; 2025 PetscDraw popup; 2026 2027 for (i = 0; i < a->nz * a->bs2; i++) { 2028 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 2029 } 2030 if (minv >= maxv) maxv = minv + PETSC_SMALL; 2031 PetscCall(PetscDrawGetPopup(draw, &popup)); 2032 PetscCall(PetscDrawScalePopup(popup, 0.0, maxv)); 2033 2034 PetscDrawCollectiveBegin(draw); 2035 for (i = 0, row = 0; i < mbs; i++, row += bs) { 2036 for (j = a->i[i]; j < a->i[i + 1]; j++) { 2037 y_l = A->rmap->N - row - 1.0; 2038 y_r = y_l + 1.0; 2039 x_l = a->j[j] * bs; 2040 x_r = x_l + 1.0; 2041 aa = a->a + j * bs2; 2042 for (k = 0; k < bs; k++) { 2043 for (l = 0; l < bs; l++) { 2044 MatScalar v = *aa++; 2045 color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv); 2046 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2047 } 2048 } 2049 } 2050 } 2051 PetscDrawCollectiveEnd(draw); 2052 } 2053 PetscFunctionReturn(PETSC_SUCCESS); 2054 } 2055 2056 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) 2057 { 2058 PetscReal xl, yl, xr, yr, w, h; 2059 PetscDraw draw; 2060 PetscBool isnull; 2061 2062 PetscFunctionBegin; 2063 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 2064 PetscCall(PetscDrawIsNull(draw, &isnull)); 2065 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 2066 2067 xr = A->cmap->n; 2068 yr = A->rmap->N; 2069 h = yr / 10.0; 2070 w = xr / 10.0; 2071 xr += w; 2072 yr += h; 2073 xl = -w; 2074 yl = -h; 2075 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 2076 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 2077 PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A)); 2078 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 2079 PetscCall(PetscDrawSave(draw)); 2080 PetscFunctionReturn(PETSC_SUCCESS); 2081 } 2082 2083 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) 2084 { 2085 PetscBool isascii, isbinary, isdraw; 2086 2087 PetscFunctionBegin; 2088 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 2089 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2090 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 2091 if (isascii) { 2092 PetscCall(MatView_SeqBAIJ_ASCII(A, viewer)); 2093 } else if (isbinary) { 2094 PetscCall(MatView_SeqBAIJ_Binary(A, viewer)); 2095 } else if (isdraw) { 2096 PetscCall(MatView_SeqBAIJ_Draw(A, viewer)); 2097 } else { 2098 Mat B; 2099 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2100 PetscCall(MatView(B, viewer)); 2101 PetscCall(MatDestroy(&B)); 2102 } 2103 PetscFunctionReturn(PETSC_SUCCESS); 2104 } 2105 2106 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) 2107 { 2108 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2109 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 2110 PetscInt *ai = a->i, *ailen = a->ilen; 2111 PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2; 2112 MatScalar *ap, *aa = a->a; 2113 2114 PetscFunctionBegin; 2115 for (k = 0; k < m; k++) { /* loop over rows */ 2116 row = im[k]; 2117 brow = row / bs; 2118 if (row < 0) { 2119 v += n; 2120 continue; 2121 } /* negative row */ 2122 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row); 2123 rp = PetscSafePointerPlusOffset(aj, ai[brow]); 2124 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); 2125 nrow = ailen[brow]; 2126 for (l = 0; l < n; l++) { /* loop over columns */ 2127 if (in[l] < 0) { 2128 v++; 2129 continue; 2130 } /* negative column */ 2131 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]); 2132 col = in[l]; 2133 bcol = col / bs; 2134 cidx = col % bs; 2135 ridx = row % bs; 2136 high = nrow; 2137 low = 0; /* assume unsorted */ 2138 while (high - low > 5) { 2139 t = (low + high) / 2; 2140 if (rp[t] > bcol) high = t; 2141 else low = t; 2142 } 2143 for (i = low; i < high; i++) { 2144 if (rp[i] > bcol) break; 2145 if (rp[i] == bcol) { 2146 *v++ = ap[bs2 * i + bs * cidx + ridx]; 2147 goto finished; 2148 } 2149 } 2150 *v++ = 0.0; 2151 finished:; 2152 } 2153 } 2154 PetscFunctionReturn(PETSC_SUCCESS); 2155 } 2156 2157 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2158 { 2159 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2160 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1; 2161 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2162 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval; 2163 PetscBool roworiented = a->roworiented; 2164 const PetscScalar *value = v; 2165 MatScalar *ap = NULL, *aa = a->a, *bap; 2166 2167 PetscFunctionBegin; 2168 if (roworiented) { 2169 stepval = (n - 1) * bs; 2170 } else { 2171 stepval = (m - 1) * bs; 2172 } 2173 for (k = 0; k < m; k++) { /* loop over added rows */ 2174 row = im[k]; 2175 if (row < 0) continue; 2176 PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1); 2177 rp = aj + ai[row]; 2178 if (!A->structure_only) ap = aa + bs2 * ai[row]; 2179 rmax = imax[row]; 2180 nrow = ailen[row]; 2181 low = 0; 2182 high = nrow; 2183 for (l = 0; l < n; l++) { /* loop over added columns */ 2184 if (in[l] < 0) continue; 2185 PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1); 2186 col = in[l]; 2187 if (!A->structure_only) { 2188 if (roworiented) { 2189 value = v + (k * (stepval + bs) + l) * bs; 2190 } else { 2191 value = v + (l * (stepval + bs) + k) * bs; 2192 } 2193 } 2194 if (col <= lastcol) low = 0; 2195 else high = nrow; 2196 lastcol = col; 2197 while (high - low > 7) { 2198 t = (low + high) / 2; 2199 if (rp[t] > col) high = t; 2200 else low = t; 2201 } 2202 for (i = low; i < high; i++) { 2203 if (rp[i] > col) break; 2204 if (rp[i] == col) { 2205 if (A->structure_only) goto noinsert2; 2206 bap = ap + bs2 * i; 2207 if (roworiented) { 2208 if (is == ADD_VALUES) { 2209 for (ii = 0; ii < bs; ii++, value += stepval) { 2210 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 2211 } 2212 } else { 2213 for (ii = 0; ii < bs; ii++, value += stepval) { 2214 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2215 } 2216 } 2217 } else { 2218 if (is == ADD_VALUES) { 2219 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2220 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 2221 bap += bs; 2222 } 2223 } else { 2224 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2225 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 2226 bap += bs; 2227 } 2228 } 2229 } 2230 goto noinsert2; 2231 } 2232 } 2233 if (nonew == 1) goto noinsert2; 2234 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2235 if (A->structure_only) { 2236 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar); 2237 } else { 2238 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2239 } 2240 N = nrow++ - 1; 2241 high++; 2242 /* shift up all the later entries in this row */ 2243 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2244 rp[i] = col; 2245 if (!A->structure_only) { 2246 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2247 bap = ap + bs2 * i; 2248 if (roworiented) { 2249 for (ii = 0; ii < bs; ii++, value += stepval) { 2250 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2251 } 2252 } else { 2253 for (ii = 0; ii < bs; ii++, value += stepval) { 2254 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 2255 } 2256 } 2257 } 2258 noinsert2:; 2259 low = i; 2260 } 2261 ailen[row] = nrow; 2262 } 2263 PetscFunctionReturn(PETSC_SUCCESS); 2264 } 2265 2266 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) 2267 { 2268 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2269 PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax; 2270 PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen; 2271 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2272 MatScalar *aa = a->a, *ap; 2273 PetscReal ratio = 0.6; 2274 2275 PetscFunctionBegin; 2276 if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS); 2277 2278 if (m) rmax = ailen[0]; 2279 for (i = 1; i < mbs; i++) { 2280 /* move each row back by the amount of empty slots (fshift) before it*/ 2281 fshift += imax[i - 1] - ailen[i - 1]; 2282 rmax = PetscMax(rmax, ailen[i]); 2283 if (fshift) { 2284 ip = aj + ai[i]; 2285 ap = aa + bs2 * ai[i]; 2286 N = ailen[i]; 2287 PetscCall(PetscArraymove(ip - fshift, ip, N)); 2288 if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N)); 2289 } 2290 ai[i] = ai[i - 1] + ailen[i - 1]; 2291 } 2292 if (mbs) { 2293 fshift += imax[mbs - 1] - ailen[mbs - 1]; 2294 ai[mbs] = ai[mbs - 1] + ailen[mbs - 1]; 2295 } 2296 2297 /* reset ilen and imax for each row */ 2298 a->nonzerorowcnt = 0; 2299 if (A->structure_only) { 2300 PetscCall(PetscFree2(a->imax, a->ilen)); 2301 } else { /* !A->structure_only */ 2302 for (i = 0; i < mbs; i++) { 2303 ailen[i] = imax[i] = ai[i + 1] - ai[i]; 2304 a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0); 2305 } 2306 } 2307 a->nz = ai[mbs]; 2308 2309 /* diagonals may have moved, so kill the diagonal pointers */ 2310 a->idiagvalid = PETSC_FALSE; 2311 if (fshift && a->diag) PetscCall(PetscFree(a->diag)); 2312 if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2); 2313 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2)); 2314 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs)); 2315 PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax)); 2316 2317 A->info.mallocs += a->reallocs; 2318 a->reallocs = 0; 2319 A->info.nz_unneeded = (PetscReal)fshift * bs2; 2320 a->rmax = rmax; 2321 2322 if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio)); 2323 PetscFunctionReturn(PETSC_SUCCESS); 2324 } 2325 2326 /* 2327 This function returns an array of flags which indicate the locations of contiguous 2328 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2329 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2330 Assume: sizes should be long enough to hold all the values. 2331 */ 2332 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) 2333 { 2334 PetscInt j = 0; 2335 2336 PetscFunctionBegin; 2337 for (PetscInt i = 0; i < n; j++) { 2338 PetscInt row = idx[i]; 2339 if (row % bs != 0) { /* Not the beginning of a block */ 2340 sizes[j] = 1; 2341 i++; 2342 } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */ 2343 sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2344 i++; 2345 } else { /* Beginning of the block, so check if the complete block exists */ 2346 PetscBool flg = PETSC_TRUE; 2347 for (PetscInt k = 1; k < bs; k++) { 2348 if (row + k != idx[i + k]) { /* break in the block */ 2349 flg = PETSC_FALSE; 2350 break; 2351 } 2352 } 2353 if (flg) { /* No break in the bs */ 2354 sizes[j] = bs; 2355 i += bs; 2356 } else { 2357 sizes[j] = 1; 2358 i++; 2359 } 2360 } 2361 } 2362 *bs_max = j; 2363 PetscFunctionReturn(PETSC_SUCCESS); 2364 } 2365 2366 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2367 { 2368 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2369 PetscInt i, j, k, count, *rows; 2370 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max; 2371 PetscScalar zero = 0.0; 2372 MatScalar *aa; 2373 const PetscScalar *xx; 2374 PetscScalar *bb; 2375 2376 PetscFunctionBegin; 2377 /* fix right-hand side if needed */ 2378 if (x && b) { 2379 PetscCall(VecGetArrayRead(x, &xx)); 2380 PetscCall(VecGetArray(b, &bb)); 2381 for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]]; 2382 PetscCall(VecRestoreArrayRead(x, &xx)); 2383 PetscCall(VecRestoreArray(b, &bb)); 2384 } 2385 2386 /* Make a copy of the IS and sort it */ 2387 /* allocate memory for rows,sizes */ 2388 PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes)); 2389 2390 /* copy IS values to rows, and sort them */ 2391 for (i = 0; i < is_n; i++) rows[i] = is_idx[i]; 2392 PetscCall(PetscSortInt(is_n, rows)); 2393 2394 if (baij->keepnonzeropattern) { 2395 for (i = 0; i < is_n; i++) sizes[i] = 1; 2396 bs_max = is_n; 2397 } else { 2398 PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max)); 2399 A->nonzerostate++; 2400 } 2401 2402 for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) { 2403 row = rows[j]; 2404 PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row); 2405 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2406 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 2407 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2408 if (diag != (PetscScalar)0.0) { 2409 if (baij->ilen[row / bs] > 0) { 2410 baij->ilen[row / bs] = 1; 2411 baij->j[baij->i[row / bs]] = row / bs; 2412 2413 PetscCall(PetscArrayzero(aa, count * bs)); 2414 } 2415 /* Now insert all the diagonal values for this bs */ 2416 for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES); 2417 } else { /* (diag == 0.0) */ 2418 baij->ilen[row / bs] = 0; 2419 } /* end (diag == 0.0) */ 2420 } else { /* (sizes[i] != bs) */ 2421 PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1"); 2422 for (k = 0; k < count; k++) { 2423 aa[0] = zero; 2424 aa += bs; 2425 } 2426 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES); 2427 } 2428 } 2429 2430 PetscCall(PetscFree2(rows, sizes)); 2431 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2432 PetscFunctionReturn(PETSC_SUCCESS); 2433 } 2434 2435 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2436 { 2437 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2438 PetscInt i, j, k, count; 2439 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col; 2440 PetscScalar zero = 0.0; 2441 MatScalar *aa; 2442 const PetscScalar *xx; 2443 PetscScalar *bb; 2444 PetscBool *zeroed, vecs = PETSC_FALSE; 2445 2446 PetscFunctionBegin; 2447 /* fix right-hand side if needed */ 2448 if (x && b) { 2449 PetscCall(VecGetArrayRead(x, &xx)); 2450 PetscCall(VecGetArray(b, &bb)); 2451 vecs = PETSC_TRUE; 2452 } 2453 2454 /* zero the columns */ 2455 PetscCall(PetscCalloc1(A->rmap->n, &zeroed)); 2456 for (i = 0; i < is_n; i++) { 2457 PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]); 2458 zeroed[is_idx[i]] = PETSC_TRUE; 2459 } 2460 for (i = 0; i < A->rmap->N; i++) { 2461 if (!zeroed[i]) { 2462 row = i / bs; 2463 for (j = baij->i[row]; j < baij->i[row + 1]; j++) { 2464 for (k = 0; k < bs; k++) { 2465 col = bs * baij->j[j] + k; 2466 if (zeroed[col]) { 2467 aa = baij->a + j * bs2 + (i % bs) + bs * k; 2468 if (vecs) bb[i] -= aa[0] * xx[col]; 2469 aa[0] = 0.0; 2470 } 2471 } 2472 } 2473 } else if (vecs) bb[i] = diag * xx[i]; 2474 } 2475 PetscCall(PetscFree(zeroed)); 2476 if (vecs) { 2477 PetscCall(VecRestoreArrayRead(x, &xx)); 2478 PetscCall(VecRestoreArray(b, &bb)); 2479 } 2480 2481 /* zero the rows */ 2482 for (i = 0; i < is_n; i++) { 2483 row = is_idx[i]; 2484 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2485 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 2486 for (k = 0; k < count; k++) { 2487 aa[0] = zero; 2488 aa += bs; 2489 } 2490 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES); 2491 } 2492 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2493 PetscFunctionReturn(PETSC_SUCCESS); 2494 } 2495 2496 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2497 { 2498 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2499 PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 2500 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2501 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 2502 PetscInt ridx, cidx, bs2 = a->bs2; 2503 PetscBool roworiented = a->roworiented; 2504 MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap; 2505 2506 PetscFunctionBegin; 2507 for (k = 0; k < m; k++) { /* loop over added rows */ 2508 row = im[k]; 2509 brow = row / bs; 2510 if (row < 0) continue; 2511 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 2512 rp = PetscSafePointerPlusOffset(aj, ai[brow]); 2513 if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); 2514 rmax = imax[brow]; 2515 nrow = ailen[brow]; 2516 low = 0; 2517 high = nrow; 2518 for (l = 0; l < n; l++) { /* loop over added columns */ 2519 if (in[l] < 0) continue; 2520 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 2521 col = in[l]; 2522 bcol = col / bs; 2523 ridx = row % bs; 2524 cidx = col % bs; 2525 if (!A->structure_only) { 2526 if (roworiented) { 2527 value = v[l + k * n]; 2528 } else { 2529 value = v[k + l * m]; 2530 } 2531 } 2532 if (col <= lastcol) low = 0; 2533 else high = nrow; 2534 lastcol = col; 2535 while (high - low > 7) { 2536 t = (low + high) / 2; 2537 if (rp[t] > bcol) high = t; 2538 else low = t; 2539 } 2540 for (i = low; i < high; i++) { 2541 if (rp[i] > bcol) break; 2542 if (rp[i] == bcol) { 2543 bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx); 2544 if (!A->structure_only) { 2545 if (is == ADD_VALUES) *bap += value; 2546 else *bap = value; 2547 } 2548 goto noinsert1; 2549 } 2550 } 2551 if (nonew == 1) goto noinsert1; 2552 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2553 if (A->structure_only) { 2554 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar); 2555 } else { 2556 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2557 } 2558 N = nrow++ - 1; 2559 high++; 2560 /* shift up all the later entries in this row */ 2561 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2562 rp[i] = bcol; 2563 if (!A->structure_only) { 2564 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2565 PetscCall(PetscArrayzero(ap + bs2 * i, bs2)); 2566 ap[bs2 * i + bs * cidx + ridx] = value; 2567 } 2568 a->nz++; 2569 noinsert1:; 2570 low = i; 2571 } 2572 ailen[brow] = nrow; 2573 } 2574 PetscFunctionReturn(PETSC_SUCCESS); 2575 } 2576 2577 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) 2578 { 2579 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data; 2580 Mat outA; 2581 PetscBool row_identity, col_identity; 2582 2583 PetscFunctionBegin; 2584 PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU"); 2585 PetscCall(ISIdentity(row, &row_identity)); 2586 PetscCall(ISIdentity(col, &col_identity)); 2587 PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU"); 2588 2589 outA = inA; 2590 inA->factortype = MAT_FACTOR_LU; 2591 PetscCall(PetscFree(inA->solvertype)); 2592 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype)); 2593 2594 PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2595 2596 PetscCall(PetscObjectReference((PetscObject)row)); 2597 PetscCall(ISDestroy(&a->row)); 2598 a->row = row; 2599 PetscCall(PetscObjectReference((PetscObject)col)); 2600 PetscCall(ISDestroy(&a->col)); 2601 a->col = col; 2602 2603 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2604 PetscCall(ISDestroy(&a->icol)); 2605 PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol)); 2606 2607 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity))); 2608 if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); 2609 PetscCall(MatLUFactorNumeric(outA, inA, info)); 2610 PetscFunctionReturn(PETSC_SUCCESS); 2611 } 2612 2613 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices) 2614 { 2615 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2616 2617 PetscFunctionBegin; 2618 baij->nz = baij->maxnz; 2619 PetscCall(PetscArraycpy(baij->j, indices, baij->nz)); 2620 PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs)); 2621 PetscFunctionReturn(PETSC_SUCCESS); 2622 } 2623 2624 /*@ 2625 MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix. 2626 2627 Input Parameters: 2628 + mat - the `MATSEQBAIJ` matrix 2629 - indices - the block column indices 2630 2631 Level: advanced 2632 2633 Notes: 2634 This can be called if you have precomputed the nonzero structure of the 2635 matrix and want to provide it to the matrix object to improve the performance 2636 of the `MatSetValues()` operation. 2637 2638 You MUST have set the correct numbers of nonzeros per row in the call to 2639 `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted. 2640 2641 MUST be called before any calls to `MatSetValues()` 2642 2643 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()` 2644 @*/ 2645 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) 2646 { 2647 PetscFunctionBegin; 2648 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2649 PetscAssertPointer(indices, 2); 2650 PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices)); 2651 PetscFunctionReturn(PETSC_SUCCESS); 2652 } 2653 2654 static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) 2655 { 2656 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2657 PetscInt i, j, n, row, bs, *ai, *aj, mbs; 2658 PetscReal atmp; 2659 PetscScalar *x, zero = 0.0; 2660 MatScalar *aa; 2661 PetscInt ncols, brow, krow, kcol; 2662 2663 PetscFunctionBegin; 2664 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2665 bs = A->rmap->bs; 2666 aa = a->a; 2667 ai = a->i; 2668 aj = a->j; 2669 mbs = a->mbs; 2670 2671 PetscCall(VecSet(v, zero)); 2672 PetscCall(VecGetArray(v, &x)); 2673 PetscCall(VecGetLocalSize(v, &n)); 2674 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2675 for (i = 0; i < mbs; i++) { 2676 ncols = ai[1] - ai[0]; 2677 ai++; 2678 brow = bs * i; 2679 for (j = 0; j < ncols; j++) { 2680 for (kcol = 0; kcol < bs; kcol++) { 2681 for (krow = 0; krow < bs; krow++) { 2682 atmp = PetscAbsScalar(*aa); 2683 aa++; 2684 row = brow + krow; /* row index */ 2685 if (PetscAbsScalar(x[row]) < atmp) { 2686 x[row] = atmp; 2687 if (idx) idx[row] = bs * (*aj) + kcol; 2688 } 2689 } 2690 } 2691 aj++; 2692 } 2693 } 2694 PetscCall(VecRestoreArray(v, &x)); 2695 PetscFunctionReturn(PETSC_SUCCESS); 2696 } 2697 2698 static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v) 2699 { 2700 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2701 PetscInt i, j, n, row, bs, *ai, mbs; 2702 PetscReal atmp; 2703 PetscScalar *x, zero = 0.0; 2704 MatScalar *aa; 2705 PetscInt ncols, brow, krow, kcol; 2706 2707 PetscFunctionBegin; 2708 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2709 bs = A->rmap->bs; 2710 aa = a->a; 2711 ai = a->i; 2712 mbs = a->mbs; 2713 2714 PetscCall(VecSet(v, zero)); 2715 PetscCall(VecGetArrayWrite(v, &x)); 2716 PetscCall(VecGetLocalSize(v, &n)); 2717 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2718 for (i = 0; i < mbs; i++) { 2719 ncols = ai[1] - ai[0]; 2720 ai++; 2721 brow = bs * i; 2722 for (j = 0; j < ncols; j++) { 2723 for (kcol = 0; kcol < bs; kcol++) { 2724 for (krow = 0; krow < bs; krow++) { 2725 atmp = PetscAbsScalar(*aa); 2726 aa++; 2727 row = brow + krow; /* row index */ 2728 x[row] += atmp; 2729 } 2730 } 2731 } 2732 } 2733 PetscCall(VecRestoreArrayWrite(v, &x)); 2734 PetscFunctionReturn(PETSC_SUCCESS); 2735 } 2736 2737 static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) 2738 { 2739 PetscFunctionBegin; 2740 /* If the two matrices have the same copy implementation, use fast copy. */ 2741 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2742 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2743 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 2744 PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs; 2745 2746 PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]); 2747 PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs); 2748 PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs])); 2749 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2750 } else { 2751 PetscCall(MatCopy_Basic(A, B, str)); 2752 } 2753 PetscFunctionReturn(PETSC_SUCCESS); 2754 } 2755 2756 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2757 { 2758 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2759 2760 PetscFunctionBegin; 2761 *array = a->a; 2762 PetscFunctionReturn(PETSC_SUCCESS); 2763 } 2764 2765 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2766 { 2767 PetscFunctionBegin; 2768 *array = NULL; 2769 PetscFunctionReturn(PETSC_SUCCESS); 2770 } 2771 2772 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) 2773 { 2774 PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs; 2775 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 2776 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 2777 2778 PetscFunctionBegin; 2779 /* Set the number of nonzeros in the new matrix */ 2780 PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz)); 2781 PetscFunctionReturn(PETSC_SUCCESS); 2782 } 2783 2784 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2785 { 2786 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data; 2787 PetscInt bs = Y->rmap->bs, bs2 = bs * bs; 2788 PetscBLASInt one = 1; 2789 2790 PetscFunctionBegin; 2791 if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2792 PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2793 if (e) { 2794 PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e)); 2795 if (e) { 2796 PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e)); 2797 if (e) str = SAME_NONZERO_PATTERN; 2798 } 2799 } 2800 if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN"); 2801 } 2802 if (str == SAME_NONZERO_PATTERN) { 2803 PetscScalar alpha = a; 2804 PetscBLASInt bnz; 2805 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 2806 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 2807 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2808 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2809 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2810 } else { 2811 Mat B; 2812 PetscInt *nnz; 2813 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 2814 PetscCall(PetscMalloc1(Y->rmap->N, &nnz)); 2815 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2816 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2817 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 2818 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 2819 PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name)); 2820 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz)); 2821 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 2822 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2823 PetscCall(MatHeaderMerge(Y, &B)); 2824 PetscCall(PetscFree(nnz)); 2825 } 2826 PetscFunctionReturn(PETSC_SUCCESS); 2827 } 2828 2829 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2830 { 2831 #if PetscDefined(USE_COMPLEX) 2832 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2833 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2834 MatScalar *aa = a->a; 2835 2836 PetscFunctionBegin; 2837 for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]); 2838 PetscFunctionReturn(PETSC_SUCCESS); 2839 #else 2840 (void)A; 2841 return PETSC_SUCCESS; 2842 #endif 2843 } 2844 2845 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2846 { 2847 #if PetscDefined(USE_COMPLEX) 2848 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2849 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2850 MatScalar *aa = a->a; 2851 2852 PetscFunctionBegin; 2853 for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]); 2854 PetscFunctionReturn(PETSC_SUCCESS); 2855 #else 2856 (void)A; 2857 return PETSC_SUCCESS; 2858 #endif 2859 } 2860 2861 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2862 { 2863 #if PetscDefined(USE_COMPLEX) 2864 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2865 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2866 MatScalar *aa = a->a; 2867 2868 PetscFunctionBegin; 2869 for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2870 PetscFunctionReturn(PETSC_SUCCESS); 2871 #else 2872 (void)A; 2873 return PETSC_SUCCESS; 2874 #endif 2875 } 2876 2877 /* 2878 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2879 */ 2880 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2881 { 2882 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2883 PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs; 2884 PetscInt nz = a->i[m], row, *jj, mr, col; 2885 2886 PetscFunctionBegin; 2887 *nn = n; 2888 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2889 PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices"); 2890 PetscCall(PetscCalloc1(n, &collengths)); 2891 PetscCall(PetscMalloc1(n + 1, &cia)); 2892 PetscCall(PetscMalloc1(nz, &cja)); 2893 jj = a->j; 2894 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2895 cia[0] = oshift; 2896 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2897 PetscCall(PetscArrayzero(collengths, n)); 2898 jj = a->j; 2899 for (row = 0; row < m; row++) { 2900 mr = a->i[row + 1] - a->i[row]; 2901 for (i = 0; i < mr; i++) { 2902 col = *jj++; 2903 2904 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2905 } 2906 } 2907 PetscCall(PetscFree(collengths)); 2908 *ia = cia; 2909 *ja = cja; 2910 PetscFunctionReturn(PETSC_SUCCESS); 2911 } 2912 2913 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2914 { 2915 PetscFunctionBegin; 2916 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2917 PetscCall(PetscFree(*ia)); 2918 PetscCall(PetscFree(*ja)); 2919 PetscFunctionReturn(PETSC_SUCCESS); 2920 } 2921 2922 /* 2923 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2924 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2925 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2926 */ 2927 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2928 { 2929 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2930 PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs; 2931 PetscInt nz = a->i[m], row, *jj, mr, col; 2932 PetscInt *cspidx; 2933 2934 PetscFunctionBegin; 2935 *nn = n; 2936 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2937 2938 PetscCall(PetscCalloc1(n, &collengths)); 2939 PetscCall(PetscMalloc1(n + 1, &cia)); 2940 PetscCall(PetscMalloc1(nz, &cja)); 2941 PetscCall(PetscMalloc1(nz, &cspidx)); 2942 jj = a->j; 2943 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2944 cia[0] = oshift; 2945 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2946 PetscCall(PetscArrayzero(collengths, n)); 2947 jj = a->j; 2948 for (row = 0; row < m; row++) { 2949 mr = a->i[row + 1] - a->i[row]; 2950 for (i = 0; i < mr; i++) { 2951 col = *jj++; 2952 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2953 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2954 } 2955 } 2956 PetscCall(PetscFree(collengths)); 2957 *ia = cia; 2958 *ja = cja; 2959 *spidx = cspidx; 2960 PetscFunctionReturn(PETSC_SUCCESS); 2961 } 2962 2963 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2964 { 2965 PetscFunctionBegin; 2966 PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done)); 2967 PetscCall(PetscFree(*spidx)); 2968 PetscFunctionReturn(PETSC_SUCCESS); 2969 } 2970 2971 static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) 2972 { 2973 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data; 2974 2975 PetscFunctionBegin; 2976 if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL)); 2977 PetscCall(MatShift_Basic(Y, a)); 2978 PetscFunctionReturn(PETSC_SUCCESS); 2979 } 2980 2981 PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep) 2982 { 2983 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2984 PetscInt fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k; 2985 PetscInt m = A->rmap->N, *ailen = a->ilen; 2986 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2987 MatScalar *aa = a->a, *ap; 2988 PetscBool zero; 2989 2990 PetscFunctionBegin; 2991 PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix"); 2992 if (m) rmax = ailen[0]; 2993 for (i = 1; i <= mbs; i++) { 2994 for (k = ai[i - 1]; k < ai[i]; k++) { 2995 zero = PETSC_TRUE; 2996 ap = aa + bs2 * k; 2997 for (j = 0; j < bs2 && zero; j++) { 2998 if (ap[j] != 0.0) zero = PETSC_FALSE; 2999 } 3000 if (zero && (aj[k] != i - 1 || !keep)) fshift++; 3001 else { 3002 if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1)); 3003 aj[k - fshift] = aj[k]; 3004 PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2)); 3005 } 3006 } 3007 ai[i - 1] -= fshift_prev; 3008 fshift_prev = fshift; 3009 ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1]; 3010 a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0); 3011 rmax = PetscMax(rmax, ailen[i - 1]); 3012 } 3013 if (fshift) { 3014 if (mbs) { 3015 ai[mbs] -= fshift; 3016 a->nz = ai[mbs]; 3017 } 3018 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz)); 3019 A->nonzerostate++; 3020 A->info.nz_unneeded += (PetscReal)fshift; 3021 a->rmax = rmax; 3022 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 3023 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 3024 } 3025 PetscFunctionReturn(PETSC_SUCCESS); 3026 } 3027 3028 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 3029 MatGetRow_SeqBAIJ, 3030 MatRestoreRow_SeqBAIJ, 3031 MatMult_SeqBAIJ_N, 3032 /* 4*/ MatMultAdd_SeqBAIJ_N, 3033 MatMultTranspose_SeqBAIJ, 3034 MatMultTransposeAdd_SeqBAIJ, 3035 NULL, 3036 NULL, 3037 NULL, 3038 /* 10*/ NULL, 3039 MatLUFactor_SeqBAIJ, 3040 NULL, 3041 NULL, 3042 MatTranspose_SeqBAIJ, 3043 /* 15*/ MatGetInfo_SeqBAIJ, 3044 MatEqual_SeqBAIJ, 3045 MatGetDiagonal_SeqBAIJ, 3046 MatDiagonalScale_SeqBAIJ, 3047 MatNorm_SeqBAIJ, 3048 /* 20*/ NULL, 3049 MatAssemblyEnd_SeqBAIJ, 3050 MatSetOption_SeqBAIJ, 3051 MatZeroEntries_SeqBAIJ, 3052 /* 24*/ MatZeroRows_SeqBAIJ, 3053 NULL, 3054 NULL, 3055 NULL, 3056 NULL, 3057 /* 29*/ MatSetUp_Seq_Hash, 3058 NULL, 3059 NULL, 3060 NULL, 3061 NULL, 3062 /* 34*/ MatDuplicate_SeqBAIJ, 3063 NULL, 3064 NULL, 3065 MatILUFactor_SeqBAIJ, 3066 NULL, 3067 /* 39*/ MatAXPY_SeqBAIJ, 3068 MatCreateSubMatrices_SeqBAIJ, 3069 MatIncreaseOverlap_SeqBAIJ, 3070 MatGetValues_SeqBAIJ, 3071 MatCopy_SeqBAIJ, 3072 /* 44*/ NULL, 3073 MatScale_SeqBAIJ, 3074 MatShift_SeqBAIJ, 3075 NULL, 3076 MatZeroRowsColumns_SeqBAIJ, 3077 /* 49*/ NULL, 3078 MatGetRowIJ_SeqBAIJ, 3079 MatRestoreRowIJ_SeqBAIJ, 3080 MatGetColumnIJ_SeqBAIJ, 3081 MatRestoreColumnIJ_SeqBAIJ, 3082 /* 54*/ MatFDColoringCreate_SeqXAIJ, 3083 NULL, 3084 NULL, 3085 NULL, 3086 MatSetValuesBlocked_SeqBAIJ, 3087 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 3088 MatDestroy_SeqBAIJ, 3089 MatView_SeqBAIJ, 3090 NULL, 3091 NULL, 3092 /* 64*/ NULL, 3093 NULL, 3094 NULL, 3095 NULL, 3096 MatGetRowMaxAbs_SeqBAIJ, 3097 /* 69*/ NULL, 3098 MatConvert_Basic, 3099 NULL, 3100 MatFDColoringApply_BAIJ, 3101 NULL, 3102 /* 74*/ NULL, 3103 NULL, 3104 NULL, 3105 NULL, 3106 MatLoad_SeqBAIJ, 3107 /* 79*/ NULL, 3108 NULL, 3109 NULL, 3110 NULL, 3111 NULL, 3112 /* 84*/ NULL, 3113 NULL, 3114 NULL, 3115 NULL, 3116 NULL, 3117 /* 89*/ NULL, 3118 NULL, 3119 NULL, 3120 NULL, 3121 MatConjugate_SeqBAIJ, 3122 /* 94*/ NULL, 3123 NULL, 3124 MatRealPart_SeqBAIJ, 3125 MatImaginaryPart_SeqBAIJ, 3126 NULL, 3127 /* 99*/ NULL, 3128 NULL, 3129 NULL, 3130 NULL, 3131 NULL, 3132 /*104*/ MatMissingDiagonal_SeqBAIJ, 3133 NULL, 3134 NULL, 3135 NULL, 3136 NULL, 3137 /*109*/ NULL, 3138 NULL, 3139 NULL, 3140 MatMultHermitianTranspose_SeqBAIJ, 3141 MatMultHermitianTransposeAdd_SeqBAIJ, 3142 /*114*/ NULL, 3143 NULL, 3144 MatGetColumnReductions_SeqBAIJ, 3145 MatInvertBlockDiagonal_SeqBAIJ, 3146 NULL, 3147 /*119*/ NULL, 3148 NULL, 3149 NULL, 3150 NULL, 3151 NULL, 3152 /*124*/ NULL, 3153 NULL, 3154 NULL, 3155 MatSetBlockSizes_Default, 3156 NULL, 3157 /*129*/ MatFDColoringSetUp_SeqXAIJ, 3158 NULL, 3159 MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 3160 MatDestroySubMatrices_SeqBAIJ, 3161 NULL, 3162 /*134*/ NULL, 3163 NULL, 3164 NULL, 3165 MatEliminateZeros_SeqBAIJ, 3166 MatGetRowSumAbs_SeqBAIJ, 3167 /*139*/ NULL, 3168 NULL, 3169 NULL, 3170 MatCopyHashToXAIJ_Seq_Hash, 3171 NULL, 3172 NULL}; 3173 3174 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3175 { 3176 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3177 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3178 3179 PetscFunctionBegin; 3180 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3181 3182 /* allocate space for values if not already there */ 3183 if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); 3184 3185 /* copy values over */ 3186 PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz)); 3187 PetscFunctionReturn(PETSC_SUCCESS); 3188 } 3189 3190 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3191 { 3192 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3193 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3194 3195 PetscFunctionBegin; 3196 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3197 PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 3198 3199 /* copy values over */ 3200 PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz)); 3201 PetscFunctionReturn(PETSC_SUCCESS); 3202 } 3203 3204 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 3205 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *); 3206 3207 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3208 { 3209 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 3210 PetscInt i, mbs, nbs, bs2; 3211 PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 3212 3213 PetscFunctionBegin; 3214 if (B->hash_active) { 3215 PetscInt bs; 3216 B->ops[0] = b->cops; 3217 PetscCall(PetscHMapIJVDestroy(&b->ht)); 3218 PetscCall(MatGetBlockSize(B, &bs)); 3219 if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht)); 3220 PetscCall(PetscFree(b->dnz)); 3221 PetscCall(PetscFree(b->bdnz)); 3222 B->hash_active = PETSC_FALSE; 3223 } 3224 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3225 if (nz == MAT_SKIP_ALLOCATION) { 3226 skipallocation = PETSC_TRUE; 3227 nz = 0; 3228 } 3229 3230 PetscCall(MatSetBlockSize(B, bs)); 3231 PetscCall(PetscLayoutSetUp(B->rmap)); 3232 PetscCall(PetscLayoutSetUp(B->cmap)); 3233 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3234 3235 B->preallocated = PETSC_TRUE; 3236 3237 mbs = B->rmap->n / bs; 3238 nbs = B->cmap->n / bs; 3239 bs2 = bs * bs; 3240 3241 PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs); 3242 3243 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3244 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 3245 if (nnz) { 3246 for (i = 0; i < mbs; i++) { 3247 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 3248 PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs); 3249 } 3250 } 3251 3252 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat"); 3253 PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL)); 3254 PetscOptionsEnd(); 3255 3256 if (!flg) { 3257 switch (bs) { 3258 case 1: 3259 B->ops->mult = MatMult_SeqBAIJ_1; 3260 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3261 break; 3262 case 2: 3263 B->ops->mult = MatMult_SeqBAIJ_2; 3264 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3265 break; 3266 case 3: 3267 B->ops->mult = MatMult_SeqBAIJ_3; 3268 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3269 break; 3270 case 4: 3271 B->ops->mult = MatMult_SeqBAIJ_4; 3272 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3273 break; 3274 case 5: 3275 B->ops->mult = MatMult_SeqBAIJ_5; 3276 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3277 break; 3278 case 6: 3279 B->ops->mult = MatMult_SeqBAIJ_6; 3280 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3281 break; 3282 case 7: 3283 B->ops->mult = MatMult_SeqBAIJ_7; 3284 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3285 break; 3286 case 9: { 3287 PetscInt version = 1; 3288 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3289 switch (version) { 3290 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3291 case 1: 3292 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 3293 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 3294 PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3295 break; 3296 #endif 3297 default: 3298 B->ops->mult = MatMult_SeqBAIJ_N; 3299 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3300 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3301 break; 3302 } 3303 break; 3304 } 3305 case 11: 3306 B->ops->mult = MatMult_SeqBAIJ_11; 3307 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 3308 break; 3309 case 12: { 3310 PetscInt version = 1; 3311 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3312 switch (version) { 3313 case 1: 3314 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 3315 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3316 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3317 break; 3318 case 2: 3319 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 3320 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 3321 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3322 break; 3323 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3324 case 3: 3325 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 3326 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3327 PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3328 break; 3329 #endif 3330 default: 3331 B->ops->mult = MatMult_SeqBAIJ_N; 3332 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3333 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3334 break; 3335 } 3336 break; 3337 } 3338 case 15: { 3339 PetscInt version = 1; 3340 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3341 switch (version) { 3342 case 1: 3343 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3344 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3345 break; 3346 case 2: 3347 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 3348 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3349 break; 3350 case 3: 3351 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 3352 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3353 break; 3354 case 4: 3355 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 3356 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3357 break; 3358 default: 3359 B->ops->mult = MatMult_SeqBAIJ_N; 3360 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3361 break; 3362 } 3363 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3364 break; 3365 } 3366 default: 3367 B->ops->mult = MatMult_SeqBAIJ_N; 3368 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3369 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3370 break; 3371 } 3372 } 3373 B->ops->sor = MatSOR_SeqBAIJ; 3374 b->mbs = mbs; 3375 b->nbs = nbs; 3376 if (!skipallocation) { 3377 if (!b->imax) { 3378 PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen)); 3379 3380 b->free_imax_ilen = PETSC_TRUE; 3381 } 3382 /* b->ilen will count nonzeros in each block row so far. */ 3383 for (i = 0; i < mbs; i++) b->ilen[i] = 0; 3384 if (!nnz) { 3385 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3386 else if (nz < 0) nz = 1; 3387 nz = PetscMin(nz, nbs); 3388 for (i = 0; i < mbs; i++) b->imax[i] = nz; 3389 PetscCall(PetscIntMultError(nz, mbs, &nz)); 3390 } else { 3391 PetscInt64 nz64 = 0; 3392 for (i = 0; i < mbs; i++) { 3393 b->imax[i] = nnz[i]; 3394 nz64 += nnz[i]; 3395 } 3396 PetscCall(PetscIntCast(nz64, &nz)); 3397 } 3398 3399 /* allocate the matrix space */ 3400 PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i)); 3401 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j)); 3402 PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i)); 3403 if (B->structure_only) { 3404 b->free_a = PETSC_FALSE; 3405 } else { 3406 PetscInt nzbs2 = 0; 3407 PetscCall(PetscIntMultError(nz, bs2, &nzbs2)); 3408 PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a)); 3409 b->free_a = PETSC_TRUE; 3410 PetscCall(PetscArrayzero(b->a, nz * bs2)); 3411 } 3412 b->free_ij = PETSC_TRUE; 3413 PetscCall(PetscArrayzero(b->j, nz)); 3414 3415 b->i[0] = 0; 3416 for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1]; 3417 } else { 3418 b->free_a = PETSC_FALSE; 3419 b->free_ij = PETSC_FALSE; 3420 } 3421 3422 b->bs2 = bs2; 3423 b->mbs = mbs; 3424 b->nz = 0; 3425 b->maxnz = nz; 3426 B->info.nz_unneeded = (PetscReal)b->maxnz * bs2; 3427 B->was_assembled = PETSC_FALSE; 3428 B->assembled = PETSC_FALSE; 3429 if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3430 PetscFunctionReturn(PETSC_SUCCESS); 3431 } 3432 3433 static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 3434 { 3435 PetscInt i, m, nz, nz_max = 0, *nnz; 3436 PetscScalar *values = NULL; 3437 PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented; 3438 3439 PetscFunctionBegin; 3440 PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 3441 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 3442 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 3443 PetscCall(PetscLayoutSetUp(B->rmap)); 3444 PetscCall(PetscLayoutSetUp(B->cmap)); 3445 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3446 m = B->rmap->n / bs; 3447 3448 PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 3449 PetscCall(PetscMalloc1(m + 1, &nnz)); 3450 for (i = 0; i < m; i++) { 3451 nz = ii[i + 1] - ii[i]; 3452 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 3453 nz_max = PetscMax(nz_max, nz); 3454 nnz[i] = nz; 3455 } 3456 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 3457 PetscCall(PetscFree(nnz)); 3458 3459 values = (PetscScalar *)V; 3460 if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values)); 3461 for (i = 0; i < m; i++) { 3462 PetscInt ncols = ii[i + 1] - ii[i]; 3463 const PetscInt *icols = jj + ii[i]; 3464 if (bs == 1 || !roworiented) { 3465 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 3466 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES)); 3467 } else { 3468 PetscInt j; 3469 for (j = 0; j < ncols; j++) { 3470 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 3471 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES)); 3472 } 3473 } 3474 } 3475 if (!V) PetscCall(PetscFree(values)); 3476 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3477 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3478 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3479 PetscFunctionReturn(PETSC_SUCCESS); 3480 } 3481 3482 /*@C 3483 MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored 3484 3485 Not Collective 3486 3487 Input Parameter: 3488 . A - a `MATSEQBAIJ` matrix 3489 3490 Output Parameter: 3491 . array - pointer to the data 3492 3493 Level: intermediate 3494 3495 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3496 @*/ 3497 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[]) 3498 { 3499 PetscFunctionBegin; 3500 PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array)); 3501 PetscFunctionReturn(PETSC_SUCCESS); 3502 } 3503 3504 /*@C 3505 MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()` 3506 3507 Not Collective 3508 3509 Input Parameters: 3510 + A - a `MATSEQBAIJ` matrix 3511 - array - pointer to the data 3512 3513 Level: intermediate 3514 3515 .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3516 @*/ 3517 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[]) 3518 { 3519 PetscFunctionBegin; 3520 PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array)); 3521 PetscFunctionReturn(PETSC_SUCCESS); 3522 } 3523 3524 /*MC 3525 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3526 block sparse compressed row format. 3527 3528 Options Database Keys: 3529 + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()` 3530 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3531 3532 Level: beginner 3533 3534 Notes: 3535 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 3536 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 3537 3538 Run with `-info` to see what version of the matrix-vector product is being used 3539 3540 .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()` 3541 M*/ 3542 3543 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *); 3544 3545 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3546 { 3547 PetscMPIInt size; 3548 Mat_SeqBAIJ *b; 3549 3550 PetscFunctionBegin; 3551 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3552 PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3553 3554 PetscCall(PetscNew(&b)); 3555 B->data = (void *)b; 3556 B->ops[0] = MatOps_Values; 3557 3558 b->row = NULL; 3559 b->col = NULL; 3560 b->icol = NULL; 3561 b->reallocs = 0; 3562 b->saved_values = NULL; 3563 3564 b->roworiented = PETSC_TRUE; 3565 b->nonew = 0; 3566 b->diag = NULL; 3567 B->spptr = NULL; 3568 B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2; 3569 b->keepnonzeropattern = PETSC_FALSE; 3570 3571 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ)); 3572 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ)); 3573 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ)); 3574 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ)); 3575 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ)); 3576 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ)); 3577 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ)); 3578 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ)); 3579 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 3580 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ)); 3581 #if defined(PETSC_HAVE_HYPRE) 3582 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE)); 3583 #endif 3584 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS)); 3585 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 3586 PetscFunctionReturn(PETSC_SUCCESS); 3587 } 3588 3589 PETSC_INTERN PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) 3590 { 3591 Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data; 3592 PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2; 3593 3594 PetscFunctionBegin; 3595 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix"); 3596 PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix"); 3597 3598 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3599 c->imax = a->imax; 3600 c->ilen = a->ilen; 3601 c->free_imax_ilen = PETSC_FALSE; 3602 } else { 3603 PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen)); 3604 for (i = 0; i < mbs; i++) { 3605 c->imax[i] = a->imax[i]; 3606 c->ilen[i] = a->ilen[i]; 3607 } 3608 c->free_imax_ilen = PETSC_TRUE; 3609 } 3610 3611 /* allocate the matrix space */ 3612 if (mallocmatspace) { 3613 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3614 PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a)); 3615 PetscCall(PetscArrayzero(c->a, bs2 * nz)); 3616 c->free_a = PETSC_TRUE; 3617 c->i = a->i; 3618 c->j = a->j; 3619 c->free_ij = PETSC_FALSE; 3620 c->parent = A; 3621 C->preallocated = PETSC_TRUE; 3622 C->assembled = PETSC_TRUE; 3623 3624 PetscCall(PetscObjectReference((PetscObject)A)); 3625 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3626 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3627 } else { 3628 PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a)); 3629 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j)); 3630 PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i)); 3631 c->free_a = PETSC_TRUE; 3632 c->free_ij = PETSC_TRUE; 3633 3634 PetscCall(PetscArraycpy(c->i, a->i, mbs + 1)); 3635 if (mbs > 0) { 3636 PetscCall(PetscArraycpy(c->j, a->j, nz)); 3637 if (cpvalues == MAT_COPY_VALUES) { 3638 PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz)); 3639 } else { 3640 PetscCall(PetscArrayzero(c->a, bs2 * nz)); 3641 } 3642 } 3643 C->preallocated = PETSC_TRUE; 3644 C->assembled = PETSC_TRUE; 3645 } 3646 } 3647 3648 c->roworiented = a->roworiented; 3649 c->nonew = a->nonew; 3650 3651 PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 3652 PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 3653 3654 c->bs2 = a->bs2; 3655 c->mbs = a->mbs; 3656 c->nbs = a->nbs; 3657 3658 if (a->diag) { 3659 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3660 c->diag = a->diag; 3661 c->free_diag = PETSC_FALSE; 3662 } else { 3663 PetscCall(PetscMalloc1(mbs + 1, &c->diag)); 3664 for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i]; 3665 c->free_diag = PETSC_TRUE; 3666 } 3667 } else c->diag = NULL; 3668 3669 c->nz = a->nz; 3670 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3671 c->solve_work = NULL; 3672 c->mult_work = NULL; 3673 c->sor_workt = NULL; 3674 c->sor_work = NULL; 3675 3676 c->compressedrow.use = a->compressedrow.use; 3677 c->compressedrow.nrows = a->compressedrow.nrows; 3678 if (a->compressedrow.use) { 3679 i = a->compressedrow.nrows; 3680 PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex)); 3681 PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1)); 3682 PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i)); 3683 } else { 3684 c->compressedrow.use = PETSC_FALSE; 3685 c->compressedrow.i = NULL; 3686 c->compressedrow.rindex = NULL; 3687 } 3688 c->nonzerorowcnt = a->nonzerorowcnt; 3689 C->nonzerostate = A->nonzerostate; 3690 3691 PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 3692 PetscFunctionReturn(PETSC_SUCCESS); 3693 } 3694 3695 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) 3696 { 3697 PetscFunctionBegin; 3698 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 3699 PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n)); 3700 PetscCall(MatSetType(*B, MATSEQBAIJ)); 3701 PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE)); 3702 PetscFunctionReturn(PETSC_SUCCESS); 3703 } 3704 3705 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3706 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 3707 { 3708 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3709 PetscInt *rowidxs, *colidxs; 3710 PetscScalar *matvals; 3711 3712 PetscFunctionBegin; 3713 PetscCall(PetscViewerSetUp(viewer)); 3714 3715 /* read matrix header */ 3716 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3717 PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3718 M = header[1]; 3719 N = header[2]; 3720 nz = header[3]; 3721 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3722 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3723 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3724 3725 /* set block sizes from the viewer's .info file */ 3726 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3727 /* set local and global sizes if not set already */ 3728 if (mat->rmap->n < 0) mat->rmap->n = M; 3729 if (mat->cmap->n < 0) mat->cmap->n = N; 3730 if (mat->rmap->N < 0) mat->rmap->N = M; 3731 if (mat->cmap->N < 0) mat->cmap->N = N; 3732 PetscCall(PetscLayoutSetUp(mat->rmap)); 3733 PetscCall(PetscLayoutSetUp(mat->cmap)); 3734 3735 /* check if the matrix sizes are correct */ 3736 PetscCall(MatGetSize(mat, &rows, &cols)); 3737 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3738 PetscCall(MatGetBlockSize(mat, &bs)); 3739 PetscCall(MatGetLocalSize(mat, &m, &n)); 3740 mbs = m / bs; 3741 nbs = n / bs; 3742 3743 /* read in row lengths, column indices and nonzero values */ 3744 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3745 PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT)); 3746 rowidxs[0] = 0; 3747 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3748 sum = rowidxs[m]; 3749 PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3750 3751 /* read in column indices and nonzero values */ 3752 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals)); 3753 PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT)); 3754 PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR)); 3755 3756 { /* preallocate matrix storage */ 3757 PetscBT bt; /* helper bit set to count nonzeros */ 3758 PetscInt *nnz; 3759 PetscBool sbaij; 3760 3761 PetscCall(PetscBTCreate(nbs, &bt)); 3762 PetscCall(PetscCalloc1(mbs, &nnz)); 3763 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij)); 3764 for (i = 0; i < mbs; i++) { 3765 PetscCall(PetscBTMemzero(nbs, bt)); 3766 for (k = 0; k < bs; k++) { 3767 PetscInt row = bs * i + k; 3768 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3769 PetscInt col = colidxs[j]; 3770 if (!sbaij || col >= row) 3771 if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++; 3772 } 3773 } 3774 } 3775 PetscCall(PetscBTDestroy(&bt)); 3776 PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz)); 3777 PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz)); 3778 PetscCall(PetscFree(nnz)); 3779 } 3780 3781 /* store matrix values */ 3782 for (i = 0; i < m; i++) { 3783 PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1]; 3784 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3785 } 3786 3787 PetscCall(PetscFree(rowidxs)); 3788 PetscCall(PetscFree2(colidxs, matvals)); 3789 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3790 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3791 PetscFunctionReturn(PETSC_SUCCESS); 3792 } 3793 3794 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) 3795 { 3796 PetscBool isbinary; 3797 3798 PetscFunctionBegin; 3799 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3800 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3801 PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer)); 3802 PetscFunctionReturn(PETSC_SUCCESS); 3803 } 3804 3805 /*@ 3806 MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block 3807 compressed row) format. For good matrix assembly performance the 3808 user should preallocate the matrix storage by setting the parameter `nz` 3809 (or the array `nnz`). 3810 3811 Collective 3812 3813 Input Parameters: 3814 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3815 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3816 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3817 . m - number of rows 3818 . n - number of columns 3819 . nz - number of nonzero blocks per block row (same for all rows) 3820 - nnz - array containing the number of nonzero blocks in the various block rows 3821 (possibly different for each block row) or `NULL` 3822 3823 Output Parameter: 3824 . A - the matrix 3825 3826 Options Database Keys: 3827 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3828 - -mat_block_size - size of the blocks to use 3829 3830 Level: intermediate 3831 3832 Notes: 3833 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3834 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3835 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 3836 3837 The number of rows and columns must be divisible by blocksize. 3838 3839 If the `nnz` parameter is given then the `nz` parameter is ignored 3840 3841 A nonzero block is any block that as 1 or more nonzeros in it 3842 3843 The `MATSEQBAIJ` format is fully compatible with standard Fortran 3844 storage. That is, the stored row and column indices can begin at 3845 either one (as in Fortran) or zero. 3846 3847 Specify the preallocated storage with either `nz` or `nnz` (not both). 3848 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3849 allocation. See [Sparse Matrices](sec_matsparse) for details. 3850 matrices. 3851 3852 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3853 @*/ 3854 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 3855 { 3856 PetscFunctionBegin; 3857 PetscCall(MatCreate(comm, A)); 3858 PetscCall(MatSetSizes(*A, m, n, m, n)); 3859 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3860 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz)); 3861 PetscFunctionReturn(PETSC_SUCCESS); 3862 } 3863 3864 /*@ 3865 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3866 per row in the matrix. For good matrix assembly performance the 3867 user should preallocate the matrix storage by setting the parameter `nz` 3868 (or the array `nnz`). 3869 3870 Collective 3871 3872 Input Parameters: 3873 + B - the matrix 3874 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3875 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3876 . nz - number of block nonzeros per block row (same for all rows) 3877 - nnz - array containing the number of block nonzeros in the various block rows 3878 (possibly different for each block row) or `NULL` 3879 3880 Options Database Keys: 3881 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3882 - -mat_block_size - size of the blocks to use 3883 3884 Level: intermediate 3885 3886 Notes: 3887 If the `nnz` parameter is given then the `nz` parameter is ignored 3888 3889 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3890 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3891 You can also run with the option `-info` and look for messages with the string 3892 malloc in them to see if additional memory allocation was needed. 3893 3894 The `MATSEQBAIJ` format is fully compatible with standard Fortran 3895 storage. That is, the stored row and column indices can begin at 3896 either one (as in Fortran) or zero. 3897 3898 Specify the preallocated storage with either `nz` or `nnz` (not both). 3899 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3900 allocation. See [Sparse Matrices](sec_matsparse) for details. 3901 3902 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3903 @*/ 3904 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3905 { 3906 PetscFunctionBegin; 3907 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3908 PetscValidType(B, 1); 3909 PetscValidLogicalCollectiveInt(B, bs, 2); 3910 PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 3911 PetscFunctionReturn(PETSC_SUCCESS); 3912 } 3913 3914 /*@C 3915 MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values 3916 3917 Collective 3918 3919 Input Parameters: 3920 + B - the matrix 3921 . bs - the blocksize 3922 . i - the indices into `j` for the start of each local row (indices start with zero) 3923 . j - the column indices for each local row (indices start with zero) these must be sorted for each row 3924 - v - optional values in the matrix, use `NULL` if not provided 3925 3926 Level: advanced 3927 3928 Notes: 3929 The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()` 3930 3931 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 3932 may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 3933 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3934 `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3935 block column and the second index is over columns within a block. 3936 3937 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3938 3939 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3940 @*/ 3941 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3942 { 3943 PetscFunctionBegin; 3944 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3945 PetscValidType(B, 1); 3946 PetscValidLogicalCollectiveInt(B, bs, 2); 3947 PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 3948 PetscFunctionReturn(PETSC_SUCCESS); 3949 } 3950 3951 /*@ 3952 MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user. 3953 3954 Collective 3955 3956 Input Parameters: 3957 + comm - must be an MPI communicator of size 1 3958 . bs - size of block 3959 . m - number of rows 3960 . n - number of columns 3961 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3962 . j - column indices 3963 - a - matrix values 3964 3965 Output Parameter: 3966 . mat - the matrix 3967 3968 Level: advanced 3969 3970 Notes: 3971 The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays 3972 once the matrix is destroyed 3973 3974 You cannot set new nonzero locations into this matrix, that will generate an error. 3975 3976 The `i` and `j` indices are 0 based 3977 3978 When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format 3979 3980 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3981 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3982 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3983 with column-major ordering within blocks. 3984 3985 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3986 @*/ 3987 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) 3988 { 3989 Mat_SeqBAIJ *baij; 3990 3991 PetscFunctionBegin; 3992 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs); 3993 if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3994 3995 PetscCall(MatCreate(comm, mat)); 3996 PetscCall(MatSetSizes(*mat, m, n, m, n)); 3997 PetscCall(MatSetType(*mat, MATSEQBAIJ)); 3998 PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL)); 3999 baij = (Mat_SeqBAIJ *)(*mat)->data; 4000 PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen)); 4001 4002 baij->i = i; 4003 baij->j = j; 4004 baij->a = a; 4005 4006 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 4007 baij->free_a = PETSC_FALSE; 4008 baij->free_ij = PETSC_FALSE; 4009 baij->free_imax_ilen = PETSC_TRUE; 4010 4011 for (PetscInt ii = 0; ii < m; ii++) { 4012 const PetscInt row_len = i[ii + 1] - i[ii]; 4013 4014 baij->ilen[ii] = baij->imax[ii] = row_len; 4015 PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len); 4016 } 4017 if (PetscDefined(USE_DEBUG)) { 4018 for (PetscInt ii = 0; ii < baij->i[m]; ii++) { 4019 PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 4020 PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 4021 } 4022 } 4023 4024 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 4025 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 4026 PetscFunctionReturn(PETSC_SUCCESS); 4027 } 4028 4029 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4030 { 4031 PetscFunctionBegin; 4032 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat)); 4033 PetscFunctionReturn(PETSC_SUCCESS); 4034 } 4035