1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 19 20 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) 21 { 22 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data; 23 PetscInt m, n, ib, jb, bs = A->rmap->bs; 24 MatScalar *a_val = a_aij->a; 25 26 PetscFunctionBegin; 27 PetscCall(MatGetSize(A, &m, &n)); 28 PetscCall(PetscArrayzero(reductions, n)); 29 if (type == NORM_2) { 30 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 31 for (jb = 0; jb < bs; jb++) { 32 for (ib = 0; ib < bs; ib++) { 33 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 34 a_val++; 35 } 36 } 37 } 38 } else if (type == NORM_1) { 39 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 40 for (jb = 0; jb < bs; jb++) { 41 for (ib = 0; ib < bs; ib++) { 42 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 43 a_val++; 44 } 45 } 46 } 47 } else if (type == NORM_INFINITY) { 48 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 49 for (jb = 0; jb < bs; jb++) { 50 for (ib = 0; ib < bs; ib++) { 51 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 52 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 53 a_val++; 54 } 55 } 56 } 57 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 58 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 59 for (jb = 0; jb < bs; jb++) { 60 for (ib = 0; ib < bs; ib++) { 61 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 62 a_val++; 63 } 64 } 65 } 66 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 67 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 68 for (jb = 0; jb < bs; jb++) { 69 for (ib = 0; ib < bs; ib++) { 70 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 71 a_val++; 72 } 73 } 74 } 75 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 76 if (type == NORM_2) { 77 for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 78 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 79 for (PetscInt i = 0; i < n; i++) reductions[i] /= m; 80 } 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) 85 { 86 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 87 PetscInt *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots; 88 MatScalar *v = a->a, *odiag, *diag, work[25], *v_work; 89 PetscReal shift = 0.0; 90 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 91 92 PetscFunctionBegin; 93 allowzeropivot = PetscNot(A->erroriffailure); 94 95 if (a->idiagvalid) { 96 if (values) *values = a->idiag; 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 100 diag_offset = a->diag; 101 if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); } 102 diag = a->idiag; 103 if (values) *values = a->idiag; 104 /* factor and invert each block */ 105 switch (bs) { 106 case 1: 107 for (i = 0; i < mbs; i++) { 108 odiag = v + 1 * diag_offset[i]; 109 diag[0] = odiag[0]; 110 111 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 112 if (allowzeropivot) { 113 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 114 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 115 A->factorerror_zeropivot_row = i; 116 PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i)); 117 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON); 118 } 119 120 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 121 diag += 1; 122 } 123 break; 124 case 2: 125 for (i = 0; i < mbs; i++) { 126 odiag = v + 4 * diag_offset[i]; 127 diag[0] = odiag[0]; 128 diag[1] = odiag[1]; 129 diag[2] = odiag[2]; 130 diag[3] = odiag[3]; 131 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 132 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 133 diag += 4; 134 } 135 break; 136 case 3: 137 for (i = 0; i < mbs; i++) { 138 odiag = v + 9 * diag_offset[i]; 139 diag[0] = odiag[0]; 140 diag[1] = odiag[1]; 141 diag[2] = odiag[2]; 142 diag[3] = odiag[3]; 143 diag[4] = odiag[4]; 144 diag[5] = odiag[5]; 145 diag[6] = odiag[6]; 146 diag[7] = odiag[7]; 147 diag[8] = odiag[8]; 148 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 149 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 150 diag += 9; 151 } 152 break; 153 case 4: 154 for (i = 0; i < mbs; i++) { 155 odiag = v + 16 * diag_offset[i]; 156 PetscCall(PetscArraycpy(diag, odiag, 16)); 157 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 158 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 159 diag += 16; 160 } 161 break; 162 case 5: 163 for (i = 0; i < mbs; i++) { 164 odiag = v + 25 * diag_offset[i]; 165 PetscCall(PetscArraycpy(diag, odiag, 25)); 166 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 167 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 168 diag += 25; 169 } 170 break; 171 case 6: 172 for (i = 0; i < mbs; i++) { 173 odiag = v + 36 * diag_offset[i]; 174 PetscCall(PetscArraycpy(diag, odiag, 36)); 175 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 176 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 177 diag += 36; 178 } 179 break; 180 case 7: 181 for (i = 0; i < mbs; i++) { 182 odiag = v + 49 * diag_offset[i]; 183 PetscCall(PetscArraycpy(diag, odiag, 49)); 184 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 185 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 186 diag += 49; 187 } 188 break; 189 default: 190 PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots)); 191 for (i = 0; i < mbs; i++) { 192 odiag = v + bs2 * diag_offset[i]; 193 PetscCall(PetscArraycpy(diag, odiag, bs2)); 194 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 195 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 196 diag += bs2; 197 } 198 PetscCall(PetscFree2(v_work, v_pivots)); 199 } 200 a->idiagvalid = PETSC_TRUE; 201 PetscFunctionReturn(PETSC_SUCCESS); 202 } 203 204 PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 205 { 206 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 207 PetscScalar *x, *work, *w, *workt, *t; 208 const MatScalar *v, *aa = a->a, *idiag; 209 const PetscScalar *b, *xb; 210 PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */ 211 PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it; 212 const PetscInt *diag, *ai = a->i, *aj = a->j, *vi; 213 214 PetscFunctionBegin; 215 its = its * lits; 216 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 217 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 218 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 219 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor"); 220 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 221 222 if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL)); 223 224 if (!m) PetscFunctionReturn(PETSC_SUCCESS); 225 diag = a->diag; 226 idiag = a->idiag; 227 k = PetscMax(A->rmap->n, A->cmap->n); 228 if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work)); 229 if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt)); 230 if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work)); 231 work = a->mult_work; 232 t = a->sor_workt; 233 w = a->sor_work; 234 235 PetscCall(VecGetArray(xx, &x)); 236 PetscCall(VecGetArrayRead(bb, &b)); 237 238 if (flag & SOR_ZERO_INITIAL_GUESS) { 239 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 240 switch (bs) { 241 case 1: 242 PetscKernel_v_gets_A_times_w_1(x, idiag, b); 243 t[0] = b[0]; 244 i2 = 1; 245 idiag += 1; 246 for (i = 1; i < m; i++) { 247 v = aa + ai[i]; 248 vi = aj + ai[i]; 249 nz = diag[i] - ai[i]; 250 s[0] = b[i2]; 251 for (j = 0; j < nz; j++) { 252 xw[0] = x[vi[j]]; 253 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 254 } 255 t[i2] = s[0]; 256 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 257 x[i2] = xw[0]; 258 idiag += 1; 259 i2 += 1; 260 } 261 break; 262 case 2: 263 PetscKernel_v_gets_A_times_w_2(x, idiag, b); 264 t[0] = b[0]; 265 t[1] = b[1]; 266 i2 = 2; 267 idiag += 4; 268 for (i = 1; i < m; i++) { 269 v = aa + 4 * ai[i]; 270 vi = aj + ai[i]; 271 nz = diag[i] - ai[i]; 272 s[0] = b[i2]; 273 s[1] = b[i2 + 1]; 274 for (j = 0; j < nz; j++) { 275 idx = 2 * vi[j]; 276 it = 4 * j; 277 xw[0] = x[idx]; 278 xw[1] = x[1 + idx]; 279 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 280 } 281 t[i2] = s[0]; 282 t[i2 + 1] = s[1]; 283 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 284 x[i2] = xw[0]; 285 x[i2 + 1] = xw[1]; 286 idiag += 4; 287 i2 += 2; 288 } 289 break; 290 case 3: 291 PetscKernel_v_gets_A_times_w_3(x, idiag, b); 292 t[0] = b[0]; 293 t[1] = b[1]; 294 t[2] = b[2]; 295 i2 = 3; 296 idiag += 9; 297 for (i = 1; i < m; i++) { 298 v = aa + 9 * ai[i]; 299 vi = aj + ai[i]; 300 nz = diag[i] - ai[i]; 301 s[0] = b[i2]; 302 s[1] = b[i2 + 1]; 303 s[2] = b[i2 + 2]; 304 while (nz--) { 305 idx = 3 * (*vi++); 306 xw[0] = x[idx]; 307 xw[1] = x[1 + idx]; 308 xw[2] = x[2 + idx]; 309 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 310 v += 9; 311 } 312 t[i2] = s[0]; 313 t[i2 + 1] = s[1]; 314 t[i2 + 2] = s[2]; 315 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 316 x[i2] = xw[0]; 317 x[i2 + 1] = xw[1]; 318 x[i2 + 2] = xw[2]; 319 idiag += 9; 320 i2 += 3; 321 } 322 break; 323 case 4: 324 PetscKernel_v_gets_A_times_w_4(x, idiag, b); 325 t[0] = b[0]; 326 t[1] = b[1]; 327 t[2] = b[2]; 328 t[3] = b[3]; 329 i2 = 4; 330 idiag += 16; 331 for (i = 1; i < m; i++) { 332 v = aa + 16 * ai[i]; 333 vi = aj + ai[i]; 334 nz = diag[i] - ai[i]; 335 s[0] = b[i2]; 336 s[1] = b[i2 + 1]; 337 s[2] = b[i2 + 2]; 338 s[3] = b[i2 + 3]; 339 while (nz--) { 340 idx = 4 * (*vi++); 341 xw[0] = x[idx]; 342 xw[1] = x[1 + idx]; 343 xw[2] = x[2 + idx]; 344 xw[3] = x[3 + idx]; 345 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 346 v += 16; 347 } 348 t[i2] = s[0]; 349 t[i2 + 1] = s[1]; 350 t[i2 + 2] = s[2]; 351 t[i2 + 3] = s[3]; 352 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 353 x[i2] = xw[0]; 354 x[i2 + 1] = xw[1]; 355 x[i2 + 2] = xw[2]; 356 x[i2 + 3] = xw[3]; 357 idiag += 16; 358 i2 += 4; 359 } 360 break; 361 case 5: 362 PetscKernel_v_gets_A_times_w_5(x, idiag, b); 363 t[0] = b[0]; 364 t[1] = b[1]; 365 t[2] = b[2]; 366 t[3] = b[3]; 367 t[4] = b[4]; 368 i2 = 5; 369 idiag += 25; 370 for (i = 1; i < m; i++) { 371 v = aa + 25 * ai[i]; 372 vi = aj + ai[i]; 373 nz = diag[i] - ai[i]; 374 s[0] = b[i2]; 375 s[1] = b[i2 + 1]; 376 s[2] = b[i2 + 2]; 377 s[3] = b[i2 + 3]; 378 s[4] = b[i2 + 4]; 379 while (nz--) { 380 idx = 5 * (*vi++); 381 xw[0] = x[idx]; 382 xw[1] = x[1 + idx]; 383 xw[2] = x[2 + idx]; 384 xw[3] = x[3 + idx]; 385 xw[4] = x[4 + idx]; 386 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 387 v += 25; 388 } 389 t[i2] = s[0]; 390 t[i2 + 1] = s[1]; 391 t[i2 + 2] = s[2]; 392 t[i2 + 3] = s[3]; 393 t[i2 + 4] = s[4]; 394 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 395 x[i2] = xw[0]; 396 x[i2 + 1] = xw[1]; 397 x[i2 + 2] = xw[2]; 398 x[i2 + 3] = xw[3]; 399 x[i2 + 4] = xw[4]; 400 idiag += 25; 401 i2 += 5; 402 } 403 break; 404 case 6: 405 PetscKernel_v_gets_A_times_w_6(x, idiag, b); 406 t[0] = b[0]; 407 t[1] = b[1]; 408 t[2] = b[2]; 409 t[3] = b[3]; 410 t[4] = b[4]; 411 t[5] = b[5]; 412 i2 = 6; 413 idiag += 36; 414 for (i = 1; i < m; i++) { 415 v = aa + 36 * ai[i]; 416 vi = aj + ai[i]; 417 nz = diag[i] - ai[i]; 418 s[0] = b[i2]; 419 s[1] = b[i2 + 1]; 420 s[2] = b[i2 + 2]; 421 s[3] = b[i2 + 3]; 422 s[4] = b[i2 + 4]; 423 s[5] = b[i2 + 5]; 424 while (nz--) { 425 idx = 6 * (*vi++); 426 xw[0] = x[idx]; 427 xw[1] = x[1 + idx]; 428 xw[2] = x[2 + idx]; 429 xw[3] = x[3 + idx]; 430 xw[4] = x[4 + idx]; 431 xw[5] = x[5 + idx]; 432 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 433 v += 36; 434 } 435 t[i2] = s[0]; 436 t[i2 + 1] = s[1]; 437 t[i2 + 2] = s[2]; 438 t[i2 + 3] = s[3]; 439 t[i2 + 4] = s[4]; 440 t[i2 + 5] = s[5]; 441 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 442 x[i2] = xw[0]; 443 x[i2 + 1] = xw[1]; 444 x[i2 + 2] = xw[2]; 445 x[i2 + 3] = xw[3]; 446 x[i2 + 4] = xw[4]; 447 x[i2 + 5] = xw[5]; 448 idiag += 36; 449 i2 += 6; 450 } 451 break; 452 case 7: 453 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 454 t[0] = b[0]; 455 t[1] = b[1]; 456 t[2] = b[2]; 457 t[3] = b[3]; 458 t[4] = b[4]; 459 t[5] = b[5]; 460 t[6] = b[6]; 461 i2 = 7; 462 idiag += 49; 463 for (i = 1; i < m; i++) { 464 v = aa + 49 * ai[i]; 465 vi = aj + ai[i]; 466 nz = diag[i] - ai[i]; 467 s[0] = b[i2]; 468 s[1] = b[i2 + 1]; 469 s[2] = b[i2 + 2]; 470 s[3] = b[i2 + 3]; 471 s[4] = b[i2 + 4]; 472 s[5] = b[i2 + 5]; 473 s[6] = b[i2 + 6]; 474 while (nz--) { 475 idx = 7 * (*vi++); 476 xw[0] = x[idx]; 477 xw[1] = x[1 + idx]; 478 xw[2] = x[2 + idx]; 479 xw[3] = x[3 + idx]; 480 xw[4] = x[4 + idx]; 481 xw[5] = x[5 + idx]; 482 xw[6] = x[6 + idx]; 483 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 484 v += 49; 485 } 486 t[i2] = s[0]; 487 t[i2 + 1] = s[1]; 488 t[i2 + 2] = s[2]; 489 t[i2 + 3] = s[3]; 490 t[i2 + 4] = s[4]; 491 t[i2 + 5] = s[5]; 492 t[i2 + 6] = s[6]; 493 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 494 x[i2] = xw[0]; 495 x[i2 + 1] = xw[1]; 496 x[i2 + 2] = xw[2]; 497 x[i2 + 3] = xw[3]; 498 x[i2 + 4] = xw[4]; 499 x[i2 + 5] = xw[5]; 500 x[i2 + 6] = xw[6]; 501 idiag += 49; 502 i2 += 7; 503 } 504 break; 505 default: 506 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); 507 PetscCall(PetscArraycpy(t, b, bs)); 508 i2 = bs; 509 idiag += bs2; 510 for (i = 1; i < m; i++) { 511 v = aa + bs2 * ai[i]; 512 vi = aj + ai[i]; 513 nz = diag[i] - ai[i]; 514 515 PetscCall(PetscArraycpy(w, b + i2, bs)); 516 /* copy all rows of x that are needed into contiguous space */ 517 workt = work; 518 for (j = 0; j < nz; j++) { 519 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 520 workt += bs; 521 } 522 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 523 PetscCall(PetscArraycpy(t + i2, w, bs)); 524 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 525 526 idiag += bs2; 527 i2 += bs; 528 } 529 break; 530 } 531 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 532 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 533 xb = t; 534 } else xb = b; 535 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 536 idiag = a->idiag + bs2 * (a->mbs - 1); 537 i2 = bs * (m - 1); 538 switch (bs) { 539 case 1: 540 s[0] = xb[i2]; 541 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 542 x[i2] = xw[0]; 543 i2 -= 1; 544 for (i = m - 2; i >= 0; i--) { 545 v = aa + (diag[i] + 1); 546 vi = aj + diag[i] + 1; 547 nz = ai[i + 1] - diag[i] - 1; 548 s[0] = xb[i2]; 549 for (j = 0; j < nz; j++) { 550 xw[0] = x[vi[j]]; 551 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 552 } 553 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 554 x[i2] = xw[0]; 555 idiag -= 1; 556 i2 -= 1; 557 } 558 break; 559 case 2: 560 s[0] = xb[i2]; 561 s[1] = xb[i2 + 1]; 562 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 563 x[i2] = xw[0]; 564 x[i2 + 1] = xw[1]; 565 i2 -= 2; 566 idiag -= 4; 567 for (i = m - 2; i >= 0; i--) { 568 v = aa + 4 * (diag[i] + 1); 569 vi = aj + diag[i] + 1; 570 nz = ai[i + 1] - diag[i] - 1; 571 s[0] = xb[i2]; 572 s[1] = xb[i2 + 1]; 573 for (j = 0; j < nz; j++) { 574 idx = 2 * vi[j]; 575 it = 4 * j; 576 xw[0] = x[idx]; 577 xw[1] = x[1 + idx]; 578 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 579 } 580 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 581 x[i2] = xw[0]; 582 x[i2 + 1] = xw[1]; 583 idiag -= 4; 584 i2 -= 2; 585 } 586 break; 587 case 3: 588 s[0] = xb[i2]; 589 s[1] = xb[i2 + 1]; 590 s[2] = xb[i2 + 2]; 591 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 592 x[i2] = xw[0]; 593 x[i2 + 1] = xw[1]; 594 x[i2 + 2] = xw[2]; 595 i2 -= 3; 596 idiag -= 9; 597 for (i = m - 2; i >= 0; i--) { 598 v = aa + 9 * (diag[i] + 1); 599 vi = aj + diag[i] + 1; 600 nz = ai[i + 1] - diag[i] - 1; 601 s[0] = xb[i2]; 602 s[1] = xb[i2 + 1]; 603 s[2] = xb[i2 + 2]; 604 while (nz--) { 605 idx = 3 * (*vi++); 606 xw[0] = x[idx]; 607 xw[1] = x[1 + idx]; 608 xw[2] = x[2 + idx]; 609 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 610 v += 9; 611 } 612 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 613 x[i2] = xw[0]; 614 x[i2 + 1] = xw[1]; 615 x[i2 + 2] = xw[2]; 616 idiag -= 9; 617 i2 -= 3; 618 } 619 break; 620 case 4: 621 s[0] = xb[i2]; 622 s[1] = xb[i2 + 1]; 623 s[2] = xb[i2 + 2]; 624 s[3] = xb[i2 + 3]; 625 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 626 x[i2] = xw[0]; 627 x[i2 + 1] = xw[1]; 628 x[i2 + 2] = xw[2]; 629 x[i2 + 3] = xw[3]; 630 i2 -= 4; 631 idiag -= 16; 632 for (i = m - 2; i >= 0; i--) { 633 v = aa + 16 * (diag[i] + 1); 634 vi = aj + diag[i] + 1; 635 nz = ai[i + 1] - diag[i] - 1; 636 s[0] = xb[i2]; 637 s[1] = xb[i2 + 1]; 638 s[2] = xb[i2 + 2]; 639 s[3] = xb[i2 + 3]; 640 while (nz--) { 641 idx = 4 * (*vi++); 642 xw[0] = x[idx]; 643 xw[1] = x[1 + idx]; 644 xw[2] = x[2 + idx]; 645 xw[3] = x[3 + idx]; 646 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 647 v += 16; 648 } 649 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 650 x[i2] = xw[0]; 651 x[i2 + 1] = xw[1]; 652 x[i2 + 2] = xw[2]; 653 x[i2 + 3] = xw[3]; 654 idiag -= 16; 655 i2 -= 4; 656 } 657 break; 658 case 5: 659 s[0] = xb[i2]; 660 s[1] = xb[i2 + 1]; 661 s[2] = xb[i2 + 2]; 662 s[3] = xb[i2 + 3]; 663 s[4] = xb[i2 + 4]; 664 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 665 x[i2] = xw[0]; 666 x[i2 + 1] = xw[1]; 667 x[i2 + 2] = xw[2]; 668 x[i2 + 3] = xw[3]; 669 x[i2 + 4] = xw[4]; 670 i2 -= 5; 671 idiag -= 25; 672 for (i = m - 2; i >= 0; i--) { 673 v = aa + 25 * (diag[i] + 1); 674 vi = aj + diag[i] + 1; 675 nz = ai[i + 1] - diag[i] - 1; 676 s[0] = xb[i2]; 677 s[1] = xb[i2 + 1]; 678 s[2] = xb[i2 + 2]; 679 s[3] = xb[i2 + 3]; 680 s[4] = xb[i2 + 4]; 681 while (nz--) { 682 idx = 5 * (*vi++); 683 xw[0] = x[idx]; 684 xw[1] = x[1 + idx]; 685 xw[2] = x[2 + idx]; 686 xw[3] = x[3 + idx]; 687 xw[4] = x[4 + idx]; 688 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 689 v += 25; 690 } 691 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 692 x[i2] = xw[0]; 693 x[i2 + 1] = xw[1]; 694 x[i2 + 2] = xw[2]; 695 x[i2 + 3] = xw[3]; 696 x[i2 + 4] = xw[4]; 697 idiag -= 25; 698 i2 -= 5; 699 } 700 break; 701 case 6: 702 s[0] = xb[i2]; 703 s[1] = xb[i2 + 1]; 704 s[2] = xb[i2 + 2]; 705 s[3] = xb[i2 + 3]; 706 s[4] = xb[i2 + 4]; 707 s[5] = xb[i2 + 5]; 708 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 709 x[i2] = xw[0]; 710 x[i2 + 1] = xw[1]; 711 x[i2 + 2] = xw[2]; 712 x[i2 + 3] = xw[3]; 713 x[i2 + 4] = xw[4]; 714 x[i2 + 5] = xw[5]; 715 i2 -= 6; 716 idiag -= 36; 717 for (i = m - 2; i >= 0; i--) { 718 v = aa + 36 * (diag[i] + 1); 719 vi = aj + diag[i] + 1; 720 nz = ai[i + 1] - diag[i] - 1; 721 s[0] = xb[i2]; 722 s[1] = xb[i2 + 1]; 723 s[2] = xb[i2 + 2]; 724 s[3] = xb[i2 + 3]; 725 s[4] = xb[i2 + 4]; 726 s[5] = xb[i2 + 5]; 727 while (nz--) { 728 idx = 6 * (*vi++); 729 xw[0] = x[idx]; 730 xw[1] = x[1 + idx]; 731 xw[2] = x[2 + idx]; 732 xw[3] = x[3 + idx]; 733 xw[4] = x[4 + idx]; 734 xw[5] = x[5 + idx]; 735 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 736 v += 36; 737 } 738 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 739 x[i2] = xw[0]; 740 x[i2 + 1] = xw[1]; 741 x[i2 + 2] = xw[2]; 742 x[i2 + 3] = xw[3]; 743 x[i2 + 4] = xw[4]; 744 x[i2 + 5] = xw[5]; 745 idiag -= 36; 746 i2 -= 6; 747 } 748 break; 749 case 7: 750 s[0] = xb[i2]; 751 s[1] = xb[i2 + 1]; 752 s[2] = xb[i2 + 2]; 753 s[3] = xb[i2 + 3]; 754 s[4] = xb[i2 + 4]; 755 s[5] = xb[i2 + 5]; 756 s[6] = xb[i2 + 6]; 757 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 758 x[i2] = xw[0]; 759 x[i2 + 1] = xw[1]; 760 x[i2 + 2] = xw[2]; 761 x[i2 + 3] = xw[3]; 762 x[i2 + 4] = xw[4]; 763 x[i2 + 5] = xw[5]; 764 x[i2 + 6] = xw[6]; 765 i2 -= 7; 766 idiag -= 49; 767 for (i = m - 2; i >= 0; i--) { 768 v = aa + 49 * (diag[i] + 1); 769 vi = aj + diag[i] + 1; 770 nz = ai[i + 1] - diag[i] - 1; 771 s[0] = xb[i2]; 772 s[1] = xb[i2 + 1]; 773 s[2] = xb[i2 + 2]; 774 s[3] = xb[i2 + 3]; 775 s[4] = xb[i2 + 4]; 776 s[5] = xb[i2 + 5]; 777 s[6] = xb[i2 + 6]; 778 while (nz--) { 779 idx = 7 * (*vi++); 780 xw[0] = x[idx]; 781 xw[1] = x[1 + idx]; 782 xw[2] = x[2 + idx]; 783 xw[3] = x[3 + idx]; 784 xw[4] = x[4 + idx]; 785 xw[5] = x[5 + idx]; 786 xw[6] = x[6 + idx]; 787 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 788 v += 49; 789 } 790 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 791 x[i2] = xw[0]; 792 x[i2 + 1] = xw[1]; 793 x[i2 + 2] = xw[2]; 794 x[i2 + 3] = xw[3]; 795 x[i2 + 4] = xw[4]; 796 x[i2 + 5] = xw[5]; 797 x[i2 + 6] = xw[6]; 798 idiag -= 49; 799 i2 -= 7; 800 } 801 break; 802 default: 803 PetscCall(PetscArraycpy(w, xb + i2, bs)); 804 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 805 i2 -= bs; 806 idiag -= bs2; 807 for (i = m - 2; i >= 0; i--) { 808 v = aa + bs2 * (diag[i] + 1); 809 vi = aj + diag[i] + 1; 810 nz = ai[i + 1] - diag[i] - 1; 811 812 PetscCall(PetscArraycpy(w, xb + i2, bs)); 813 /* copy all rows of x that are needed into contiguous space */ 814 workt = work; 815 for (j = 0; j < nz; j++) { 816 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 817 workt += bs; 818 } 819 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 820 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 821 822 idiag -= bs2; 823 i2 -= bs; 824 } 825 break; 826 } 827 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 828 } 829 its--; 830 } 831 while (its--) { 832 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 833 idiag = a->idiag; 834 i2 = 0; 835 switch (bs) { 836 case 1: 837 for (i = 0; i < m; i++) { 838 v = aa + ai[i]; 839 vi = aj + ai[i]; 840 nz = ai[i + 1] - ai[i]; 841 s[0] = b[i2]; 842 for (j = 0; j < nz; j++) { 843 xw[0] = x[vi[j]]; 844 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 845 } 846 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 847 x[i2] += xw[0]; 848 idiag += 1; 849 i2 += 1; 850 } 851 break; 852 case 2: 853 for (i = 0; i < m; i++) { 854 v = aa + 4 * ai[i]; 855 vi = aj + ai[i]; 856 nz = ai[i + 1] - ai[i]; 857 s[0] = b[i2]; 858 s[1] = b[i2 + 1]; 859 for (j = 0; j < nz; j++) { 860 idx = 2 * vi[j]; 861 it = 4 * j; 862 xw[0] = x[idx]; 863 xw[1] = x[1 + idx]; 864 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 865 } 866 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 867 x[i2] += xw[0]; 868 x[i2 + 1] += xw[1]; 869 idiag += 4; 870 i2 += 2; 871 } 872 break; 873 case 3: 874 for (i = 0; i < m; i++) { 875 v = aa + 9 * ai[i]; 876 vi = aj + ai[i]; 877 nz = ai[i + 1] - ai[i]; 878 s[0] = b[i2]; 879 s[1] = b[i2 + 1]; 880 s[2] = b[i2 + 2]; 881 while (nz--) { 882 idx = 3 * (*vi++); 883 xw[0] = x[idx]; 884 xw[1] = x[1 + idx]; 885 xw[2] = x[2 + idx]; 886 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 887 v += 9; 888 } 889 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 890 x[i2] += xw[0]; 891 x[i2 + 1] += xw[1]; 892 x[i2 + 2] += xw[2]; 893 idiag += 9; 894 i2 += 3; 895 } 896 break; 897 case 4: 898 for (i = 0; i < m; i++) { 899 v = aa + 16 * ai[i]; 900 vi = aj + ai[i]; 901 nz = ai[i + 1] - ai[i]; 902 s[0] = b[i2]; 903 s[1] = b[i2 + 1]; 904 s[2] = b[i2 + 2]; 905 s[3] = b[i2 + 3]; 906 while (nz--) { 907 idx = 4 * (*vi++); 908 xw[0] = x[idx]; 909 xw[1] = x[1 + idx]; 910 xw[2] = x[2 + idx]; 911 xw[3] = x[3 + idx]; 912 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 913 v += 16; 914 } 915 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 916 x[i2] += xw[0]; 917 x[i2 + 1] += xw[1]; 918 x[i2 + 2] += xw[2]; 919 x[i2 + 3] += xw[3]; 920 idiag += 16; 921 i2 += 4; 922 } 923 break; 924 case 5: 925 for (i = 0; i < m; i++) { 926 v = aa + 25 * ai[i]; 927 vi = aj + ai[i]; 928 nz = ai[i + 1] - ai[i]; 929 s[0] = b[i2]; 930 s[1] = b[i2 + 1]; 931 s[2] = b[i2 + 2]; 932 s[3] = b[i2 + 3]; 933 s[4] = b[i2 + 4]; 934 while (nz--) { 935 idx = 5 * (*vi++); 936 xw[0] = x[idx]; 937 xw[1] = x[1 + idx]; 938 xw[2] = x[2 + idx]; 939 xw[3] = x[3 + idx]; 940 xw[4] = x[4 + idx]; 941 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 942 v += 25; 943 } 944 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 945 x[i2] += xw[0]; 946 x[i2 + 1] += xw[1]; 947 x[i2 + 2] += xw[2]; 948 x[i2 + 3] += xw[3]; 949 x[i2 + 4] += xw[4]; 950 idiag += 25; 951 i2 += 5; 952 } 953 break; 954 case 6: 955 for (i = 0; i < m; i++) { 956 v = aa + 36 * ai[i]; 957 vi = aj + ai[i]; 958 nz = ai[i + 1] - ai[i]; 959 s[0] = b[i2]; 960 s[1] = b[i2 + 1]; 961 s[2] = b[i2 + 2]; 962 s[3] = b[i2 + 3]; 963 s[4] = b[i2 + 4]; 964 s[5] = b[i2 + 5]; 965 while (nz--) { 966 idx = 6 * (*vi++); 967 xw[0] = x[idx]; 968 xw[1] = x[1 + idx]; 969 xw[2] = x[2 + idx]; 970 xw[3] = x[3 + idx]; 971 xw[4] = x[4 + idx]; 972 xw[5] = x[5 + idx]; 973 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 974 v += 36; 975 } 976 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 977 x[i2] += xw[0]; 978 x[i2 + 1] += xw[1]; 979 x[i2 + 2] += xw[2]; 980 x[i2 + 3] += xw[3]; 981 x[i2 + 4] += xw[4]; 982 x[i2 + 5] += xw[5]; 983 idiag += 36; 984 i2 += 6; 985 } 986 break; 987 case 7: 988 for (i = 0; i < m; i++) { 989 v = aa + 49 * ai[i]; 990 vi = aj + ai[i]; 991 nz = ai[i + 1] - ai[i]; 992 s[0] = b[i2]; 993 s[1] = b[i2 + 1]; 994 s[2] = b[i2 + 2]; 995 s[3] = b[i2 + 3]; 996 s[4] = b[i2 + 4]; 997 s[5] = b[i2 + 5]; 998 s[6] = b[i2 + 6]; 999 while (nz--) { 1000 idx = 7 * (*vi++); 1001 xw[0] = x[idx]; 1002 xw[1] = x[1 + idx]; 1003 xw[2] = x[2 + idx]; 1004 xw[3] = x[3 + idx]; 1005 xw[4] = x[4 + idx]; 1006 xw[5] = x[5 + idx]; 1007 xw[6] = x[6 + idx]; 1008 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1009 v += 49; 1010 } 1011 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1012 x[i2] += xw[0]; 1013 x[i2 + 1] += xw[1]; 1014 x[i2 + 2] += xw[2]; 1015 x[i2 + 3] += xw[3]; 1016 x[i2 + 4] += xw[4]; 1017 x[i2 + 5] += xw[5]; 1018 x[i2 + 6] += xw[6]; 1019 idiag += 49; 1020 i2 += 7; 1021 } 1022 break; 1023 default: 1024 for (i = 0; i < m; i++) { 1025 v = aa + bs2 * ai[i]; 1026 vi = aj + ai[i]; 1027 nz = ai[i + 1] - ai[i]; 1028 1029 PetscCall(PetscArraycpy(w, b + i2, bs)); 1030 /* copy all rows of x that are needed into contiguous space */ 1031 workt = work; 1032 for (j = 0; j < nz; j++) { 1033 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1034 workt += bs; 1035 } 1036 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1037 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1038 1039 idiag += bs2; 1040 i2 += bs; 1041 } 1042 break; 1043 } 1044 PetscCall(PetscLogFlops(2.0 * bs2 * a->nz)); 1045 } 1046 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1047 idiag = a->idiag + bs2 * (a->mbs - 1); 1048 i2 = bs * (m - 1); 1049 switch (bs) { 1050 case 1: 1051 for (i = m - 1; i >= 0; i--) { 1052 v = aa + ai[i]; 1053 vi = aj + ai[i]; 1054 nz = ai[i + 1] - ai[i]; 1055 s[0] = b[i2]; 1056 for (j = 0; j < nz; j++) { 1057 xw[0] = x[vi[j]]; 1058 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 1059 } 1060 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 1061 x[i2] += xw[0]; 1062 idiag -= 1; 1063 i2 -= 1; 1064 } 1065 break; 1066 case 2: 1067 for (i = m - 1; i >= 0; i--) { 1068 v = aa + 4 * ai[i]; 1069 vi = aj + ai[i]; 1070 nz = ai[i + 1] - ai[i]; 1071 s[0] = b[i2]; 1072 s[1] = b[i2 + 1]; 1073 for (j = 0; j < nz; j++) { 1074 idx = 2 * vi[j]; 1075 it = 4 * j; 1076 xw[0] = x[idx]; 1077 xw[1] = x[1 + idx]; 1078 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 1079 } 1080 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 1081 x[i2] += xw[0]; 1082 x[i2 + 1] += xw[1]; 1083 idiag -= 4; 1084 i2 -= 2; 1085 } 1086 break; 1087 case 3: 1088 for (i = m - 1; i >= 0; i--) { 1089 v = aa + 9 * ai[i]; 1090 vi = aj + ai[i]; 1091 nz = ai[i + 1] - ai[i]; 1092 s[0] = b[i2]; 1093 s[1] = b[i2 + 1]; 1094 s[2] = b[i2 + 2]; 1095 while (nz--) { 1096 idx = 3 * (*vi++); 1097 xw[0] = x[idx]; 1098 xw[1] = x[1 + idx]; 1099 xw[2] = x[2 + idx]; 1100 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 1101 v += 9; 1102 } 1103 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 1104 x[i2] += xw[0]; 1105 x[i2 + 1] += xw[1]; 1106 x[i2 + 2] += xw[2]; 1107 idiag -= 9; 1108 i2 -= 3; 1109 } 1110 break; 1111 case 4: 1112 for (i = m - 1; i >= 0; i--) { 1113 v = aa + 16 * ai[i]; 1114 vi = aj + ai[i]; 1115 nz = ai[i + 1] - ai[i]; 1116 s[0] = b[i2]; 1117 s[1] = b[i2 + 1]; 1118 s[2] = b[i2 + 2]; 1119 s[3] = b[i2 + 3]; 1120 while (nz--) { 1121 idx = 4 * (*vi++); 1122 xw[0] = x[idx]; 1123 xw[1] = x[1 + idx]; 1124 xw[2] = x[2 + idx]; 1125 xw[3] = x[3 + idx]; 1126 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 1127 v += 16; 1128 } 1129 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 1130 x[i2] += xw[0]; 1131 x[i2 + 1] += xw[1]; 1132 x[i2 + 2] += xw[2]; 1133 x[i2 + 3] += xw[3]; 1134 idiag -= 16; 1135 i2 -= 4; 1136 } 1137 break; 1138 case 5: 1139 for (i = m - 1; i >= 0; i--) { 1140 v = aa + 25 * ai[i]; 1141 vi = aj + ai[i]; 1142 nz = ai[i + 1] - ai[i]; 1143 s[0] = b[i2]; 1144 s[1] = b[i2 + 1]; 1145 s[2] = b[i2 + 2]; 1146 s[3] = b[i2 + 3]; 1147 s[4] = b[i2 + 4]; 1148 while (nz--) { 1149 idx = 5 * (*vi++); 1150 xw[0] = x[idx]; 1151 xw[1] = x[1 + idx]; 1152 xw[2] = x[2 + idx]; 1153 xw[3] = x[3 + idx]; 1154 xw[4] = x[4 + idx]; 1155 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 1156 v += 25; 1157 } 1158 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 1159 x[i2] += xw[0]; 1160 x[i2 + 1] += xw[1]; 1161 x[i2 + 2] += xw[2]; 1162 x[i2 + 3] += xw[3]; 1163 x[i2 + 4] += xw[4]; 1164 idiag -= 25; 1165 i2 -= 5; 1166 } 1167 break; 1168 case 6: 1169 for (i = m - 1; i >= 0; i--) { 1170 v = aa + 36 * ai[i]; 1171 vi = aj + ai[i]; 1172 nz = ai[i + 1] - ai[i]; 1173 s[0] = b[i2]; 1174 s[1] = b[i2 + 1]; 1175 s[2] = b[i2 + 2]; 1176 s[3] = b[i2 + 3]; 1177 s[4] = b[i2 + 4]; 1178 s[5] = b[i2 + 5]; 1179 while (nz--) { 1180 idx = 6 * (*vi++); 1181 xw[0] = x[idx]; 1182 xw[1] = x[1 + idx]; 1183 xw[2] = x[2 + idx]; 1184 xw[3] = x[3 + idx]; 1185 xw[4] = x[4 + idx]; 1186 xw[5] = x[5 + idx]; 1187 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 1188 v += 36; 1189 } 1190 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 1191 x[i2] += xw[0]; 1192 x[i2 + 1] += xw[1]; 1193 x[i2 + 2] += xw[2]; 1194 x[i2 + 3] += xw[3]; 1195 x[i2 + 4] += xw[4]; 1196 x[i2 + 5] += xw[5]; 1197 idiag -= 36; 1198 i2 -= 6; 1199 } 1200 break; 1201 case 7: 1202 for (i = m - 1; i >= 0; i--) { 1203 v = aa + 49 * ai[i]; 1204 vi = aj + ai[i]; 1205 nz = ai[i + 1] - ai[i]; 1206 s[0] = b[i2]; 1207 s[1] = b[i2 + 1]; 1208 s[2] = b[i2 + 2]; 1209 s[3] = b[i2 + 3]; 1210 s[4] = b[i2 + 4]; 1211 s[5] = b[i2 + 5]; 1212 s[6] = b[i2 + 6]; 1213 while (nz--) { 1214 idx = 7 * (*vi++); 1215 xw[0] = x[idx]; 1216 xw[1] = x[1 + idx]; 1217 xw[2] = x[2 + idx]; 1218 xw[3] = x[3 + idx]; 1219 xw[4] = x[4 + idx]; 1220 xw[5] = x[5 + idx]; 1221 xw[6] = x[6 + idx]; 1222 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1223 v += 49; 1224 } 1225 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1226 x[i2] += xw[0]; 1227 x[i2 + 1] += xw[1]; 1228 x[i2 + 2] += xw[2]; 1229 x[i2 + 3] += xw[3]; 1230 x[i2 + 4] += xw[4]; 1231 x[i2 + 5] += xw[5]; 1232 x[i2 + 6] += xw[6]; 1233 idiag -= 49; 1234 i2 -= 7; 1235 } 1236 break; 1237 default: 1238 for (i = m - 1; i >= 0; i--) { 1239 v = aa + bs2 * ai[i]; 1240 vi = aj + ai[i]; 1241 nz = ai[i + 1] - ai[i]; 1242 1243 PetscCall(PetscArraycpy(w, b + i2, bs)); 1244 /* copy all rows of x that are needed into contiguous space */ 1245 workt = work; 1246 for (j = 0; j < nz; j++) { 1247 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1248 workt += bs; 1249 } 1250 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1251 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1252 1253 idiag -= bs2; 1254 i2 -= bs; 1255 } 1256 break; 1257 } 1258 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz))); 1259 } 1260 } 1261 PetscCall(VecRestoreArray(xx, &x)); 1262 PetscCall(VecRestoreArrayRead(bb, &b)); 1263 PetscFunctionReturn(PETSC_SUCCESS); 1264 } 1265 1266 /* 1267 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1268 */ 1269 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1270 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1271 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1272 #define matsetvaluesblocked4_ matsetvaluesblocked4 1273 #endif 1274 1275 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) 1276 { 1277 Mat A = *AA; 1278 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1279 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn; 1280 PetscInt *ai = a->i, *ailen = a->ilen; 1281 PetscInt *aj = a->j, stepval, lastcol = -1; 1282 const PetscScalar *value = v; 1283 MatScalar *ap, *aa = a->a, *bap; 1284 1285 PetscFunctionBegin; 1286 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4"); 1287 stepval = (n - 1) * 4; 1288 for (k = 0; k < m; k++) { /* loop over added rows */ 1289 row = im[k]; 1290 rp = aj + ai[row]; 1291 ap = aa + 16 * ai[row]; 1292 nrow = ailen[row]; 1293 low = 0; 1294 high = nrow; 1295 for (l = 0; l < n; l++) { /* loop over added columns */ 1296 col = in[l]; 1297 if (col <= lastcol) low = 0; 1298 else high = nrow; 1299 lastcol = col; 1300 value = v + k * (stepval + 4 + l) * 4; 1301 while (high - low > 7) { 1302 t = (low + high) / 2; 1303 if (rp[t] > col) high = t; 1304 else low = t; 1305 } 1306 for (i = low; i < high; i++) { 1307 if (rp[i] > col) break; 1308 if (rp[i] == col) { 1309 bap = ap + 16 * i; 1310 for (ii = 0; ii < 4; ii++, value += stepval) { 1311 for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++; 1312 } 1313 goto noinsert2; 1314 } 1315 } 1316 N = nrow++ - 1; 1317 high++; /* added new column index thus must search to one higher than before */ 1318 /* shift up all the later entries in this row */ 1319 for (ii = N; ii >= i; ii--) { 1320 rp[ii + 1] = rp[ii]; 1321 PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16)); 1322 } 1323 if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1324 rp[i] = col; 1325 bap = ap + 16 * i; 1326 for (ii = 0; ii < 4; ii++, value += stepval) { 1327 for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++; 1328 } 1329 noinsert2:; 1330 low = i; 1331 } 1332 ailen[row] = nrow; 1333 } 1334 PetscFunctionReturnVoid(); 1335 } 1336 1337 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1338 #define matsetvalues4_ MATSETVALUES4 1339 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1340 #define matsetvalues4_ matsetvalues4 1341 #endif 1342 1343 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) 1344 { 1345 Mat A = *AA; 1346 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1347 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm; 1348 PetscInt *ai = a->i, *ailen = a->ilen; 1349 PetscInt *aj = a->j, brow, bcol; 1350 PetscInt ridx, cidx, lastcol = -1; 1351 MatScalar *ap, value, *aa = a->a, *bap; 1352 1353 PetscFunctionBegin; 1354 for (k = 0; k < m; k++) { /* loop over added rows */ 1355 row = im[k]; 1356 brow = row / 4; 1357 rp = aj + ai[brow]; 1358 ap = aa + 16 * ai[brow]; 1359 nrow = ailen[brow]; 1360 low = 0; 1361 high = nrow; 1362 for (l = 0; l < n; l++) { /* loop over added columns */ 1363 col = in[l]; 1364 bcol = col / 4; 1365 ridx = row % 4; 1366 cidx = col % 4; 1367 value = v[l + k * n]; 1368 if (col <= lastcol) low = 0; 1369 else high = nrow; 1370 lastcol = col; 1371 while (high - low > 7) { 1372 t = (low + high) / 2; 1373 if (rp[t] > bcol) high = t; 1374 else low = t; 1375 } 1376 for (i = low; i < high; i++) { 1377 if (rp[i] > bcol) break; 1378 if (rp[i] == bcol) { 1379 bap = ap + 16 * i + 4 * cidx + ridx; 1380 *bap += value; 1381 goto noinsert1; 1382 } 1383 } 1384 N = nrow++ - 1; 1385 high++; /* added new column thus must search to one higher than before */ 1386 /* shift up all the later entries in this row */ 1387 PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 1388 PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1))); 1389 PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1390 rp[i] = bcol; 1391 ap[16 * i + 4 * cidx + ridx] = value; 1392 noinsert1:; 1393 low = i; 1394 } 1395 ailen[brow] = nrow; 1396 } 1397 PetscFunctionReturnVoid(); 1398 } 1399 1400 /* 1401 Checks for missing diagonals 1402 */ 1403 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1404 { 1405 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1406 PetscInt *diag, *ii = a->i, i; 1407 1408 PetscFunctionBegin; 1409 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 1410 *missing = PETSC_FALSE; 1411 if (A->rmap->n > 0 && !ii) { 1412 *missing = PETSC_TRUE; 1413 if (d) *d = 0; 1414 PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n")); 1415 } else { 1416 PetscInt n; 1417 n = PetscMin(a->mbs, a->nbs); 1418 diag = a->diag; 1419 for (i = 0; i < n; i++) { 1420 if (diag[i] >= ii[i + 1]) { 1421 *missing = PETSC_TRUE; 1422 if (d) *d = i; 1423 PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i)); 1424 break; 1425 } 1426 } 1427 } 1428 PetscFunctionReturn(PETSC_SUCCESS); 1429 } 1430 1431 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1432 { 1433 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1434 PetscInt i, j, m = a->mbs; 1435 1436 PetscFunctionBegin; 1437 if (!a->diag) { 1438 PetscCall(PetscMalloc1(m, &a->diag)); 1439 a->free_diag = PETSC_TRUE; 1440 } 1441 for (i = 0; i < m; i++) { 1442 a->diag[i] = a->i[i + 1]; 1443 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1444 if (a->j[j] == i) { 1445 a->diag[i] = j; 1446 break; 1447 } 1448 } 1449 } 1450 PetscFunctionReturn(PETSC_SUCCESS); 1451 } 1452 1453 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) 1454 { 1455 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1456 PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt; 1457 PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja; 1458 1459 PetscFunctionBegin; 1460 *nn = n; 1461 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1462 if (symmetric) { 1463 PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja)); 1464 nz = tia[n]; 1465 } else { 1466 tia = a->i; 1467 tja = a->j; 1468 } 1469 1470 if (!blockcompressed && bs > 1) { 1471 (*nn) *= bs; 1472 /* malloc & create the natural set of indices */ 1473 PetscCall(PetscMalloc1((n + 1) * bs, ia)); 1474 if (n) { 1475 (*ia)[0] = oshift; 1476 for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; 1477 } 1478 1479 for (i = 1; i < n; i++) { 1480 (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1]; 1481 for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; 1482 } 1483 if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; 1484 1485 if (inja) { 1486 PetscCall(PetscMalloc1(nz * bs * bs, ja)); 1487 cnt = 0; 1488 for (i = 0; i < n; i++) { 1489 for (j = 0; j < bs; j++) { 1490 for (k = tia[i]; k < tia[i + 1]; k++) { 1491 for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l; 1492 } 1493 } 1494 } 1495 } 1496 1497 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1498 PetscCall(PetscFree(tia)); 1499 PetscCall(PetscFree(tja)); 1500 } 1501 } else if (oshift == 1) { 1502 if (symmetric) { 1503 nz = tia[A->rmap->n / bs]; 1504 /* add 1 to i and j indices */ 1505 for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1; 1506 *ia = tia; 1507 if (ja) { 1508 for (i = 0; i < nz; i++) tja[i] = tja[i] + 1; 1509 *ja = tja; 1510 } 1511 } else { 1512 nz = a->i[A->rmap->n / bs]; 1513 /* malloc space and add 1 to i and j indices */ 1514 PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia)); 1515 for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1; 1516 if (ja) { 1517 PetscCall(PetscMalloc1(nz, ja)); 1518 for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1; 1519 } 1520 } 1521 } else { 1522 *ia = tia; 1523 if (ja) *ja = tja; 1524 } 1525 PetscFunctionReturn(PETSC_SUCCESS); 1526 } 1527 1528 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 1529 { 1530 PetscFunctionBegin; 1531 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1532 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1533 PetscCall(PetscFree(*ia)); 1534 if (ja) PetscCall(PetscFree(*ja)); 1535 } 1536 PetscFunctionReturn(PETSC_SUCCESS); 1537 } 1538 1539 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1540 { 1541 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1542 1543 PetscFunctionBegin; 1544 #if defined(PETSC_USE_LOG) 1545 PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz)); 1546 #endif 1547 PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i)); 1548 PetscCall(ISDestroy(&a->row)); 1549 PetscCall(ISDestroy(&a->col)); 1550 if (a->free_diag) PetscCall(PetscFree(a->diag)); 1551 PetscCall(PetscFree(a->idiag)); 1552 if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen)); 1553 PetscCall(PetscFree(a->solve_work)); 1554 PetscCall(PetscFree(a->mult_work)); 1555 PetscCall(PetscFree(a->sor_workt)); 1556 PetscCall(PetscFree(a->sor_work)); 1557 PetscCall(ISDestroy(&a->icol)); 1558 PetscCall(PetscFree(a->saved_values)); 1559 PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex)); 1560 1561 PetscCall(MatDestroy(&a->sbaijMat)); 1562 PetscCall(MatDestroy(&a->parent)); 1563 PetscCall(PetscFree(A->data)); 1564 1565 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 1566 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL)); 1567 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL)); 1568 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 1569 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 1570 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL)); 1571 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL)); 1572 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL)); 1573 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL)); 1574 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL)); 1575 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL)); 1576 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL)); 1577 #if defined(PETSC_HAVE_HYPRE) 1578 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL)); 1579 #endif 1580 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL)); 1581 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1582 PetscFunctionReturn(PETSC_SUCCESS); 1583 } 1584 1585 PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) 1586 { 1587 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1588 1589 PetscFunctionBegin; 1590 switch (op) { 1591 case MAT_ROW_ORIENTED: 1592 a->roworiented = flg; 1593 break; 1594 case MAT_KEEP_NONZERO_PATTERN: 1595 a->keepnonzeropattern = flg; 1596 break; 1597 case MAT_NEW_NONZERO_LOCATIONS: 1598 a->nonew = (flg ? 0 : 1); 1599 break; 1600 case MAT_NEW_NONZERO_LOCATION_ERR: 1601 a->nonew = (flg ? -1 : 0); 1602 break; 1603 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1604 a->nonew = (flg ? -2 : 0); 1605 break; 1606 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1607 a->nounused = (flg ? -1 : 0); 1608 break; 1609 case MAT_FORCE_DIAGONAL_ENTRIES: 1610 case MAT_IGNORE_OFF_PROC_ENTRIES: 1611 case MAT_USE_HASH_TABLE: 1612 case MAT_SORTED_FULL: 1613 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1614 break; 1615 case MAT_SPD: 1616 case MAT_SYMMETRIC: 1617 case MAT_STRUCTURALLY_SYMMETRIC: 1618 case MAT_HERMITIAN: 1619 case MAT_SYMMETRY_ETERNAL: 1620 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1621 case MAT_SUBMAT_SINGLEIS: 1622 case MAT_STRUCTURE_ONLY: 1623 case MAT_SPD_ETERNAL: 1624 /* if the diagonal matrix is square it inherits some of the properties above */ 1625 break; 1626 default: 1627 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1628 } 1629 PetscFunctionReturn(PETSC_SUCCESS); 1630 } 1631 1632 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1633 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) 1634 { 1635 PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2; 1636 MatScalar *aa_i; 1637 PetscScalar *v_i; 1638 1639 PetscFunctionBegin; 1640 bs = A->rmap->bs; 1641 bs2 = bs * bs; 1642 PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1643 1644 bn = row / bs; /* Block number */ 1645 bp = row % bs; /* Block Position */ 1646 M = ai[bn + 1] - ai[bn]; 1647 *nz = bs * M; 1648 1649 if (v) { 1650 *v = NULL; 1651 if (*nz) { 1652 PetscCall(PetscMalloc1(*nz, v)); 1653 for (i = 0; i < M; i++) { /* for each block in the block row */ 1654 v_i = *v + i * bs; 1655 aa_i = aa + bs2 * (ai[bn] + i); 1656 for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j]; 1657 } 1658 } 1659 } 1660 1661 if (idx) { 1662 *idx = NULL; 1663 if (*nz) { 1664 PetscCall(PetscMalloc1(*nz, idx)); 1665 for (i = 0; i < M; i++) { /* for each block in the block row */ 1666 idx_i = *idx + i * bs; 1667 itmp = bs * aj[ai[bn] + i]; 1668 for (j = 0; j < bs; j++) idx_i[j] = itmp++; 1669 } 1670 } 1671 } 1672 PetscFunctionReturn(PETSC_SUCCESS); 1673 } 1674 1675 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1676 { 1677 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1678 1679 PetscFunctionBegin; 1680 PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a)); 1681 PetscFunctionReturn(PETSC_SUCCESS); 1682 } 1683 1684 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1685 { 1686 PetscFunctionBegin; 1687 if (nz) *nz = 0; 1688 if (idx) PetscCall(PetscFree(*idx)); 1689 if (v) PetscCall(PetscFree(*v)); 1690 PetscFunctionReturn(PETSC_SUCCESS); 1691 } 1692 1693 PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) 1694 { 1695 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at; 1696 Mat C; 1697 PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill; 1698 PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr; 1699 MatScalar *ata, *aa = a->a; 1700 1701 PetscFunctionBegin; 1702 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1703 PetscCall(PetscCalloc1(1 + nbs, &atfill)); 1704 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1705 for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1706 1707 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 1708 PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N)); 1709 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 1710 PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill)); 1711 1712 at = (Mat_SeqBAIJ *)C->data; 1713 ati = at->i; 1714 for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i]; 1715 } else { 1716 C = *B; 1717 at = (Mat_SeqBAIJ *)C->data; 1718 ati = at->i; 1719 } 1720 1721 atj = at->j; 1722 ata = at->a; 1723 1724 /* Copy ati into atfill so we have locations of the next free space in atj */ 1725 PetscCall(PetscArraycpy(atfill, ati, nbs)); 1726 1727 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1728 for (i = 0; i < mbs; i++) { 1729 anzj = ai[i + 1] - ai[i]; 1730 for (j = 0; j < anzj; j++) { 1731 atj[atfill[*aj]] = i; 1732 for (kr = 0; kr < bs; kr++) { 1733 for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; 1734 } 1735 atfill[*aj++] += 1; 1736 } 1737 } 1738 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1739 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1740 1741 /* Clean up temporary space and complete requests. */ 1742 PetscCall(PetscFree(atfill)); 1743 1744 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1745 PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1746 *B = C; 1747 } else { 1748 PetscCall(MatHeaderMerge(A, &C)); 1749 } 1750 PetscFunctionReturn(PETSC_SUCCESS); 1751 } 1752 1753 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) 1754 { 1755 Mat Btrans; 1756 1757 PetscFunctionBegin; 1758 *f = PETSC_FALSE; 1759 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans)); 1760 PetscCall(MatEqual_SeqBAIJ(B, Btrans, f)); 1761 PetscCall(MatDestroy(&Btrans)); 1762 PetscFunctionReturn(PETSC_SUCCESS); 1763 } 1764 1765 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1766 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 1767 { 1768 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data; 1769 PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l; 1770 PetscInt *rowlens, *colidxs; 1771 PetscScalar *matvals; 1772 1773 PetscFunctionBegin; 1774 PetscCall(PetscViewerSetUp(viewer)); 1775 1776 M = mat->rmap->N; 1777 N = mat->cmap->N; 1778 m = mat->rmap->n; 1779 bs = mat->rmap->bs; 1780 nz = bs * bs * A->nz; 1781 1782 /* write matrix header */ 1783 header[0] = MAT_FILE_CLASSID; 1784 header[1] = M; 1785 header[2] = N; 1786 header[3] = nz; 1787 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1788 1789 /* store row lengths */ 1790 PetscCall(PetscMalloc1(m, &rowlens)); 1791 for (cnt = 0, i = 0; i < A->mbs; i++) 1792 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]); 1793 PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT)); 1794 PetscCall(PetscFree(rowlens)); 1795 1796 /* store column indices */ 1797 PetscCall(PetscMalloc1(nz, &colidxs)); 1798 for (cnt = 0, i = 0; i < A->mbs; i++) 1799 for (k = 0; k < bs; k++) 1800 for (j = A->i[i]; j < A->i[i + 1]; j++) 1801 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l; 1802 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1803 PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT)); 1804 PetscCall(PetscFree(colidxs)); 1805 1806 /* store nonzero values */ 1807 PetscCall(PetscMalloc1(nz, &matvals)); 1808 for (cnt = 0, i = 0; i < A->mbs; i++) 1809 for (k = 0; k < bs; k++) 1810 for (j = A->i[i]; j < A->i[i + 1]; j++) 1811 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k]; 1812 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1813 PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR)); 1814 PetscCall(PetscFree(matvals)); 1815 1816 /* write block size option to the viewer's .info file */ 1817 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1818 PetscFunctionReturn(PETSC_SUCCESS); 1819 } 1820 1821 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) 1822 { 1823 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1824 PetscInt i, bs = A->rmap->bs, k; 1825 1826 PetscFunctionBegin; 1827 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1828 for (i = 0; i < a->mbs; i++) { 1829 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1)); 1830 for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1)); 1831 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1832 } 1833 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1834 PetscFunctionReturn(PETSC_SUCCESS); 1835 } 1836 1837 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) 1838 { 1839 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1840 PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2; 1841 PetscViewerFormat format; 1842 1843 PetscFunctionBegin; 1844 if (A->structure_only) { 1845 PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer)); 1846 PetscFunctionReturn(PETSC_SUCCESS); 1847 } 1848 1849 PetscCall(PetscViewerGetFormat(viewer, &format)); 1850 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1851 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1852 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1853 const char *matname; 1854 Mat aij; 1855 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij)); 1856 PetscCall(PetscObjectGetName((PetscObject)A, &matname)); 1857 PetscCall(PetscObjectSetName((PetscObject)aij, matname)); 1858 PetscCall(MatView(aij, viewer)); 1859 PetscCall(MatDestroy(&aij)); 1860 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1861 PetscFunctionReturn(PETSC_SUCCESS); 1862 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1863 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1864 for (i = 0; i < a->mbs; i++) { 1865 for (j = 0; j < bs; j++) { 1866 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1867 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1868 for (l = 0; l < bs; l++) { 1869 #if defined(PETSC_USE_COMPLEX) 1870 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1871 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1872 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1873 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1874 } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1875 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1876 } 1877 #else 1878 if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1879 #endif 1880 } 1881 } 1882 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1883 } 1884 } 1885 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1886 } else { 1887 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1888 for (i = 0; i < a->mbs; i++) { 1889 for (j = 0; j < bs; j++) { 1890 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1891 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1892 for (l = 0; l < bs; l++) { 1893 #if defined(PETSC_USE_COMPLEX) 1894 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) { 1895 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1896 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) { 1897 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1898 } else { 1899 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1900 } 1901 #else 1902 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1903 #endif 1904 } 1905 } 1906 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1907 } 1908 } 1909 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1910 } 1911 PetscCall(PetscViewerFlush(viewer)); 1912 PetscFunctionReturn(PETSC_SUCCESS); 1913 } 1914 1915 #include <petscdraw.h> 1916 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) 1917 { 1918 Mat A = (Mat)Aa; 1919 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1920 PetscInt row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2; 1921 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1922 MatScalar *aa; 1923 PetscViewer viewer; 1924 PetscViewerFormat format; 1925 1926 PetscFunctionBegin; 1927 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1928 PetscCall(PetscViewerGetFormat(viewer, &format)); 1929 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1930 1931 /* loop over matrix elements drawing boxes */ 1932 1933 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1934 PetscDrawCollectiveBegin(draw); 1935 /* Blue for negative, Cyan for zero and Red for positive */ 1936 color = PETSC_DRAW_BLUE; 1937 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1938 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1939 y_l = A->rmap->N - row - 1.0; 1940 y_r = y_l + 1.0; 1941 x_l = a->j[j] * bs; 1942 x_r = x_l + 1.0; 1943 aa = a->a + j * bs2; 1944 for (k = 0; k < bs; k++) { 1945 for (l = 0; l < bs; l++) { 1946 if (PetscRealPart(*aa++) >= 0.) continue; 1947 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1948 } 1949 } 1950 } 1951 } 1952 color = PETSC_DRAW_CYAN; 1953 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1954 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1955 y_l = A->rmap->N - row - 1.0; 1956 y_r = y_l + 1.0; 1957 x_l = a->j[j] * bs; 1958 x_r = x_l + 1.0; 1959 aa = a->a + j * bs2; 1960 for (k = 0; k < bs; k++) { 1961 for (l = 0; l < bs; l++) { 1962 if (PetscRealPart(*aa++) != 0.) continue; 1963 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1964 } 1965 } 1966 } 1967 } 1968 color = PETSC_DRAW_RED; 1969 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1970 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1971 y_l = A->rmap->N - row - 1.0; 1972 y_r = y_l + 1.0; 1973 x_l = a->j[j] * bs; 1974 x_r = x_l + 1.0; 1975 aa = a->a + j * bs2; 1976 for (k = 0; k < bs; k++) { 1977 for (l = 0; l < bs; l++) { 1978 if (PetscRealPart(*aa++) <= 0.) continue; 1979 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1980 } 1981 } 1982 } 1983 } 1984 PetscDrawCollectiveEnd(draw); 1985 } else { 1986 /* use contour shading to indicate magnitude of values */ 1987 /* first determine max of all nonzero values */ 1988 PetscReal minv = 0.0, maxv = 0.0; 1989 PetscDraw popup; 1990 1991 for (i = 0; i < a->nz * a->bs2; i++) { 1992 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1993 } 1994 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1995 PetscCall(PetscDrawGetPopup(draw, &popup)); 1996 PetscCall(PetscDrawScalePopup(popup, 0.0, maxv)); 1997 1998 PetscDrawCollectiveBegin(draw); 1999 for (i = 0, row = 0; i < mbs; i++, row += bs) { 2000 for (j = a->i[i]; j < a->i[i + 1]; j++) { 2001 y_l = A->rmap->N - row - 1.0; 2002 y_r = y_l + 1.0; 2003 x_l = a->j[j] * bs; 2004 x_r = x_l + 1.0; 2005 aa = a->a + j * bs2; 2006 for (k = 0; k < bs; k++) { 2007 for (l = 0; l < bs; l++) { 2008 MatScalar v = *aa++; 2009 color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv); 2010 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2011 } 2012 } 2013 } 2014 } 2015 PetscDrawCollectiveEnd(draw); 2016 } 2017 PetscFunctionReturn(PETSC_SUCCESS); 2018 } 2019 2020 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) 2021 { 2022 PetscReal xl, yl, xr, yr, w, h; 2023 PetscDraw draw; 2024 PetscBool isnull; 2025 2026 PetscFunctionBegin; 2027 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 2028 PetscCall(PetscDrawIsNull(draw, &isnull)); 2029 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 2030 2031 xr = A->cmap->n; 2032 yr = A->rmap->N; 2033 h = yr / 10.0; 2034 w = xr / 10.0; 2035 xr += w; 2036 yr += h; 2037 xl = -w; 2038 yl = -h; 2039 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 2040 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 2041 PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A)); 2042 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 2043 PetscCall(PetscDrawSave(draw)); 2044 PetscFunctionReturn(PETSC_SUCCESS); 2045 } 2046 2047 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) 2048 { 2049 PetscBool iascii, isbinary, isdraw; 2050 2051 PetscFunctionBegin; 2052 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2053 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2054 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 2055 if (iascii) { 2056 PetscCall(MatView_SeqBAIJ_ASCII(A, viewer)); 2057 } else if (isbinary) { 2058 PetscCall(MatView_SeqBAIJ_Binary(A, viewer)); 2059 } else if (isdraw) { 2060 PetscCall(MatView_SeqBAIJ_Draw(A, viewer)); 2061 } else { 2062 Mat B; 2063 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2064 PetscCall(MatView(B, viewer)); 2065 PetscCall(MatDestroy(&B)); 2066 } 2067 PetscFunctionReturn(PETSC_SUCCESS); 2068 } 2069 2070 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) 2071 { 2072 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2073 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 2074 PetscInt *ai = a->i, *ailen = a->ilen; 2075 PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2; 2076 MatScalar *ap, *aa = a->a; 2077 2078 PetscFunctionBegin; 2079 for (k = 0; k < m; k++) { /* loop over rows */ 2080 row = im[k]; 2081 brow = row / bs; 2082 if (row < 0) { 2083 v += n; 2084 continue; 2085 } /* negative row */ 2086 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row); 2087 rp = aj ? aj + ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2088 ap = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2089 nrow = ailen[brow]; 2090 for (l = 0; l < n; l++) { /* loop over columns */ 2091 if (in[l] < 0) { 2092 v++; 2093 continue; 2094 } /* negative column */ 2095 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]); 2096 col = in[l]; 2097 bcol = col / bs; 2098 cidx = col % bs; 2099 ridx = row % bs; 2100 high = nrow; 2101 low = 0; /* assume unsorted */ 2102 while (high - low > 5) { 2103 t = (low + high) / 2; 2104 if (rp[t] > bcol) high = t; 2105 else low = t; 2106 } 2107 for (i = low; i < high; i++) { 2108 if (rp[i] > bcol) break; 2109 if (rp[i] == bcol) { 2110 *v++ = ap[bs2 * i + bs * cidx + ridx]; 2111 goto finished; 2112 } 2113 } 2114 *v++ = 0.0; 2115 finished:; 2116 } 2117 } 2118 PetscFunctionReturn(PETSC_SUCCESS); 2119 } 2120 2121 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2122 { 2123 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2124 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1; 2125 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2126 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval; 2127 PetscBool roworiented = a->roworiented; 2128 const PetscScalar *value = v; 2129 MatScalar *ap = NULL, *aa = a->a, *bap; 2130 2131 PetscFunctionBegin; 2132 if (roworiented) { 2133 stepval = (n - 1) * bs; 2134 } else { 2135 stepval = (m - 1) * bs; 2136 } 2137 for (k = 0; k < m; k++) { /* loop over added rows */ 2138 row = im[k]; 2139 if (row < 0) continue; 2140 PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1); 2141 rp = aj + ai[row]; 2142 if (!A->structure_only) ap = aa + bs2 * ai[row]; 2143 rmax = imax[row]; 2144 nrow = ailen[row]; 2145 low = 0; 2146 high = nrow; 2147 for (l = 0; l < n; l++) { /* loop over added columns */ 2148 if (in[l] < 0) continue; 2149 PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1); 2150 col = in[l]; 2151 if (!A->structure_only) { 2152 if (roworiented) { 2153 value = v + (k * (stepval + bs) + l) * bs; 2154 } else { 2155 value = v + (l * (stepval + bs) + k) * bs; 2156 } 2157 } 2158 if (col <= lastcol) low = 0; 2159 else high = nrow; 2160 lastcol = col; 2161 while (high - low > 7) { 2162 t = (low + high) / 2; 2163 if (rp[t] > col) high = t; 2164 else low = t; 2165 } 2166 for (i = low; i < high; i++) { 2167 if (rp[i] > col) break; 2168 if (rp[i] == col) { 2169 if (A->structure_only) goto noinsert2; 2170 bap = ap + bs2 * i; 2171 if (roworiented) { 2172 if (is == ADD_VALUES) { 2173 for (ii = 0; ii < bs; ii++, value += stepval) { 2174 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 2175 } 2176 } else { 2177 for (ii = 0; ii < bs; ii++, value += stepval) { 2178 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2179 } 2180 } 2181 } else { 2182 if (is == ADD_VALUES) { 2183 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2184 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 2185 bap += bs; 2186 } 2187 } else { 2188 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2189 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 2190 bap += bs; 2191 } 2192 } 2193 } 2194 goto noinsert2; 2195 } 2196 } 2197 if (nonew == 1) goto noinsert2; 2198 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2199 if (A->structure_only) { 2200 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar); 2201 } else { 2202 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2203 } 2204 N = nrow++ - 1; 2205 high++; 2206 /* shift up all the later entries in this row */ 2207 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2208 rp[i] = col; 2209 if (!A->structure_only) { 2210 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2211 bap = ap + bs2 * i; 2212 if (roworiented) { 2213 for (ii = 0; ii < bs; ii++, value += stepval) { 2214 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2215 } 2216 } else { 2217 for (ii = 0; ii < bs; ii++, value += stepval) { 2218 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 2219 } 2220 } 2221 } 2222 noinsert2:; 2223 low = i; 2224 } 2225 ailen[row] = nrow; 2226 } 2227 PetscFunctionReturn(PETSC_SUCCESS); 2228 } 2229 2230 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) 2231 { 2232 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2233 PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax; 2234 PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen; 2235 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2236 MatScalar *aa = a->a, *ap; 2237 PetscReal ratio = 0.6; 2238 2239 PetscFunctionBegin; 2240 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 2241 2242 if (m) rmax = ailen[0]; 2243 for (i = 1; i < mbs; i++) { 2244 /* move each row back by the amount of empty slots (fshift) before it*/ 2245 fshift += imax[i - 1] - ailen[i - 1]; 2246 rmax = PetscMax(rmax, ailen[i]); 2247 if (fshift) { 2248 ip = aj + ai[i]; 2249 ap = aa + bs2 * ai[i]; 2250 N = ailen[i]; 2251 PetscCall(PetscArraymove(ip - fshift, ip, N)); 2252 if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N)); 2253 } 2254 ai[i] = ai[i - 1] + ailen[i - 1]; 2255 } 2256 if (mbs) { 2257 fshift += imax[mbs - 1] - ailen[mbs - 1]; 2258 ai[mbs] = ai[mbs - 1] + ailen[mbs - 1]; 2259 } 2260 2261 /* reset ilen and imax for each row */ 2262 a->nonzerorowcnt = 0; 2263 if (A->structure_only) { 2264 PetscCall(PetscFree2(a->imax, a->ilen)); 2265 } else { /* !A->structure_only */ 2266 for (i = 0; i < mbs; i++) { 2267 ailen[i] = imax[i] = ai[i + 1] - ai[i]; 2268 a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0); 2269 } 2270 } 2271 a->nz = ai[mbs]; 2272 2273 /* diagonals may have moved, so kill the diagonal pointers */ 2274 a->idiagvalid = PETSC_FALSE; 2275 if (fshift && a->diag) PetscCall(PetscFree(a->diag)); 2276 if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2); 2277 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2)); 2278 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs)); 2279 PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax)); 2280 2281 A->info.mallocs += a->reallocs; 2282 a->reallocs = 0; 2283 A->info.nz_unneeded = (PetscReal)fshift * bs2; 2284 a->rmax = rmax; 2285 2286 if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio)); 2287 PetscFunctionReturn(PETSC_SUCCESS); 2288 } 2289 2290 /* 2291 This function returns an array of flags which indicate the locations of contiguous 2292 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2293 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2294 Assume: sizes should be long enough to hold all the values. 2295 */ 2296 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) 2297 { 2298 PetscInt j = 0; 2299 2300 PetscFunctionBegin; 2301 for (PetscInt i = 0; i < n; j++) { 2302 PetscInt row = idx[i]; 2303 if (row % bs != 0) { /* Not the beginning of a block */ 2304 sizes[j] = 1; 2305 i++; 2306 } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */ 2307 sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2308 i++; 2309 } else { /* Beginning of the block, so check if the complete block exists */ 2310 PetscBool flg = PETSC_TRUE; 2311 for (PetscInt k = 1; k < bs; k++) { 2312 if (row + k != idx[i + k]) { /* break in the block */ 2313 flg = PETSC_FALSE; 2314 break; 2315 } 2316 } 2317 if (flg) { /* No break in the bs */ 2318 sizes[j] = bs; 2319 i += bs; 2320 } else { 2321 sizes[j] = 1; 2322 i++; 2323 } 2324 } 2325 } 2326 *bs_max = j; 2327 PetscFunctionReturn(PETSC_SUCCESS); 2328 } 2329 2330 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2331 { 2332 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2333 PetscInt i, j, k, count, *rows; 2334 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max; 2335 PetscScalar zero = 0.0; 2336 MatScalar *aa; 2337 const PetscScalar *xx; 2338 PetscScalar *bb; 2339 2340 PetscFunctionBegin; 2341 /* fix right hand side if needed */ 2342 if (x && b) { 2343 PetscCall(VecGetArrayRead(x, &xx)); 2344 PetscCall(VecGetArray(b, &bb)); 2345 for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]]; 2346 PetscCall(VecRestoreArrayRead(x, &xx)); 2347 PetscCall(VecRestoreArray(b, &bb)); 2348 } 2349 2350 /* Make a copy of the IS and sort it */ 2351 /* allocate memory for rows,sizes */ 2352 PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes)); 2353 2354 /* copy IS values to rows, and sort them */ 2355 for (i = 0; i < is_n; i++) rows[i] = is_idx[i]; 2356 PetscCall(PetscSortInt(is_n, rows)); 2357 2358 if (baij->keepnonzeropattern) { 2359 for (i = 0; i < is_n; i++) sizes[i] = 1; 2360 bs_max = is_n; 2361 } else { 2362 PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max)); 2363 A->nonzerostate++; 2364 } 2365 2366 for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) { 2367 row = rows[j]; 2368 PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row); 2369 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2370 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2371 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2372 if (diag != (PetscScalar)0.0) { 2373 if (baij->ilen[row / bs] > 0) { 2374 baij->ilen[row / bs] = 1; 2375 baij->j[baij->i[row / bs]] = row / bs; 2376 2377 PetscCall(PetscArrayzero(aa, count * bs)); 2378 } 2379 /* Now insert all the diagonal values for this bs */ 2380 for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES)); 2381 } else { /* (diag == 0.0) */ 2382 baij->ilen[row / bs] = 0; 2383 } /* end (diag == 0.0) */ 2384 } else { /* (sizes[i] != bs) */ 2385 PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1"); 2386 for (k = 0; k < count; k++) { 2387 aa[0] = zero; 2388 aa += bs; 2389 } 2390 if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES)); 2391 } 2392 } 2393 2394 PetscCall(PetscFree2(rows, sizes)); 2395 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2396 PetscFunctionReturn(PETSC_SUCCESS); 2397 } 2398 2399 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2400 { 2401 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2402 PetscInt i, j, k, count; 2403 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col; 2404 PetscScalar zero = 0.0; 2405 MatScalar *aa; 2406 const PetscScalar *xx; 2407 PetscScalar *bb; 2408 PetscBool *zeroed, vecs = PETSC_FALSE; 2409 2410 PetscFunctionBegin; 2411 /* fix right hand side if needed */ 2412 if (x && b) { 2413 PetscCall(VecGetArrayRead(x, &xx)); 2414 PetscCall(VecGetArray(b, &bb)); 2415 vecs = PETSC_TRUE; 2416 } 2417 2418 /* zero the columns */ 2419 PetscCall(PetscCalloc1(A->rmap->n, &zeroed)); 2420 for (i = 0; i < is_n; i++) { 2421 PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]); 2422 zeroed[is_idx[i]] = PETSC_TRUE; 2423 } 2424 for (i = 0; i < A->rmap->N; i++) { 2425 if (!zeroed[i]) { 2426 row = i / bs; 2427 for (j = baij->i[row]; j < baij->i[row + 1]; j++) { 2428 for (k = 0; k < bs; k++) { 2429 col = bs * baij->j[j] + k; 2430 if (zeroed[col]) { 2431 aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 2432 if (vecs) bb[i] -= aa[0] * xx[col]; 2433 aa[0] = 0.0; 2434 } 2435 } 2436 } 2437 } else if (vecs) bb[i] = diag * xx[i]; 2438 } 2439 PetscCall(PetscFree(zeroed)); 2440 if (vecs) { 2441 PetscCall(VecRestoreArrayRead(x, &xx)); 2442 PetscCall(VecRestoreArray(b, &bb)); 2443 } 2444 2445 /* zero the rows */ 2446 for (i = 0; i < is_n; i++) { 2447 row = is_idx[i]; 2448 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2449 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2450 for (k = 0; k < count; k++) { 2451 aa[0] = zero; 2452 aa += bs; 2453 } 2454 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES); 2455 } 2456 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2457 PetscFunctionReturn(PETSC_SUCCESS); 2458 } 2459 2460 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2461 { 2462 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2463 PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 2464 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2465 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 2466 PetscInt ridx, cidx, bs2 = a->bs2; 2467 PetscBool roworiented = a->roworiented; 2468 MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap; 2469 2470 PetscFunctionBegin; 2471 for (k = 0; k < m; k++) { /* loop over added rows */ 2472 row = im[k]; 2473 brow = row / bs; 2474 if (row < 0) continue; 2475 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 2476 rp = aj + ai[brow]; 2477 if (!A->structure_only) ap = aa + bs2 * ai[brow]; 2478 rmax = imax[brow]; 2479 nrow = ailen[brow]; 2480 low = 0; 2481 high = nrow; 2482 for (l = 0; l < n; l++) { /* loop over added columns */ 2483 if (in[l] < 0) continue; 2484 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 2485 col = in[l]; 2486 bcol = col / bs; 2487 ridx = row % bs; 2488 cidx = col % bs; 2489 if (!A->structure_only) { 2490 if (roworiented) { 2491 value = v[l + k * n]; 2492 } else { 2493 value = v[k + l * m]; 2494 } 2495 } 2496 if (col <= lastcol) low = 0; 2497 else high = nrow; 2498 lastcol = col; 2499 while (high - low > 7) { 2500 t = (low + high) / 2; 2501 if (rp[t] > bcol) high = t; 2502 else low = t; 2503 } 2504 for (i = low; i < high; i++) { 2505 if (rp[i] > bcol) break; 2506 if (rp[i] == bcol) { 2507 bap = ap + bs2 * i + bs * cidx + ridx; 2508 if (!A->structure_only) { 2509 if (is == ADD_VALUES) *bap += value; 2510 else *bap = value; 2511 } 2512 goto noinsert1; 2513 } 2514 } 2515 if (nonew == 1) goto noinsert1; 2516 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2517 if (A->structure_only) { 2518 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar); 2519 } else { 2520 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2521 } 2522 N = nrow++ - 1; 2523 high++; 2524 /* shift up all the later entries in this row */ 2525 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2526 rp[i] = bcol; 2527 if (!A->structure_only) { 2528 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2529 PetscCall(PetscArrayzero(ap + bs2 * i, bs2)); 2530 ap[bs2 * i + bs * cidx + ridx] = value; 2531 } 2532 a->nz++; 2533 A->nonzerostate++; 2534 noinsert1:; 2535 low = i; 2536 } 2537 ailen[brow] = nrow; 2538 } 2539 PetscFunctionReturn(PETSC_SUCCESS); 2540 } 2541 2542 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) 2543 { 2544 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data; 2545 Mat outA; 2546 PetscBool row_identity, col_identity; 2547 2548 PetscFunctionBegin; 2549 PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU"); 2550 PetscCall(ISIdentity(row, &row_identity)); 2551 PetscCall(ISIdentity(col, &col_identity)); 2552 PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU"); 2553 2554 outA = inA; 2555 inA->factortype = MAT_FACTOR_LU; 2556 PetscCall(PetscFree(inA->solvertype)); 2557 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype)); 2558 2559 PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2560 2561 PetscCall(PetscObjectReference((PetscObject)row)); 2562 PetscCall(ISDestroy(&a->row)); 2563 a->row = row; 2564 PetscCall(PetscObjectReference((PetscObject)col)); 2565 PetscCall(ISDestroy(&a->col)); 2566 a->col = col; 2567 2568 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2569 PetscCall(ISDestroy(&a->icol)); 2570 PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol)); 2571 2572 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity))); 2573 if (!a->solve_work) { PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); } 2574 PetscCall(MatLUFactorNumeric(outA, inA, info)); 2575 PetscFunctionReturn(PETSC_SUCCESS); 2576 } 2577 2578 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices) 2579 { 2580 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2581 2582 PetscFunctionBegin; 2583 baij->nz = baij->maxnz; 2584 PetscCall(PetscArraycpy(baij->j, indices, baij->nz)); 2585 PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs)); 2586 PetscFunctionReturn(PETSC_SUCCESS); 2587 } 2588 2589 /*@ 2590 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix. 2591 2592 Input Parameters: 2593 + mat - the `MATSEQBAIJ` matrix 2594 - indices - the column indices 2595 2596 Level: advanced 2597 2598 Notes: 2599 This can be called if you have precomputed the nonzero structure of the 2600 matrix and want to provide it to the matrix object to improve the performance 2601 of the `MatSetValues()` operation. 2602 2603 You MUST have set the correct numbers of nonzeros per row in the call to 2604 `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted. 2605 2606 MUST be called before any calls to `MatSetValues()` 2607 2608 .seealso: `MATSEQBAIJ`, `MatSetValues()` 2609 @*/ 2610 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) 2611 { 2612 PetscFunctionBegin; 2613 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2614 PetscValidIntPointer(indices, 2); 2615 PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices)); 2616 PetscFunctionReturn(PETSC_SUCCESS); 2617 } 2618 2619 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) 2620 { 2621 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2622 PetscInt i, j, n, row, bs, *ai, *aj, mbs; 2623 PetscReal atmp; 2624 PetscScalar *x, zero = 0.0; 2625 MatScalar *aa; 2626 PetscInt ncols, brow, krow, kcol; 2627 2628 PetscFunctionBegin; 2629 /* why is this not a macro???????????????????????????????????????????????????????????????? */ 2630 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2631 bs = A->rmap->bs; 2632 aa = a->a; 2633 ai = a->i; 2634 aj = a->j; 2635 mbs = a->mbs; 2636 2637 PetscCall(VecSet(v, zero)); 2638 PetscCall(VecGetArray(v, &x)); 2639 PetscCall(VecGetLocalSize(v, &n)); 2640 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2641 for (i = 0; i < mbs; i++) { 2642 ncols = ai[1] - ai[0]; 2643 ai++; 2644 brow = bs * i; 2645 for (j = 0; j < ncols; j++) { 2646 for (kcol = 0; kcol < bs; kcol++) { 2647 for (krow = 0; krow < bs; krow++) { 2648 atmp = PetscAbsScalar(*aa); 2649 aa++; 2650 row = brow + krow; /* row index */ 2651 if (PetscAbsScalar(x[row]) < atmp) { 2652 x[row] = atmp; 2653 if (idx) idx[row] = bs * (*aj) + kcol; 2654 } 2655 } 2656 } 2657 aj++; 2658 } 2659 } 2660 PetscCall(VecRestoreArray(v, &x)); 2661 PetscFunctionReturn(PETSC_SUCCESS); 2662 } 2663 2664 PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) 2665 { 2666 PetscFunctionBegin; 2667 /* If the two matrices have the same copy implementation, use fast copy. */ 2668 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2669 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2670 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 2671 PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs; 2672 2673 PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]); 2674 PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs); 2675 PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs])); 2676 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2677 } else { 2678 PetscCall(MatCopy_Basic(A, B, str)); 2679 } 2680 PetscFunctionReturn(PETSC_SUCCESS); 2681 } 2682 2683 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2684 { 2685 PetscFunctionBegin; 2686 PetscCall(MatSeqBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL)); 2687 PetscFunctionReturn(PETSC_SUCCESS); 2688 } 2689 2690 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2691 { 2692 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2693 2694 PetscFunctionBegin; 2695 *array = a->a; 2696 PetscFunctionReturn(PETSC_SUCCESS); 2697 } 2698 2699 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2700 { 2701 PetscFunctionBegin; 2702 *array = NULL; 2703 PetscFunctionReturn(PETSC_SUCCESS); 2704 } 2705 2706 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) 2707 { 2708 PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs; 2709 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 2710 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 2711 2712 PetscFunctionBegin; 2713 /* Set the number of nonzeros in the new matrix */ 2714 PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz)); 2715 PetscFunctionReturn(PETSC_SUCCESS); 2716 } 2717 2718 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2719 { 2720 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data; 2721 PetscInt bs = Y->rmap->bs, bs2 = bs * bs; 2722 PetscBLASInt one = 1; 2723 2724 PetscFunctionBegin; 2725 if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2726 PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2727 if (e) { 2728 PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e)); 2729 if (e) { 2730 PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e)); 2731 if (e) str = SAME_NONZERO_PATTERN; 2732 } 2733 } 2734 if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN"); 2735 } 2736 if (str == SAME_NONZERO_PATTERN) { 2737 PetscScalar alpha = a; 2738 PetscBLASInt bnz; 2739 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 2740 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 2741 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2742 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2743 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2744 } else { 2745 Mat B; 2746 PetscInt *nnz; 2747 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 2748 PetscCall(PetscMalloc1(Y->rmap->N, &nnz)); 2749 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2750 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2751 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 2752 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 2753 PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name)); 2754 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz)); 2755 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 2756 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2757 PetscCall(MatHeaderMerge(Y, &B)); 2758 PetscCall(PetscFree(nnz)); 2759 } 2760 PetscFunctionReturn(PETSC_SUCCESS); 2761 } 2762 2763 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2764 { 2765 #if PetscDefined(USE_COMPLEX) 2766 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2767 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2768 MatScalar *aa = a->a; 2769 2770 PetscFunctionBegin; 2771 for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]); 2772 PetscFunctionReturn(PETSC_SUCCESS); 2773 #else 2774 (void)A; 2775 return PETSC_SUCCESS; 2776 #endif 2777 } 2778 2779 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2780 { 2781 #if PetscDefined(USE_COMPLEX) 2782 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2783 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2784 MatScalar *aa = a->a; 2785 2786 PetscFunctionBegin; 2787 for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]); 2788 PetscFunctionReturn(PETSC_SUCCESS); 2789 #else 2790 (void)A; 2791 return PETSC_SUCCESS; 2792 #endif 2793 } 2794 2795 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2796 { 2797 #if PetscDefined(USE_COMPLEX) 2798 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2799 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2800 MatScalar *aa = a->a; 2801 2802 PetscFunctionBegin; 2803 for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2804 PetscFunctionReturn(PETSC_SUCCESS); 2805 #else 2806 (void)A; 2807 return PETSC_SUCCESS; 2808 #endif 2809 } 2810 2811 /* 2812 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2813 */ 2814 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2815 { 2816 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2817 PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs; 2818 PetscInt nz = a->i[m], row, *jj, mr, col; 2819 2820 PetscFunctionBegin; 2821 *nn = n; 2822 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2823 PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices"); 2824 PetscCall(PetscCalloc1(n, &collengths)); 2825 PetscCall(PetscMalloc1(n + 1, &cia)); 2826 PetscCall(PetscMalloc1(nz, &cja)); 2827 jj = a->j; 2828 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2829 cia[0] = oshift; 2830 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2831 PetscCall(PetscArrayzero(collengths, n)); 2832 jj = a->j; 2833 for (row = 0; row < m; row++) { 2834 mr = a->i[row + 1] - a->i[row]; 2835 for (i = 0; i < mr; i++) { 2836 col = *jj++; 2837 2838 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2839 } 2840 } 2841 PetscCall(PetscFree(collengths)); 2842 *ia = cia; 2843 *ja = cja; 2844 PetscFunctionReturn(PETSC_SUCCESS); 2845 } 2846 2847 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2848 { 2849 PetscFunctionBegin; 2850 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2851 PetscCall(PetscFree(*ia)); 2852 PetscCall(PetscFree(*ja)); 2853 PetscFunctionReturn(PETSC_SUCCESS); 2854 } 2855 2856 /* 2857 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2858 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2859 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2860 */ 2861 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2862 { 2863 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2864 PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs; 2865 PetscInt nz = a->i[m], row, *jj, mr, col; 2866 PetscInt *cspidx; 2867 2868 PetscFunctionBegin; 2869 *nn = n; 2870 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2871 2872 PetscCall(PetscCalloc1(n, &collengths)); 2873 PetscCall(PetscMalloc1(n + 1, &cia)); 2874 PetscCall(PetscMalloc1(nz, &cja)); 2875 PetscCall(PetscMalloc1(nz, &cspidx)); 2876 jj = a->j; 2877 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2878 cia[0] = oshift; 2879 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2880 PetscCall(PetscArrayzero(collengths, n)); 2881 jj = a->j; 2882 for (row = 0; row < m; row++) { 2883 mr = a->i[row + 1] - a->i[row]; 2884 for (i = 0; i < mr; i++) { 2885 col = *jj++; 2886 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2887 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2888 } 2889 } 2890 PetscCall(PetscFree(collengths)); 2891 *ia = cia; 2892 *ja = cja; 2893 *spidx = cspidx; 2894 PetscFunctionReturn(PETSC_SUCCESS); 2895 } 2896 2897 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2898 { 2899 PetscFunctionBegin; 2900 PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done)); 2901 PetscCall(PetscFree(*spidx)); 2902 PetscFunctionReturn(PETSC_SUCCESS); 2903 } 2904 2905 PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) 2906 { 2907 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data; 2908 2909 PetscFunctionBegin; 2910 if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL)); 2911 PetscCall(MatShift_Basic(Y, a)); 2912 PetscFunctionReturn(PETSC_SUCCESS); 2913 } 2914 2915 /* -------------------------------------------------------------------*/ 2916 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2917 MatGetRow_SeqBAIJ, 2918 MatRestoreRow_SeqBAIJ, 2919 MatMult_SeqBAIJ_N, 2920 /* 4*/ MatMultAdd_SeqBAIJ_N, 2921 MatMultTranspose_SeqBAIJ, 2922 MatMultTransposeAdd_SeqBAIJ, 2923 NULL, 2924 NULL, 2925 NULL, 2926 /* 10*/ NULL, 2927 MatLUFactor_SeqBAIJ, 2928 NULL, 2929 NULL, 2930 MatTranspose_SeqBAIJ, 2931 /* 15*/ MatGetInfo_SeqBAIJ, 2932 MatEqual_SeqBAIJ, 2933 MatGetDiagonal_SeqBAIJ, 2934 MatDiagonalScale_SeqBAIJ, 2935 MatNorm_SeqBAIJ, 2936 /* 20*/ NULL, 2937 MatAssemblyEnd_SeqBAIJ, 2938 MatSetOption_SeqBAIJ, 2939 MatZeroEntries_SeqBAIJ, 2940 /* 24*/ MatZeroRows_SeqBAIJ, 2941 NULL, 2942 NULL, 2943 NULL, 2944 NULL, 2945 /* 29*/ MatSetUp_SeqBAIJ, 2946 NULL, 2947 NULL, 2948 NULL, 2949 NULL, 2950 /* 34*/ MatDuplicate_SeqBAIJ, 2951 NULL, 2952 NULL, 2953 MatILUFactor_SeqBAIJ, 2954 NULL, 2955 /* 39*/ MatAXPY_SeqBAIJ, 2956 MatCreateSubMatrices_SeqBAIJ, 2957 MatIncreaseOverlap_SeqBAIJ, 2958 MatGetValues_SeqBAIJ, 2959 MatCopy_SeqBAIJ, 2960 /* 44*/ NULL, 2961 MatScale_SeqBAIJ, 2962 MatShift_SeqBAIJ, 2963 NULL, 2964 MatZeroRowsColumns_SeqBAIJ, 2965 /* 49*/ NULL, 2966 MatGetRowIJ_SeqBAIJ, 2967 MatRestoreRowIJ_SeqBAIJ, 2968 MatGetColumnIJ_SeqBAIJ, 2969 MatRestoreColumnIJ_SeqBAIJ, 2970 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2971 NULL, 2972 NULL, 2973 NULL, 2974 MatSetValuesBlocked_SeqBAIJ, 2975 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2976 MatDestroy_SeqBAIJ, 2977 MatView_SeqBAIJ, 2978 NULL, 2979 NULL, 2980 /* 64*/ NULL, 2981 NULL, 2982 NULL, 2983 NULL, 2984 NULL, 2985 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2986 NULL, 2987 MatConvert_Basic, 2988 NULL, 2989 NULL, 2990 /* 74*/ NULL, 2991 MatFDColoringApply_BAIJ, 2992 NULL, 2993 NULL, 2994 NULL, 2995 /* 79*/ NULL, 2996 NULL, 2997 NULL, 2998 NULL, 2999 MatLoad_SeqBAIJ, 3000 /* 84*/ NULL, 3001 NULL, 3002 NULL, 3003 NULL, 3004 NULL, 3005 /* 89*/ NULL, 3006 NULL, 3007 NULL, 3008 NULL, 3009 NULL, 3010 /* 94*/ NULL, 3011 NULL, 3012 NULL, 3013 NULL, 3014 NULL, 3015 /* 99*/ NULL, 3016 NULL, 3017 NULL, 3018 MatConjugate_SeqBAIJ, 3019 NULL, 3020 /*104*/ NULL, 3021 MatRealPart_SeqBAIJ, 3022 MatImaginaryPart_SeqBAIJ, 3023 NULL, 3024 NULL, 3025 /*109*/ NULL, 3026 NULL, 3027 NULL, 3028 NULL, 3029 MatMissingDiagonal_SeqBAIJ, 3030 /*114*/ NULL, 3031 NULL, 3032 NULL, 3033 NULL, 3034 NULL, 3035 /*119*/ NULL, 3036 NULL, 3037 MatMultHermitianTranspose_SeqBAIJ, 3038 MatMultHermitianTransposeAdd_SeqBAIJ, 3039 NULL, 3040 /*124*/ NULL, 3041 MatGetColumnReductions_SeqBAIJ, 3042 MatInvertBlockDiagonal_SeqBAIJ, 3043 NULL, 3044 NULL, 3045 /*129*/ NULL, 3046 NULL, 3047 NULL, 3048 NULL, 3049 NULL, 3050 /*134*/ NULL, 3051 NULL, 3052 NULL, 3053 NULL, 3054 NULL, 3055 /*139*/ MatSetBlockSizes_Default, 3056 NULL, 3057 NULL, 3058 MatFDColoringSetUp_SeqXAIJ, 3059 NULL, 3060 /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 3061 MatDestroySubMatrices_SeqBAIJ, 3062 NULL, 3063 NULL, 3064 NULL, 3065 NULL, 3066 /*150*/ NULL, 3067 NULL}; 3068 3069 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3070 { 3071 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3072 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3073 3074 PetscFunctionBegin; 3075 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3076 3077 /* allocate space for values if not already there */ 3078 if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); 3079 3080 /* copy values over */ 3081 PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz)); 3082 PetscFunctionReturn(PETSC_SUCCESS); 3083 } 3084 3085 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3086 { 3087 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3088 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3089 3090 PetscFunctionBegin; 3091 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3092 PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 3093 3094 /* copy values over */ 3095 PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz)); 3096 PetscFunctionReturn(PETSC_SUCCESS); 3097 } 3098 3099 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 3100 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *); 3101 3102 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, PetscInt *nnz) 3103 { 3104 Mat_SeqBAIJ *b; 3105 PetscInt i, mbs, nbs, bs2; 3106 PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 3107 3108 PetscFunctionBegin; 3109 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3110 if (nz == MAT_SKIP_ALLOCATION) { 3111 skipallocation = PETSC_TRUE; 3112 nz = 0; 3113 } 3114 3115 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 3116 PetscCall(PetscLayoutSetUp(B->rmap)); 3117 PetscCall(PetscLayoutSetUp(B->cmap)); 3118 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3119 3120 B->preallocated = PETSC_TRUE; 3121 3122 mbs = B->rmap->n / bs; 3123 nbs = B->cmap->n / bs; 3124 bs2 = bs * bs; 3125 3126 PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs); 3127 3128 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3129 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 3130 if (nnz) { 3131 for (i = 0; i < mbs; i++) { 3132 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 3133 PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs); 3134 } 3135 } 3136 3137 b = (Mat_SeqBAIJ *)B->data; 3138 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat"); 3139 PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL)); 3140 PetscOptionsEnd(); 3141 3142 if (!flg) { 3143 switch (bs) { 3144 case 1: 3145 B->ops->mult = MatMult_SeqBAIJ_1; 3146 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3147 break; 3148 case 2: 3149 B->ops->mult = MatMult_SeqBAIJ_2; 3150 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3151 break; 3152 case 3: 3153 B->ops->mult = MatMult_SeqBAIJ_3; 3154 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3155 break; 3156 case 4: 3157 B->ops->mult = MatMult_SeqBAIJ_4; 3158 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3159 break; 3160 case 5: 3161 B->ops->mult = MatMult_SeqBAIJ_5; 3162 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3163 break; 3164 case 6: 3165 B->ops->mult = MatMult_SeqBAIJ_6; 3166 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3167 break; 3168 case 7: 3169 B->ops->mult = MatMult_SeqBAIJ_7; 3170 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3171 break; 3172 case 9: { 3173 PetscInt version = 1; 3174 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3175 switch (version) { 3176 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3177 case 1: 3178 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 3179 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 3180 PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3181 break; 3182 #endif 3183 default: 3184 B->ops->mult = MatMult_SeqBAIJ_N; 3185 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3186 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3187 break; 3188 } 3189 break; 3190 } 3191 case 11: 3192 B->ops->mult = MatMult_SeqBAIJ_11; 3193 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 3194 break; 3195 case 12: { 3196 PetscInt version = 1; 3197 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3198 switch (version) { 3199 case 1: 3200 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 3201 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3202 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3203 break; 3204 case 2: 3205 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 3206 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 3207 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3208 break; 3209 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3210 case 3: 3211 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 3212 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3213 PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3214 break; 3215 #endif 3216 default: 3217 B->ops->mult = MatMult_SeqBAIJ_N; 3218 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3219 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3220 break; 3221 } 3222 break; 3223 } 3224 case 15: { 3225 PetscInt version = 1; 3226 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3227 switch (version) { 3228 case 1: 3229 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3230 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3231 break; 3232 case 2: 3233 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 3234 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3235 break; 3236 case 3: 3237 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 3238 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3239 break; 3240 case 4: 3241 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 3242 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3243 break; 3244 default: 3245 B->ops->mult = MatMult_SeqBAIJ_N; 3246 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3247 break; 3248 } 3249 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3250 break; 3251 } 3252 default: 3253 B->ops->mult = MatMult_SeqBAIJ_N; 3254 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3255 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3256 break; 3257 } 3258 } 3259 B->ops->sor = MatSOR_SeqBAIJ; 3260 b->mbs = mbs; 3261 b->nbs = nbs; 3262 if (!skipallocation) { 3263 if (!b->imax) { 3264 PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen)); 3265 3266 b->free_imax_ilen = PETSC_TRUE; 3267 } 3268 /* b->ilen will count nonzeros in each block row so far. */ 3269 for (i = 0; i < mbs; i++) b->ilen[i] = 0; 3270 if (!nnz) { 3271 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3272 else if (nz < 0) nz = 1; 3273 nz = PetscMin(nz, nbs); 3274 for (i = 0; i < mbs; i++) b->imax[i] = nz; 3275 PetscCall(PetscIntMultError(nz, mbs, &nz)); 3276 } else { 3277 PetscInt64 nz64 = 0; 3278 for (i = 0; i < mbs; i++) { 3279 b->imax[i] = nnz[i]; 3280 nz64 += nnz[i]; 3281 } 3282 PetscCall(PetscIntCast(nz64, &nz)); 3283 } 3284 3285 /* allocate the matrix space */ 3286 PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i)); 3287 if (B->structure_only) { 3288 PetscCall(PetscMalloc1(nz, &b->j)); 3289 PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i)); 3290 } else { 3291 PetscInt nzbs2 = 0; 3292 PetscCall(PetscIntMultError(nz, bs2, &nzbs2)); 3293 PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i)); 3294 PetscCall(PetscArrayzero(b->a, nz * bs2)); 3295 } 3296 PetscCall(PetscArrayzero(b->j, nz)); 3297 3298 if (B->structure_only) { 3299 b->singlemalloc = PETSC_FALSE; 3300 b->free_a = PETSC_FALSE; 3301 } else { 3302 b->singlemalloc = PETSC_TRUE; 3303 b->free_a = PETSC_TRUE; 3304 } 3305 b->free_ij = PETSC_TRUE; 3306 3307 b->i[0] = 0; 3308 for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1]; 3309 3310 } else { 3311 b->free_a = PETSC_FALSE; 3312 b->free_ij = PETSC_FALSE; 3313 } 3314 3315 b->bs2 = bs2; 3316 b->mbs = mbs; 3317 b->nz = 0; 3318 b->maxnz = nz; 3319 B->info.nz_unneeded = (PetscReal)b->maxnz * bs2; 3320 B->was_assembled = PETSC_FALSE; 3321 B->assembled = PETSC_FALSE; 3322 if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3323 PetscFunctionReturn(PETSC_SUCCESS); 3324 } 3325 3326 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 3327 { 3328 PetscInt i, m, nz, nz_max = 0, *nnz; 3329 PetscScalar *values = NULL; 3330 PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented; 3331 3332 PetscFunctionBegin; 3333 PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 3334 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 3335 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 3336 PetscCall(PetscLayoutSetUp(B->rmap)); 3337 PetscCall(PetscLayoutSetUp(B->cmap)); 3338 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3339 m = B->rmap->n / bs; 3340 3341 PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 3342 PetscCall(PetscMalloc1(m + 1, &nnz)); 3343 for (i = 0; i < m; i++) { 3344 nz = ii[i + 1] - ii[i]; 3345 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 3346 nz_max = PetscMax(nz_max, nz); 3347 nnz[i] = nz; 3348 } 3349 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 3350 PetscCall(PetscFree(nnz)); 3351 3352 values = (PetscScalar *)V; 3353 if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values)); 3354 for (i = 0; i < m; i++) { 3355 PetscInt ncols = ii[i + 1] - ii[i]; 3356 const PetscInt *icols = jj + ii[i]; 3357 if (bs == 1 || !roworiented) { 3358 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 3359 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES)); 3360 } else { 3361 PetscInt j; 3362 for (j = 0; j < ncols; j++) { 3363 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 3364 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES)); 3365 } 3366 } 3367 } 3368 if (!V) PetscCall(PetscFree(values)); 3369 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3370 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3371 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3372 PetscFunctionReturn(PETSC_SUCCESS); 3373 } 3374 3375 /*@C 3376 MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored 3377 3378 Not Collective 3379 3380 Input Parameter: 3381 . mat - a `MATSEQBAIJ` matrix 3382 3383 Output Parameter: 3384 . array - pointer to the data 3385 3386 Level: intermediate 3387 3388 .seealso: `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3389 @*/ 3390 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array) 3391 { 3392 PetscFunctionBegin; 3393 PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array)); 3394 PetscFunctionReturn(PETSC_SUCCESS); 3395 } 3396 3397 /*@C 3398 MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()` 3399 3400 Not Collective 3401 3402 Input Parameters: 3403 + mat - a `MATSEQBAIJ` matrix 3404 - array - pointer to the data 3405 3406 Level: intermediate 3407 3408 .seealso: `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3409 @*/ 3410 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array) 3411 { 3412 PetscFunctionBegin; 3413 PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array)); 3414 PetscFunctionReturn(PETSC_SUCCESS); 3415 } 3416 3417 /*MC 3418 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3419 block sparse compressed row format. 3420 3421 Options Database Keys: 3422 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3423 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3424 3425 Level: beginner 3426 3427 Notes: 3428 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 3429 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 3430 3431 Run with -info to see what version of the matrix-vector product is being used 3432 3433 .seealso: `MatCreateSeqBAIJ()` 3434 M*/ 3435 3436 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *); 3437 3438 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3439 { 3440 PetscMPIInt size; 3441 Mat_SeqBAIJ *b; 3442 3443 PetscFunctionBegin; 3444 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3445 PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3446 3447 PetscCall(PetscNew(&b)); 3448 B->data = (void *)b; 3449 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 3450 3451 b->row = NULL; 3452 b->col = NULL; 3453 b->icol = NULL; 3454 b->reallocs = 0; 3455 b->saved_values = NULL; 3456 3457 b->roworiented = PETSC_TRUE; 3458 b->nonew = 0; 3459 b->diag = NULL; 3460 B->spptr = NULL; 3461 B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2; 3462 b->keepnonzeropattern = PETSC_FALSE; 3463 3464 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ)); 3465 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ)); 3466 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ)); 3467 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ)); 3468 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ)); 3469 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ)); 3470 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ)); 3471 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ)); 3472 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 3473 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ)); 3474 #if defined(PETSC_HAVE_HYPRE) 3475 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE)); 3476 #endif 3477 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS)); 3478 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 3479 PetscFunctionReturn(PETSC_SUCCESS); 3480 } 3481 3482 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) 3483 { 3484 Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data; 3485 PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2; 3486 3487 PetscFunctionBegin; 3488 PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix"); 3489 3490 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3491 c->imax = a->imax; 3492 c->ilen = a->ilen; 3493 c->free_imax_ilen = PETSC_FALSE; 3494 } else { 3495 PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen)); 3496 for (i = 0; i < mbs; i++) { 3497 c->imax[i] = a->imax[i]; 3498 c->ilen[i] = a->ilen[i]; 3499 } 3500 c->free_imax_ilen = PETSC_TRUE; 3501 } 3502 3503 /* allocate the matrix space */ 3504 if (mallocmatspace) { 3505 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3506 PetscCall(PetscCalloc1(bs2 * nz, &c->a)); 3507 3508 c->i = a->i; 3509 c->j = a->j; 3510 c->singlemalloc = PETSC_FALSE; 3511 c->free_a = PETSC_TRUE; 3512 c->free_ij = PETSC_FALSE; 3513 c->parent = A; 3514 C->preallocated = PETSC_TRUE; 3515 C->assembled = PETSC_TRUE; 3516 3517 PetscCall(PetscObjectReference((PetscObject)A)); 3518 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3519 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3520 } else { 3521 PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i)); 3522 3523 c->singlemalloc = PETSC_TRUE; 3524 c->free_a = PETSC_TRUE; 3525 c->free_ij = PETSC_TRUE; 3526 3527 PetscCall(PetscArraycpy(c->i, a->i, mbs + 1)); 3528 if (mbs > 0) { 3529 PetscCall(PetscArraycpy(c->j, a->j, nz)); 3530 if (cpvalues == MAT_COPY_VALUES) { 3531 PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz)); 3532 } else { 3533 PetscCall(PetscArrayzero(c->a, bs2 * nz)); 3534 } 3535 } 3536 C->preallocated = PETSC_TRUE; 3537 C->assembled = PETSC_TRUE; 3538 } 3539 } 3540 3541 c->roworiented = a->roworiented; 3542 c->nonew = a->nonew; 3543 3544 PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 3545 PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 3546 3547 c->bs2 = a->bs2; 3548 c->mbs = a->mbs; 3549 c->nbs = a->nbs; 3550 3551 if (a->diag) { 3552 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3553 c->diag = a->diag; 3554 c->free_diag = PETSC_FALSE; 3555 } else { 3556 PetscCall(PetscMalloc1(mbs + 1, &c->diag)); 3557 for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i]; 3558 c->free_diag = PETSC_TRUE; 3559 } 3560 } else c->diag = NULL; 3561 3562 c->nz = a->nz; 3563 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3564 c->solve_work = NULL; 3565 c->mult_work = NULL; 3566 c->sor_workt = NULL; 3567 c->sor_work = NULL; 3568 3569 c->compressedrow.use = a->compressedrow.use; 3570 c->compressedrow.nrows = a->compressedrow.nrows; 3571 if (a->compressedrow.use) { 3572 i = a->compressedrow.nrows; 3573 PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex)); 3574 PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1)); 3575 PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i)); 3576 } else { 3577 c->compressedrow.use = PETSC_FALSE; 3578 c->compressedrow.i = NULL; 3579 c->compressedrow.rindex = NULL; 3580 } 3581 C->nonzerostate = A->nonzerostate; 3582 3583 PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 3584 PetscFunctionReturn(PETSC_SUCCESS); 3585 } 3586 3587 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) 3588 { 3589 PetscFunctionBegin; 3590 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 3591 PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n)); 3592 PetscCall(MatSetType(*B, MATSEQBAIJ)); 3593 PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE)); 3594 PetscFunctionReturn(PETSC_SUCCESS); 3595 } 3596 3597 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3598 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 3599 { 3600 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3601 PetscInt *rowidxs, *colidxs; 3602 PetscScalar *matvals; 3603 3604 PetscFunctionBegin; 3605 PetscCall(PetscViewerSetUp(viewer)); 3606 3607 /* read matrix header */ 3608 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3609 PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3610 M = header[1]; 3611 N = header[2]; 3612 nz = header[3]; 3613 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3614 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3615 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3616 3617 /* set block sizes from the viewer's .info file */ 3618 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3619 /* set local and global sizes if not set already */ 3620 if (mat->rmap->n < 0) mat->rmap->n = M; 3621 if (mat->cmap->n < 0) mat->cmap->n = N; 3622 if (mat->rmap->N < 0) mat->rmap->N = M; 3623 if (mat->cmap->N < 0) mat->cmap->N = N; 3624 PetscCall(PetscLayoutSetUp(mat->rmap)); 3625 PetscCall(PetscLayoutSetUp(mat->cmap)); 3626 3627 /* check if the matrix sizes are correct */ 3628 PetscCall(MatGetSize(mat, &rows, &cols)); 3629 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3630 PetscCall(MatGetBlockSize(mat, &bs)); 3631 PetscCall(MatGetLocalSize(mat, &m, &n)); 3632 mbs = m / bs; 3633 nbs = n / bs; 3634 3635 /* read in row lengths, column indices and nonzero values */ 3636 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3637 PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT)); 3638 rowidxs[0] = 0; 3639 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3640 sum = rowidxs[m]; 3641 PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3642 3643 /* read in column indices and nonzero values */ 3644 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals)); 3645 PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT)); 3646 PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR)); 3647 3648 { /* preallocate matrix storage */ 3649 PetscBT bt; /* helper bit set to count nonzeros */ 3650 PetscInt *nnz; 3651 PetscBool sbaij; 3652 3653 PetscCall(PetscBTCreate(nbs, &bt)); 3654 PetscCall(PetscCalloc1(mbs, &nnz)); 3655 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij)); 3656 for (i = 0; i < mbs; i++) { 3657 PetscCall(PetscBTMemzero(nbs, bt)); 3658 for (k = 0; k < bs; k++) { 3659 PetscInt row = bs * i + k; 3660 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3661 PetscInt col = colidxs[j]; 3662 if (!sbaij || col >= row) 3663 if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++; 3664 } 3665 } 3666 } 3667 PetscCall(PetscBTDestroy(&bt)); 3668 PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz)); 3669 PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz)); 3670 PetscCall(PetscFree(nnz)); 3671 } 3672 3673 /* store matrix values */ 3674 for (i = 0; i < m; i++) { 3675 PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1]; 3676 PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3677 } 3678 3679 PetscCall(PetscFree(rowidxs)); 3680 PetscCall(PetscFree2(colidxs, matvals)); 3681 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3682 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3683 PetscFunctionReturn(PETSC_SUCCESS); 3684 } 3685 3686 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) 3687 { 3688 PetscBool isbinary; 3689 3690 PetscFunctionBegin; 3691 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3692 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3693 PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer)); 3694 PetscFunctionReturn(PETSC_SUCCESS); 3695 } 3696 3697 /*@C 3698 MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block 3699 compressed row) format. For good matrix assembly performance the 3700 user should preallocate the matrix storage by setting the parameter nz 3701 (or the array nnz). By setting these parameters accurately, performance 3702 during matrix assembly can be increased by more than a factor of 50. 3703 3704 Collective 3705 3706 Input Parameters: 3707 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3708 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3709 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3710 . m - number of rows 3711 . n - number of columns 3712 . nz - number of nonzero blocks per block row (same for all rows) 3713 - nnz - array containing the number of nonzero blocks in the various block rows 3714 (possibly different for each block row) or NULL 3715 3716 Output Parameter: 3717 . A - the matrix 3718 3719 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3720 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3721 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 3722 3723 Options Database Keys: 3724 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3725 - -mat_block_size - size of the blocks to use 3726 3727 Level: intermediate 3728 3729 Notes: 3730 The number of rows and columns must be divisible by blocksize. 3731 3732 If the nnz parameter is given then the nz parameter is ignored 3733 3734 A nonzero block is any block that as 1 or more nonzeros in it 3735 3736 The `MATSEQBAIJ` format is fully compatible with standard Fortran 77 3737 storage. That is, the stored row and column indices can begin at 3738 either one (as in Fortran) or zero. See the users' manual for details. 3739 3740 Specify the preallocated storage with either nz or nnz (not both). 3741 Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory 3742 allocation. See [Sparse Matrices](sec_matsparse) for details. 3743 matrices. 3744 3745 .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3746 @*/ 3747 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 3748 { 3749 PetscFunctionBegin; 3750 PetscCall(MatCreate(comm, A)); 3751 PetscCall(MatSetSizes(*A, m, n, m, n)); 3752 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3753 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz)); 3754 PetscFunctionReturn(PETSC_SUCCESS); 3755 } 3756 3757 /*@C 3758 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3759 per row in the matrix. For good matrix assembly performance the 3760 user should preallocate the matrix storage by setting the parameter nz 3761 (or the array nnz). By setting these parameters accurately, performance 3762 during matrix assembly can be increased by more than a factor of 50. 3763 3764 Collective 3765 3766 Input Parameters: 3767 + B - the matrix 3768 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3769 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3770 . nz - number of block nonzeros per block row (same for all rows) 3771 - nnz - array containing the number of block nonzeros in the various block rows 3772 (possibly different for each block row) or NULL 3773 3774 Options Database Keys: 3775 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3776 - -mat_block_size - size of the blocks to use 3777 3778 Level: intermediate 3779 3780 Notes: 3781 If the nnz parameter is given then the nz parameter is ignored 3782 3783 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3784 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3785 You can also run with the option -info and look for messages with the string 3786 malloc in them to see if additional memory allocation was needed. 3787 3788 The `MATSEQBAIJ` format is fully compatible with standard Fortran 77 3789 storage. That is, the stored row and column indices can begin at 3790 either one (as in Fortran) or zero. See the users' manual for details. 3791 3792 Specify the preallocated storage with either nz or nnz (not both). 3793 Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory 3794 allocation. See [Sparse Matrices](sec_matsparse) for details. 3795 3796 .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3797 @*/ 3798 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3799 { 3800 PetscFunctionBegin; 3801 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3802 PetscValidType(B, 1); 3803 PetscValidLogicalCollectiveInt(B, bs, 2); 3804 PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 3805 PetscFunctionReturn(PETSC_SUCCESS); 3806 } 3807 3808 /*@C 3809 MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values 3810 3811 Collective 3812 3813 Input Parameters: 3814 + B - the matrix 3815 . i - the indices into j for the start of each local row (starts with zero) 3816 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3817 - v - optional values in the matrix 3818 3819 Level: advanced 3820 3821 Notes: 3822 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 3823 may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 3824 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3825 `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3826 block column and the second index is over columns within a block. 3827 3828 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3829 3830 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3831 @*/ 3832 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3833 { 3834 PetscFunctionBegin; 3835 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3836 PetscValidType(B, 1); 3837 PetscValidLogicalCollectiveInt(B, bs, 2); 3838 PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 3839 PetscFunctionReturn(PETSC_SUCCESS); 3840 } 3841 3842 /*@ 3843 MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user. 3844 3845 Collective 3846 3847 Input Parameters: 3848 + comm - must be an MPI communicator of size 1 3849 . bs - size of block 3850 . m - number of rows 3851 . n - number of columns 3852 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3853 . j - column indices 3854 - a - matrix values 3855 3856 Output Parameter: 3857 . mat - the matrix 3858 3859 Level: advanced 3860 3861 Notes: 3862 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3863 once the matrix is destroyed 3864 3865 You cannot set new nonzero locations into this matrix, that will generate an error. 3866 3867 The i and j indices are 0 based 3868 3869 When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format 3870 3871 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3872 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3873 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3874 with column-major ordering within blocks. 3875 3876 .seealso: `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3877 @*/ 3878 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) 3879 { 3880 Mat_SeqBAIJ *baij; 3881 3882 PetscFunctionBegin; 3883 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs); 3884 if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3885 3886 PetscCall(MatCreate(comm, mat)); 3887 PetscCall(MatSetSizes(*mat, m, n, m, n)); 3888 PetscCall(MatSetType(*mat, MATSEQBAIJ)); 3889 PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL)); 3890 baij = (Mat_SeqBAIJ *)(*mat)->data; 3891 PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen)); 3892 3893 baij->i = i; 3894 baij->j = j; 3895 baij->a = a; 3896 3897 baij->singlemalloc = PETSC_FALSE; 3898 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3899 baij->free_a = PETSC_FALSE; 3900 baij->free_ij = PETSC_FALSE; 3901 baij->free_imax_ilen = PETSC_TRUE; 3902 3903 for (PetscInt ii = 0; ii < m; ii++) { 3904 const PetscInt row_len = i[ii + 1] - i[ii]; 3905 3906 baij->ilen[ii] = baij->imax[ii] = row_len; 3907 PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len); 3908 } 3909 if (PetscDefined(USE_DEBUG)) { 3910 for (PetscInt ii = 0; ii < baij->i[m]; ii++) { 3911 PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3912 PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3913 } 3914 } 3915 3916 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3917 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3918 PetscFunctionReturn(PETSC_SUCCESS); 3919 } 3920 3921 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3922 { 3923 PetscFunctionBegin; 3924 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat)); 3925 PetscFunctionReturn(PETSC_SUCCESS); 3926 } 3927