1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 19 20 PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) { 21 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data; 22 PetscInt m, n, i; 23 PetscInt ib, jb, bs = A->rmap->bs; 24 MatScalar *a_val = a_aij->a; 25 26 PetscFunctionBegin; 27 PetscCall(MatGetSize(A, &m, &n)); 28 for (i = 0; i < n; i++) reductions[i] = 0.0; 29 if (type == NORM_2) { 30 for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 31 for (jb = 0; jb < bs; jb++) { 32 for (ib = 0; ib < bs; ib++) { 33 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 34 a_val++; 35 } 36 } 37 } 38 } else if (type == NORM_1) { 39 for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 40 for (jb = 0; jb < bs; jb++) { 41 for (ib = 0; ib < bs; ib++) { 42 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 43 a_val++; 44 } 45 } 46 } 47 } else if (type == NORM_INFINITY) { 48 for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 49 for (jb = 0; jb < bs; jb++) { 50 for (ib = 0; ib < bs; ib++) { 51 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 52 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 53 a_val++; 54 } 55 } 56 } 57 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 58 for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 59 for (jb = 0; jb < bs; jb++) { 60 for (ib = 0; ib < bs; ib++) { 61 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 62 a_val++; 63 } 64 } 65 } 66 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 67 for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 68 for (jb = 0; jb < bs; jb++) { 69 for (ib = 0; ib < bs; ib++) { 70 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 71 a_val++; 72 } 73 } 74 } 75 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 76 if (type == NORM_2) { 77 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 78 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 79 for (i = 0; i < n; i++) reductions[i] /= m; 80 } 81 PetscFunctionReturn(0); 82 } 83 84 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) { 85 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 86 PetscInt *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots; 87 MatScalar *v = a->a, *odiag, *diag, work[25], *v_work; 88 PetscReal shift = 0.0; 89 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 90 91 PetscFunctionBegin; 92 allowzeropivot = PetscNot(A->erroriffailure); 93 94 if (a->idiagvalid) { 95 if (values) *values = a->idiag; 96 PetscFunctionReturn(0); 97 } 98 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 99 diag_offset = a->diag; 100 if (!a->idiag) { 101 PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); 102 PetscCall(PetscLogObjectMemory((PetscObject)A, bs2 * mbs * sizeof(PetscScalar))); 103 } 104 diag = a->idiag; 105 if (values) *values = a->idiag; 106 /* factor and invert each block */ 107 switch (bs) { 108 case 1: 109 for (i = 0; i < mbs; i++) { 110 odiag = v + 1 * diag_offset[i]; 111 diag[0] = odiag[0]; 112 113 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 114 if (allowzeropivot) { 115 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 116 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 117 A->factorerror_zeropivot_row = i; 118 PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i)); 119 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON); 120 } 121 122 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 123 diag += 1; 124 } 125 break; 126 case 2: 127 for (i = 0; i < mbs; i++) { 128 odiag = v + 4 * diag_offset[i]; 129 diag[0] = odiag[0]; 130 diag[1] = odiag[1]; 131 diag[2] = odiag[2]; 132 diag[3] = odiag[3]; 133 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 134 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 135 diag += 4; 136 } 137 break; 138 case 3: 139 for (i = 0; i < mbs; i++) { 140 odiag = v + 9 * diag_offset[i]; 141 diag[0] = odiag[0]; 142 diag[1] = odiag[1]; 143 diag[2] = odiag[2]; 144 diag[3] = odiag[3]; 145 diag[4] = odiag[4]; 146 diag[5] = odiag[5]; 147 diag[6] = odiag[6]; 148 diag[7] = odiag[7]; 149 diag[8] = odiag[8]; 150 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 151 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 152 diag += 9; 153 } 154 break; 155 case 4: 156 for (i = 0; i < mbs; i++) { 157 odiag = v + 16 * diag_offset[i]; 158 PetscCall(PetscArraycpy(diag, odiag, 16)); 159 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 160 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 161 diag += 16; 162 } 163 break; 164 case 5: 165 for (i = 0; i < mbs; i++) { 166 odiag = v + 25 * diag_offset[i]; 167 PetscCall(PetscArraycpy(diag, odiag, 25)); 168 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 169 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 170 diag += 25; 171 } 172 break; 173 case 6: 174 for (i = 0; i < mbs; i++) { 175 odiag = v + 36 * diag_offset[i]; 176 PetscCall(PetscArraycpy(diag, odiag, 36)); 177 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 178 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 179 diag += 36; 180 } 181 break; 182 case 7: 183 for (i = 0; i < mbs; i++) { 184 odiag = v + 49 * diag_offset[i]; 185 PetscCall(PetscArraycpy(diag, odiag, 49)); 186 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 187 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 188 diag += 49; 189 } 190 break; 191 default: 192 PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots)); 193 for (i = 0; i < mbs; i++) { 194 odiag = v + bs2 * diag_offset[i]; 195 PetscCall(PetscArraycpy(diag, odiag, bs2)); 196 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 197 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 198 diag += bs2; 199 } 200 PetscCall(PetscFree2(v_work, v_pivots)); 201 } 202 a->idiagvalid = PETSC_TRUE; 203 PetscFunctionReturn(0); 204 } 205 206 PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) { 207 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 208 PetscScalar *x, *work, *w, *workt, *t; 209 const MatScalar *v, *aa = a->a, *idiag; 210 const PetscScalar *b, *xb; 211 PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */ 212 PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it; 213 const PetscInt *diag, *ai = a->i, *aj = a->j, *vi; 214 215 PetscFunctionBegin; 216 its = its * lits; 217 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 218 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 219 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 220 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor"); 221 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 222 223 if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL)); 224 225 if (!m) PetscFunctionReturn(0); 226 diag = a->diag; 227 idiag = a->idiag; 228 k = PetscMax(A->rmap->n, A->cmap->n); 229 if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work)); 230 if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt)); 231 if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work)); 232 work = a->mult_work; 233 t = a->sor_workt; 234 w = a->sor_work; 235 236 PetscCall(VecGetArray(xx, &x)); 237 PetscCall(VecGetArrayRead(bb, &b)); 238 239 if (flag & SOR_ZERO_INITIAL_GUESS) { 240 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 241 switch (bs) { 242 case 1: 243 PetscKernel_v_gets_A_times_w_1(x, idiag, b); 244 t[0] = b[0]; 245 i2 = 1; 246 idiag += 1; 247 for (i = 1; i < m; i++) { 248 v = aa + ai[i]; 249 vi = aj + ai[i]; 250 nz = diag[i] - ai[i]; 251 s[0] = b[i2]; 252 for (j = 0; j < nz; j++) { 253 xw[0] = x[vi[j]]; 254 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 255 } 256 t[i2] = s[0]; 257 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 258 x[i2] = xw[0]; 259 idiag += 1; 260 i2 += 1; 261 } 262 break; 263 case 2: 264 PetscKernel_v_gets_A_times_w_2(x, idiag, b); 265 t[0] = b[0]; 266 t[1] = b[1]; 267 i2 = 2; 268 idiag += 4; 269 for (i = 1; i < m; i++) { 270 v = aa + 4 * ai[i]; 271 vi = aj + ai[i]; 272 nz = diag[i] - ai[i]; 273 s[0] = b[i2]; 274 s[1] = b[i2 + 1]; 275 for (j = 0; j < nz; j++) { 276 idx = 2 * vi[j]; 277 it = 4 * j; 278 xw[0] = x[idx]; 279 xw[1] = x[1 + idx]; 280 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 281 } 282 t[i2] = s[0]; 283 t[i2 + 1] = s[1]; 284 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 285 x[i2] = xw[0]; 286 x[i2 + 1] = xw[1]; 287 idiag += 4; 288 i2 += 2; 289 } 290 break; 291 case 3: 292 PetscKernel_v_gets_A_times_w_3(x, idiag, b); 293 t[0] = b[0]; 294 t[1] = b[1]; 295 t[2] = b[2]; 296 i2 = 3; 297 idiag += 9; 298 for (i = 1; i < m; i++) { 299 v = aa + 9 * ai[i]; 300 vi = aj + ai[i]; 301 nz = diag[i] - ai[i]; 302 s[0] = b[i2]; 303 s[1] = b[i2 + 1]; 304 s[2] = b[i2 + 2]; 305 while (nz--) { 306 idx = 3 * (*vi++); 307 xw[0] = x[idx]; 308 xw[1] = x[1 + idx]; 309 xw[2] = x[2 + idx]; 310 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 311 v += 9; 312 } 313 t[i2] = s[0]; 314 t[i2 + 1] = s[1]; 315 t[i2 + 2] = s[2]; 316 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 317 x[i2] = xw[0]; 318 x[i2 + 1] = xw[1]; 319 x[i2 + 2] = xw[2]; 320 idiag += 9; 321 i2 += 3; 322 } 323 break; 324 case 4: 325 PetscKernel_v_gets_A_times_w_4(x, idiag, b); 326 t[0] = b[0]; 327 t[1] = b[1]; 328 t[2] = b[2]; 329 t[3] = b[3]; 330 i2 = 4; 331 idiag += 16; 332 for (i = 1; i < m; i++) { 333 v = aa + 16 * ai[i]; 334 vi = aj + ai[i]; 335 nz = diag[i] - ai[i]; 336 s[0] = b[i2]; 337 s[1] = b[i2 + 1]; 338 s[2] = b[i2 + 2]; 339 s[3] = b[i2 + 3]; 340 while (nz--) { 341 idx = 4 * (*vi++); 342 xw[0] = x[idx]; 343 xw[1] = x[1 + idx]; 344 xw[2] = x[2 + idx]; 345 xw[3] = x[3 + idx]; 346 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 347 v += 16; 348 } 349 t[i2] = s[0]; 350 t[i2 + 1] = s[1]; 351 t[i2 + 2] = s[2]; 352 t[i2 + 3] = s[3]; 353 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 354 x[i2] = xw[0]; 355 x[i2 + 1] = xw[1]; 356 x[i2 + 2] = xw[2]; 357 x[i2 + 3] = xw[3]; 358 idiag += 16; 359 i2 += 4; 360 } 361 break; 362 case 5: 363 PetscKernel_v_gets_A_times_w_5(x, idiag, b); 364 t[0] = b[0]; 365 t[1] = b[1]; 366 t[2] = b[2]; 367 t[3] = b[3]; 368 t[4] = b[4]; 369 i2 = 5; 370 idiag += 25; 371 for (i = 1; i < m; i++) { 372 v = aa + 25 * ai[i]; 373 vi = aj + ai[i]; 374 nz = diag[i] - ai[i]; 375 s[0] = b[i2]; 376 s[1] = b[i2 + 1]; 377 s[2] = b[i2 + 2]; 378 s[3] = b[i2 + 3]; 379 s[4] = b[i2 + 4]; 380 while (nz--) { 381 idx = 5 * (*vi++); 382 xw[0] = x[idx]; 383 xw[1] = x[1 + idx]; 384 xw[2] = x[2 + idx]; 385 xw[3] = x[3 + idx]; 386 xw[4] = x[4 + idx]; 387 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 388 v += 25; 389 } 390 t[i2] = s[0]; 391 t[i2 + 1] = s[1]; 392 t[i2 + 2] = s[2]; 393 t[i2 + 3] = s[3]; 394 t[i2 + 4] = s[4]; 395 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 396 x[i2] = xw[0]; 397 x[i2 + 1] = xw[1]; 398 x[i2 + 2] = xw[2]; 399 x[i2 + 3] = xw[3]; 400 x[i2 + 4] = xw[4]; 401 idiag += 25; 402 i2 += 5; 403 } 404 break; 405 case 6: 406 PetscKernel_v_gets_A_times_w_6(x, idiag, b); 407 t[0] = b[0]; 408 t[1] = b[1]; 409 t[2] = b[2]; 410 t[3] = b[3]; 411 t[4] = b[4]; 412 t[5] = b[5]; 413 i2 = 6; 414 idiag += 36; 415 for (i = 1; i < m; i++) { 416 v = aa + 36 * ai[i]; 417 vi = aj + ai[i]; 418 nz = diag[i] - ai[i]; 419 s[0] = b[i2]; 420 s[1] = b[i2 + 1]; 421 s[2] = b[i2 + 2]; 422 s[3] = b[i2 + 3]; 423 s[4] = b[i2 + 4]; 424 s[5] = b[i2 + 5]; 425 while (nz--) { 426 idx = 6 * (*vi++); 427 xw[0] = x[idx]; 428 xw[1] = x[1 + idx]; 429 xw[2] = x[2 + idx]; 430 xw[3] = x[3 + idx]; 431 xw[4] = x[4 + idx]; 432 xw[5] = x[5 + idx]; 433 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 434 v += 36; 435 } 436 t[i2] = s[0]; 437 t[i2 + 1] = s[1]; 438 t[i2 + 2] = s[2]; 439 t[i2 + 3] = s[3]; 440 t[i2 + 4] = s[4]; 441 t[i2 + 5] = s[5]; 442 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 443 x[i2] = xw[0]; 444 x[i2 + 1] = xw[1]; 445 x[i2 + 2] = xw[2]; 446 x[i2 + 3] = xw[3]; 447 x[i2 + 4] = xw[4]; 448 x[i2 + 5] = xw[5]; 449 idiag += 36; 450 i2 += 6; 451 } 452 break; 453 case 7: 454 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 455 t[0] = b[0]; 456 t[1] = b[1]; 457 t[2] = b[2]; 458 t[3] = b[3]; 459 t[4] = b[4]; 460 t[5] = b[5]; 461 t[6] = b[6]; 462 i2 = 7; 463 idiag += 49; 464 for (i = 1; i < m; i++) { 465 v = aa + 49 * ai[i]; 466 vi = aj + ai[i]; 467 nz = diag[i] - ai[i]; 468 s[0] = b[i2]; 469 s[1] = b[i2 + 1]; 470 s[2] = b[i2 + 2]; 471 s[3] = b[i2 + 3]; 472 s[4] = b[i2 + 4]; 473 s[5] = b[i2 + 5]; 474 s[6] = b[i2 + 6]; 475 while (nz--) { 476 idx = 7 * (*vi++); 477 xw[0] = x[idx]; 478 xw[1] = x[1 + idx]; 479 xw[2] = x[2 + idx]; 480 xw[3] = x[3 + idx]; 481 xw[4] = x[4 + idx]; 482 xw[5] = x[5 + idx]; 483 xw[6] = x[6 + idx]; 484 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 485 v += 49; 486 } 487 t[i2] = s[0]; 488 t[i2 + 1] = s[1]; 489 t[i2 + 2] = s[2]; 490 t[i2 + 3] = s[3]; 491 t[i2 + 4] = s[4]; 492 t[i2 + 5] = s[5]; 493 t[i2 + 6] = s[6]; 494 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 495 x[i2] = xw[0]; 496 x[i2 + 1] = xw[1]; 497 x[i2 + 2] = xw[2]; 498 x[i2 + 3] = xw[3]; 499 x[i2 + 4] = xw[4]; 500 x[i2 + 5] = xw[5]; 501 x[i2 + 6] = xw[6]; 502 idiag += 49; 503 i2 += 7; 504 } 505 break; 506 default: 507 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); 508 PetscCall(PetscArraycpy(t, b, bs)); 509 i2 = bs; 510 idiag += bs2; 511 for (i = 1; i < m; i++) { 512 v = aa + bs2 * ai[i]; 513 vi = aj + ai[i]; 514 nz = diag[i] - ai[i]; 515 516 PetscCall(PetscArraycpy(w, b + i2, bs)); 517 /* copy all rows of x that are needed into contiguous space */ 518 workt = work; 519 for (j = 0; j < nz; j++) { 520 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 521 workt += bs; 522 } 523 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 524 PetscCall(PetscArraycpy(t + i2, w, bs)); 525 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 526 527 idiag += bs2; 528 i2 += bs; 529 } 530 break; 531 } 532 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 533 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 534 xb = t; 535 } else xb = b; 536 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 537 idiag = a->idiag + bs2 * (a->mbs - 1); 538 i2 = bs * (m - 1); 539 switch (bs) { 540 case 1: 541 s[0] = xb[i2]; 542 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 543 x[i2] = xw[0]; 544 i2 -= 1; 545 for (i = m - 2; i >= 0; i--) { 546 v = aa + (diag[i] + 1); 547 vi = aj + diag[i] + 1; 548 nz = ai[i + 1] - diag[i] - 1; 549 s[0] = xb[i2]; 550 for (j = 0; j < nz; j++) { 551 xw[0] = x[vi[j]]; 552 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 553 } 554 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 555 x[i2] = xw[0]; 556 idiag -= 1; 557 i2 -= 1; 558 } 559 break; 560 case 2: 561 s[0] = xb[i2]; 562 s[1] = xb[i2 + 1]; 563 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 564 x[i2] = xw[0]; 565 x[i2 + 1] = xw[1]; 566 i2 -= 2; 567 idiag -= 4; 568 for (i = m - 2; i >= 0; i--) { 569 v = aa + 4 * (diag[i] + 1); 570 vi = aj + diag[i] + 1; 571 nz = ai[i + 1] - diag[i] - 1; 572 s[0] = xb[i2]; 573 s[1] = xb[i2 + 1]; 574 for (j = 0; j < nz; j++) { 575 idx = 2 * vi[j]; 576 it = 4 * j; 577 xw[0] = x[idx]; 578 xw[1] = x[1 + idx]; 579 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 580 } 581 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 582 x[i2] = xw[0]; 583 x[i2 + 1] = xw[1]; 584 idiag -= 4; 585 i2 -= 2; 586 } 587 break; 588 case 3: 589 s[0] = xb[i2]; 590 s[1] = xb[i2 + 1]; 591 s[2] = xb[i2 + 2]; 592 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 593 x[i2] = xw[0]; 594 x[i2 + 1] = xw[1]; 595 x[i2 + 2] = xw[2]; 596 i2 -= 3; 597 idiag -= 9; 598 for (i = m - 2; i >= 0; i--) { 599 v = aa + 9 * (diag[i] + 1); 600 vi = aj + diag[i] + 1; 601 nz = ai[i + 1] - diag[i] - 1; 602 s[0] = xb[i2]; 603 s[1] = xb[i2 + 1]; 604 s[2] = xb[i2 + 2]; 605 while (nz--) { 606 idx = 3 * (*vi++); 607 xw[0] = x[idx]; 608 xw[1] = x[1 + idx]; 609 xw[2] = x[2 + idx]; 610 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 611 v += 9; 612 } 613 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 614 x[i2] = xw[0]; 615 x[i2 + 1] = xw[1]; 616 x[i2 + 2] = xw[2]; 617 idiag -= 9; 618 i2 -= 3; 619 } 620 break; 621 case 4: 622 s[0] = xb[i2]; 623 s[1] = xb[i2 + 1]; 624 s[2] = xb[i2 + 2]; 625 s[3] = xb[i2 + 3]; 626 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 627 x[i2] = xw[0]; 628 x[i2 + 1] = xw[1]; 629 x[i2 + 2] = xw[2]; 630 x[i2 + 3] = xw[3]; 631 i2 -= 4; 632 idiag -= 16; 633 for (i = m - 2; i >= 0; i--) { 634 v = aa + 16 * (diag[i] + 1); 635 vi = aj + diag[i] + 1; 636 nz = ai[i + 1] - diag[i] - 1; 637 s[0] = xb[i2]; 638 s[1] = xb[i2 + 1]; 639 s[2] = xb[i2 + 2]; 640 s[3] = xb[i2 + 3]; 641 while (nz--) { 642 idx = 4 * (*vi++); 643 xw[0] = x[idx]; 644 xw[1] = x[1 + idx]; 645 xw[2] = x[2 + idx]; 646 xw[3] = x[3 + idx]; 647 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 648 v += 16; 649 } 650 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 651 x[i2] = xw[0]; 652 x[i2 + 1] = xw[1]; 653 x[i2 + 2] = xw[2]; 654 x[i2 + 3] = xw[3]; 655 idiag -= 16; 656 i2 -= 4; 657 } 658 break; 659 case 5: 660 s[0] = xb[i2]; 661 s[1] = xb[i2 + 1]; 662 s[2] = xb[i2 + 2]; 663 s[3] = xb[i2 + 3]; 664 s[4] = xb[i2 + 4]; 665 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 666 x[i2] = xw[0]; 667 x[i2 + 1] = xw[1]; 668 x[i2 + 2] = xw[2]; 669 x[i2 + 3] = xw[3]; 670 x[i2 + 4] = xw[4]; 671 i2 -= 5; 672 idiag -= 25; 673 for (i = m - 2; i >= 0; i--) { 674 v = aa + 25 * (diag[i] + 1); 675 vi = aj + diag[i] + 1; 676 nz = ai[i + 1] - diag[i] - 1; 677 s[0] = xb[i2]; 678 s[1] = xb[i2 + 1]; 679 s[2] = xb[i2 + 2]; 680 s[3] = xb[i2 + 3]; 681 s[4] = xb[i2 + 4]; 682 while (nz--) { 683 idx = 5 * (*vi++); 684 xw[0] = x[idx]; 685 xw[1] = x[1 + idx]; 686 xw[2] = x[2 + idx]; 687 xw[3] = x[3 + idx]; 688 xw[4] = x[4 + idx]; 689 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 690 v += 25; 691 } 692 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 693 x[i2] = xw[0]; 694 x[i2 + 1] = xw[1]; 695 x[i2 + 2] = xw[2]; 696 x[i2 + 3] = xw[3]; 697 x[i2 + 4] = xw[4]; 698 idiag -= 25; 699 i2 -= 5; 700 } 701 break; 702 case 6: 703 s[0] = xb[i2]; 704 s[1] = xb[i2 + 1]; 705 s[2] = xb[i2 + 2]; 706 s[3] = xb[i2 + 3]; 707 s[4] = xb[i2 + 4]; 708 s[5] = xb[i2 + 5]; 709 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 710 x[i2] = xw[0]; 711 x[i2 + 1] = xw[1]; 712 x[i2 + 2] = xw[2]; 713 x[i2 + 3] = xw[3]; 714 x[i2 + 4] = xw[4]; 715 x[i2 + 5] = xw[5]; 716 i2 -= 6; 717 idiag -= 36; 718 for (i = m - 2; i >= 0; i--) { 719 v = aa + 36 * (diag[i] + 1); 720 vi = aj + diag[i] + 1; 721 nz = ai[i + 1] - diag[i] - 1; 722 s[0] = xb[i2]; 723 s[1] = xb[i2 + 1]; 724 s[2] = xb[i2 + 2]; 725 s[3] = xb[i2 + 3]; 726 s[4] = xb[i2 + 4]; 727 s[5] = xb[i2 + 5]; 728 while (nz--) { 729 idx = 6 * (*vi++); 730 xw[0] = x[idx]; 731 xw[1] = x[1 + idx]; 732 xw[2] = x[2 + idx]; 733 xw[3] = x[3 + idx]; 734 xw[4] = x[4 + idx]; 735 xw[5] = x[5 + idx]; 736 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 737 v += 36; 738 } 739 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 740 x[i2] = xw[0]; 741 x[i2 + 1] = xw[1]; 742 x[i2 + 2] = xw[2]; 743 x[i2 + 3] = xw[3]; 744 x[i2 + 4] = xw[4]; 745 x[i2 + 5] = xw[5]; 746 idiag -= 36; 747 i2 -= 6; 748 } 749 break; 750 case 7: 751 s[0] = xb[i2]; 752 s[1] = xb[i2 + 1]; 753 s[2] = xb[i2 + 2]; 754 s[3] = xb[i2 + 3]; 755 s[4] = xb[i2 + 4]; 756 s[5] = xb[i2 + 5]; 757 s[6] = xb[i2 + 6]; 758 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 759 x[i2] = xw[0]; 760 x[i2 + 1] = xw[1]; 761 x[i2 + 2] = xw[2]; 762 x[i2 + 3] = xw[3]; 763 x[i2 + 4] = xw[4]; 764 x[i2 + 5] = xw[5]; 765 x[i2 + 6] = xw[6]; 766 i2 -= 7; 767 idiag -= 49; 768 for (i = m - 2; i >= 0; i--) { 769 v = aa + 49 * (diag[i] + 1); 770 vi = aj + diag[i] + 1; 771 nz = ai[i + 1] - diag[i] - 1; 772 s[0] = xb[i2]; 773 s[1] = xb[i2 + 1]; 774 s[2] = xb[i2 + 2]; 775 s[3] = xb[i2 + 3]; 776 s[4] = xb[i2 + 4]; 777 s[5] = xb[i2 + 5]; 778 s[6] = xb[i2 + 6]; 779 while (nz--) { 780 idx = 7 * (*vi++); 781 xw[0] = x[idx]; 782 xw[1] = x[1 + idx]; 783 xw[2] = x[2 + idx]; 784 xw[3] = x[3 + idx]; 785 xw[4] = x[4 + idx]; 786 xw[5] = x[5 + idx]; 787 xw[6] = x[6 + idx]; 788 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 789 v += 49; 790 } 791 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 792 x[i2] = xw[0]; 793 x[i2 + 1] = xw[1]; 794 x[i2 + 2] = xw[2]; 795 x[i2 + 3] = xw[3]; 796 x[i2 + 4] = xw[4]; 797 x[i2 + 5] = xw[5]; 798 x[i2 + 6] = xw[6]; 799 idiag -= 49; 800 i2 -= 7; 801 } 802 break; 803 default: 804 PetscCall(PetscArraycpy(w, xb + i2, bs)); 805 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 806 i2 -= bs; 807 idiag -= bs2; 808 for (i = m - 2; i >= 0; i--) { 809 v = aa + bs2 * (diag[i] + 1); 810 vi = aj + diag[i] + 1; 811 nz = ai[i + 1] - diag[i] - 1; 812 813 PetscCall(PetscArraycpy(w, xb + i2, bs)); 814 /* copy all rows of x that are needed into contiguous space */ 815 workt = work; 816 for (j = 0; j < nz; j++) { 817 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 818 workt += bs; 819 } 820 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 821 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 822 823 idiag -= bs2; 824 i2 -= bs; 825 } 826 break; 827 } 828 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 829 } 830 its--; 831 } 832 while (its--) { 833 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 834 idiag = a->idiag; 835 i2 = 0; 836 switch (bs) { 837 case 1: 838 for (i = 0; i < m; i++) { 839 v = aa + ai[i]; 840 vi = aj + ai[i]; 841 nz = ai[i + 1] - ai[i]; 842 s[0] = b[i2]; 843 for (j = 0; j < nz; j++) { 844 xw[0] = x[vi[j]]; 845 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 846 } 847 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 848 x[i2] += xw[0]; 849 idiag += 1; 850 i2 += 1; 851 } 852 break; 853 case 2: 854 for (i = 0; i < m; i++) { 855 v = aa + 4 * ai[i]; 856 vi = aj + ai[i]; 857 nz = ai[i + 1] - ai[i]; 858 s[0] = b[i2]; 859 s[1] = b[i2 + 1]; 860 for (j = 0; j < nz; j++) { 861 idx = 2 * vi[j]; 862 it = 4 * j; 863 xw[0] = x[idx]; 864 xw[1] = x[1 + idx]; 865 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 866 } 867 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 868 x[i2] += xw[0]; 869 x[i2 + 1] += xw[1]; 870 idiag += 4; 871 i2 += 2; 872 } 873 break; 874 case 3: 875 for (i = 0; i < m; i++) { 876 v = aa + 9 * ai[i]; 877 vi = aj + ai[i]; 878 nz = ai[i + 1] - ai[i]; 879 s[0] = b[i2]; 880 s[1] = b[i2 + 1]; 881 s[2] = b[i2 + 2]; 882 while (nz--) { 883 idx = 3 * (*vi++); 884 xw[0] = x[idx]; 885 xw[1] = x[1 + idx]; 886 xw[2] = x[2 + idx]; 887 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 888 v += 9; 889 } 890 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 891 x[i2] += xw[0]; 892 x[i2 + 1] += xw[1]; 893 x[i2 + 2] += xw[2]; 894 idiag += 9; 895 i2 += 3; 896 } 897 break; 898 case 4: 899 for (i = 0; i < m; i++) { 900 v = aa + 16 * ai[i]; 901 vi = aj + ai[i]; 902 nz = ai[i + 1] - ai[i]; 903 s[0] = b[i2]; 904 s[1] = b[i2 + 1]; 905 s[2] = b[i2 + 2]; 906 s[3] = b[i2 + 3]; 907 while (nz--) { 908 idx = 4 * (*vi++); 909 xw[0] = x[idx]; 910 xw[1] = x[1 + idx]; 911 xw[2] = x[2 + idx]; 912 xw[3] = x[3 + idx]; 913 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 914 v += 16; 915 } 916 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 917 x[i2] += xw[0]; 918 x[i2 + 1] += xw[1]; 919 x[i2 + 2] += xw[2]; 920 x[i2 + 3] += xw[3]; 921 idiag += 16; 922 i2 += 4; 923 } 924 break; 925 case 5: 926 for (i = 0; i < m; i++) { 927 v = aa + 25 * ai[i]; 928 vi = aj + ai[i]; 929 nz = ai[i + 1] - ai[i]; 930 s[0] = b[i2]; 931 s[1] = b[i2 + 1]; 932 s[2] = b[i2 + 2]; 933 s[3] = b[i2 + 3]; 934 s[4] = b[i2 + 4]; 935 while (nz--) { 936 idx = 5 * (*vi++); 937 xw[0] = x[idx]; 938 xw[1] = x[1 + idx]; 939 xw[2] = x[2 + idx]; 940 xw[3] = x[3 + idx]; 941 xw[4] = x[4 + idx]; 942 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 943 v += 25; 944 } 945 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 946 x[i2] += xw[0]; 947 x[i2 + 1] += xw[1]; 948 x[i2 + 2] += xw[2]; 949 x[i2 + 3] += xw[3]; 950 x[i2 + 4] += xw[4]; 951 idiag += 25; 952 i2 += 5; 953 } 954 break; 955 case 6: 956 for (i = 0; i < m; i++) { 957 v = aa + 36 * ai[i]; 958 vi = aj + ai[i]; 959 nz = ai[i + 1] - ai[i]; 960 s[0] = b[i2]; 961 s[1] = b[i2 + 1]; 962 s[2] = b[i2 + 2]; 963 s[3] = b[i2 + 3]; 964 s[4] = b[i2 + 4]; 965 s[5] = b[i2 + 5]; 966 while (nz--) { 967 idx = 6 * (*vi++); 968 xw[0] = x[idx]; 969 xw[1] = x[1 + idx]; 970 xw[2] = x[2 + idx]; 971 xw[3] = x[3 + idx]; 972 xw[4] = x[4 + idx]; 973 xw[5] = x[5 + idx]; 974 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 975 v += 36; 976 } 977 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 978 x[i2] += xw[0]; 979 x[i2 + 1] += xw[1]; 980 x[i2 + 2] += xw[2]; 981 x[i2 + 3] += xw[3]; 982 x[i2 + 4] += xw[4]; 983 x[i2 + 5] += xw[5]; 984 idiag += 36; 985 i2 += 6; 986 } 987 break; 988 case 7: 989 for (i = 0; i < m; i++) { 990 v = aa + 49 * ai[i]; 991 vi = aj + ai[i]; 992 nz = ai[i + 1] - ai[i]; 993 s[0] = b[i2]; 994 s[1] = b[i2 + 1]; 995 s[2] = b[i2 + 2]; 996 s[3] = b[i2 + 3]; 997 s[4] = b[i2 + 4]; 998 s[5] = b[i2 + 5]; 999 s[6] = b[i2 + 6]; 1000 while (nz--) { 1001 idx = 7 * (*vi++); 1002 xw[0] = x[idx]; 1003 xw[1] = x[1 + idx]; 1004 xw[2] = x[2 + idx]; 1005 xw[3] = x[3 + idx]; 1006 xw[4] = x[4 + idx]; 1007 xw[5] = x[5 + idx]; 1008 xw[6] = x[6 + idx]; 1009 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1010 v += 49; 1011 } 1012 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1013 x[i2] += xw[0]; 1014 x[i2 + 1] += xw[1]; 1015 x[i2 + 2] += xw[2]; 1016 x[i2 + 3] += xw[3]; 1017 x[i2 + 4] += xw[4]; 1018 x[i2 + 5] += xw[5]; 1019 x[i2 + 6] += xw[6]; 1020 idiag += 49; 1021 i2 += 7; 1022 } 1023 break; 1024 default: 1025 for (i = 0; i < m; i++) { 1026 v = aa + bs2 * ai[i]; 1027 vi = aj + ai[i]; 1028 nz = ai[i + 1] - ai[i]; 1029 1030 PetscCall(PetscArraycpy(w, b + i2, bs)); 1031 /* copy all rows of x that are needed into contiguous space */ 1032 workt = work; 1033 for (j = 0; j < nz; j++) { 1034 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1035 workt += bs; 1036 } 1037 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1038 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1039 1040 idiag += bs2; 1041 i2 += bs; 1042 } 1043 break; 1044 } 1045 PetscCall(PetscLogFlops(2.0 * bs2 * a->nz)); 1046 } 1047 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1048 idiag = a->idiag + bs2 * (a->mbs - 1); 1049 i2 = bs * (m - 1); 1050 switch (bs) { 1051 case 1: 1052 for (i = m - 1; i >= 0; i--) { 1053 v = aa + ai[i]; 1054 vi = aj + ai[i]; 1055 nz = ai[i + 1] - ai[i]; 1056 s[0] = b[i2]; 1057 for (j = 0; j < nz; j++) { 1058 xw[0] = x[vi[j]]; 1059 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 1060 } 1061 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 1062 x[i2] += xw[0]; 1063 idiag -= 1; 1064 i2 -= 1; 1065 } 1066 break; 1067 case 2: 1068 for (i = m - 1; i >= 0; i--) { 1069 v = aa + 4 * ai[i]; 1070 vi = aj + ai[i]; 1071 nz = ai[i + 1] - ai[i]; 1072 s[0] = b[i2]; 1073 s[1] = b[i2 + 1]; 1074 for (j = 0; j < nz; j++) { 1075 idx = 2 * vi[j]; 1076 it = 4 * j; 1077 xw[0] = x[idx]; 1078 xw[1] = x[1 + idx]; 1079 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 1080 } 1081 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 1082 x[i2] += xw[0]; 1083 x[i2 + 1] += xw[1]; 1084 idiag -= 4; 1085 i2 -= 2; 1086 } 1087 break; 1088 case 3: 1089 for (i = m - 1; i >= 0; i--) { 1090 v = aa + 9 * ai[i]; 1091 vi = aj + ai[i]; 1092 nz = ai[i + 1] - ai[i]; 1093 s[0] = b[i2]; 1094 s[1] = b[i2 + 1]; 1095 s[2] = b[i2 + 2]; 1096 while (nz--) { 1097 idx = 3 * (*vi++); 1098 xw[0] = x[idx]; 1099 xw[1] = x[1 + idx]; 1100 xw[2] = x[2 + idx]; 1101 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 1102 v += 9; 1103 } 1104 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 1105 x[i2] += xw[0]; 1106 x[i2 + 1] += xw[1]; 1107 x[i2 + 2] += xw[2]; 1108 idiag -= 9; 1109 i2 -= 3; 1110 } 1111 break; 1112 case 4: 1113 for (i = m - 1; i >= 0; i--) { 1114 v = aa + 16 * ai[i]; 1115 vi = aj + ai[i]; 1116 nz = ai[i + 1] - ai[i]; 1117 s[0] = b[i2]; 1118 s[1] = b[i2 + 1]; 1119 s[2] = b[i2 + 2]; 1120 s[3] = b[i2 + 3]; 1121 while (nz--) { 1122 idx = 4 * (*vi++); 1123 xw[0] = x[idx]; 1124 xw[1] = x[1 + idx]; 1125 xw[2] = x[2 + idx]; 1126 xw[3] = x[3 + idx]; 1127 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 1128 v += 16; 1129 } 1130 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 1131 x[i2] += xw[0]; 1132 x[i2 + 1] += xw[1]; 1133 x[i2 + 2] += xw[2]; 1134 x[i2 + 3] += xw[3]; 1135 idiag -= 16; 1136 i2 -= 4; 1137 } 1138 break; 1139 case 5: 1140 for (i = m - 1; i >= 0; i--) { 1141 v = aa + 25 * ai[i]; 1142 vi = aj + ai[i]; 1143 nz = ai[i + 1] - ai[i]; 1144 s[0] = b[i2]; 1145 s[1] = b[i2 + 1]; 1146 s[2] = b[i2 + 2]; 1147 s[3] = b[i2 + 3]; 1148 s[4] = b[i2 + 4]; 1149 while (nz--) { 1150 idx = 5 * (*vi++); 1151 xw[0] = x[idx]; 1152 xw[1] = x[1 + idx]; 1153 xw[2] = x[2 + idx]; 1154 xw[3] = x[3 + idx]; 1155 xw[4] = x[4 + idx]; 1156 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 1157 v += 25; 1158 } 1159 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 1160 x[i2] += xw[0]; 1161 x[i2 + 1] += xw[1]; 1162 x[i2 + 2] += xw[2]; 1163 x[i2 + 3] += xw[3]; 1164 x[i2 + 4] += xw[4]; 1165 idiag -= 25; 1166 i2 -= 5; 1167 } 1168 break; 1169 case 6: 1170 for (i = m - 1; i >= 0; i--) { 1171 v = aa + 36 * ai[i]; 1172 vi = aj + ai[i]; 1173 nz = ai[i + 1] - ai[i]; 1174 s[0] = b[i2]; 1175 s[1] = b[i2 + 1]; 1176 s[2] = b[i2 + 2]; 1177 s[3] = b[i2 + 3]; 1178 s[4] = b[i2 + 4]; 1179 s[5] = b[i2 + 5]; 1180 while (nz--) { 1181 idx = 6 * (*vi++); 1182 xw[0] = x[idx]; 1183 xw[1] = x[1 + idx]; 1184 xw[2] = x[2 + idx]; 1185 xw[3] = x[3 + idx]; 1186 xw[4] = x[4 + idx]; 1187 xw[5] = x[5 + idx]; 1188 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 1189 v += 36; 1190 } 1191 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 1192 x[i2] += xw[0]; 1193 x[i2 + 1] += xw[1]; 1194 x[i2 + 2] += xw[2]; 1195 x[i2 + 3] += xw[3]; 1196 x[i2 + 4] += xw[4]; 1197 x[i2 + 5] += xw[5]; 1198 idiag -= 36; 1199 i2 -= 6; 1200 } 1201 break; 1202 case 7: 1203 for (i = m - 1; i >= 0; i--) { 1204 v = aa + 49 * ai[i]; 1205 vi = aj + ai[i]; 1206 nz = ai[i + 1] - ai[i]; 1207 s[0] = b[i2]; 1208 s[1] = b[i2 + 1]; 1209 s[2] = b[i2 + 2]; 1210 s[3] = b[i2 + 3]; 1211 s[4] = b[i2 + 4]; 1212 s[5] = b[i2 + 5]; 1213 s[6] = b[i2 + 6]; 1214 while (nz--) { 1215 idx = 7 * (*vi++); 1216 xw[0] = x[idx]; 1217 xw[1] = x[1 + idx]; 1218 xw[2] = x[2 + idx]; 1219 xw[3] = x[3 + idx]; 1220 xw[4] = x[4 + idx]; 1221 xw[5] = x[5 + idx]; 1222 xw[6] = x[6 + idx]; 1223 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1224 v += 49; 1225 } 1226 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1227 x[i2] += xw[0]; 1228 x[i2 + 1] += xw[1]; 1229 x[i2 + 2] += xw[2]; 1230 x[i2 + 3] += xw[3]; 1231 x[i2 + 4] += xw[4]; 1232 x[i2 + 5] += xw[5]; 1233 x[i2 + 6] += xw[6]; 1234 idiag -= 49; 1235 i2 -= 7; 1236 } 1237 break; 1238 default: 1239 for (i = m - 1; i >= 0; i--) { 1240 v = aa + bs2 * ai[i]; 1241 vi = aj + ai[i]; 1242 nz = ai[i + 1] - ai[i]; 1243 1244 PetscCall(PetscArraycpy(w, b + i2, bs)); 1245 /* copy all rows of x that are needed into contiguous space */ 1246 workt = work; 1247 for (j = 0; j < nz; j++) { 1248 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1249 workt += bs; 1250 } 1251 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1252 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1253 1254 idiag -= bs2; 1255 i2 -= bs; 1256 } 1257 break; 1258 } 1259 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz))); 1260 } 1261 } 1262 PetscCall(VecRestoreArray(xx, &x)); 1263 PetscCall(VecRestoreArrayRead(bb, &b)); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 /* 1268 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1269 */ 1270 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1271 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1272 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1273 #define matsetvaluesblocked4_ matsetvaluesblocked4 1274 #endif 1275 1276 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) { 1277 Mat A = *AA; 1278 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1279 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn; 1280 PetscInt *ai = a->i, *ailen = a->ilen; 1281 PetscInt *aj = a->j, stepval, lastcol = -1; 1282 const PetscScalar *value = v; 1283 MatScalar *ap, *aa = a->a, *bap; 1284 1285 PetscFunctionBegin; 1286 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4"); 1287 stepval = (n - 1) * 4; 1288 for (k = 0; k < m; k++) { /* loop over added rows */ 1289 row = im[k]; 1290 rp = aj + ai[row]; 1291 ap = aa + 16 * ai[row]; 1292 nrow = ailen[row]; 1293 low = 0; 1294 high = nrow; 1295 for (l = 0; l < n; l++) { /* loop over added columns */ 1296 col = in[l]; 1297 if (col <= lastcol) low = 0; 1298 else high = nrow; 1299 lastcol = col; 1300 value = v + k * (stepval + 4 + l) * 4; 1301 while (high - low > 7) { 1302 t = (low + high) / 2; 1303 if (rp[t] > col) high = t; 1304 else low = t; 1305 } 1306 for (i = low; i < high; i++) { 1307 if (rp[i] > col) break; 1308 if (rp[i] == col) { 1309 bap = ap + 16 * i; 1310 for (ii = 0; ii < 4; ii++, value += stepval) { 1311 for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++; 1312 } 1313 goto noinsert2; 1314 } 1315 } 1316 N = nrow++ - 1; 1317 high++; /* added new column index thus must search to one higher than before */ 1318 /* shift up all the later entries in this row */ 1319 for (ii = N; ii >= i; ii--) { 1320 rp[ii + 1] = rp[ii]; 1321 PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16)); 1322 } 1323 if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1324 rp[i] = col; 1325 bap = ap + 16 * i; 1326 for (ii = 0; ii < 4; ii++, value += stepval) { 1327 for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++; 1328 } 1329 noinsert2:; 1330 low = i; 1331 } 1332 ailen[row] = nrow; 1333 } 1334 PetscFunctionReturnVoid(); 1335 } 1336 1337 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1338 #define matsetvalues4_ MATSETVALUES4 1339 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1340 #define matsetvalues4_ matsetvalues4 1341 #endif 1342 1343 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) { 1344 Mat A = *AA; 1345 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1346 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm; 1347 PetscInt *ai = a->i, *ailen = a->ilen; 1348 PetscInt *aj = a->j, brow, bcol; 1349 PetscInt ridx, cidx, lastcol = -1; 1350 MatScalar *ap, value, *aa = a->a, *bap; 1351 1352 PetscFunctionBegin; 1353 for (k = 0; k < m; k++) { /* loop over added rows */ 1354 row = im[k]; 1355 brow = row / 4; 1356 rp = aj + ai[brow]; 1357 ap = aa + 16 * ai[brow]; 1358 nrow = ailen[brow]; 1359 low = 0; 1360 high = nrow; 1361 for (l = 0; l < n; l++) { /* loop over added columns */ 1362 col = in[l]; 1363 bcol = col / 4; 1364 ridx = row % 4; 1365 cidx = col % 4; 1366 value = v[l + k * n]; 1367 if (col <= lastcol) low = 0; 1368 else high = nrow; 1369 lastcol = col; 1370 while (high - low > 7) { 1371 t = (low + high) / 2; 1372 if (rp[t] > bcol) high = t; 1373 else low = t; 1374 } 1375 for (i = low; i < high; i++) { 1376 if (rp[i] > bcol) break; 1377 if (rp[i] == bcol) { 1378 bap = ap + 16 * i + 4 * cidx + ridx; 1379 *bap += value; 1380 goto noinsert1; 1381 } 1382 } 1383 N = nrow++ - 1; 1384 high++; /* added new column thus must search to one higher than before */ 1385 /* shift up all the later entries in this row */ 1386 PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 1387 PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1))); 1388 PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1389 rp[i] = bcol; 1390 ap[16 * i + 4 * cidx + ridx] = value; 1391 noinsert1:; 1392 low = i; 1393 } 1394 ailen[brow] = nrow; 1395 } 1396 PetscFunctionReturnVoid(); 1397 } 1398 1399 /* 1400 Checks for missing diagonals 1401 */ 1402 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d) { 1403 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1404 PetscInt *diag, *ii = a->i, i; 1405 1406 PetscFunctionBegin; 1407 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 1408 *missing = PETSC_FALSE; 1409 if (A->rmap->n > 0 && !ii) { 1410 *missing = PETSC_TRUE; 1411 if (d) *d = 0; 1412 PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n")); 1413 } else { 1414 PetscInt n; 1415 n = PetscMin(a->mbs, a->nbs); 1416 diag = a->diag; 1417 for (i = 0; i < n; i++) { 1418 if (diag[i] >= ii[i + 1]) { 1419 *missing = PETSC_TRUE; 1420 if (d) *d = i; 1421 PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i)); 1422 break; 1423 } 1424 } 1425 } 1426 PetscFunctionReturn(0); 1427 } 1428 1429 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) { 1430 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1431 PetscInt i, j, m = a->mbs; 1432 1433 PetscFunctionBegin; 1434 if (!a->diag) { 1435 PetscCall(PetscMalloc1(m, &a->diag)); 1436 PetscCall(PetscLogObjectMemory((PetscObject)A, m * sizeof(PetscInt))); 1437 a->free_diag = PETSC_TRUE; 1438 } 1439 for (i = 0; i < m; i++) { 1440 a->diag[i] = a->i[i + 1]; 1441 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1442 if (a->j[j] == i) { 1443 a->diag[i] = j; 1444 break; 1445 } 1446 } 1447 } 1448 PetscFunctionReturn(0); 1449 } 1450 1451 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) { 1452 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1453 PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt; 1454 PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja; 1455 1456 PetscFunctionBegin; 1457 *nn = n; 1458 if (!ia) PetscFunctionReturn(0); 1459 if (symmetric) { 1460 PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja)); 1461 nz = tia[n]; 1462 } else { 1463 tia = a->i; 1464 tja = a->j; 1465 } 1466 1467 if (!blockcompressed && bs > 1) { 1468 (*nn) *= bs; 1469 /* malloc & create the natural set of indices */ 1470 PetscCall(PetscMalloc1((n + 1) * bs, ia)); 1471 if (n) { 1472 (*ia)[0] = oshift; 1473 for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; 1474 } 1475 1476 for (i = 1; i < n; i++) { 1477 (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1]; 1478 for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; 1479 } 1480 if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; 1481 1482 if (inja) { 1483 PetscCall(PetscMalloc1(nz * bs * bs, ja)); 1484 cnt = 0; 1485 for (i = 0; i < n; i++) { 1486 for (j = 0; j < bs; j++) { 1487 for (k = tia[i]; k < tia[i + 1]; k++) { 1488 for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l; 1489 } 1490 } 1491 } 1492 } 1493 1494 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1495 PetscCall(PetscFree(tia)); 1496 PetscCall(PetscFree(tja)); 1497 } 1498 } else if (oshift == 1) { 1499 if (symmetric) { 1500 nz = tia[A->rmap->n / bs]; 1501 /* add 1 to i and j indices */ 1502 for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1; 1503 *ia = tia; 1504 if (ja) { 1505 for (i = 0; i < nz; i++) tja[i] = tja[i] + 1; 1506 *ja = tja; 1507 } 1508 } else { 1509 nz = a->i[A->rmap->n / bs]; 1510 /* malloc space and add 1 to i and j indices */ 1511 PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia)); 1512 for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1; 1513 if (ja) { 1514 PetscCall(PetscMalloc1(nz, ja)); 1515 for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1; 1516 } 1517 } 1518 } else { 1519 *ia = tia; 1520 if (ja) *ja = tja; 1521 } 1522 PetscFunctionReturn(0); 1523 } 1524 1525 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) { 1526 PetscFunctionBegin; 1527 if (!ia) PetscFunctionReturn(0); 1528 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1529 PetscCall(PetscFree(*ia)); 1530 if (ja) PetscCall(PetscFree(*ja)); 1531 } 1532 PetscFunctionReturn(0); 1533 } 1534 1535 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) { 1536 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1537 1538 PetscFunctionBegin; 1539 #if defined(PETSC_USE_LOG) 1540 PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz); 1541 #endif 1542 PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i)); 1543 PetscCall(ISDestroy(&a->row)); 1544 PetscCall(ISDestroy(&a->col)); 1545 if (a->free_diag) PetscCall(PetscFree(a->diag)); 1546 PetscCall(PetscFree(a->idiag)); 1547 if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen)); 1548 PetscCall(PetscFree(a->solve_work)); 1549 PetscCall(PetscFree(a->mult_work)); 1550 PetscCall(PetscFree(a->sor_workt)); 1551 PetscCall(PetscFree(a->sor_work)); 1552 PetscCall(ISDestroy(&a->icol)); 1553 PetscCall(PetscFree(a->saved_values)); 1554 PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex)); 1555 1556 PetscCall(MatDestroy(&a->sbaijMat)); 1557 PetscCall(MatDestroy(&a->parent)); 1558 PetscCall(PetscFree(A->data)); 1559 1560 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 1561 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL)); 1562 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL)); 1563 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 1564 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 1565 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL)); 1566 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL)); 1567 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL)); 1568 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL)); 1569 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL)); 1570 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL)); 1571 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL)); 1572 #if defined(PETSC_HAVE_HYPRE) 1573 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL)); 1574 #endif 1575 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL)); 1576 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1577 PetscFunctionReturn(0); 1578 } 1579 1580 PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) { 1581 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1582 1583 PetscFunctionBegin; 1584 switch (op) { 1585 case MAT_ROW_ORIENTED: a->roworiented = flg; break; 1586 case MAT_KEEP_NONZERO_PATTERN: a->keepnonzeropattern = flg; break; 1587 case MAT_NEW_NONZERO_LOCATIONS: a->nonew = (flg ? 0 : 1); break; 1588 case MAT_NEW_NONZERO_LOCATION_ERR: a->nonew = (flg ? -1 : 0); break; 1589 case MAT_NEW_NONZERO_ALLOCATION_ERR: a->nonew = (flg ? -2 : 0); break; 1590 case MAT_UNUSED_NONZERO_LOCATION_ERR: a->nounused = (flg ? -1 : 0); break; 1591 case MAT_FORCE_DIAGONAL_ENTRIES: 1592 case MAT_IGNORE_OFF_PROC_ENTRIES: 1593 case MAT_USE_HASH_TABLE: 1594 case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break; 1595 case MAT_SPD: 1596 case MAT_SYMMETRIC: 1597 case MAT_STRUCTURALLY_SYMMETRIC: 1598 case MAT_HERMITIAN: 1599 case MAT_SYMMETRY_ETERNAL: 1600 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1601 case MAT_SUBMAT_SINGLEIS: 1602 case MAT_STRUCTURE_ONLY: 1603 case MAT_SPD_ETERNAL: 1604 /* if the diagonal matrix is square it inherits some of the properties above */ 1605 break; 1606 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1607 } 1608 PetscFunctionReturn(0); 1609 } 1610 1611 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1612 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) { 1613 PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2; 1614 MatScalar *aa_i; 1615 PetscScalar *v_i; 1616 1617 PetscFunctionBegin; 1618 bs = A->rmap->bs; 1619 bs2 = bs * bs; 1620 PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1621 1622 bn = row / bs; /* Block number */ 1623 bp = row % bs; /* Block Position */ 1624 M = ai[bn + 1] - ai[bn]; 1625 *nz = bs * M; 1626 1627 if (v) { 1628 *v = NULL; 1629 if (*nz) { 1630 PetscCall(PetscMalloc1(*nz, v)); 1631 for (i = 0; i < M; i++) { /* for each block in the block row */ 1632 v_i = *v + i * bs; 1633 aa_i = aa + bs2 * (ai[bn] + i); 1634 for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j]; 1635 } 1636 } 1637 } 1638 1639 if (idx) { 1640 *idx = NULL; 1641 if (*nz) { 1642 PetscCall(PetscMalloc1(*nz, idx)); 1643 for (i = 0; i < M; i++) { /* for each block in the block row */ 1644 idx_i = *idx + i * bs; 1645 itmp = bs * aj[ai[bn] + i]; 1646 for (j = 0; j < bs; j++) idx_i[j] = itmp++; 1647 } 1648 } 1649 } 1650 PetscFunctionReturn(0); 1651 } 1652 1653 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1654 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1655 1656 PetscFunctionBegin; 1657 PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a)); 1658 PetscFunctionReturn(0); 1659 } 1660 1661 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1662 PetscFunctionBegin; 1663 if (nz) *nz = 0; 1664 if (idx) PetscCall(PetscFree(*idx)); 1665 if (v) PetscCall(PetscFree(*v)); 1666 PetscFunctionReturn(0); 1667 } 1668 1669 PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) { 1670 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at; 1671 Mat C; 1672 PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill; 1673 PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr; 1674 MatScalar *ata, *aa = a->a; 1675 1676 PetscFunctionBegin; 1677 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1678 PetscCall(PetscCalloc1(1 + nbs, &atfill)); 1679 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1680 for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1681 1682 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 1683 PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N)); 1684 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 1685 PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill)); 1686 1687 at = (Mat_SeqBAIJ *)C->data; 1688 ati = at->i; 1689 for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i]; 1690 } else { 1691 C = *B; 1692 at = (Mat_SeqBAIJ *)C->data; 1693 ati = at->i; 1694 } 1695 1696 atj = at->j; 1697 ata = at->a; 1698 1699 /* Copy ati into atfill so we have locations of the next free space in atj */ 1700 PetscCall(PetscArraycpy(atfill, ati, nbs)); 1701 1702 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1703 for (i = 0; i < mbs; i++) { 1704 anzj = ai[i + 1] - ai[i]; 1705 for (j = 0; j < anzj; j++) { 1706 atj[atfill[*aj]] = i; 1707 for (kr = 0; kr < bs; kr++) { 1708 for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; 1709 } 1710 atfill[*aj++] += 1; 1711 } 1712 } 1713 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1714 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1715 1716 /* Clean up temporary space and complete requests. */ 1717 PetscCall(PetscFree(atfill)); 1718 1719 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1720 PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1721 *B = C; 1722 } else { 1723 PetscCall(MatHeaderMerge(A, &C)); 1724 } 1725 PetscFunctionReturn(0); 1726 } 1727 1728 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) { 1729 Mat Btrans; 1730 1731 PetscFunctionBegin; 1732 *f = PETSC_FALSE; 1733 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans)); 1734 PetscCall(MatEqual_SeqBAIJ(B, Btrans, f)); 1735 PetscCall(MatDestroy(&Btrans)); 1736 PetscFunctionReturn(0); 1737 } 1738 1739 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1740 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) { 1741 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data; 1742 PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l; 1743 PetscInt *rowlens, *colidxs; 1744 PetscScalar *matvals; 1745 1746 PetscFunctionBegin; 1747 PetscCall(PetscViewerSetUp(viewer)); 1748 1749 M = mat->rmap->N; 1750 N = mat->cmap->N; 1751 m = mat->rmap->n; 1752 bs = mat->rmap->bs; 1753 nz = bs * bs * A->nz; 1754 1755 /* write matrix header */ 1756 header[0] = MAT_FILE_CLASSID; 1757 header[1] = M; 1758 header[2] = N; 1759 header[3] = nz; 1760 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1761 1762 /* store row lengths */ 1763 PetscCall(PetscMalloc1(m, &rowlens)); 1764 for (cnt = 0, i = 0; i < A->mbs; i++) 1765 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]); 1766 PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT)); 1767 PetscCall(PetscFree(rowlens)); 1768 1769 /* store column indices */ 1770 PetscCall(PetscMalloc1(nz, &colidxs)); 1771 for (cnt = 0, i = 0; i < A->mbs; i++) 1772 for (k = 0; k < bs; k++) 1773 for (j = A->i[i]; j < A->i[i + 1]; j++) 1774 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l; 1775 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1776 PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT)); 1777 PetscCall(PetscFree(colidxs)); 1778 1779 /* store nonzero values */ 1780 PetscCall(PetscMalloc1(nz, &matvals)); 1781 for (cnt = 0, i = 0; i < A->mbs; i++) 1782 for (k = 0; k < bs; k++) 1783 for (j = A->i[i]; j < A->i[i + 1]; j++) 1784 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k]; 1785 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1786 PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR)); 1787 PetscCall(PetscFree(matvals)); 1788 1789 /* write block size option to the viewer's .info file */ 1790 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1791 PetscFunctionReturn(0); 1792 } 1793 1794 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) { 1795 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1796 PetscInt i, bs = A->rmap->bs, k; 1797 1798 PetscFunctionBegin; 1799 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1800 for (i = 0; i < a->mbs; i++) { 1801 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1)); 1802 for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1)); 1803 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1804 } 1805 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1806 PetscFunctionReturn(0); 1807 } 1808 1809 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) { 1810 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1811 PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2; 1812 PetscViewerFormat format; 1813 1814 PetscFunctionBegin; 1815 if (A->structure_only) { 1816 PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer)); 1817 PetscFunctionReturn(0); 1818 } 1819 1820 PetscCall(PetscViewerGetFormat(viewer, &format)); 1821 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1822 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1823 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1824 const char *matname; 1825 Mat aij; 1826 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij)); 1827 PetscCall(PetscObjectGetName((PetscObject)A, &matname)); 1828 PetscCall(PetscObjectSetName((PetscObject)aij, matname)); 1829 PetscCall(MatView(aij, viewer)); 1830 PetscCall(MatDestroy(&aij)); 1831 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1832 PetscFunctionReturn(0); 1833 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1834 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1835 for (i = 0; i < a->mbs; i++) { 1836 for (j = 0; j < bs; j++) { 1837 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1838 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1839 for (l = 0; l < bs; l++) { 1840 #if defined(PETSC_USE_COMPLEX) 1841 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1842 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1843 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1844 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1845 } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1846 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1847 } 1848 #else 1849 if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1850 #endif 1851 } 1852 } 1853 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1854 } 1855 } 1856 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1857 } else { 1858 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1859 for (i = 0; i < a->mbs; i++) { 1860 for (j = 0; j < bs; j++) { 1861 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1862 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1863 for (l = 0; l < bs; l++) { 1864 #if defined(PETSC_USE_COMPLEX) 1865 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) { 1866 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1867 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) { 1868 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1869 } else { 1870 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1871 } 1872 #else 1873 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1874 #endif 1875 } 1876 } 1877 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1878 } 1879 } 1880 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1881 } 1882 PetscCall(PetscViewerFlush(viewer)); 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #include <petscdraw.h> 1887 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) { 1888 Mat A = (Mat)Aa; 1889 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1890 PetscInt row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2; 1891 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1892 MatScalar *aa; 1893 PetscViewer viewer; 1894 PetscViewerFormat format; 1895 1896 PetscFunctionBegin; 1897 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1898 PetscCall(PetscViewerGetFormat(viewer, &format)); 1899 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1900 1901 /* loop over matrix elements drawing boxes */ 1902 1903 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1904 PetscDrawCollectiveBegin(draw); 1905 /* Blue for negative, Cyan for zero and Red for positive */ 1906 color = PETSC_DRAW_BLUE; 1907 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1908 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1909 y_l = A->rmap->N - row - 1.0; 1910 y_r = y_l + 1.0; 1911 x_l = a->j[j] * bs; 1912 x_r = x_l + 1.0; 1913 aa = a->a + j * bs2; 1914 for (k = 0; k < bs; k++) { 1915 for (l = 0; l < bs; l++) { 1916 if (PetscRealPart(*aa++) >= 0.) continue; 1917 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1918 } 1919 } 1920 } 1921 } 1922 color = PETSC_DRAW_CYAN; 1923 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1924 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1925 y_l = A->rmap->N - row - 1.0; 1926 y_r = y_l + 1.0; 1927 x_l = a->j[j] * bs; 1928 x_r = x_l + 1.0; 1929 aa = a->a + j * bs2; 1930 for (k = 0; k < bs; k++) { 1931 for (l = 0; l < bs; l++) { 1932 if (PetscRealPart(*aa++) != 0.) continue; 1933 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1934 } 1935 } 1936 } 1937 } 1938 color = PETSC_DRAW_RED; 1939 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1940 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1941 y_l = A->rmap->N - row - 1.0; 1942 y_r = y_l + 1.0; 1943 x_l = a->j[j] * bs; 1944 x_r = x_l + 1.0; 1945 aa = a->a + j * bs2; 1946 for (k = 0; k < bs; k++) { 1947 for (l = 0; l < bs; l++) { 1948 if (PetscRealPart(*aa++) <= 0.) continue; 1949 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1950 } 1951 } 1952 } 1953 } 1954 PetscDrawCollectiveEnd(draw); 1955 } else { 1956 /* use contour shading to indicate magnitude of values */ 1957 /* first determine max of all nonzero values */ 1958 PetscReal minv = 0.0, maxv = 0.0; 1959 PetscDraw popup; 1960 1961 for (i = 0; i < a->nz * a->bs2; i++) { 1962 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1963 } 1964 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1965 PetscCall(PetscDrawGetPopup(draw, &popup)); 1966 PetscCall(PetscDrawScalePopup(popup, 0.0, maxv)); 1967 1968 PetscDrawCollectiveBegin(draw); 1969 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1970 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1971 y_l = A->rmap->N - row - 1.0; 1972 y_r = y_l + 1.0; 1973 x_l = a->j[j] * bs; 1974 x_r = x_l + 1.0; 1975 aa = a->a + j * bs2; 1976 for (k = 0; k < bs; k++) { 1977 for (l = 0; l < bs; l++) { 1978 MatScalar v = *aa++; 1979 color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv); 1980 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1981 } 1982 } 1983 } 1984 } 1985 PetscDrawCollectiveEnd(draw); 1986 } 1987 PetscFunctionReturn(0); 1988 } 1989 1990 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) { 1991 PetscReal xl, yl, xr, yr, w, h; 1992 PetscDraw draw; 1993 PetscBool isnull; 1994 1995 PetscFunctionBegin; 1996 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1997 PetscCall(PetscDrawIsNull(draw, &isnull)); 1998 if (isnull) PetscFunctionReturn(0); 1999 2000 xr = A->cmap->n; 2001 yr = A->rmap->N; 2002 h = yr / 10.0; 2003 w = xr / 10.0; 2004 xr += w; 2005 yr += h; 2006 xl = -w; 2007 yl = -h; 2008 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 2009 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 2010 PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A)); 2011 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 2012 PetscCall(PetscDrawSave(draw)); 2013 PetscFunctionReturn(0); 2014 } 2015 2016 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) { 2017 PetscBool iascii, isbinary, isdraw; 2018 2019 PetscFunctionBegin; 2020 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2021 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2022 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 2023 if (iascii) { 2024 PetscCall(MatView_SeqBAIJ_ASCII(A, viewer)); 2025 } else if (isbinary) { 2026 PetscCall(MatView_SeqBAIJ_Binary(A, viewer)); 2027 } else if (isdraw) { 2028 PetscCall(MatView_SeqBAIJ_Draw(A, viewer)); 2029 } else { 2030 Mat B; 2031 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2032 PetscCall(MatView(B, viewer)); 2033 PetscCall(MatDestroy(&B)); 2034 } 2035 PetscFunctionReturn(0); 2036 } 2037 2038 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) { 2039 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2040 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 2041 PetscInt *ai = a->i, *ailen = a->ilen; 2042 PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2; 2043 MatScalar *ap, *aa = a->a; 2044 2045 PetscFunctionBegin; 2046 for (k = 0; k < m; k++) { /* loop over rows */ 2047 row = im[k]; 2048 brow = row / bs; 2049 if (row < 0) { 2050 v += n; 2051 continue; 2052 } /* negative row */ 2053 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row); 2054 rp = aj ? aj + ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2055 ap = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2056 nrow = ailen[brow]; 2057 for (l = 0; l < n; l++) { /* loop over columns */ 2058 if (in[l] < 0) { 2059 v++; 2060 continue; 2061 } /* negative column */ 2062 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]); 2063 col = in[l]; 2064 bcol = col / bs; 2065 cidx = col % bs; 2066 ridx = row % bs; 2067 high = nrow; 2068 low = 0; /* assume unsorted */ 2069 while (high - low > 5) { 2070 t = (low + high) / 2; 2071 if (rp[t] > bcol) high = t; 2072 else low = t; 2073 } 2074 for (i = low; i < high; i++) { 2075 if (rp[i] > bcol) break; 2076 if (rp[i] == bcol) { 2077 *v++ = ap[bs2 * i + bs * cidx + ridx]; 2078 goto finished; 2079 } 2080 } 2081 *v++ = 0.0; 2082 finished:; 2083 } 2084 } 2085 PetscFunctionReturn(0); 2086 } 2087 2088 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) { 2089 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2090 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1; 2091 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2092 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval; 2093 PetscBool roworiented = a->roworiented; 2094 const PetscScalar *value = v; 2095 MatScalar *ap = NULL, *aa = a->a, *bap; 2096 2097 PetscFunctionBegin; 2098 if (roworiented) { 2099 stepval = (n - 1) * bs; 2100 } else { 2101 stepval = (m - 1) * bs; 2102 } 2103 for (k = 0; k < m; k++) { /* loop over added rows */ 2104 row = im[k]; 2105 if (row < 0) continue; 2106 PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1); 2107 rp = aj + ai[row]; 2108 if (!A->structure_only) ap = aa + bs2 * ai[row]; 2109 rmax = imax[row]; 2110 nrow = ailen[row]; 2111 low = 0; 2112 high = nrow; 2113 for (l = 0; l < n; l++) { /* loop over added columns */ 2114 if (in[l] < 0) continue; 2115 PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1); 2116 col = in[l]; 2117 if (!A->structure_only) { 2118 if (roworiented) { 2119 value = v + (k * (stepval + bs) + l) * bs; 2120 } else { 2121 value = v + (l * (stepval + bs) + k) * bs; 2122 } 2123 } 2124 if (col <= lastcol) low = 0; 2125 else high = nrow; 2126 lastcol = col; 2127 while (high - low > 7) { 2128 t = (low + high) / 2; 2129 if (rp[t] > col) high = t; 2130 else low = t; 2131 } 2132 for (i = low; i < high; i++) { 2133 if (rp[i] > col) break; 2134 if (rp[i] == col) { 2135 if (A->structure_only) goto noinsert2; 2136 bap = ap + bs2 * i; 2137 if (roworiented) { 2138 if (is == ADD_VALUES) { 2139 for (ii = 0; ii < bs; ii++, value += stepval) { 2140 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 2141 } 2142 } else { 2143 for (ii = 0; ii < bs; ii++, value += stepval) { 2144 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2145 } 2146 } 2147 } else { 2148 if (is == ADD_VALUES) { 2149 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2150 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 2151 bap += bs; 2152 } 2153 } else { 2154 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2155 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 2156 bap += bs; 2157 } 2158 } 2159 } 2160 goto noinsert2; 2161 } 2162 } 2163 if (nonew == 1) goto noinsert2; 2164 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2165 if (A->structure_only) { 2166 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar); 2167 } else { 2168 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2169 } 2170 N = nrow++ - 1; 2171 high++; 2172 /* shift up all the later entries in this row */ 2173 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2174 rp[i] = col; 2175 if (!A->structure_only) { 2176 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2177 bap = ap + bs2 * i; 2178 if (roworiented) { 2179 for (ii = 0; ii < bs; ii++, value += stepval) { 2180 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2181 } 2182 } else { 2183 for (ii = 0; ii < bs; ii++, value += stepval) { 2184 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 2185 } 2186 } 2187 } 2188 noinsert2:; 2189 low = i; 2190 } 2191 ailen[row] = nrow; 2192 } 2193 PetscFunctionReturn(0); 2194 } 2195 2196 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) { 2197 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2198 PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax; 2199 PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen; 2200 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2201 MatScalar *aa = a->a, *ap; 2202 PetscReal ratio = 0.6; 2203 2204 PetscFunctionBegin; 2205 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2206 2207 if (m) rmax = ailen[0]; 2208 for (i = 1; i < mbs; i++) { 2209 /* move each row back by the amount of empty slots (fshift) before it*/ 2210 fshift += imax[i - 1] - ailen[i - 1]; 2211 rmax = PetscMax(rmax, ailen[i]); 2212 if (fshift) { 2213 ip = aj + ai[i]; 2214 ap = aa + bs2 * ai[i]; 2215 N = ailen[i]; 2216 PetscCall(PetscArraymove(ip - fshift, ip, N)); 2217 if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N)); 2218 } 2219 ai[i] = ai[i - 1] + ailen[i - 1]; 2220 } 2221 if (mbs) { 2222 fshift += imax[mbs - 1] - ailen[mbs - 1]; 2223 ai[mbs] = ai[mbs - 1] + ailen[mbs - 1]; 2224 } 2225 2226 /* reset ilen and imax for each row */ 2227 a->nonzerorowcnt = 0; 2228 if (A->structure_only) { 2229 PetscCall(PetscFree2(a->imax, a->ilen)); 2230 } else { /* !A->structure_only */ 2231 for (i = 0; i < mbs; i++) { 2232 ailen[i] = imax[i] = ai[i + 1] - ai[i]; 2233 a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0); 2234 } 2235 } 2236 a->nz = ai[mbs]; 2237 2238 /* diagonals may have moved, so kill the diagonal pointers */ 2239 a->idiagvalid = PETSC_FALSE; 2240 if (fshift && a->diag) { 2241 PetscCall(PetscFree(a->diag)); 2242 PetscCall(PetscLogObjectMemory((PetscObject)A, -(mbs + 1) * sizeof(PetscInt))); 2243 a->diag = NULL; 2244 } 2245 if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2); 2246 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2)); 2247 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs)); 2248 PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax)); 2249 2250 A->info.mallocs += a->reallocs; 2251 a->reallocs = 0; 2252 A->info.nz_unneeded = (PetscReal)fshift * bs2; 2253 a->rmax = rmax; 2254 2255 if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio)); 2256 PetscFunctionReturn(0); 2257 } 2258 2259 /* 2260 This function returns an array of flags which indicate the locations of contiguous 2261 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2262 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2263 Assume: sizes should be long enough to hold all the values. 2264 */ 2265 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) { 2266 PetscInt i, j, k, row; 2267 PetscBool flg; 2268 2269 PetscFunctionBegin; 2270 for (i = 0, j = 0; i < n; j++) { 2271 row = idx[i]; 2272 if (row % bs != 0) { /* Not the beginning of a block */ 2273 sizes[j] = 1; 2274 i++; 2275 } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */ 2276 sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2277 i++; 2278 } else { /* Beginning of the block, so check if the complete block exists */ 2279 flg = PETSC_TRUE; 2280 for (k = 1; k < bs; k++) { 2281 if (row + k != idx[i + k]) { /* break in the block */ 2282 flg = PETSC_FALSE; 2283 break; 2284 } 2285 } 2286 if (flg) { /* No break in the bs */ 2287 sizes[j] = bs; 2288 i += bs; 2289 } else { 2290 sizes[j] = 1; 2291 i++; 2292 } 2293 } 2294 } 2295 *bs_max = j; 2296 PetscFunctionReturn(0); 2297 } 2298 2299 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) { 2300 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2301 PetscInt i, j, k, count, *rows; 2302 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max; 2303 PetscScalar zero = 0.0; 2304 MatScalar *aa; 2305 const PetscScalar *xx; 2306 PetscScalar *bb; 2307 2308 PetscFunctionBegin; 2309 /* fix right hand side if needed */ 2310 if (x && b) { 2311 PetscCall(VecGetArrayRead(x, &xx)); 2312 PetscCall(VecGetArray(b, &bb)); 2313 for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]]; 2314 PetscCall(VecRestoreArrayRead(x, &xx)); 2315 PetscCall(VecRestoreArray(b, &bb)); 2316 } 2317 2318 /* Make a copy of the IS and sort it */ 2319 /* allocate memory for rows,sizes */ 2320 PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes)); 2321 2322 /* copy IS values to rows, and sort them */ 2323 for (i = 0; i < is_n; i++) rows[i] = is_idx[i]; 2324 PetscCall(PetscSortInt(is_n, rows)); 2325 2326 if (baij->keepnonzeropattern) { 2327 for (i = 0; i < is_n; i++) sizes[i] = 1; 2328 bs_max = is_n; 2329 } else { 2330 PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max)); 2331 A->nonzerostate++; 2332 } 2333 2334 for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) { 2335 row = rows[j]; 2336 PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row); 2337 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2338 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2339 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2340 if (diag != (PetscScalar)0.0) { 2341 if (baij->ilen[row / bs] > 0) { 2342 baij->ilen[row / bs] = 1; 2343 baij->j[baij->i[row / bs]] = row / bs; 2344 2345 PetscCall(PetscArrayzero(aa, count * bs)); 2346 } 2347 /* Now insert all the diagonal values for this bs */ 2348 for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES)); 2349 } else { /* (diag == 0.0) */ 2350 baij->ilen[row / bs] = 0; 2351 } /* end (diag == 0.0) */ 2352 } else { /* (sizes[i] != bs) */ 2353 PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1"); 2354 for (k = 0; k < count; k++) { 2355 aa[0] = zero; 2356 aa += bs; 2357 } 2358 if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES)); 2359 } 2360 } 2361 2362 PetscCall(PetscFree2(rows, sizes)); 2363 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2364 PetscFunctionReturn(0); 2365 } 2366 2367 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) { 2368 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2369 PetscInt i, j, k, count; 2370 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col; 2371 PetscScalar zero = 0.0; 2372 MatScalar *aa; 2373 const PetscScalar *xx; 2374 PetscScalar *bb; 2375 PetscBool *zeroed, vecs = PETSC_FALSE; 2376 2377 PetscFunctionBegin; 2378 /* fix right hand side if needed */ 2379 if (x && b) { 2380 PetscCall(VecGetArrayRead(x, &xx)); 2381 PetscCall(VecGetArray(b, &bb)); 2382 vecs = PETSC_TRUE; 2383 } 2384 2385 /* zero the columns */ 2386 PetscCall(PetscCalloc1(A->rmap->n, &zeroed)); 2387 for (i = 0; i < is_n; i++) { 2388 PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]); 2389 zeroed[is_idx[i]] = PETSC_TRUE; 2390 } 2391 for (i = 0; i < A->rmap->N; i++) { 2392 if (!zeroed[i]) { 2393 row = i / bs; 2394 for (j = baij->i[row]; j < baij->i[row + 1]; j++) { 2395 for (k = 0; k < bs; k++) { 2396 col = bs * baij->j[j] + k; 2397 if (zeroed[col]) { 2398 aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 2399 if (vecs) bb[i] -= aa[0] * xx[col]; 2400 aa[0] = 0.0; 2401 } 2402 } 2403 } 2404 } else if (vecs) bb[i] = diag * xx[i]; 2405 } 2406 PetscCall(PetscFree(zeroed)); 2407 if (vecs) { 2408 PetscCall(VecRestoreArrayRead(x, &xx)); 2409 PetscCall(VecRestoreArray(b, &bb)); 2410 } 2411 2412 /* zero the rows */ 2413 for (i = 0; i < is_n; i++) { 2414 row = is_idx[i]; 2415 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2416 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2417 for (k = 0; k < count; k++) { 2418 aa[0] = zero; 2419 aa += bs; 2420 } 2421 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES); 2422 } 2423 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2424 PetscFunctionReturn(0); 2425 } 2426 2427 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) { 2428 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2429 PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 2430 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2431 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 2432 PetscInt ridx, cidx, bs2 = a->bs2; 2433 PetscBool roworiented = a->roworiented; 2434 MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap; 2435 2436 PetscFunctionBegin; 2437 for (k = 0; k < m; k++) { /* loop over added rows */ 2438 row = im[k]; 2439 brow = row / bs; 2440 if (row < 0) continue; 2441 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 2442 rp = aj + ai[brow]; 2443 if (!A->structure_only) ap = aa + bs2 * ai[brow]; 2444 rmax = imax[brow]; 2445 nrow = ailen[brow]; 2446 low = 0; 2447 high = nrow; 2448 for (l = 0; l < n; l++) { /* loop over added columns */ 2449 if (in[l] < 0) continue; 2450 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 2451 col = in[l]; 2452 bcol = col / bs; 2453 ridx = row % bs; 2454 cidx = col % bs; 2455 if (!A->structure_only) { 2456 if (roworiented) { 2457 value = v[l + k * n]; 2458 } else { 2459 value = v[k + l * m]; 2460 } 2461 } 2462 if (col <= lastcol) low = 0; 2463 else high = nrow; 2464 lastcol = col; 2465 while (high - low > 7) { 2466 t = (low + high) / 2; 2467 if (rp[t] > bcol) high = t; 2468 else low = t; 2469 } 2470 for (i = low; i < high; i++) { 2471 if (rp[i] > bcol) break; 2472 if (rp[i] == bcol) { 2473 bap = ap + bs2 * i + bs * cidx + ridx; 2474 if (!A->structure_only) { 2475 if (is == ADD_VALUES) *bap += value; 2476 else *bap = value; 2477 } 2478 goto noinsert1; 2479 } 2480 } 2481 if (nonew == 1) goto noinsert1; 2482 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2483 if (A->structure_only) { 2484 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar); 2485 } else { 2486 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2487 } 2488 N = nrow++ - 1; 2489 high++; 2490 /* shift up all the later entries in this row */ 2491 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2492 rp[i] = bcol; 2493 if (!A->structure_only) { 2494 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2495 PetscCall(PetscArrayzero(ap + bs2 * i, bs2)); 2496 ap[bs2 * i + bs * cidx + ridx] = value; 2497 } 2498 a->nz++; 2499 A->nonzerostate++; 2500 noinsert1:; 2501 low = i; 2502 } 2503 ailen[brow] = nrow; 2504 } 2505 PetscFunctionReturn(0); 2506 } 2507 2508 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) { 2509 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data; 2510 Mat outA; 2511 PetscBool row_identity, col_identity; 2512 2513 PetscFunctionBegin; 2514 PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU"); 2515 PetscCall(ISIdentity(row, &row_identity)); 2516 PetscCall(ISIdentity(col, &col_identity)); 2517 PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU"); 2518 2519 outA = inA; 2520 inA->factortype = MAT_FACTOR_LU; 2521 PetscCall(PetscFree(inA->solvertype)); 2522 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype)); 2523 2524 PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2525 2526 PetscCall(PetscObjectReference((PetscObject)row)); 2527 PetscCall(ISDestroy(&a->row)); 2528 a->row = row; 2529 PetscCall(PetscObjectReference((PetscObject)col)); 2530 PetscCall(ISDestroy(&a->col)); 2531 a->col = col; 2532 2533 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2534 PetscCall(ISDestroy(&a->icol)); 2535 PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol)); 2536 PetscCall(PetscLogObjectParent((PetscObject)inA, (PetscObject)a->icol)); 2537 2538 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity))); 2539 if (!a->solve_work) { 2540 PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); 2541 PetscCall(PetscLogObjectMemory((PetscObject)inA, (inA->rmap->N + inA->rmap->bs) * sizeof(PetscScalar))); 2542 } 2543 PetscCall(MatLUFactorNumeric(outA, inA, info)); 2544 PetscFunctionReturn(0); 2545 } 2546 2547 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, PetscInt *indices) { 2548 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2549 PetscInt i, nz, mbs; 2550 2551 PetscFunctionBegin; 2552 nz = baij->maxnz; 2553 mbs = baij->mbs; 2554 for (i = 0; i < nz; i++) baij->j[i] = indices[i]; 2555 baij->nz = nz; 2556 for (i = 0; i < mbs; i++) baij->ilen[i] = baij->imax[i]; 2557 PetscFunctionReturn(0); 2558 } 2559 2560 /*@ 2561 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix. 2562 2563 Input Parameters: 2564 + mat - the `MATSEQBAIJ` matrix 2565 - indices - the column indices 2566 2567 Level: advanced 2568 2569 Notes: 2570 This can be called if you have precomputed the nonzero structure of the 2571 matrix and want to provide it to the matrix object to improve the performance 2572 of the `MatSetValues()` operation. 2573 2574 You MUST have set the correct numbers of nonzeros per row in the call to 2575 `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted. 2576 2577 MUST be called before any calls to `MatSetValues()` 2578 2579 .seealso: `MATSEQBAIJ`, `MatSetValues()` 2580 @*/ 2581 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) { 2582 PetscFunctionBegin; 2583 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2584 PetscValidIntPointer(indices, 2); 2585 PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices)); 2586 PetscFunctionReturn(0); 2587 } 2588 2589 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) { 2590 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2591 PetscInt i, j, n, row, bs, *ai, *aj, mbs; 2592 PetscReal atmp; 2593 PetscScalar *x, zero = 0.0; 2594 MatScalar *aa; 2595 PetscInt ncols, brow, krow, kcol; 2596 2597 PetscFunctionBegin; 2598 /* why is this not a macro???????????????????????????????????????????????????????????????? */ 2599 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2600 bs = A->rmap->bs; 2601 aa = a->a; 2602 ai = a->i; 2603 aj = a->j; 2604 mbs = a->mbs; 2605 2606 PetscCall(VecSet(v, zero)); 2607 PetscCall(VecGetArray(v, &x)); 2608 PetscCall(VecGetLocalSize(v, &n)); 2609 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2610 for (i = 0; i < mbs; i++) { 2611 ncols = ai[1] - ai[0]; 2612 ai++; 2613 brow = bs * i; 2614 for (j = 0; j < ncols; j++) { 2615 for (kcol = 0; kcol < bs; kcol++) { 2616 for (krow = 0; krow < bs; krow++) { 2617 atmp = PetscAbsScalar(*aa); 2618 aa++; 2619 row = brow + krow; /* row index */ 2620 if (PetscAbsScalar(x[row]) < atmp) { 2621 x[row] = atmp; 2622 if (idx) idx[row] = bs * (*aj) + kcol; 2623 } 2624 } 2625 } 2626 aj++; 2627 } 2628 } 2629 PetscCall(VecRestoreArray(v, &x)); 2630 PetscFunctionReturn(0); 2631 } 2632 2633 PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) { 2634 PetscFunctionBegin; 2635 /* If the two matrices have the same copy implementation, use fast copy. */ 2636 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2637 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2638 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 2639 PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs; 2640 2641 PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]); 2642 PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs); 2643 PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs])); 2644 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2645 } else { 2646 PetscCall(MatCopy_Basic(A, B, str)); 2647 } 2648 PetscFunctionReturn(0); 2649 } 2650 2651 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) { 2652 PetscFunctionBegin; 2653 PetscCall(MatSeqBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL)); 2654 PetscFunctionReturn(0); 2655 } 2656 2657 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) { 2658 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2659 2660 PetscFunctionBegin; 2661 *array = a->a; 2662 PetscFunctionReturn(0); 2663 } 2664 2665 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) { 2666 PetscFunctionBegin; 2667 *array = NULL; 2668 PetscFunctionReturn(0); 2669 } 2670 2671 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) { 2672 PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs; 2673 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 2674 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 2675 2676 PetscFunctionBegin; 2677 /* Set the number of nonzeros in the new matrix */ 2678 PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz)); 2679 PetscFunctionReturn(0); 2680 } 2681 2682 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) { 2683 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data; 2684 PetscInt bs = Y->rmap->bs, bs2 = bs * bs; 2685 PetscBLASInt one = 1; 2686 2687 PetscFunctionBegin; 2688 if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2689 PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2690 if (e) { 2691 PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e)); 2692 if (e) { 2693 PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e)); 2694 if (e) str = SAME_NONZERO_PATTERN; 2695 } 2696 } 2697 if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN"); 2698 } 2699 if (str == SAME_NONZERO_PATTERN) { 2700 PetscScalar alpha = a; 2701 PetscBLASInt bnz; 2702 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 2703 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 2704 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2705 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2706 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2707 } else { 2708 Mat B; 2709 PetscInt *nnz; 2710 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 2711 PetscCall(PetscMalloc1(Y->rmap->N, &nnz)); 2712 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2713 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2714 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 2715 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 2716 PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name)); 2717 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz)); 2718 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 2719 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2720 PetscCall(MatHeaderMerge(Y, &B)); 2721 PetscCall(PetscFree(nnz)); 2722 } 2723 PetscFunctionReturn(0); 2724 } 2725 2726 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) { 2727 #if defined(PETSC_USE_COMPLEX) 2728 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2729 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2730 MatScalar *aa = a->a; 2731 2732 PetscFunctionBegin; 2733 for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]); 2734 #else 2735 PetscFunctionBegin; 2736 #endif 2737 PetscFunctionReturn(0); 2738 } 2739 2740 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) { 2741 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2742 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2743 MatScalar *aa = a->a; 2744 2745 PetscFunctionBegin; 2746 for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]); 2747 PetscFunctionReturn(0); 2748 } 2749 2750 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) { 2751 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2752 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2753 MatScalar *aa = a->a; 2754 2755 PetscFunctionBegin; 2756 for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2757 PetscFunctionReturn(0); 2758 } 2759 2760 /* 2761 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2762 */ 2763 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) { 2764 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2765 PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs; 2766 PetscInt nz = a->i[m], row, *jj, mr, col; 2767 2768 PetscFunctionBegin; 2769 *nn = n; 2770 if (!ia) PetscFunctionReturn(0); 2771 PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices"); 2772 PetscCall(PetscCalloc1(n, &collengths)); 2773 PetscCall(PetscMalloc1(n + 1, &cia)); 2774 PetscCall(PetscMalloc1(nz, &cja)); 2775 jj = a->j; 2776 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2777 cia[0] = oshift; 2778 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2779 PetscCall(PetscArrayzero(collengths, n)); 2780 jj = a->j; 2781 for (row = 0; row < m; row++) { 2782 mr = a->i[row + 1] - a->i[row]; 2783 for (i = 0; i < mr; i++) { 2784 col = *jj++; 2785 2786 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2787 } 2788 } 2789 PetscCall(PetscFree(collengths)); 2790 *ia = cia; 2791 *ja = cja; 2792 PetscFunctionReturn(0); 2793 } 2794 2795 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) { 2796 PetscFunctionBegin; 2797 if (!ia) PetscFunctionReturn(0); 2798 PetscCall(PetscFree(*ia)); 2799 PetscCall(PetscFree(*ja)); 2800 PetscFunctionReturn(0); 2801 } 2802 2803 /* 2804 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2805 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2806 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2807 */ 2808 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) { 2809 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2810 PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs; 2811 PetscInt nz = a->i[m], row, *jj, mr, col; 2812 PetscInt *cspidx; 2813 2814 PetscFunctionBegin; 2815 *nn = n; 2816 if (!ia) PetscFunctionReturn(0); 2817 2818 PetscCall(PetscCalloc1(n, &collengths)); 2819 PetscCall(PetscMalloc1(n + 1, &cia)); 2820 PetscCall(PetscMalloc1(nz, &cja)); 2821 PetscCall(PetscMalloc1(nz, &cspidx)); 2822 jj = a->j; 2823 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2824 cia[0] = oshift; 2825 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2826 PetscCall(PetscArrayzero(collengths, n)); 2827 jj = a->j; 2828 for (row = 0; row < m; row++) { 2829 mr = a->i[row + 1] - a->i[row]; 2830 for (i = 0; i < mr; i++) { 2831 col = *jj++; 2832 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2833 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2834 } 2835 } 2836 PetscCall(PetscFree(collengths)); 2837 *ia = cia; 2838 *ja = cja; 2839 *spidx = cspidx; 2840 PetscFunctionReturn(0); 2841 } 2842 2843 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) { 2844 PetscFunctionBegin; 2845 PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done)); 2846 PetscCall(PetscFree(*spidx)); 2847 PetscFunctionReturn(0); 2848 } 2849 2850 PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) { 2851 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data; 2852 2853 PetscFunctionBegin; 2854 if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL)); 2855 PetscCall(MatShift_Basic(Y, a)); 2856 PetscFunctionReturn(0); 2857 } 2858 2859 /* -------------------------------------------------------------------*/ 2860 static struct _MatOps MatOps_Values = { 2861 MatSetValues_SeqBAIJ, 2862 MatGetRow_SeqBAIJ, 2863 MatRestoreRow_SeqBAIJ, 2864 MatMult_SeqBAIJ_N, 2865 /* 4*/ MatMultAdd_SeqBAIJ_N, 2866 MatMultTranspose_SeqBAIJ, 2867 MatMultTransposeAdd_SeqBAIJ, 2868 NULL, 2869 NULL, 2870 NULL, 2871 /* 10*/ NULL, 2872 MatLUFactor_SeqBAIJ, 2873 NULL, 2874 NULL, 2875 MatTranspose_SeqBAIJ, 2876 /* 15*/ MatGetInfo_SeqBAIJ, 2877 MatEqual_SeqBAIJ, 2878 MatGetDiagonal_SeqBAIJ, 2879 MatDiagonalScale_SeqBAIJ, 2880 MatNorm_SeqBAIJ, 2881 /* 20*/ NULL, 2882 MatAssemblyEnd_SeqBAIJ, 2883 MatSetOption_SeqBAIJ, 2884 MatZeroEntries_SeqBAIJ, 2885 /* 24*/ MatZeroRows_SeqBAIJ, 2886 NULL, 2887 NULL, 2888 NULL, 2889 NULL, 2890 /* 29*/ MatSetUp_SeqBAIJ, 2891 NULL, 2892 NULL, 2893 NULL, 2894 NULL, 2895 /* 34*/ MatDuplicate_SeqBAIJ, 2896 NULL, 2897 NULL, 2898 MatILUFactor_SeqBAIJ, 2899 NULL, 2900 /* 39*/ MatAXPY_SeqBAIJ, 2901 MatCreateSubMatrices_SeqBAIJ, 2902 MatIncreaseOverlap_SeqBAIJ, 2903 MatGetValues_SeqBAIJ, 2904 MatCopy_SeqBAIJ, 2905 /* 44*/ NULL, 2906 MatScale_SeqBAIJ, 2907 MatShift_SeqBAIJ, 2908 NULL, 2909 MatZeroRowsColumns_SeqBAIJ, 2910 /* 49*/ NULL, 2911 MatGetRowIJ_SeqBAIJ, 2912 MatRestoreRowIJ_SeqBAIJ, 2913 MatGetColumnIJ_SeqBAIJ, 2914 MatRestoreColumnIJ_SeqBAIJ, 2915 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2916 NULL, 2917 NULL, 2918 NULL, 2919 MatSetValuesBlocked_SeqBAIJ, 2920 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2921 MatDestroy_SeqBAIJ, 2922 MatView_SeqBAIJ, 2923 NULL, 2924 NULL, 2925 /* 64*/ NULL, 2926 NULL, 2927 NULL, 2928 NULL, 2929 NULL, 2930 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2931 NULL, 2932 MatConvert_Basic, 2933 NULL, 2934 NULL, 2935 /* 74*/ NULL, 2936 MatFDColoringApply_BAIJ, 2937 NULL, 2938 NULL, 2939 NULL, 2940 /* 79*/ NULL, 2941 NULL, 2942 NULL, 2943 NULL, 2944 MatLoad_SeqBAIJ, 2945 /* 84*/ NULL, 2946 NULL, 2947 NULL, 2948 NULL, 2949 NULL, 2950 /* 89*/ NULL, 2951 NULL, 2952 NULL, 2953 NULL, 2954 NULL, 2955 /* 94*/ NULL, 2956 NULL, 2957 NULL, 2958 NULL, 2959 NULL, 2960 /* 99*/ NULL, 2961 NULL, 2962 NULL, 2963 MatConjugate_SeqBAIJ, 2964 NULL, 2965 /*104*/ NULL, 2966 MatRealPart_SeqBAIJ, 2967 MatImaginaryPart_SeqBAIJ, 2968 NULL, 2969 NULL, 2970 /*109*/ NULL, 2971 NULL, 2972 NULL, 2973 NULL, 2974 MatMissingDiagonal_SeqBAIJ, 2975 /*114*/ NULL, 2976 NULL, 2977 NULL, 2978 NULL, 2979 NULL, 2980 /*119*/ NULL, 2981 NULL, 2982 MatMultHermitianTranspose_SeqBAIJ, 2983 MatMultHermitianTransposeAdd_SeqBAIJ, 2984 NULL, 2985 /*124*/ NULL, 2986 MatGetColumnReductions_SeqBAIJ, 2987 MatInvertBlockDiagonal_SeqBAIJ, 2988 NULL, 2989 NULL, 2990 /*129*/ NULL, 2991 NULL, 2992 NULL, 2993 NULL, 2994 NULL, 2995 /*134*/ NULL, 2996 NULL, 2997 NULL, 2998 NULL, 2999 NULL, 3000 /*139*/ MatSetBlockSizes_Default, 3001 NULL, 3002 NULL, 3003 MatFDColoringSetUp_SeqXAIJ, 3004 NULL, 3005 /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 3006 MatDestroySubMatrices_SeqBAIJ, 3007 NULL, 3008 NULL, 3009 NULL, 3010 NULL, 3011 /*150*/ NULL, 3012 }; 3013 3014 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) { 3015 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3016 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3017 3018 PetscFunctionBegin; 3019 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3020 3021 /* allocate space for values if not already there */ 3022 if (!aij->saved_values) { 3023 PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); 3024 PetscCall(PetscLogObjectMemory((PetscObject)mat, (nz + 1) * sizeof(PetscScalar))); 3025 } 3026 3027 /* copy values over */ 3028 PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz)); 3029 PetscFunctionReturn(0); 3030 } 3031 3032 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) { 3033 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3034 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3035 3036 PetscFunctionBegin; 3037 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3038 PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 3039 3040 /* copy values over */ 3041 PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz)); 3042 PetscFunctionReturn(0); 3043 } 3044 3045 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 3046 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *); 3047 3048 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, PetscInt *nnz) { 3049 Mat_SeqBAIJ *b; 3050 PetscInt i, mbs, nbs, bs2; 3051 PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 3052 3053 PetscFunctionBegin; 3054 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3055 if (nz == MAT_SKIP_ALLOCATION) { 3056 skipallocation = PETSC_TRUE; 3057 nz = 0; 3058 } 3059 3060 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 3061 PetscCall(PetscLayoutSetUp(B->rmap)); 3062 PetscCall(PetscLayoutSetUp(B->cmap)); 3063 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3064 3065 B->preallocated = PETSC_TRUE; 3066 3067 mbs = B->rmap->n / bs; 3068 nbs = B->cmap->n / bs; 3069 bs2 = bs * bs; 3070 3071 PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs); 3072 3073 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3074 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 3075 if (nnz) { 3076 for (i = 0; i < mbs; i++) { 3077 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 3078 PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs); 3079 } 3080 } 3081 3082 b = (Mat_SeqBAIJ *)B->data; 3083 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat"); 3084 PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL)); 3085 PetscOptionsEnd(); 3086 3087 if (!flg) { 3088 switch (bs) { 3089 case 1: 3090 B->ops->mult = MatMult_SeqBAIJ_1; 3091 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3092 break; 3093 case 2: 3094 B->ops->mult = MatMult_SeqBAIJ_2; 3095 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3096 break; 3097 case 3: 3098 B->ops->mult = MatMult_SeqBAIJ_3; 3099 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3100 break; 3101 case 4: 3102 B->ops->mult = MatMult_SeqBAIJ_4; 3103 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3104 break; 3105 case 5: 3106 B->ops->mult = MatMult_SeqBAIJ_5; 3107 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3108 break; 3109 case 6: 3110 B->ops->mult = MatMult_SeqBAIJ_6; 3111 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3112 break; 3113 case 7: 3114 B->ops->mult = MatMult_SeqBAIJ_7; 3115 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3116 break; 3117 case 9: { 3118 PetscInt version = 1; 3119 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3120 switch (version) { 3121 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3122 case 1: 3123 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 3124 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 3125 PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3126 break; 3127 #endif 3128 default: 3129 B->ops->mult = MatMult_SeqBAIJ_N; 3130 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3131 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3132 break; 3133 } 3134 break; 3135 } 3136 case 11: 3137 B->ops->mult = MatMult_SeqBAIJ_11; 3138 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 3139 break; 3140 case 12: { 3141 PetscInt version = 1; 3142 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3143 switch (version) { 3144 case 1: 3145 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 3146 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3147 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3148 break; 3149 case 2: 3150 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 3151 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 3152 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3153 break; 3154 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3155 case 3: 3156 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 3157 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3158 PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3159 break; 3160 #endif 3161 default: 3162 B->ops->mult = MatMult_SeqBAIJ_N; 3163 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3164 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3165 break; 3166 } 3167 break; 3168 } 3169 case 15: { 3170 PetscInt version = 1; 3171 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3172 switch (version) { 3173 case 1: 3174 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3175 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3176 break; 3177 case 2: 3178 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 3179 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3180 break; 3181 case 3: 3182 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 3183 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3184 break; 3185 case 4: 3186 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 3187 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3188 break; 3189 default: 3190 B->ops->mult = MatMult_SeqBAIJ_N; 3191 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3192 break; 3193 } 3194 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3195 break; 3196 } 3197 default: 3198 B->ops->mult = MatMult_SeqBAIJ_N; 3199 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3200 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3201 break; 3202 } 3203 } 3204 B->ops->sor = MatSOR_SeqBAIJ; 3205 b->mbs = mbs; 3206 b->nbs = nbs; 3207 if (!skipallocation) { 3208 if (!b->imax) { 3209 PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen)); 3210 PetscCall(PetscLogObjectMemory((PetscObject)B, 2 * mbs * sizeof(PetscInt))); 3211 3212 b->free_imax_ilen = PETSC_TRUE; 3213 } 3214 /* b->ilen will count nonzeros in each block row so far. */ 3215 for (i = 0; i < mbs; i++) b->ilen[i] = 0; 3216 if (!nnz) { 3217 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3218 else if (nz < 0) nz = 1; 3219 nz = PetscMin(nz, nbs); 3220 for (i = 0; i < mbs; i++) b->imax[i] = nz; 3221 PetscCall(PetscIntMultError(nz, mbs, &nz)); 3222 } else { 3223 PetscInt64 nz64 = 0; 3224 for (i = 0; i < mbs; i++) { 3225 b->imax[i] = nnz[i]; 3226 nz64 += nnz[i]; 3227 } 3228 PetscCall(PetscIntCast(nz64, &nz)); 3229 } 3230 3231 /* allocate the matrix space */ 3232 PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i)); 3233 if (B->structure_only) { 3234 PetscCall(PetscMalloc1(nz, &b->j)); 3235 PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i)); 3236 PetscCall(PetscLogObjectMemory((PetscObject)B, (B->rmap->N + 1) * sizeof(PetscInt) + nz * sizeof(PetscInt))); 3237 } else { 3238 PetscInt nzbs2 = 0; 3239 PetscCall(PetscIntMultError(nz, bs2, &nzbs2)); 3240 PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i)); 3241 PetscCall(PetscLogObjectMemory((PetscObject)B, (B->rmap->N + 1) * sizeof(PetscInt) + nz * (bs2 * sizeof(PetscScalar) + sizeof(PetscInt)))); 3242 PetscCall(PetscArrayzero(b->a, nz * bs2)); 3243 } 3244 PetscCall(PetscArrayzero(b->j, nz)); 3245 3246 if (B->structure_only) { 3247 b->singlemalloc = PETSC_FALSE; 3248 b->free_a = PETSC_FALSE; 3249 } else { 3250 b->singlemalloc = PETSC_TRUE; 3251 b->free_a = PETSC_TRUE; 3252 } 3253 b->free_ij = PETSC_TRUE; 3254 3255 b->i[0] = 0; 3256 for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1]; 3257 3258 } else { 3259 b->free_a = PETSC_FALSE; 3260 b->free_ij = PETSC_FALSE; 3261 } 3262 3263 b->bs2 = bs2; 3264 b->mbs = mbs; 3265 b->nz = 0; 3266 b->maxnz = nz; 3267 B->info.nz_unneeded = (PetscReal)b->maxnz * bs2; 3268 B->was_assembled = PETSC_FALSE; 3269 B->assembled = PETSC_FALSE; 3270 if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3271 PetscFunctionReturn(0); 3272 } 3273 3274 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) { 3275 PetscInt i, m, nz, nz_max = 0, *nnz; 3276 PetscScalar *values = NULL; 3277 PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented; 3278 3279 PetscFunctionBegin; 3280 PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 3281 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 3282 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 3283 PetscCall(PetscLayoutSetUp(B->rmap)); 3284 PetscCall(PetscLayoutSetUp(B->cmap)); 3285 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3286 m = B->rmap->n / bs; 3287 3288 PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 3289 PetscCall(PetscMalloc1(m + 1, &nnz)); 3290 for (i = 0; i < m; i++) { 3291 nz = ii[i + 1] - ii[i]; 3292 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 3293 nz_max = PetscMax(nz_max, nz); 3294 nnz[i] = nz; 3295 } 3296 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 3297 PetscCall(PetscFree(nnz)); 3298 3299 values = (PetscScalar *)V; 3300 if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values)); 3301 for (i = 0; i < m; i++) { 3302 PetscInt ncols = ii[i + 1] - ii[i]; 3303 const PetscInt *icols = jj + ii[i]; 3304 if (bs == 1 || !roworiented) { 3305 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 3306 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES)); 3307 } else { 3308 PetscInt j; 3309 for (j = 0; j < ncols; j++) { 3310 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 3311 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES)); 3312 } 3313 } 3314 } 3315 if (!V) PetscCall(PetscFree(values)); 3316 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3317 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3318 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3319 PetscFunctionReturn(0); 3320 } 3321 3322 /*@C 3323 MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored 3324 3325 Not Collective 3326 3327 Input Parameter: 3328 . mat - a `MATSEQBAIJ` matrix 3329 3330 Output Parameter: 3331 . array - pointer to the data 3332 3333 Level: intermediate 3334 3335 .seealso: `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3336 @*/ 3337 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array) { 3338 PetscFunctionBegin; 3339 PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array)); 3340 PetscFunctionReturn(0); 3341 } 3342 3343 /*@C 3344 MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()` 3345 3346 Not Collective 3347 3348 Input Parameters: 3349 + mat - a `MATSEQBAIJ` matrix 3350 - array - pointer to the data 3351 3352 Level: intermediate 3353 3354 .seealso: `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3355 @*/ 3356 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array) { 3357 PetscFunctionBegin; 3358 PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array)); 3359 PetscFunctionReturn(0); 3360 } 3361 3362 /*MC 3363 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3364 block sparse compressed row format. 3365 3366 Options Database Keys: 3367 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3368 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3369 3370 Level: beginner 3371 3372 Notes: 3373 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 3374 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 3375 3376 Run with -info to see what version of the matrix-vector product is being used 3377 3378 .seealso: `MatCreateSeqBAIJ()` 3379 M*/ 3380 3381 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *); 3382 3383 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) { 3384 PetscMPIInt size; 3385 Mat_SeqBAIJ *b; 3386 3387 PetscFunctionBegin; 3388 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3389 PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3390 3391 PetscCall(PetscNewLog(B, &b)); 3392 B->data = (void *)b; 3393 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 3394 3395 b->row = NULL; 3396 b->col = NULL; 3397 b->icol = NULL; 3398 b->reallocs = 0; 3399 b->saved_values = NULL; 3400 3401 b->roworiented = PETSC_TRUE; 3402 b->nonew = 0; 3403 b->diag = NULL; 3404 B->spptr = NULL; 3405 B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2; 3406 b->keepnonzeropattern = PETSC_FALSE; 3407 3408 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ)); 3409 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ)); 3410 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ)); 3411 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ)); 3412 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ)); 3413 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ)); 3414 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ)); 3415 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ)); 3416 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 3417 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ)); 3418 #if defined(PETSC_HAVE_HYPRE) 3419 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE)); 3420 #endif 3421 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS)); 3422 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 3423 PetscFunctionReturn(0); 3424 } 3425 3426 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) { 3427 Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data; 3428 PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2; 3429 3430 PetscFunctionBegin; 3431 PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix"); 3432 3433 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3434 c->imax = a->imax; 3435 c->ilen = a->ilen; 3436 c->free_imax_ilen = PETSC_FALSE; 3437 } else { 3438 PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen)); 3439 PetscCall(PetscLogObjectMemory((PetscObject)C, 2 * mbs * sizeof(PetscInt))); 3440 for (i = 0; i < mbs; i++) { 3441 c->imax[i] = a->imax[i]; 3442 c->ilen[i] = a->ilen[i]; 3443 } 3444 c->free_imax_ilen = PETSC_TRUE; 3445 } 3446 3447 /* allocate the matrix space */ 3448 if (mallocmatspace) { 3449 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3450 PetscCall(PetscCalloc1(bs2 * nz, &c->a)); 3451 PetscCall(PetscLogObjectMemory((PetscObject)C, a->i[mbs] * bs2 * sizeof(PetscScalar))); 3452 3453 c->i = a->i; 3454 c->j = a->j; 3455 c->singlemalloc = PETSC_FALSE; 3456 c->free_a = PETSC_TRUE; 3457 c->free_ij = PETSC_FALSE; 3458 c->parent = A; 3459 C->preallocated = PETSC_TRUE; 3460 C->assembled = PETSC_TRUE; 3461 3462 PetscCall(PetscObjectReference((PetscObject)A)); 3463 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3464 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3465 } else { 3466 PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i)); 3467 PetscCall(PetscLogObjectMemory((PetscObject)C, a->i[mbs] * (bs2 * sizeof(PetscScalar) + sizeof(PetscInt)) + (mbs + 1) * sizeof(PetscInt))); 3468 3469 c->singlemalloc = PETSC_TRUE; 3470 c->free_a = PETSC_TRUE; 3471 c->free_ij = PETSC_TRUE; 3472 3473 PetscCall(PetscArraycpy(c->i, a->i, mbs + 1)); 3474 if (mbs > 0) { 3475 PetscCall(PetscArraycpy(c->j, a->j, nz)); 3476 if (cpvalues == MAT_COPY_VALUES) { 3477 PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz)); 3478 } else { 3479 PetscCall(PetscArrayzero(c->a, bs2 * nz)); 3480 } 3481 } 3482 C->preallocated = PETSC_TRUE; 3483 C->assembled = PETSC_TRUE; 3484 } 3485 } 3486 3487 c->roworiented = a->roworiented; 3488 c->nonew = a->nonew; 3489 3490 PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 3491 PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 3492 3493 c->bs2 = a->bs2; 3494 c->mbs = a->mbs; 3495 c->nbs = a->nbs; 3496 3497 if (a->diag) { 3498 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3499 c->diag = a->diag; 3500 c->free_diag = PETSC_FALSE; 3501 } else { 3502 PetscCall(PetscMalloc1(mbs + 1, &c->diag)); 3503 PetscCall(PetscLogObjectMemory((PetscObject)C, (mbs + 1) * sizeof(PetscInt))); 3504 for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i]; 3505 c->free_diag = PETSC_TRUE; 3506 } 3507 } else c->diag = NULL; 3508 3509 c->nz = a->nz; 3510 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3511 c->solve_work = NULL; 3512 c->mult_work = NULL; 3513 c->sor_workt = NULL; 3514 c->sor_work = NULL; 3515 3516 c->compressedrow.use = a->compressedrow.use; 3517 c->compressedrow.nrows = a->compressedrow.nrows; 3518 if (a->compressedrow.use) { 3519 i = a->compressedrow.nrows; 3520 PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex)); 3521 PetscCall(PetscLogObjectMemory((PetscObject)C, (2 * i + 1) * sizeof(PetscInt))); 3522 PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1)); 3523 PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i)); 3524 } else { 3525 c->compressedrow.use = PETSC_FALSE; 3526 c->compressedrow.i = NULL; 3527 c->compressedrow.rindex = NULL; 3528 } 3529 C->nonzerostate = A->nonzerostate; 3530 3531 PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 3532 PetscFunctionReturn(0); 3533 } 3534 3535 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) { 3536 PetscFunctionBegin; 3537 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 3538 PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n)); 3539 PetscCall(MatSetType(*B, MATSEQBAIJ)); 3540 PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE)); 3541 PetscFunctionReturn(0); 3542 } 3543 3544 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3545 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) { 3546 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3547 PetscInt *rowidxs, *colidxs; 3548 PetscScalar *matvals; 3549 3550 PetscFunctionBegin; 3551 PetscCall(PetscViewerSetUp(viewer)); 3552 3553 /* read matrix header */ 3554 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3555 PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3556 M = header[1]; 3557 N = header[2]; 3558 nz = header[3]; 3559 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3560 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3561 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3562 3563 /* set block sizes from the viewer's .info file */ 3564 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3565 /* set local and global sizes if not set already */ 3566 if (mat->rmap->n < 0) mat->rmap->n = M; 3567 if (mat->cmap->n < 0) mat->cmap->n = N; 3568 if (mat->rmap->N < 0) mat->rmap->N = M; 3569 if (mat->cmap->N < 0) mat->cmap->N = N; 3570 PetscCall(PetscLayoutSetUp(mat->rmap)); 3571 PetscCall(PetscLayoutSetUp(mat->cmap)); 3572 3573 /* check if the matrix sizes are correct */ 3574 PetscCall(MatGetSize(mat, &rows, &cols)); 3575 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3576 PetscCall(MatGetBlockSize(mat, &bs)); 3577 PetscCall(MatGetLocalSize(mat, &m, &n)); 3578 mbs = m / bs; 3579 nbs = n / bs; 3580 3581 /* read in row lengths, column indices and nonzero values */ 3582 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3583 PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT)); 3584 rowidxs[0] = 0; 3585 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3586 sum = rowidxs[m]; 3587 PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3588 3589 /* read in column indices and nonzero values */ 3590 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals)); 3591 PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT)); 3592 PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR)); 3593 3594 { /* preallocate matrix storage */ 3595 PetscBT bt; /* helper bit set to count nonzeros */ 3596 PetscInt *nnz; 3597 PetscBool sbaij; 3598 3599 PetscCall(PetscBTCreate(nbs, &bt)); 3600 PetscCall(PetscCalloc1(mbs, &nnz)); 3601 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij)); 3602 for (i = 0; i < mbs; i++) { 3603 PetscCall(PetscBTMemzero(nbs, bt)); 3604 for (k = 0; k < bs; k++) { 3605 PetscInt row = bs * i + k; 3606 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3607 PetscInt col = colidxs[j]; 3608 if (!sbaij || col >= row) 3609 if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++; 3610 } 3611 } 3612 } 3613 PetscCall(PetscBTDestroy(&bt)); 3614 PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz)); 3615 PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz)); 3616 PetscCall(PetscFree(nnz)); 3617 } 3618 3619 /* store matrix values */ 3620 for (i = 0; i < m; i++) { 3621 PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1]; 3622 PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3623 } 3624 3625 PetscCall(PetscFree(rowidxs)); 3626 PetscCall(PetscFree2(colidxs, matvals)); 3627 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3628 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3629 PetscFunctionReturn(0); 3630 } 3631 3632 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) { 3633 PetscBool isbinary; 3634 3635 PetscFunctionBegin; 3636 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3637 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3638 PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer)); 3639 PetscFunctionReturn(0); 3640 } 3641 3642 /*@C 3643 MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block 3644 compressed row) format. For good matrix assembly performance the 3645 user should preallocate the matrix storage by setting the parameter nz 3646 (or the array nnz). By setting these parameters accurately, performance 3647 during matrix assembly can be increased by more than a factor of 50. 3648 3649 Collective 3650 3651 Input Parameters: 3652 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3653 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3654 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3655 . m - number of rows 3656 . n - number of columns 3657 . nz - number of nonzero blocks per block row (same for all rows) 3658 - nnz - array containing the number of nonzero blocks in the various block rows 3659 (possibly different for each block row) or NULL 3660 3661 Output Parameter: 3662 . A - the matrix 3663 3664 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3665 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3666 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 3667 3668 Options Database Keys: 3669 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3670 - -mat_block_size - size of the blocks to use 3671 3672 Level: intermediate 3673 3674 Notes: 3675 The number of rows and columns must be divisible by blocksize. 3676 3677 If the nnz parameter is given then the nz parameter is ignored 3678 3679 A nonzero block is any block that as 1 or more nonzeros in it 3680 3681 The `MATSEQBAIJ` format is fully compatible with standard Fortran 77 3682 storage. That is, the stored row and column indices can begin at 3683 either one (as in Fortran) or zero. See the users' manual for details. 3684 3685 Specify the preallocated storage with either nz or nnz (not both). 3686 Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory 3687 allocation. See Users-Manual: ch_mat for details. 3688 matrices. 3689 3690 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3691 @*/ 3692 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) { 3693 PetscFunctionBegin; 3694 PetscCall(MatCreate(comm, A)); 3695 PetscCall(MatSetSizes(*A, m, n, m, n)); 3696 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3697 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz)); 3698 PetscFunctionReturn(0); 3699 } 3700 3701 /*@C 3702 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3703 per row in the matrix. For good matrix assembly performance the 3704 user should preallocate the matrix storage by setting the parameter nz 3705 (or the array nnz). By setting these parameters accurately, performance 3706 during matrix assembly can be increased by more than a factor of 50. 3707 3708 Collective 3709 3710 Input Parameters: 3711 + B - the matrix 3712 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3713 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3714 . nz - number of block nonzeros per block row (same for all rows) 3715 - nnz - array containing the number of block nonzeros in the various block rows 3716 (possibly different for each block row) or NULL 3717 3718 Options Database Keys: 3719 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3720 - -mat_block_size - size of the blocks to use 3721 3722 Level: intermediate 3723 3724 Notes: 3725 If the nnz parameter is given then the nz parameter is ignored 3726 3727 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3728 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3729 You can also run with the option -info and look for messages with the string 3730 malloc in them to see if additional memory allocation was needed. 3731 3732 The `MATSEQBAIJ` format is fully compatible with standard Fortran 77 3733 storage. That is, the stored row and column indices can begin at 3734 either one (as in Fortran) or zero. See the users' manual for details. 3735 3736 Specify the preallocated storage with either nz or nnz (not both). 3737 Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory 3738 allocation. See Users-Manual: ch_mat for details. 3739 3740 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3741 @*/ 3742 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) { 3743 PetscFunctionBegin; 3744 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3745 PetscValidType(B, 1); 3746 PetscValidLogicalCollectiveInt(B, bs, 2); 3747 PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 3748 PetscFunctionReturn(0); 3749 } 3750 3751 /*@C 3752 MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values 3753 3754 Collective 3755 3756 Input Parameters: 3757 + B - the matrix 3758 . i - the indices into j for the start of each local row (starts with zero) 3759 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3760 - v - optional values in the matrix 3761 3762 Level: advanced 3763 3764 Notes: 3765 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 3766 may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 3767 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3768 `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3769 block column and the second index is over columns within a block. 3770 3771 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3772 3773 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3774 @*/ 3775 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) { 3776 PetscFunctionBegin; 3777 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3778 PetscValidType(B, 1); 3779 PetscValidLogicalCollectiveInt(B, bs, 2); 3780 PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 3781 PetscFunctionReturn(0); 3782 } 3783 3784 /*@ 3785 MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user. 3786 3787 Collective 3788 3789 Input Parameters: 3790 + comm - must be an MPI communicator of size 1 3791 . bs - size of block 3792 . m - number of rows 3793 . n - number of columns 3794 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3795 . j - column indices 3796 - a - matrix values 3797 3798 Output Parameter: 3799 . mat - the matrix 3800 3801 Level: advanced 3802 3803 Notes: 3804 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3805 once the matrix is destroyed 3806 3807 You cannot set new nonzero locations into this matrix, that will generate an error. 3808 3809 The i and j indices are 0 based 3810 3811 When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format 3812 3813 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3814 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3815 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3816 with column-major ordering within blocks. 3817 3818 .seealso: `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3819 @*/ 3820 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) { 3821 PetscInt ii; 3822 Mat_SeqBAIJ *baij; 3823 3824 PetscFunctionBegin; 3825 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs); 3826 if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3827 3828 PetscCall(MatCreate(comm, mat)); 3829 PetscCall(MatSetSizes(*mat, m, n, m, n)); 3830 PetscCall(MatSetType(*mat, MATSEQBAIJ)); 3831 PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL)); 3832 baij = (Mat_SeqBAIJ *)(*mat)->data; 3833 PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen)); 3834 PetscCall(PetscLogObjectMemory((PetscObject)*mat, 2 * m * sizeof(PetscInt))); 3835 3836 baij->i = i; 3837 baij->j = j; 3838 baij->a = a; 3839 3840 baij->singlemalloc = PETSC_FALSE; 3841 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3842 baij->free_a = PETSC_FALSE; 3843 baij->free_ij = PETSC_FALSE; 3844 3845 for (ii = 0; ii < m; ii++) { 3846 baij->ilen[ii] = baij->imax[ii] = i[ii + 1] - i[ii]; 3847 PetscCheck(i[ii + 1] - i[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, i[ii + 1] - i[ii]); 3848 } 3849 if (PetscDefined(USE_DEBUG)) { 3850 for (ii = 0; ii < baij->i[m]; ii++) { 3851 PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3852 PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3853 } 3854 } 3855 3856 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3857 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3858 PetscFunctionReturn(0); 3859 } 3860 3861 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) { 3862 PetscFunctionBegin; 3863 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat)); 3864 PetscFunctionReturn(0); 3865 } 3866