1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 19 20 PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) 21 { 22 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data; 23 PetscInt m, n, i; 24 PetscInt ib, jb, bs = A->rmap->bs; 25 MatScalar *a_val = a_aij->a; 26 27 PetscFunctionBegin; 28 PetscCall(MatGetSize(A, &m, &n)); 29 for (i = 0; i < n; i++) reductions[i] = 0.0; 30 if (type == NORM_2) { 31 for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 32 for (jb = 0; jb < bs; jb++) { 33 for (ib = 0; ib < bs; ib++) { 34 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 35 a_val++; 36 } 37 } 38 } 39 } else if (type == NORM_1) { 40 for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 41 for (jb = 0; jb < bs; jb++) { 42 for (ib = 0; ib < bs; ib++) { 43 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 44 a_val++; 45 } 46 } 47 } 48 } else if (type == NORM_INFINITY) { 49 for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 50 for (jb = 0; jb < bs; jb++) { 51 for (ib = 0; ib < bs; ib++) { 52 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 53 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 54 a_val++; 55 } 56 } 57 } 58 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 59 for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 60 for (jb = 0; jb < bs; jb++) { 61 for (ib = 0; ib < bs; ib++) { 62 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 63 a_val++; 64 } 65 } 66 } 67 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 68 for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 69 for (jb = 0; jb < bs; jb++) { 70 for (ib = 0; ib < bs; ib++) { 71 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 72 a_val++; 73 } 74 } 75 } 76 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 77 if (type == NORM_2) { 78 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 79 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 80 for (i = 0; i < n; i++) reductions[i] /= m; 81 } 82 PetscFunctionReturn(0); 83 } 84 85 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) 86 { 87 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 88 PetscInt *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots; 89 MatScalar *v = a->a, *odiag, *diag, work[25], *v_work; 90 PetscReal shift = 0.0; 91 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 92 93 PetscFunctionBegin; 94 allowzeropivot = PetscNot(A->erroriffailure); 95 96 if (a->idiagvalid) { 97 if (values) *values = a->idiag; 98 PetscFunctionReturn(0); 99 } 100 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 101 diag_offset = a->diag; 102 if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); } 103 diag = a->idiag; 104 if (values) *values = a->idiag; 105 /* factor and invert each block */ 106 switch (bs) { 107 case 1: 108 for (i = 0; i < mbs; i++) { 109 odiag = v + 1 * diag_offset[i]; 110 diag[0] = odiag[0]; 111 112 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 113 if (allowzeropivot) { 114 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 115 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 116 A->factorerror_zeropivot_row = i; 117 PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i)); 118 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON); 119 } 120 121 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 122 diag += 1; 123 } 124 break; 125 case 2: 126 for (i = 0; i < mbs; i++) { 127 odiag = v + 4 * diag_offset[i]; 128 diag[0] = odiag[0]; 129 diag[1] = odiag[1]; 130 diag[2] = odiag[2]; 131 diag[3] = odiag[3]; 132 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 133 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 134 diag += 4; 135 } 136 break; 137 case 3: 138 for (i = 0; i < mbs; i++) { 139 odiag = v + 9 * diag_offset[i]; 140 diag[0] = odiag[0]; 141 diag[1] = odiag[1]; 142 diag[2] = odiag[2]; 143 diag[3] = odiag[3]; 144 diag[4] = odiag[4]; 145 diag[5] = odiag[5]; 146 diag[6] = odiag[6]; 147 diag[7] = odiag[7]; 148 diag[8] = odiag[8]; 149 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 150 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 151 diag += 9; 152 } 153 break; 154 case 4: 155 for (i = 0; i < mbs; i++) { 156 odiag = v + 16 * diag_offset[i]; 157 PetscCall(PetscArraycpy(diag, odiag, 16)); 158 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 159 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 160 diag += 16; 161 } 162 break; 163 case 5: 164 for (i = 0; i < mbs; i++) { 165 odiag = v + 25 * diag_offset[i]; 166 PetscCall(PetscArraycpy(diag, odiag, 25)); 167 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 168 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 169 diag += 25; 170 } 171 break; 172 case 6: 173 for (i = 0; i < mbs; i++) { 174 odiag = v + 36 * diag_offset[i]; 175 PetscCall(PetscArraycpy(diag, odiag, 36)); 176 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 177 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 178 diag += 36; 179 } 180 break; 181 case 7: 182 for (i = 0; i < mbs; i++) { 183 odiag = v + 49 * diag_offset[i]; 184 PetscCall(PetscArraycpy(diag, odiag, 49)); 185 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 186 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 187 diag += 49; 188 } 189 break; 190 default: 191 PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots)); 192 for (i = 0; i < mbs; i++) { 193 odiag = v + bs2 * diag_offset[i]; 194 PetscCall(PetscArraycpy(diag, odiag, bs2)); 195 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 196 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 197 diag += bs2; 198 } 199 PetscCall(PetscFree2(v_work, v_pivots)); 200 } 201 a->idiagvalid = PETSC_TRUE; 202 PetscFunctionReturn(0); 203 } 204 205 PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 206 { 207 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 208 PetscScalar *x, *work, *w, *workt, *t; 209 const MatScalar *v, *aa = a->a, *idiag; 210 const PetscScalar *b, *xb; 211 PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */ 212 PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it; 213 const PetscInt *diag, *ai = a->i, *aj = a->j, *vi; 214 215 PetscFunctionBegin; 216 its = its * lits; 217 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 218 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 219 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 220 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor"); 221 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 222 223 if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL)); 224 225 if (!m) PetscFunctionReturn(0); 226 diag = a->diag; 227 idiag = a->idiag; 228 k = PetscMax(A->rmap->n, A->cmap->n); 229 if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work)); 230 if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt)); 231 if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work)); 232 work = a->mult_work; 233 t = a->sor_workt; 234 w = a->sor_work; 235 236 PetscCall(VecGetArray(xx, &x)); 237 PetscCall(VecGetArrayRead(bb, &b)); 238 239 if (flag & SOR_ZERO_INITIAL_GUESS) { 240 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 241 switch (bs) { 242 case 1: 243 PetscKernel_v_gets_A_times_w_1(x, idiag, b); 244 t[0] = b[0]; 245 i2 = 1; 246 idiag += 1; 247 for (i = 1; i < m; i++) { 248 v = aa + ai[i]; 249 vi = aj + ai[i]; 250 nz = diag[i] - ai[i]; 251 s[0] = b[i2]; 252 for (j = 0; j < nz; j++) { 253 xw[0] = x[vi[j]]; 254 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 255 } 256 t[i2] = s[0]; 257 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 258 x[i2] = xw[0]; 259 idiag += 1; 260 i2 += 1; 261 } 262 break; 263 case 2: 264 PetscKernel_v_gets_A_times_w_2(x, idiag, b); 265 t[0] = b[0]; 266 t[1] = b[1]; 267 i2 = 2; 268 idiag += 4; 269 for (i = 1; i < m; i++) { 270 v = aa + 4 * ai[i]; 271 vi = aj + ai[i]; 272 nz = diag[i] - ai[i]; 273 s[0] = b[i2]; 274 s[1] = b[i2 + 1]; 275 for (j = 0; j < nz; j++) { 276 idx = 2 * vi[j]; 277 it = 4 * j; 278 xw[0] = x[idx]; 279 xw[1] = x[1 + idx]; 280 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 281 } 282 t[i2] = s[0]; 283 t[i2 + 1] = s[1]; 284 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 285 x[i2] = xw[0]; 286 x[i2 + 1] = xw[1]; 287 idiag += 4; 288 i2 += 2; 289 } 290 break; 291 case 3: 292 PetscKernel_v_gets_A_times_w_3(x, idiag, b); 293 t[0] = b[0]; 294 t[1] = b[1]; 295 t[2] = b[2]; 296 i2 = 3; 297 idiag += 9; 298 for (i = 1; i < m; i++) { 299 v = aa + 9 * ai[i]; 300 vi = aj + ai[i]; 301 nz = diag[i] - ai[i]; 302 s[0] = b[i2]; 303 s[1] = b[i2 + 1]; 304 s[2] = b[i2 + 2]; 305 while (nz--) { 306 idx = 3 * (*vi++); 307 xw[0] = x[idx]; 308 xw[1] = x[1 + idx]; 309 xw[2] = x[2 + idx]; 310 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 311 v += 9; 312 } 313 t[i2] = s[0]; 314 t[i2 + 1] = s[1]; 315 t[i2 + 2] = s[2]; 316 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 317 x[i2] = xw[0]; 318 x[i2 + 1] = xw[1]; 319 x[i2 + 2] = xw[2]; 320 idiag += 9; 321 i2 += 3; 322 } 323 break; 324 case 4: 325 PetscKernel_v_gets_A_times_w_4(x, idiag, b); 326 t[0] = b[0]; 327 t[1] = b[1]; 328 t[2] = b[2]; 329 t[3] = b[3]; 330 i2 = 4; 331 idiag += 16; 332 for (i = 1; i < m; i++) { 333 v = aa + 16 * ai[i]; 334 vi = aj + ai[i]; 335 nz = diag[i] - ai[i]; 336 s[0] = b[i2]; 337 s[1] = b[i2 + 1]; 338 s[2] = b[i2 + 2]; 339 s[3] = b[i2 + 3]; 340 while (nz--) { 341 idx = 4 * (*vi++); 342 xw[0] = x[idx]; 343 xw[1] = x[1 + idx]; 344 xw[2] = x[2 + idx]; 345 xw[3] = x[3 + idx]; 346 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 347 v += 16; 348 } 349 t[i2] = s[0]; 350 t[i2 + 1] = s[1]; 351 t[i2 + 2] = s[2]; 352 t[i2 + 3] = s[3]; 353 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 354 x[i2] = xw[0]; 355 x[i2 + 1] = xw[1]; 356 x[i2 + 2] = xw[2]; 357 x[i2 + 3] = xw[3]; 358 idiag += 16; 359 i2 += 4; 360 } 361 break; 362 case 5: 363 PetscKernel_v_gets_A_times_w_5(x, idiag, b); 364 t[0] = b[0]; 365 t[1] = b[1]; 366 t[2] = b[2]; 367 t[3] = b[3]; 368 t[4] = b[4]; 369 i2 = 5; 370 idiag += 25; 371 for (i = 1; i < m; i++) { 372 v = aa + 25 * ai[i]; 373 vi = aj + ai[i]; 374 nz = diag[i] - ai[i]; 375 s[0] = b[i2]; 376 s[1] = b[i2 + 1]; 377 s[2] = b[i2 + 2]; 378 s[3] = b[i2 + 3]; 379 s[4] = b[i2 + 4]; 380 while (nz--) { 381 idx = 5 * (*vi++); 382 xw[0] = x[idx]; 383 xw[1] = x[1 + idx]; 384 xw[2] = x[2 + idx]; 385 xw[3] = x[3 + idx]; 386 xw[4] = x[4 + idx]; 387 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 388 v += 25; 389 } 390 t[i2] = s[0]; 391 t[i2 + 1] = s[1]; 392 t[i2 + 2] = s[2]; 393 t[i2 + 3] = s[3]; 394 t[i2 + 4] = s[4]; 395 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 396 x[i2] = xw[0]; 397 x[i2 + 1] = xw[1]; 398 x[i2 + 2] = xw[2]; 399 x[i2 + 3] = xw[3]; 400 x[i2 + 4] = xw[4]; 401 idiag += 25; 402 i2 += 5; 403 } 404 break; 405 case 6: 406 PetscKernel_v_gets_A_times_w_6(x, idiag, b); 407 t[0] = b[0]; 408 t[1] = b[1]; 409 t[2] = b[2]; 410 t[3] = b[3]; 411 t[4] = b[4]; 412 t[5] = b[5]; 413 i2 = 6; 414 idiag += 36; 415 for (i = 1; i < m; i++) { 416 v = aa + 36 * ai[i]; 417 vi = aj + ai[i]; 418 nz = diag[i] - ai[i]; 419 s[0] = b[i2]; 420 s[1] = b[i2 + 1]; 421 s[2] = b[i2 + 2]; 422 s[3] = b[i2 + 3]; 423 s[4] = b[i2 + 4]; 424 s[5] = b[i2 + 5]; 425 while (nz--) { 426 idx = 6 * (*vi++); 427 xw[0] = x[idx]; 428 xw[1] = x[1 + idx]; 429 xw[2] = x[2 + idx]; 430 xw[3] = x[3 + idx]; 431 xw[4] = x[4 + idx]; 432 xw[5] = x[5 + idx]; 433 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 434 v += 36; 435 } 436 t[i2] = s[0]; 437 t[i2 + 1] = s[1]; 438 t[i2 + 2] = s[2]; 439 t[i2 + 3] = s[3]; 440 t[i2 + 4] = s[4]; 441 t[i2 + 5] = s[5]; 442 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 443 x[i2] = xw[0]; 444 x[i2 + 1] = xw[1]; 445 x[i2 + 2] = xw[2]; 446 x[i2 + 3] = xw[3]; 447 x[i2 + 4] = xw[4]; 448 x[i2 + 5] = xw[5]; 449 idiag += 36; 450 i2 += 6; 451 } 452 break; 453 case 7: 454 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 455 t[0] = b[0]; 456 t[1] = b[1]; 457 t[2] = b[2]; 458 t[3] = b[3]; 459 t[4] = b[4]; 460 t[5] = b[5]; 461 t[6] = b[6]; 462 i2 = 7; 463 idiag += 49; 464 for (i = 1; i < m; i++) { 465 v = aa + 49 * ai[i]; 466 vi = aj + ai[i]; 467 nz = diag[i] - ai[i]; 468 s[0] = b[i2]; 469 s[1] = b[i2 + 1]; 470 s[2] = b[i2 + 2]; 471 s[3] = b[i2 + 3]; 472 s[4] = b[i2 + 4]; 473 s[5] = b[i2 + 5]; 474 s[6] = b[i2 + 6]; 475 while (nz--) { 476 idx = 7 * (*vi++); 477 xw[0] = x[idx]; 478 xw[1] = x[1 + idx]; 479 xw[2] = x[2 + idx]; 480 xw[3] = x[3 + idx]; 481 xw[4] = x[4 + idx]; 482 xw[5] = x[5 + idx]; 483 xw[6] = x[6 + idx]; 484 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 485 v += 49; 486 } 487 t[i2] = s[0]; 488 t[i2 + 1] = s[1]; 489 t[i2 + 2] = s[2]; 490 t[i2 + 3] = s[3]; 491 t[i2 + 4] = s[4]; 492 t[i2 + 5] = s[5]; 493 t[i2 + 6] = s[6]; 494 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 495 x[i2] = xw[0]; 496 x[i2 + 1] = xw[1]; 497 x[i2 + 2] = xw[2]; 498 x[i2 + 3] = xw[3]; 499 x[i2 + 4] = xw[4]; 500 x[i2 + 5] = xw[5]; 501 x[i2 + 6] = xw[6]; 502 idiag += 49; 503 i2 += 7; 504 } 505 break; 506 default: 507 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); 508 PetscCall(PetscArraycpy(t, b, bs)); 509 i2 = bs; 510 idiag += bs2; 511 for (i = 1; i < m; i++) { 512 v = aa + bs2 * ai[i]; 513 vi = aj + ai[i]; 514 nz = diag[i] - ai[i]; 515 516 PetscCall(PetscArraycpy(w, b + i2, bs)); 517 /* copy all rows of x that are needed into contiguous space */ 518 workt = work; 519 for (j = 0; j < nz; j++) { 520 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 521 workt += bs; 522 } 523 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 524 PetscCall(PetscArraycpy(t + i2, w, bs)); 525 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 526 527 idiag += bs2; 528 i2 += bs; 529 } 530 break; 531 } 532 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 533 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 534 xb = t; 535 } else xb = b; 536 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 537 idiag = a->idiag + bs2 * (a->mbs - 1); 538 i2 = bs * (m - 1); 539 switch (bs) { 540 case 1: 541 s[0] = xb[i2]; 542 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 543 x[i2] = xw[0]; 544 i2 -= 1; 545 for (i = m - 2; i >= 0; i--) { 546 v = aa + (diag[i] + 1); 547 vi = aj + diag[i] + 1; 548 nz = ai[i + 1] - diag[i] - 1; 549 s[0] = xb[i2]; 550 for (j = 0; j < nz; j++) { 551 xw[0] = x[vi[j]]; 552 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 553 } 554 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 555 x[i2] = xw[0]; 556 idiag -= 1; 557 i2 -= 1; 558 } 559 break; 560 case 2: 561 s[0] = xb[i2]; 562 s[1] = xb[i2 + 1]; 563 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 564 x[i2] = xw[0]; 565 x[i2 + 1] = xw[1]; 566 i2 -= 2; 567 idiag -= 4; 568 for (i = m - 2; i >= 0; i--) { 569 v = aa + 4 * (diag[i] + 1); 570 vi = aj + diag[i] + 1; 571 nz = ai[i + 1] - diag[i] - 1; 572 s[0] = xb[i2]; 573 s[1] = xb[i2 + 1]; 574 for (j = 0; j < nz; j++) { 575 idx = 2 * vi[j]; 576 it = 4 * j; 577 xw[0] = x[idx]; 578 xw[1] = x[1 + idx]; 579 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 580 } 581 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 582 x[i2] = xw[0]; 583 x[i2 + 1] = xw[1]; 584 idiag -= 4; 585 i2 -= 2; 586 } 587 break; 588 case 3: 589 s[0] = xb[i2]; 590 s[1] = xb[i2 + 1]; 591 s[2] = xb[i2 + 2]; 592 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 593 x[i2] = xw[0]; 594 x[i2 + 1] = xw[1]; 595 x[i2 + 2] = xw[2]; 596 i2 -= 3; 597 idiag -= 9; 598 for (i = m - 2; i >= 0; i--) { 599 v = aa + 9 * (diag[i] + 1); 600 vi = aj + diag[i] + 1; 601 nz = ai[i + 1] - diag[i] - 1; 602 s[0] = xb[i2]; 603 s[1] = xb[i2 + 1]; 604 s[2] = xb[i2 + 2]; 605 while (nz--) { 606 idx = 3 * (*vi++); 607 xw[0] = x[idx]; 608 xw[1] = x[1 + idx]; 609 xw[2] = x[2 + idx]; 610 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 611 v += 9; 612 } 613 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 614 x[i2] = xw[0]; 615 x[i2 + 1] = xw[1]; 616 x[i2 + 2] = xw[2]; 617 idiag -= 9; 618 i2 -= 3; 619 } 620 break; 621 case 4: 622 s[0] = xb[i2]; 623 s[1] = xb[i2 + 1]; 624 s[2] = xb[i2 + 2]; 625 s[3] = xb[i2 + 3]; 626 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 627 x[i2] = xw[0]; 628 x[i2 + 1] = xw[1]; 629 x[i2 + 2] = xw[2]; 630 x[i2 + 3] = xw[3]; 631 i2 -= 4; 632 idiag -= 16; 633 for (i = m - 2; i >= 0; i--) { 634 v = aa + 16 * (diag[i] + 1); 635 vi = aj + diag[i] + 1; 636 nz = ai[i + 1] - diag[i] - 1; 637 s[0] = xb[i2]; 638 s[1] = xb[i2 + 1]; 639 s[2] = xb[i2 + 2]; 640 s[3] = xb[i2 + 3]; 641 while (nz--) { 642 idx = 4 * (*vi++); 643 xw[0] = x[idx]; 644 xw[1] = x[1 + idx]; 645 xw[2] = x[2 + idx]; 646 xw[3] = x[3 + idx]; 647 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 648 v += 16; 649 } 650 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 651 x[i2] = xw[0]; 652 x[i2 + 1] = xw[1]; 653 x[i2 + 2] = xw[2]; 654 x[i2 + 3] = xw[3]; 655 idiag -= 16; 656 i2 -= 4; 657 } 658 break; 659 case 5: 660 s[0] = xb[i2]; 661 s[1] = xb[i2 + 1]; 662 s[2] = xb[i2 + 2]; 663 s[3] = xb[i2 + 3]; 664 s[4] = xb[i2 + 4]; 665 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 666 x[i2] = xw[0]; 667 x[i2 + 1] = xw[1]; 668 x[i2 + 2] = xw[2]; 669 x[i2 + 3] = xw[3]; 670 x[i2 + 4] = xw[4]; 671 i2 -= 5; 672 idiag -= 25; 673 for (i = m - 2; i >= 0; i--) { 674 v = aa + 25 * (diag[i] + 1); 675 vi = aj + diag[i] + 1; 676 nz = ai[i + 1] - diag[i] - 1; 677 s[0] = xb[i2]; 678 s[1] = xb[i2 + 1]; 679 s[2] = xb[i2 + 2]; 680 s[3] = xb[i2 + 3]; 681 s[4] = xb[i2 + 4]; 682 while (nz--) { 683 idx = 5 * (*vi++); 684 xw[0] = x[idx]; 685 xw[1] = x[1 + idx]; 686 xw[2] = x[2 + idx]; 687 xw[3] = x[3 + idx]; 688 xw[4] = x[4 + idx]; 689 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 690 v += 25; 691 } 692 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 693 x[i2] = xw[0]; 694 x[i2 + 1] = xw[1]; 695 x[i2 + 2] = xw[2]; 696 x[i2 + 3] = xw[3]; 697 x[i2 + 4] = xw[4]; 698 idiag -= 25; 699 i2 -= 5; 700 } 701 break; 702 case 6: 703 s[0] = xb[i2]; 704 s[1] = xb[i2 + 1]; 705 s[2] = xb[i2 + 2]; 706 s[3] = xb[i2 + 3]; 707 s[4] = xb[i2 + 4]; 708 s[5] = xb[i2 + 5]; 709 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 710 x[i2] = xw[0]; 711 x[i2 + 1] = xw[1]; 712 x[i2 + 2] = xw[2]; 713 x[i2 + 3] = xw[3]; 714 x[i2 + 4] = xw[4]; 715 x[i2 + 5] = xw[5]; 716 i2 -= 6; 717 idiag -= 36; 718 for (i = m - 2; i >= 0; i--) { 719 v = aa + 36 * (diag[i] + 1); 720 vi = aj + diag[i] + 1; 721 nz = ai[i + 1] - diag[i] - 1; 722 s[0] = xb[i2]; 723 s[1] = xb[i2 + 1]; 724 s[2] = xb[i2 + 2]; 725 s[3] = xb[i2 + 3]; 726 s[4] = xb[i2 + 4]; 727 s[5] = xb[i2 + 5]; 728 while (nz--) { 729 idx = 6 * (*vi++); 730 xw[0] = x[idx]; 731 xw[1] = x[1 + idx]; 732 xw[2] = x[2 + idx]; 733 xw[3] = x[3 + idx]; 734 xw[4] = x[4 + idx]; 735 xw[5] = x[5 + idx]; 736 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 737 v += 36; 738 } 739 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 740 x[i2] = xw[0]; 741 x[i2 + 1] = xw[1]; 742 x[i2 + 2] = xw[2]; 743 x[i2 + 3] = xw[3]; 744 x[i2 + 4] = xw[4]; 745 x[i2 + 5] = xw[5]; 746 idiag -= 36; 747 i2 -= 6; 748 } 749 break; 750 case 7: 751 s[0] = xb[i2]; 752 s[1] = xb[i2 + 1]; 753 s[2] = xb[i2 + 2]; 754 s[3] = xb[i2 + 3]; 755 s[4] = xb[i2 + 4]; 756 s[5] = xb[i2 + 5]; 757 s[6] = xb[i2 + 6]; 758 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 759 x[i2] = xw[0]; 760 x[i2 + 1] = xw[1]; 761 x[i2 + 2] = xw[2]; 762 x[i2 + 3] = xw[3]; 763 x[i2 + 4] = xw[4]; 764 x[i2 + 5] = xw[5]; 765 x[i2 + 6] = xw[6]; 766 i2 -= 7; 767 idiag -= 49; 768 for (i = m - 2; i >= 0; i--) { 769 v = aa + 49 * (diag[i] + 1); 770 vi = aj + diag[i] + 1; 771 nz = ai[i + 1] - diag[i] - 1; 772 s[0] = xb[i2]; 773 s[1] = xb[i2 + 1]; 774 s[2] = xb[i2 + 2]; 775 s[3] = xb[i2 + 3]; 776 s[4] = xb[i2 + 4]; 777 s[5] = xb[i2 + 5]; 778 s[6] = xb[i2 + 6]; 779 while (nz--) { 780 idx = 7 * (*vi++); 781 xw[0] = x[idx]; 782 xw[1] = x[1 + idx]; 783 xw[2] = x[2 + idx]; 784 xw[3] = x[3 + idx]; 785 xw[4] = x[4 + idx]; 786 xw[5] = x[5 + idx]; 787 xw[6] = x[6 + idx]; 788 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 789 v += 49; 790 } 791 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 792 x[i2] = xw[0]; 793 x[i2 + 1] = xw[1]; 794 x[i2 + 2] = xw[2]; 795 x[i2 + 3] = xw[3]; 796 x[i2 + 4] = xw[4]; 797 x[i2 + 5] = xw[5]; 798 x[i2 + 6] = xw[6]; 799 idiag -= 49; 800 i2 -= 7; 801 } 802 break; 803 default: 804 PetscCall(PetscArraycpy(w, xb + i2, bs)); 805 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 806 i2 -= bs; 807 idiag -= bs2; 808 for (i = m - 2; i >= 0; i--) { 809 v = aa + bs2 * (diag[i] + 1); 810 vi = aj + diag[i] + 1; 811 nz = ai[i + 1] - diag[i] - 1; 812 813 PetscCall(PetscArraycpy(w, xb + i2, bs)); 814 /* copy all rows of x that are needed into contiguous space */ 815 workt = work; 816 for (j = 0; j < nz; j++) { 817 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 818 workt += bs; 819 } 820 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 821 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 822 823 idiag -= bs2; 824 i2 -= bs; 825 } 826 break; 827 } 828 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 829 } 830 its--; 831 } 832 while (its--) { 833 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 834 idiag = a->idiag; 835 i2 = 0; 836 switch (bs) { 837 case 1: 838 for (i = 0; i < m; i++) { 839 v = aa + ai[i]; 840 vi = aj + ai[i]; 841 nz = ai[i + 1] - ai[i]; 842 s[0] = b[i2]; 843 for (j = 0; j < nz; j++) { 844 xw[0] = x[vi[j]]; 845 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 846 } 847 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 848 x[i2] += xw[0]; 849 idiag += 1; 850 i2 += 1; 851 } 852 break; 853 case 2: 854 for (i = 0; i < m; i++) { 855 v = aa + 4 * ai[i]; 856 vi = aj + ai[i]; 857 nz = ai[i + 1] - ai[i]; 858 s[0] = b[i2]; 859 s[1] = b[i2 + 1]; 860 for (j = 0; j < nz; j++) { 861 idx = 2 * vi[j]; 862 it = 4 * j; 863 xw[0] = x[idx]; 864 xw[1] = x[1 + idx]; 865 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 866 } 867 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 868 x[i2] += xw[0]; 869 x[i2 + 1] += xw[1]; 870 idiag += 4; 871 i2 += 2; 872 } 873 break; 874 case 3: 875 for (i = 0; i < m; i++) { 876 v = aa + 9 * ai[i]; 877 vi = aj + ai[i]; 878 nz = ai[i + 1] - ai[i]; 879 s[0] = b[i2]; 880 s[1] = b[i2 + 1]; 881 s[2] = b[i2 + 2]; 882 while (nz--) { 883 idx = 3 * (*vi++); 884 xw[0] = x[idx]; 885 xw[1] = x[1 + idx]; 886 xw[2] = x[2 + idx]; 887 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 888 v += 9; 889 } 890 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 891 x[i2] += xw[0]; 892 x[i2 + 1] += xw[1]; 893 x[i2 + 2] += xw[2]; 894 idiag += 9; 895 i2 += 3; 896 } 897 break; 898 case 4: 899 for (i = 0; i < m; i++) { 900 v = aa + 16 * ai[i]; 901 vi = aj + ai[i]; 902 nz = ai[i + 1] - ai[i]; 903 s[0] = b[i2]; 904 s[1] = b[i2 + 1]; 905 s[2] = b[i2 + 2]; 906 s[3] = b[i2 + 3]; 907 while (nz--) { 908 idx = 4 * (*vi++); 909 xw[0] = x[idx]; 910 xw[1] = x[1 + idx]; 911 xw[2] = x[2 + idx]; 912 xw[3] = x[3 + idx]; 913 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 914 v += 16; 915 } 916 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 917 x[i2] += xw[0]; 918 x[i2 + 1] += xw[1]; 919 x[i2 + 2] += xw[2]; 920 x[i2 + 3] += xw[3]; 921 idiag += 16; 922 i2 += 4; 923 } 924 break; 925 case 5: 926 for (i = 0; i < m; i++) { 927 v = aa + 25 * ai[i]; 928 vi = aj + ai[i]; 929 nz = ai[i + 1] - ai[i]; 930 s[0] = b[i2]; 931 s[1] = b[i2 + 1]; 932 s[2] = b[i2 + 2]; 933 s[3] = b[i2 + 3]; 934 s[4] = b[i2 + 4]; 935 while (nz--) { 936 idx = 5 * (*vi++); 937 xw[0] = x[idx]; 938 xw[1] = x[1 + idx]; 939 xw[2] = x[2 + idx]; 940 xw[3] = x[3 + idx]; 941 xw[4] = x[4 + idx]; 942 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 943 v += 25; 944 } 945 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 946 x[i2] += xw[0]; 947 x[i2 + 1] += xw[1]; 948 x[i2 + 2] += xw[2]; 949 x[i2 + 3] += xw[3]; 950 x[i2 + 4] += xw[4]; 951 idiag += 25; 952 i2 += 5; 953 } 954 break; 955 case 6: 956 for (i = 0; i < m; i++) { 957 v = aa + 36 * ai[i]; 958 vi = aj + ai[i]; 959 nz = ai[i + 1] - ai[i]; 960 s[0] = b[i2]; 961 s[1] = b[i2 + 1]; 962 s[2] = b[i2 + 2]; 963 s[3] = b[i2 + 3]; 964 s[4] = b[i2 + 4]; 965 s[5] = b[i2 + 5]; 966 while (nz--) { 967 idx = 6 * (*vi++); 968 xw[0] = x[idx]; 969 xw[1] = x[1 + idx]; 970 xw[2] = x[2 + idx]; 971 xw[3] = x[3 + idx]; 972 xw[4] = x[4 + idx]; 973 xw[5] = x[5 + idx]; 974 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 975 v += 36; 976 } 977 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 978 x[i2] += xw[0]; 979 x[i2 + 1] += xw[1]; 980 x[i2 + 2] += xw[2]; 981 x[i2 + 3] += xw[3]; 982 x[i2 + 4] += xw[4]; 983 x[i2 + 5] += xw[5]; 984 idiag += 36; 985 i2 += 6; 986 } 987 break; 988 case 7: 989 for (i = 0; i < m; i++) { 990 v = aa + 49 * ai[i]; 991 vi = aj + ai[i]; 992 nz = ai[i + 1] - ai[i]; 993 s[0] = b[i2]; 994 s[1] = b[i2 + 1]; 995 s[2] = b[i2 + 2]; 996 s[3] = b[i2 + 3]; 997 s[4] = b[i2 + 4]; 998 s[5] = b[i2 + 5]; 999 s[6] = b[i2 + 6]; 1000 while (nz--) { 1001 idx = 7 * (*vi++); 1002 xw[0] = x[idx]; 1003 xw[1] = x[1 + idx]; 1004 xw[2] = x[2 + idx]; 1005 xw[3] = x[3 + idx]; 1006 xw[4] = x[4 + idx]; 1007 xw[5] = x[5 + idx]; 1008 xw[6] = x[6 + idx]; 1009 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1010 v += 49; 1011 } 1012 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1013 x[i2] += xw[0]; 1014 x[i2 + 1] += xw[1]; 1015 x[i2 + 2] += xw[2]; 1016 x[i2 + 3] += xw[3]; 1017 x[i2 + 4] += xw[4]; 1018 x[i2 + 5] += xw[5]; 1019 x[i2 + 6] += xw[6]; 1020 idiag += 49; 1021 i2 += 7; 1022 } 1023 break; 1024 default: 1025 for (i = 0; i < m; i++) { 1026 v = aa + bs2 * ai[i]; 1027 vi = aj + ai[i]; 1028 nz = ai[i + 1] - ai[i]; 1029 1030 PetscCall(PetscArraycpy(w, b + i2, bs)); 1031 /* copy all rows of x that are needed into contiguous space */ 1032 workt = work; 1033 for (j = 0; j < nz; j++) { 1034 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1035 workt += bs; 1036 } 1037 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1038 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1039 1040 idiag += bs2; 1041 i2 += bs; 1042 } 1043 break; 1044 } 1045 PetscCall(PetscLogFlops(2.0 * bs2 * a->nz)); 1046 } 1047 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1048 idiag = a->idiag + bs2 * (a->mbs - 1); 1049 i2 = bs * (m - 1); 1050 switch (bs) { 1051 case 1: 1052 for (i = m - 1; i >= 0; i--) { 1053 v = aa + ai[i]; 1054 vi = aj + ai[i]; 1055 nz = ai[i + 1] - ai[i]; 1056 s[0] = b[i2]; 1057 for (j = 0; j < nz; j++) { 1058 xw[0] = x[vi[j]]; 1059 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 1060 } 1061 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 1062 x[i2] += xw[0]; 1063 idiag -= 1; 1064 i2 -= 1; 1065 } 1066 break; 1067 case 2: 1068 for (i = m - 1; i >= 0; i--) { 1069 v = aa + 4 * ai[i]; 1070 vi = aj + ai[i]; 1071 nz = ai[i + 1] - ai[i]; 1072 s[0] = b[i2]; 1073 s[1] = b[i2 + 1]; 1074 for (j = 0; j < nz; j++) { 1075 idx = 2 * vi[j]; 1076 it = 4 * j; 1077 xw[0] = x[idx]; 1078 xw[1] = x[1 + idx]; 1079 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 1080 } 1081 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 1082 x[i2] += xw[0]; 1083 x[i2 + 1] += xw[1]; 1084 idiag -= 4; 1085 i2 -= 2; 1086 } 1087 break; 1088 case 3: 1089 for (i = m - 1; i >= 0; i--) { 1090 v = aa + 9 * ai[i]; 1091 vi = aj + ai[i]; 1092 nz = ai[i + 1] - ai[i]; 1093 s[0] = b[i2]; 1094 s[1] = b[i2 + 1]; 1095 s[2] = b[i2 + 2]; 1096 while (nz--) { 1097 idx = 3 * (*vi++); 1098 xw[0] = x[idx]; 1099 xw[1] = x[1 + idx]; 1100 xw[2] = x[2 + idx]; 1101 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 1102 v += 9; 1103 } 1104 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 1105 x[i2] += xw[0]; 1106 x[i2 + 1] += xw[1]; 1107 x[i2 + 2] += xw[2]; 1108 idiag -= 9; 1109 i2 -= 3; 1110 } 1111 break; 1112 case 4: 1113 for (i = m - 1; i >= 0; i--) { 1114 v = aa + 16 * ai[i]; 1115 vi = aj + ai[i]; 1116 nz = ai[i + 1] - ai[i]; 1117 s[0] = b[i2]; 1118 s[1] = b[i2 + 1]; 1119 s[2] = b[i2 + 2]; 1120 s[3] = b[i2 + 3]; 1121 while (nz--) { 1122 idx = 4 * (*vi++); 1123 xw[0] = x[idx]; 1124 xw[1] = x[1 + idx]; 1125 xw[2] = x[2 + idx]; 1126 xw[3] = x[3 + idx]; 1127 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 1128 v += 16; 1129 } 1130 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 1131 x[i2] += xw[0]; 1132 x[i2 + 1] += xw[1]; 1133 x[i2 + 2] += xw[2]; 1134 x[i2 + 3] += xw[3]; 1135 idiag -= 16; 1136 i2 -= 4; 1137 } 1138 break; 1139 case 5: 1140 for (i = m - 1; i >= 0; i--) { 1141 v = aa + 25 * ai[i]; 1142 vi = aj + ai[i]; 1143 nz = ai[i + 1] - ai[i]; 1144 s[0] = b[i2]; 1145 s[1] = b[i2 + 1]; 1146 s[2] = b[i2 + 2]; 1147 s[3] = b[i2 + 3]; 1148 s[4] = b[i2 + 4]; 1149 while (nz--) { 1150 idx = 5 * (*vi++); 1151 xw[0] = x[idx]; 1152 xw[1] = x[1 + idx]; 1153 xw[2] = x[2 + idx]; 1154 xw[3] = x[3 + idx]; 1155 xw[4] = x[4 + idx]; 1156 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 1157 v += 25; 1158 } 1159 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 1160 x[i2] += xw[0]; 1161 x[i2 + 1] += xw[1]; 1162 x[i2 + 2] += xw[2]; 1163 x[i2 + 3] += xw[3]; 1164 x[i2 + 4] += xw[4]; 1165 idiag -= 25; 1166 i2 -= 5; 1167 } 1168 break; 1169 case 6: 1170 for (i = m - 1; i >= 0; i--) { 1171 v = aa + 36 * ai[i]; 1172 vi = aj + ai[i]; 1173 nz = ai[i + 1] - ai[i]; 1174 s[0] = b[i2]; 1175 s[1] = b[i2 + 1]; 1176 s[2] = b[i2 + 2]; 1177 s[3] = b[i2 + 3]; 1178 s[4] = b[i2 + 4]; 1179 s[5] = b[i2 + 5]; 1180 while (nz--) { 1181 idx = 6 * (*vi++); 1182 xw[0] = x[idx]; 1183 xw[1] = x[1 + idx]; 1184 xw[2] = x[2 + idx]; 1185 xw[3] = x[3 + idx]; 1186 xw[4] = x[4 + idx]; 1187 xw[5] = x[5 + idx]; 1188 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 1189 v += 36; 1190 } 1191 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 1192 x[i2] += xw[0]; 1193 x[i2 + 1] += xw[1]; 1194 x[i2 + 2] += xw[2]; 1195 x[i2 + 3] += xw[3]; 1196 x[i2 + 4] += xw[4]; 1197 x[i2 + 5] += xw[5]; 1198 idiag -= 36; 1199 i2 -= 6; 1200 } 1201 break; 1202 case 7: 1203 for (i = m - 1; i >= 0; i--) { 1204 v = aa + 49 * ai[i]; 1205 vi = aj + ai[i]; 1206 nz = ai[i + 1] - ai[i]; 1207 s[0] = b[i2]; 1208 s[1] = b[i2 + 1]; 1209 s[2] = b[i2 + 2]; 1210 s[3] = b[i2 + 3]; 1211 s[4] = b[i2 + 4]; 1212 s[5] = b[i2 + 5]; 1213 s[6] = b[i2 + 6]; 1214 while (nz--) { 1215 idx = 7 * (*vi++); 1216 xw[0] = x[idx]; 1217 xw[1] = x[1 + idx]; 1218 xw[2] = x[2 + idx]; 1219 xw[3] = x[3 + idx]; 1220 xw[4] = x[4 + idx]; 1221 xw[5] = x[5 + idx]; 1222 xw[6] = x[6 + idx]; 1223 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1224 v += 49; 1225 } 1226 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1227 x[i2] += xw[0]; 1228 x[i2 + 1] += xw[1]; 1229 x[i2 + 2] += xw[2]; 1230 x[i2 + 3] += xw[3]; 1231 x[i2 + 4] += xw[4]; 1232 x[i2 + 5] += xw[5]; 1233 x[i2 + 6] += xw[6]; 1234 idiag -= 49; 1235 i2 -= 7; 1236 } 1237 break; 1238 default: 1239 for (i = m - 1; i >= 0; i--) { 1240 v = aa + bs2 * ai[i]; 1241 vi = aj + ai[i]; 1242 nz = ai[i + 1] - ai[i]; 1243 1244 PetscCall(PetscArraycpy(w, b + i2, bs)); 1245 /* copy all rows of x that are needed into contiguous space */ 1246 workt = work; 1247 for (j = 0; j < nz; j++) { 1248 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1249 workt += bs; 1250 } 1251 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1252 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1253 1254 idiag -= bs2; 1255 i2 -= bs; 1256 } 1257 break; 1258 } 1259 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz))); 1260 } 1261 } 1262 PetscCall(VecRestoreArray(xx, &x)); 1263 PetscCall(VecRestoreArrayRead(bb, &b)); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 /* 1268 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1269 */ 1270 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1271 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1272 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1273 #define matsetvaluesblocked4_ matsetvaluesblocked4 1274 #endif 1275 1276 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) 1277 { 1278 Mat A = *AA; 1279 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1280 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn; 1281 PetscInt *ai = a->i, *ailen = a->ilen; 1282 PetscInt *aj = a->j, stepval, lastcol = -1; 1283 const PetscScalar *value = v; 1284 MatScalar *ap, *aa = a->a, *bap; 1285 1286 PetscFunctionBegin; 1287 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4"); 1288 stepval = (n - 1) * 4; 1289 for (k = 0; k < m; k++) { /* loop over added rows */ 1290 row = im[k]; 1291 rp = aj + ai[row]; 1292 ap = aa + 16 * ai[row]; 1293 nrow = ailen[row]; 1294 low = 0; 1295 high = nrow; 1296 for (l = 0; l < n; l++) { /* loop over added columns */ 1297 col = in[l]; 1298 if (col <= lastcol) low = 0; 1299 else high = nrow; 1300 lastcol = col; 1301 value = v + k * (stepval + 4 + l) * 4; 1302 while (high - low > 7) { 1303 t = (low + high) / 2; 1304 if (rp[t] > col) high = t; 1305 else low = t; 1306 } 1307 for (i = low; i < high; i++) { 1308 if (rp[i] > col) break; 1309 if (rp[i] == col) { 1310 bap = ap + 16 * i; 1311 for (ii = 0; ii < 4; ii++, value += stepval) { 1312 for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++; 1313 } 1314 goto noinsert2; 1315 } 1316 } 1317 N = nrow++ - 1; 1318 high++; /* added new column index thus must search to one higher than before */ 1319 /* shift up all the later entries in this row */ 1320 for (ii = N; ii >= i; ii--) { 1321 rp[ii + 1] = rp[ii]; 1322 PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16)); 1323 } 1324 if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1325 rp[i] = col; 1326 bap = ap + 16 * i; 1327 for (ii = 0; ii < 4; ii++, value += stepval) { 1328 for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++; 1329 } 1330 noinsert2:; 1331 low = i; 1332 } 1333 ailen[row] = nrow; 1334 } 1335 PetscFunctionReturnVoid(); 1336 } 1337 1338 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1339 #define matsetvalues4_ MATSETVALUES4 1340 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1341 #define matsetvalues4_ matsetvalues4 1342 #endif 1343 1344 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) 1345 { 1346 Mat A = *AA; 1347 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1348 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm; 1349 PetscInt *ai = a->i, *ailen = a->ilen; 1350 PetscInt *aj = a->j, brow, bcol; 1351 PetscInt ridx, cidx, lastcol = -1; 1352 MatScalar *ap, value, *aa = a->a, *bap; 1353 1354 PetscFunctionBegin; 1355 for (k = 0; k < m; k++) { /* loop over added rows */ 1356 row = im[k]; 1357 brow = row / 4; 1358 rp = aj + ai[brow]; 1359 ap = aa + 16 * ai[brow]; 1360 nrow = ailen[brow]; 1361 low = 0; 1362 high = nrow; 1363 for (l = 0; l < n; l++) { /* loop over added columns */ 1364 col = in[l]; 1365 bcol = col / 4; 1366 ridx = row % 4; 1367 cidx = col % 4; 1368 value = v[l + k * n]; 1369 if (col <= lastcol) low = 0; 1370 else high = nrow; 1371 lastcol = col; 1372 while (high - low > 7) { 1373 t = (low + high) / 2; 1374 if (rp[t] > bcol) high = t; 1375 else low = t; 1376 } 1377 for (i = low; i < high; i++) { 1378 if (rp[i] > bcol) break; 1379 if (rp[i] == bcol) { 1380 bap = ap + 16 * i + 4 * cidx + ridx; 1381 *bap += value; 1382 goto noinsert1; 1383 } 1384 } 1385 N = nrow++ - 1; 1386 high++; /* added new column thus must search to one higher than before */ 1387 /* shift up all the later entries in this row */ 1388 PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 1389 PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1))); 1390 PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1391 rp[i] = bcol; 1392 ap[16 * i + 4 * cidx + ridx] = value; 1393 noinsert1:; 1394 low = i; 1395 } 1396 ailen[brow] = nrow; 1397 } 1398 PetscFunctionReturnVoid(); 1399 } 1400 1401 /* 1402 Checks for missing diagonals 1403 */ 1404 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1405 { 1406 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1407 PetscInt *diag, *ii = a->i, i; 1408 1409 PetscFunctionBegin; 1410 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 1411 *missing = PETSC_FALSE; 1412 if (A->rmap->n > 0 && !ii) { 1413 *missing = PETSC_TRUE; 1414 if (d) *d = 0; 1415 PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n")); 1416 } else { 1417 PetscInt n; 1418 n = PetscMin(a->mbs, a->nbs); 1419 diag = a->diag; 1420 for (i = 0; i < n; i++) { 1421 if (diag[i] >= ii[i + 1]) { 1422 *missing = PETSC_TRUE; 1423 if (d) *d = i; 1424 PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i)); 1425 break; 1426 } 1427 } 1428 } 1429 PetscFunctionReturn(0); 1430 } 1431 1432 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1433 { 1434 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1435 PetscInt i, j, m = a->mbs; 1436 1437 PetscFunctionBegin; 1438 if (!a->diag) { 1439 PetscCall(PetscMalloc1(m, &a->diag)); 1440 a->free_diag = PETSC_TRUE; 1441 } 1442 for (i = 0; i < m; i++) { 1443 a->diag[i] = a->i[i + 1]; 1444 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1445 if (a->j[j] == i) { 1446 a->diag[i] = j; 1447 break; 1448 } 1449 } 1450 } 1451 PetscFunctionReturn(0); 1452 } 1453 1454 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) 1455 { 1456 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1457 PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt; 1458 PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja; 1459 1460 PetscFunctionBegin; 1461 *nn = n; 1462 if (!ia) PetscFunctionReturn(0); 1463 if (symmetric) { 1464 PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja)); 1465 nz = tia[n]; 1466 } else { 1467 tia = a->i; 1468 tja = a->j; 1469 } 1470 1471 if (!blockcompressed && bs > 1) { 1472 (*nn) *= bs; 1473 /* malloc & create the natural set of indices */ 1474 PetscCall(PetscMalloc1((n + 1) * bs, ia)); 1475 if (n) { 1476 (*ia)[0] = oshift; 1477 for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; 1478 } 1479 1480 for (i = 1; i < n; i++) { 1481 (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1]; 1482 for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; 1483 } 1484 if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; 1485 1486 if (inja) { 1487 PetscCall(PetscMalloc1(nz * bs * bs, ja)); 1488 cnt = 0; 1489 for (i = 0; i < n; i++) { 1490 for (j = 0; j < bs; j++) { 1491 for (k = tia[i]; k < tia[i + 1]; k++) { 1492 for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l; 1493 } 1494 } 1495 } 1496 } 1497 1498 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1499 PetscCall(PetscFree(tia)); 1500 PetscCall(PetscFree(tja)); 1501 } 1502 } else if (oshift == 1) { 1503 if (symmetric) { 1504 nz = tia[A->rmap->n / bs]; 1505 /* add 1 to i and j indices */ 1506 for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1; 1507 *ia = tia; 1508 if (ja) { 1509 for (i = 0; i < nz; i++) tja[i] = tja[i] + 1; 1510 *ja = tja; 1511 } 1512 } else { 1513 nz = a->i[A->rmap->n / bs]; 1514 /* malloc space and add 1 to i and j indices */ 1515 PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia)); 1516 for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1; 1517 if (ja) { 1518 PetscCall(PetscMalloc1(nz, ja)); 1519 for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1; 1520 } 1521 } 1522 } else { 1523 *ia = tia; 1524 if (ja) *ja = tja; 1525 } 1526 PetscFunctionReturn(0); 1527 } 1528 1529 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 1530 { 1531 PetscFunctionBegin; 1532 if (!ia) PetscFunctionReturn(0); 1533 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1534 PetscCall(PetscFree(*ia)); 1535 if (ja) PetscCall(PetscFree(*ja)); 1536 } 1537 PetscFunctionReturn(0); 1538 } 1539 1540 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1541 { 1542 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1543 1544 PetscFunctionBegin; 1545 #if defined(PETSC_USE_LOG) 1546 PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz); 1547 #endif 1548 PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i)); 1549 PetscCall(ISDestroy(&a->row)); 1550 PetscCall(ISDestroy(&a->col)); 1551 if (a->free_diag) PetscCall(PetscFree(a->diag)); 1552 PetscCall(PetscFree(a->idiag)); 1553 if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen)); 1554 PetscCall(PetscFree(a->solve_work)); 1555 PetscCall(PetscFree(a->mult_work)); 1556 PetscCall(PetscFree(a->sor_workt)); 1557 PetscCall(PetscFree(a->sor_work)); 1558 PetscCall(ISDestroy(&a->icol)); 1559 PetscCall(PetscFree(a->saved_values)); 1560 PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex)); 1561 1562 PetscCall(MatDestroy(&a->sbaijMat)); 1563 PetscCall(MatDestroy(&a->parent)); 1564 PetscCall(PetscFree(A->data)); 1565 1566 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 1567 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL)); 1568 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL)); 1569 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 1570 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 1571 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL)); 1572 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL)); 1573 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL)); 1574 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL)); 1575 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL)); 1576 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL)); 1577 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL)); 1578 #if defined(PETSC_HAVE_HYPRE) 1579 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL)); 1580 #endif 1581 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL)); 1582 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1583 PetscFunctionReturn(0); 1584 } 1585 1586 PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) 1587 { 1588 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1589 1590 PetscFunctionBegin; 1591 switch (op) { 1592 case MAT_ROW_ORIENTED: 1593 a->roworiented = flg; 1594 break; 1595 case MAT_KEEP_NONZERO_PATTERN: 1596 a->keepnonzeropattern = flg; 1597 break; 1598 case MAT_NEW_NONZERO_LOCATIONS: 1599 a->nonew = (flg ? 0 : 1); 1600 break; 1601 case MAT_NEW_NONZERO_LOCATION_ERR: 1602 a->nonew = (flg ? -1 : 0); 1603 break; 1604 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1605 a->nonew = (flg ? -2 : 0); 1606 break; 1607 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1608 a->nounused = (flg ? -1 : 0); 1609 break; 1610 case MAT_FORCE_DIAGONAL_ENTRIES: 1611 case MAT_IGNORE_OFF_PROC_ENTRIES: 1612 case MAT_USE_HASH_TABLE: 1613 case MAT_SORTED_FULL: 1614 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1615 break; 1616 case MAT_SPD: 1617 case MAT_SYMMETRIC: 1618 case MAT_STRUCTURALLY_SYMMETRIC: 1619 case MAT_HERMITIAN: 1620 case MAT_SYMMETRY_ETERNAL: 1621 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1622 case MAT_SUBMAT_SINGLEIS: 1623 case MAT_STRUCTURE_ONLY: 1624 case MAT_SPD_ETERNAL: 1625 /* if the diagonal matrix is square it inherits some of the properties above */ 1626 break; 1627 default: 1628 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1629 } 1630 PetscFunctionReturn(0); 1631 } 1632 1633 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1634 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) 1635 { 1636 PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2; 1637 MatScalar *aa_i; 1638 PetscScalar *v_i; 1639 1640 PetscFunctionBegin; 1641 bs = A->rmap->bs; 1642 bs2 = bs * bs; 1643 PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1644 1645 bn = row / bs; /* Block number */ 1646 bp = row % bs; /* Block Position */ 1647 M = ai[bn + 1] - ai[bn]; 1648 *nz = bs * M; 1649 1650 if (v) { 1651 *v = NULL; 1652 if (*nz) { 1653 PetscCall(PetscMalloc1(*nz, v)); 1654 for (i = 0; i < M; i++) { /* for each block in the block row */ 1655 v_i = *v + i * bs; 1656 aa_i = aa + bs2 * (ai[bn] + i); 1657 for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j]; 1658 } 1659 } 1660 } 1661 1662 if (idx) { 1663 *idx = NULL; 1664 if (*nz) { 1665 PetscCall(PetscMalloc1(*nz, idx)); 1666 for (i = 0; i < M; i++) { /* for each block in the block row */ 1667 idx_i = *idx + i * bs; 1668 itmp = bs * aj[ai[bn] + i]; 1669 for (j = 0; j < bs; j++) idx_i[j] = itmp++; 1670 } 1671 } 1672 } 1673 PetscFunctionReturn(0); 1674 } 1675 1676 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1677 { 1678 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1679 1680 PetscFunctionBegin; 1681 PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a)); 1682 PetscFunctionReturn(0); 1683 } 1684 1685 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1686 { 1687 PetscFunctionBegin; 1688 if (nz) *nz = 0; 1689 if (idx) PetscCall(PetscFree(*idx)); 1690 if (v) PetscCall(PetscFree(*v)); 1691 PetscFunctionReturn(0); 1692 } 1693 1694 PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) 1695 { 1696 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at; 1697 Mat C; 1698 PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill; 1699 PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr; 1700 MatScalar *ata, *aa = a->a; 1701 1702 PetscFunctionBegin; 1703 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1704 PetscCall(PetscCalloc1(1 + nbs, &atfill)); 1705 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1706 for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1707 1708 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 1709 PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N)); 1710 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 1711 PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill)); 1712 1713 at = (Mat_SeqBAIJ *)C->data; 1714 ati = at->i; 1715 for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i]; 1716 } else { 1717 C = *B; 1718 at = (Mat_SeqBAIJ *)C->data; 1719 ati = at->i; 1720 } 1721 1722 atj = at->j; 1723 ata = at->a; 1724 1725 /* Copy ati into atfill so we have locations of the next free space in atj */ 1726 PetscCall(PetscArraycpy(atfill, ati, nbs)); 1727 1728 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1729 for (i = 0; i < mbs; i++) { 1730 anzj = ai[i + 1] - ai[i]; 1731 for (j = 0; j < anzj; j++) { 1732 atj[atfill[*aj]] = i; 1733 for (kr = 0; kr < bs; kr++) { 1734 for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; 1735 } 1736 atfill[*aj++] += 1; 1737 } 1738 } 1739 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1740 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1741 1742 /* Clean up temporary space and complete requests. */ 1743 PetscCall(PetscFree(atfill)); 1744 1745 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1746 PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1747 *B = C; 1748 } else { 1749 PetscCall(MatHeaderMerge(A, &C)); 1750 } 1751 PetscFunctionReturn(0); 1752 } 1753 1754 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) 1755 { 1756 Mat Btrans; 1757 1758 PetscFunctionBegin; 1759 *f = PETSC_FALSE; 1760 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans)); 1761 PetscCall(MatEqual_SeqBAIJ(B, Btrans, f)); 1762 PetscCall(MatDestroy(&Btrans)); 1763 PetscFunctionReturn(0); 1764 } 1765 1766 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1767 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 1768 { 1769 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data; 1770 PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l; 1771 PetscInt *rowlens, *colidxs; 1772 PetscScalar *matvals; 1773 1774 PetscFunctionBegin; 1775 PetscCall(PetscViewerSetUp(viewer)); 1776 1777 M = mat->rmap->N; 1778 N = mat->cmap->N; 1779 m = mat->rmap->n; 1780 bs = mat->rmap->bs; 1781 nz = bs * bs * A->nz; 1782 1783 /* write matrix header */ 1784 header[0] = MAT_FILE_CLASSID; 1785 header[1] = M; 1786 header[2] = N; 1787 header[3] = nz; 1788 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1789 1790 /* store row lengths */ 1791 PetscCall(PetscMalloc1(m, &rowlens)); 1792 for (cnt = 0, i = 0; i < A->mbs; i++) 1793 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]); 1794 PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT)); 1795 PetscCall(PetscFree(rowlens)); 1796 1797 /* store column indices */ 1798 PetscCall(PetscMalloc1(nz, &colidxs)); 1799 for (cnt = 0, i = 0; i < A->mbs; i++) 1800 for (k = 0; k < bs; k++) 1801 for (j = A->i[i]; j < A->i[i + 1]; j++) 1802 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l; 1803 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1804 PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT)); 1805 PetscCall(PetscFree(colidxs)); 1806 1807 /* store nonzero values */ 1808 PetscCall(PetscMalloc1(nz, &matvals)); 1809 for (cnt = 0, i = 0; i < A->mbs; i++) 1810 for (k = 0; k < bs; k++) 1811 for (j = A->i[i]; j < A->i[i + 1]; j++) 1812 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k]; 1813 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1814 PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR)); 1815 PetscCall(PetscFree(matvals)); 1816 1817 /* write block size option to the viewer's .info file */ 1818 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1819 PetscFunctionReturn(0); 1820 } 1821 1822 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) 1823 { 1824 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1825 PetscInt i, bs = A->rmap->bs, k; 1826 1827 PetscFunctionBegin; 1828 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1829 for (i = 0; i < a->mbs; i++) { 1830 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1)); 1831 for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1)); 1832 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1833 } 1834 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1835 PetscFunctionReturn(0); 1836 } 1837 1838 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) 1839 { 1840 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1841 PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2; 1842 PetscViewerFormat format; 1843 1844 PetscFunctionBegin; 1845 if (A->structure_only) { 1846 PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer)); 1847 PetscFunctionReturn(0); 1848 } 1849 1850 PetscCall(PetscViewerGetFormat(viewer, &format)); 1851 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1852 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1853 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1854 const char *matname; 1855 Mat aij; 1856 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij)); 1857 PetscCall(PetscObjectGetName((PetscObject)A, &matname)); 1858 PetscCall(PetscObjectSetName((PetscObject)aij, matname)); 1859 PetscCall(MatView(aij, viewer)); 1860 PetscCall(MatDestroy(&aij)); 1861 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1862 PetscFunctionReturn(0); 1863 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1864 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1865 for (i = 0; i < a->mbs; i++) { 1866 for (j = 0; j < bs; j++) { 1867 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1868 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1869 for (l = 0; l < bs; l++) { 1870 #if defined(PETSC_USE_COMPLEX) 1871 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1872 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1873 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1874 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1875 } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1876 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1877 } 1878 #else 1879 if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1880 #endif 1881 } 1882 } 1883 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1884 } 1885 } 1886 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1887 } else { 1888 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1889 for (i = 0; i < a->mbs; i++) { 1890 for (j = 0; j < bs; j++) { 1891 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1892 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1893 for (l = 0; l < bs; l++) { 1894 #if defined(PETSC_USE_COMPLEX) 1895 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) { 1896 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1897 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) { 1898 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1899 } else { 1900 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1901 } 1902 #else 1903 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1904 #endif 1905 } 1906 } 1907 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1908 } 1909 } 1910 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1911 } 1912 PetscCall(PetscViewerFlush(viewer)); 1913 PetscFunctionReturn(0); 1914 } 1915 1916 #include <petscdraw.h> 1917 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) 1918 { 1919 Mat A = (Mat)Aa; 1920 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1921 PetscInt row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2; 1922 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1923 MatScalar *aa; 1924 PetscViewer viewer; 1925 PetscViewerFormat format; 1926 1927 PetscFunctionBegin; 1928 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1929 PetscCall(PetscViewerGetFormat(viewer, &format)); 1930 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1931 1932 /* loop over matrix elements drawing boxes */ 1933 1934 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1935 PetscDrawCollectiveBegin(draw); 1936 /* Blue for negative, Cyan for zero and Red for positive */ 1937 color = PETSC_DRAW_BLUE; 1938 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1939 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1940 y_l = A->rmap->N - row - 1.0; 1941 y_r = y_l + 1.0; 1942 x_l = a->j[j] * bs; 1943 x_r = x_l + 1.0; 1944 aa = a->a + j * bs2; 1945 for (k = 0; k < bs; k++) { 1946 for (l = 0; l < bs; l++) { 1947 if (PetscRealPart(*aa++) >= 0.) continue; 1948 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1949 } 1950 } 1951 } 1952 } 1953 color = PETSC_DRAW_CYAN; 1954 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1955 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1956 y_l = A->rmap->N - row - 1.0; 1957 y_r = y_l + 1.0; 1958 x_l = a->j[j] * bs; 1959 x_r = x_l + 1.0; 1960 aa = a->a + j * bs2; 1961 for (k = 0; k < bs; k++) { 1962 for (l = 0; l < bs; l++) { 1963 if (PetscRealPart(*aa++) != 0.) continue; 1964 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1965 } 1966 } 1967 } 1968 } 1969 color = PETSC_DRAW_RED; 1970 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1971 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1972 y_l = A->rmap->N - row - 1.0; 1973 y_r = y_l + 1.0; 1974 x_l = a->j[j] * bs; 1975 x_r = x_l + 1.0; 1976 aa = a->a + j * bs2; 1977 for (k = 0; k < bs; k++) { 1978 for (l = 0; l < bs; l++) { 1979 if (PetscRealPart(*aa++) <= 0.) continue; 1980 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1981 } 1982 } 1983 } 1984 } 1985 PetscDrawCollectiveEnd(draw); 1986 } else { 1987 /* use contour shading to indicate magnitude of values */ 1988 /* first determine max of all nonzero values */ 1989 PetscReal minv = 0.0, maxv = 0.0; 1990 PetscDraw popup; 1991 1992 for (i = 0; i < a->nz * a->bs2; i++) { 1993 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1994 } 1995 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1996 PetscCall(PetscDrawGetPopup(draw, &popup)); 1997 PetscCall(PetscDrawScalePopup(popup, 0.0, maxv)); 1998 1999 PetscDrawCollectiveBegin(draw); 2000 for (i = 0, row = 0; i < mbs; i++, row += bs) { 2001 for (j = a->i[i]; j < a->i[i + 1]; j++) { 2002 y_l = A->rmap->N - row - 1.0; 2003 y_r = y_l + 1.0; 2004 x_l = a->j[j] * bs; 2005 x_r = x_l + 1.0; 2006 aa = a->a + j * bs2; 2007 for (k = 0; k < bs; k++) { 2008 for (l = 0; l < bs; l++) { 2009 MatScalar v = *aa++; 2010 color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv); 2011 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2012 } 2013 } 2014 } 2015 } 2016 PetscDrawCollectiveEnd(draw); 2017 } 2018 PetscFunctionReturn(0); 2019 } 2020 2021 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) 2022 { 2023 PetscReal xl, yl, xr, yr, w, h; 2024 PetscDraw draw; 2025 PetscBool isnull; 2026 2027 PetscFunctionBegin; 2028 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 2029 PetscCall(PetscDrawIsNull(draw, &isnull)); 2030 if (isnull) PetscFunctionReturn(0); 2031 2032 xr = A->cmap->n; 2033 yr = A->rmap->N; 2034 h = yr / 10.0; 2035 w = xr / 10.0; 2036 xr += w; 2037 yr += h; 2038 xl = -w; 2039 yl = -h; 2040 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 2041 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 2042 PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A)); 2043 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 2044 PetscCall(PetscDrawSave(draw)); 2045 PetscFunctionReturn(0); 2046 } 2047 2048 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) 2049 { 2050 PetscBool iascii, isbinary, isdraw; 2051 2052 PetscFunctionBegin; 2053 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2054 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2055 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 2056 if (iascii) { 2057 PetscCall(MatView_SeqBAIJ_ASCII(A, viewer)); 2058 } else if (isbinary) { 2059 PetscCall(MatView_SeqBAIJ_Binary(A, viewer)); 2060 } else if (isdraw) { 2061 PetscCall(MatView_SeqBAIJ_Draw(A, viewer)); 2062 } else { 2063 Mat B; 2064 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2065 PetscCall(MatView(B, viewer)); 2066 PetscCall(MatDestroy(&B)); 2067 } 2068 PetscFunctionReturn(0); 2069 } 2070 2071 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) 2072 { 2073 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2074 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 2075 PetscInt *ai = a->i, *ailen = a->ilen; 2076 PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2; 2077 MatScalar *ap, *aa = a->a; 2078 2079 PetscFunctionBegin; 2080 for (k = 0; k < m; k++) { /* loop over rows */ 2081 row = im[k]; 2082 brow = row / bs; 2083 if (row < 0) { 2084 v += n; 2085 continue; 2086 } /* negative row */ 2087 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row); 2088 rp = aj ? aj + ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2089 ap = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2090 nrow = ailen[brow]; 2091 for (l = 0; l < n; l++) { /* loop over columns */ 2092 if (in[l] < 0) { 2093 v++; 2094 continue; 2095 } /* negative column */ 2096 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]); 2097 col = in[l]; 2098 bcol = col / bs; 2099 cidx = col % bs; 2100 ridx = row % bs; 2101 high = nrow; 2102 low = 0; /* assume unsorted */ 2103 while (high - low > 5) { 2104 t = (low + high) / 2; 2105 if (rp[t] > bcol) high = t; 2106 else low = t; 2107 } 2108 for (i = low; i < high; i++) { 2109 if (rp[i] > bcol) break; 2110 if (rp[i] == bcol) { 2111 *v++ = ap[bs2 * i + bs * cidx + ridx]; 2112 goto finished; 2113 } 2114 } 2115 *v++ = 0.0; 2116 finished:; 2117 } 2118 } 2119 PetscFunctionReturn(0); 2120 } 2121 2122 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2123 { 2124 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2125 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1; 2126 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2127 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval; 2128 PetscBool roworiented = a->roworiented; 2129 const PetscScalar *value = v; 2130 MatScalar *ap = NULL, *aa = a->a, *bap; 2131 2132 PetscFunctionBegin; 2133 if (roworiented) { 2134 stepval = (n - 1) * bs; 2135 } else { 2136 stepval = (m - 1) * bs; 2137 } 2138 for (k = 0; k < m; k++) { /* loop over added rows */ 2139 row = im[k]; 2140 if (row < 0) continue; 2141 PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1); 2142 rp = aj + ai[row]; 2143 if (!A->structure_only) ap = aa + bs2 * ai[row]; 2144 rmax = imax[row]; 2145 nrow = ailen[row]; 2146 low = 0; 2147 high = nrow; 2148 for (l = 0; l < n; l++) { /* loop over added columns */ 2149 if (in[l] < 0) continue; 2150 PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1); 2151 col = in[l]; 2152 if (!A->structure_only) { 2153 if (roworiented) { 2154 value = v + (k * (stepval + bs) + l) * bs; 2155 } else { 2156 value = v + (l * (stepval + bs) + k) * bs; 2157 } 2158 } 2159 if (col <= lastcol) low = 0; 2160 else high = nrow; 2161 lastcol = col; 2162 while (high - low > 7) { 2163 t = (low + high) / 2; 2164 if (rp[t] > col) high = t; 2165 else low = t; 2166 } 2167 for (i = low; i < high; i++) { 2168 if (rp[i] > col) break; 2169 if (rp[i] == col) { 2170 if (A->structure_only) goto noinsert2; 2171 bap = ap + bs2 * i; 2172 if (roworiented) { 2173 if (is == ADD_VALUES) { 2174 for (ii = 0; ii < bs; ii++, value += stepval) { 2175 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 2176 } 2177 } else { 2178 for (ii = 0; ii < bs; ii++, value += stepval) { 2179 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2180 } 2181 } 2182 } else { 2183 if (is == ADD_VALUES) { 2184 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2185 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 2186 bap += bs; 2187 } 2188 } else { 2189 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2190 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 2191 bap += bs; 2192 } 2193 } 2194 } 2195 goto noinsert2; 2196 } 2197 } 2198 if (nonew == 1) goto noinsert2; 2199 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2200 if (A->structure_only) { 2201 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar); 2202 } else { 2203 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2204 } 2205 N = nrow++ - 1; 2206 high++; 2207 /* shift up all the later entries in this row */ 2208 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2209 rp[i] = col; 2210 if (!A->structure_only) { 2211 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2212 bap = ap + bs2 * i; 2213 if (roworiented) { 2214 for (ii = 0; ii < bs; ii++, value += stepval) { 2215 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2216 } 2217 } else { 2218 for (ii = 0; ii < bs; ii++, value += stepval) { 2219 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 2220 } 2221 } 2222 } 2223 noinsert2:; 2224 low = i; 2225 } 2226 ailen[row] = nrow; 2227 } 2228 PetscFunctionReturn(0); 2229 } 2230 2231 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) 2232 { 2233 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2234 PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax; 2235 PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen; 2236 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2237 MatScalar *aa = a->a, *ap; 2238 PetscReal ratio = 0.6; 2239 2240 PetscFunctionBegin; 2241 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2242 2243 if (m) rmax = ailen[0]; 2244 for (i = 1; i < mbs; i++) { 2245 /* move each row back by the amount of empty slots (fshift) before it*/ 2246 fshift += imax[i - 1] - ailen[i - 1]; 2247 rmax = PetscMax(rmax, ailen[i]); 2248 if (fshift) { 2249 ip = aj + ai[i]; 2250 ap = aa + bs2 * ai[i]; 2251 N = ailen[i]; 2252 PetscCall(PetscArraymove(ip - fshift, ip, N)); 2253 if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N)); 2254 } 2255 ai[i] = ai[i - 1] + ailen[i - 1]; 2256 } 2257 if (mbs) { 2258 fshift += imax[mbs - 1] - ailen[mbs - 1]; 2259 ai[mbs] = ai[mbs - 1] + ailen[mbs - 1]; 2260 } 2261 2262 /* reset ilen and imax for each row */ 2263 a->nonzerorowcnt = 0; 2264 if (A->structure_only) { 2265 PetscCall(PetscFree2(a->imax, a->ilen)); 2266 } else { /* !A->structure_only */ 2267 for (i = 0; i < mbs; i++) { 2268 ailen[i] = imax[i] = ai[i + 1] - ai[i]; 2269 a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0); 2270 } 2271 } 2272 a->nz = ai[mbs]; 2273 2274 /* diagonals may have moved, so kill the diagonal pointers */ 2275 a->idiagvalid = PETSC_FALSE; 2276 if (fshift && a->diag) { 2277 PetscCall(PetscFree(a->diag)); 2278 a->diag = NULL; 2279 } 2280 if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2); 2281 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2)); 2282 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs)); 2283 PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax)); 2284 2285 A->info.mallocs += a->reallocs; 2286 a->reallocs = 0; 2287 A->info.nz_unneeded = (PetscReal)fshift * bs2; 2288 a->rmax = rmax; 2289 2290 if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio)); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 /* 2295 This function returns an array of flags which indicate the locations of contiguous 2296 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2297 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2298 Assume: sizes should be long enough to hold all the values. 2299 */ 2300 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) 2301 { 2302 PetscInt i, j, k, row; 2303 PetscBool flg; 2304 2305 PetscFunctionBegin; 2306 for (i = 0, j = 0; i < n; j++) { 2307 row = idx[i]; 2308 if (row % bs != 0) { /* Not the beginning of a block */ 2309 sizes[j] = 1; 2310 i++; 2311 } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */ 2312 sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2313 i++; 2314 } else { /* Beginning of the block, so check if the complete block exists */ 2315 flg = PETSC_TRUE; 2316 for (k = 1; k < bs; k++) { 2317 if (row + k != idx[i + k]) { /* break in the block */ 2318 flg = PETSC_FALSE; 2319 break; 2320 } 2321 } 2322 if (flg) { /* No break in the bs */ 2323 sizes[j] = bs; 2324 i += bs; 2325 } else { 2326 sizes[j] = 1; 2327 i++; 2328 } 2329 } 2330 } 2331 *bs_max = j; 2332 PetscFunctionReturn(0); 2333 } 2334 2335 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2336 { 2337 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2338 PetscInt i, j, k, count, *rows; 2339 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max; 2340 PetscScalar zero = 0.0; 2341 MatScalar *aa; 2342 const PetscScalar *xx; 2343 PetscScalar *bb; 2344 2345 PetscFunctionBegin; 2346 /* fix right hand side if needed */ 2347 if (x && b) { 2348 PetscCall(VecGetArrayRead(x, &xx)); 2349 PetscCall(VecGetArray(b, &bb)); 2350 for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]]; 2351 PetscCall(VecRestoreArrayRead(x, &xx)); 2352 PetscCall(VecRestoreArray(b, &bb)); 2353 } 2354 2355 /* Make a copy of the IS and sort it */ 2356 /* allocate memory for rows,sizes */ 2357 PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes)); 2358 2359 /* copy IS values to rows, and sort them */ 2360 for (i = 0; i < is_n; i++) rows[i] = is_idx[i]; 2361 PetscCall(PetscSortInt(is_n, rows)); 2362 2363 if (baij->keepnonzeropattern) { 2364 for (i = 0; i < is_n; i++) sizes[i] = 1; 2365 bs_max = is_n; 2366 } else { 2367 PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max)); 2368 A->nonzerostate++; 2369 } 2370 2371 for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) { 2372 row = rows[j]; 2373 PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row); 2374 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2375 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2376 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2377 if (diag != (PetscScalar)0.0) { 2378 if (baij->ilen[row / bs] > 0) { 2379 baij->ilen[row / bs] = 1; 2380 baij->j[baij->i[row / bs]] = row / bs; 2381 2382 PetscCall(PetscArrayzero(aa, count * bs)); 2383 } 2384 /* Now insert all the diagonal values for this bs */ 2385 for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES)); 2386 } else { /* (diag == 0.0) */ 2387 baij->ilen[row / bs] = 0; 2388 } /* end (diag == 0.0) */ 2389 } else { /* (sizes[i] != bs) */ 2390 PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1"); 2391 for (k = 0; k < count; k++) { 2392 aa[0] = zero; 2393 aa += bs; 2394 } 2395 if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES)); 2396 } 2397 } 2398 2399 PetscCall(PetscFree2(rows, sizes)); 2400 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2401 PetscFunctionReturn(0); 2402 } 2403 2404 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2405 { 2406 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2407 PetscInt i, j, k, count; 2408 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col; 2409 PetscScalar zero = 0.0; 2410 MatScalar *aa; 2411 const PetscScalar *xx; 2412 PetscScalar *bb; 2413 PetscBool *zeroed, vecs = PETSC_FALSE; 2414 2415 PetscFunctionBegin; 2416 /* fix right hand side if needed */ 2417 if (x && b) { 2418 PetscCall(VecGetArrayRead(x, &xx)); 2419 PetscCall(VecGetArray(b, &bb)); 2420 vecs = PETSC_TRUE; 2421 } 2422 2423 /* zero the columns */ 2424 PetscCall(PetscCalloc1(A->rmap->n, &zeroed)); 2425 for (i = 0; i < is_n; i++) { 2426 PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]); 2427 zeroed[is_idx[i]] = PETSC_TRUE; 2428 } 2429 for (i = 0; i < A->rmap->N; i++) { 2430 if (!zeroed[i]) { 2431 row = i / bs; 2432 for (j = baij->i[row]; j < baij->i[row + 1]; j++) { 2433 for (k = 0; k < bs; k++) { 2434 col = bs * baij->j[j] + k; 2435 if (zeroed[col]) { 2436 aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 2437 if (vecs) bb[i] -= aa[0] * xx[col]; 2438 aa[0] = 0.0; 2439 } 2440 } 2441 } 2442 } else if (vecs) bb[i] = diag * xx[i]; 2443 } 2444 PetscCall(PetscFree(zeroed)); 2445 if (vecs) { 2446 PetscCall(VecRestoreArrayRead(x, &xx)); 2447 PetscCall(VecRestoreArray(b, &bb)); 2448 } 2449 2450 /* zero the rows */ 2451 for (i = 0; i < is_n; i++) { 2452 row = is_idx[i]; 2453 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2454 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2455 for (k = 0; k < count; k++) { 2456 aa[0] = zero; 2457 aa += bs; 2458 } 2459 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES); 2460 } 2461 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2462 PetscFunctionReturn(0); 2463 } 2464 2465 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2466 { 2467 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2468 PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 2469 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2470 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 2471 PetscInt ridx, cidx, bs2 = a->bs2; 2472 PetscBool roworiented = a->roworiented; 2473 MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap; 2474 2475 PetscFunctionBegin; 2476 for (k = 0; k < m; k++) { /* loop over added rows */ 2477 row = im[k]; 2478 brow = row / bs; 2479 if (row < 0) continue; 2480 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 2481 rp = aj + ai[brow]; 2482 if (!A->structure_only) ap = aa + bs2 * ai[brow]; 2483 rmax = imax[brow]; 2484 nrow = ailen[brow]; 2485 low = 0; 2486 high = nrow; 2487 for (l = 0; l < n; l++) { /* loop over added columns */ 2488 if (in[l] < 0) continue; 2489 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 2490 col = in[l]; 2491 bcol = col / bs; 2492 ridx = row % bs; 2493 cidx = col % bs; 2494 if (!A->structure_only) { 2495 if (roworiented) { 2496 value = v[l + k * n]; 2497 } else { 2498 value = v[k + l * m]; 2499 } 2500 } 2501 if (col <= lastcol) low = 0; 2502 else high = nrow; 2503 lastcol = col; 2504 while (high - low > 7) { 2505 t = (low + high) / 2; 2506 if (rp[t] > bcol) high = t; 2507 else low = t; 2508 } 2509 for (i = low; i < high; i++) { 2510 if (rp[i] > bcol) break; 2511 if (rp[i] == bcol) { 2512 bap = ap + bs2 * i + bs * cidx + ridx; 2513 if (!A->structure_only) { 2514 if (is == ADD_VALUES) *bap += value; 2515 else *bap = value; 2516 } 2517 goto noinsert1; 2518 } 2519 } 2520 if (nonew == 1) goto noinsert1; 2521 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2522 if (A->structure_only) { 2523 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar); 2524 } else { 2525 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2526 } 2527 N = nrow++ - 1; 2528 high++; 2529 /* shift up all the later entries in this row */ 2530 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2531 rp[i] = bcol; 2532 if (!A->structure_only) { 2533 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2534 PetscCall(PetscArrayzero(ap + bs2 * i, bs2)); 2535 ap[bs2 * i + bs * cidx + ridx] = value; 2536 } 2537 a->nz++; 2538 A->nonzerostate++; 2539 noinsert1:; 2540 low = i; 2541 } 2542 ailen[brow] = nrow; 2543 } 2544 PetscFunctionReturn(0); 2545 } 2546 2547 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) 2548 { 2549 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data; 2550 Mat outA; 2551 PetscBool row_identity, col_identity; 2552 2553 PetscFunctionBegin; 2554 PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU"); 2555 PetscCall(ISIdentity(row, &row_identity)); 2556 PetscCall(ISIdentity(col, &col_identity)); 2557 PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU"); 2558 2559 outA = inA; 2560 inA->factortype = MAT_FACTOR_LU; 2561 PetscCall(PetscFree(inA->solvertype)); 2562 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype)); 2563 2564 PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2565 2566 PetscCall(PetscObjectReference((PetscObject)row)); 2567 PetscCall(ISDestroy(&a->row)); 2568 a->row = row; 2569 PetscCall(PetscObjectReference((PetscObject)col)); 2570 PetscCall(ISDestroy(&a->col)); 2571 a->col = col; 2572 2573 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2574 PetscCall(ISDestroy(&a->icol)); 2575 PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol)); 2576 2577 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity))); 2578 if (!a->solve_work) { PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); } 2579 PetscCall(MatLUFactorNumeric(outA, inA, info)); 2580 PetscFunctionReturn(0); 2581 } 2582 2583 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, PetscInt *indices) 2584 { 2585 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2586 PetscInt i, nz, mbs; 2587 2588 PetscFunctionBegin; 2589 nz = baij->maxnz; 2590 mbs = baij->mbs; 2591 for (i = 0; i < nz; i++) baij->j[i] = indices[i]; 2592 baij->nz = nz; 2593 for (i = 0; i < mbs; i++) baij->ilen[i] = baij->imax[i]; 2594 PetscFunctionReturn(0); 2595 } 2596 2597 /*@ 2598 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix. 2599 2600 Input Parameters: 2601 + mat - the `MATSEQBAIJ` matrix 2602 - indices - the column indices 2603 2604 Level: advanced 2605 2606 Notes: 2607 This can be called if you have precomputed the nonzero structure of the 2608 matrix and want to provide it to the matrix object to improve the performance 2609 of the `MatSetValues()` operation. 2610 2611 You MUST have set the correct numbers of nonzeros per row in the call to 2612 `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted. 2613 2614 MUST be called before any calls to `MatSetValues()` 2615 2616 .seealso: `MATSEQBAIJ`, `MatSetValues()` 2617 @*/ 2618 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) 2619 { 2620 PetscFunctionBegin; 2621 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2622 PetscValidIntPointer(indices, 2); 2623 PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices)); 2624 PetscFunctionReturn(0); 2625 } 2626 2627 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) 2628 { 2629 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2630 PetscInt i, j, n, row, bs, *ai, *aj, mbs; 2631 PetscReal atmp; 2632 PetscScalar *x, zero = 0.0; 2633 MatScalar *aa; 2634 PetscInt ncols, brow, krow, kcol; 2635 2636 PetscFunctionBegin; 2637 /* why is this not a macro???????????????????????????????????????????????????????????????? */ 2638 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2639 bs = A->rmap->bs; 2640 aa = a->a; 2641 ai = a->i; 2642 aj = a->j; 2643 mbs = a->mbs; 2644 2645 PetscCall(VecSet(v, zero)); 2646 PetscCall(VecGetArray(v, &x)); 2647 PetscCall(VecGetLocalSize(v, &n)); 2648 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2649 for (i = 0; i < mbs; i++) { 2650 ncols = ai[1] - ai[0]; 2651 ai++; 2652 brow = bs * i; 2653 for (j = 0; j < ncols; j++) { 2654 for (kcol = 0; kcol < bs; kcol++) { 2655 for (krow = 0; krow < bs; krow++) { 2656 atmp = PetscAbsScalar(*aa); 2657 aa++; 2658 row = brow + krow; /* row index */ 2659 if (PetscAbsScalar(x[row]) < atmp) { 2660 x[row] = atmp; 2661 if (idx) idx[row] = bs * (*aj) + kcol; 2662 } 2663 } 2664 } 2665 aj++; 2666 } 2667 } 2668 PetscCall(VecRestoreArray(v, &x)); 2669 PetscFunctionReturn(0); 2670 } 2671 2672 PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) 2673 { 2674 PetscFunctionBegin; 2675 /* If the two matrices have the same copy implementation, use fast copy. */ 2676 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2677 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2678 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 2679 PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs; 2680 2681 PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]); 2682 PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs); 2683 PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs])); 2684 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2685 } else { 2686 PetscCall(MatCopy_Basic(A, B, str)); 2687 } 2688 PetscFunctionReturn(0); 2689 } 2690 2691 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2692 { 2693 PetscFunctionBegin; 2694 PetscCall(MatSeqBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL)); 2695 PetscFunctionReturn(0); 2696 } 2697 2698 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2699 { 2700 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2701 2702 PetscFunctionBegin; 2703 *array = a->a; 2704 PetscFunctionReturn(0); 2705 } 2706 2707 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2708 { 2709 PetscFunctionBegin; 2710 *array = NULL; 2711 PetscFunctionReturn(0); 2712 } 2713 2714 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) 2715 { 2716 PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs; 2717 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 2718 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 2719 2720 PetscFunctionBegin; 2721 /* Set the number of nonzeros in the new matrix */ 2722 PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz)); 2723 PetscFunctionReturn(0); 2724 } 2725 2726 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2727 { 2728 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data; 2729 PetscInt bs = Y->rmap->bs, bs2 = bs * bs; 2730 PetscBLASInt one = 1; 2731 2732 PetscFunctionBegin; 2733 if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2734 PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2735 if (e) { 2736 PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e)); 2737 if (e) { 2738 PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e)); 2739 if (e) str = SAME_NONZERO_PATTERN; 2740 } 2741 } 2742 if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN"); 2743 } 2744 if (str == SAME_NONZERO_PATTERN) { 2745 PetscScalar alpha = a; 2746 PetscBLASInt bnz; 2747 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 2748 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 2749 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2750 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2751 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2752 } else { 2753 Mat B; 2754 PetscInt *nnz; 2755 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 2756 PetscCall(PetscMalloc1(Y->rmap->N, &nnz)); 2757 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2758 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2759 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 2760 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 2761 PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name)); 2762 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz)); 2763 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 2764 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2765 PetscCall(MatHeaderMerge(Y, &B)); 2766 PetscCall(PetscFree(nnz)); 2767 } 2768 PetscFunctionReturn(0); 2769 } 2770 2771 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2772 { 2773 #if defined(PETSC_USE_COMPLEX) 2774 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2775 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2776 MatScalar *aa = a->a; 2777 2778 PetscFunctionBegin; 2779 for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]); 2780 #else 2781 PetscFunctionBegin; 2782 #endif 2783 PetscFunctionReturn(0); 2784 } 2785 2786 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2787 { 2788 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2789 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2790 MatScalar *aa = a->a; 2791 2792 PetscFunctionBegin; 2793 for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]); 2794 PetscFunctionReturn(0); 2795 } 2796 2797 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2798 { 2799 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2800 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2801 MatScalar *aa = a->a; 2802 2803 PetscFunctionBegin; 2804 for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2805 PetscFunctionReturn(0); 2806 } 2807 2808 /* 2809 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2810 */ 2811 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2812 { 2813 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2814 PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs; 2815 PetscInt nz = a->i[m], row, *jj, mr, col; 2816 2817 PetscFunctionBegin; 2818 *nn = n; 2819 if (!ia) PetscFunctionReturn(0); 2820 PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices"); 2821 PetscCall(PetscCalloc1(n, &collengths)); 2822 PetscCall(PetscMalloc1(n + 1, &cia)); 2823 PetscCall(PetscMalloc1(nz, &cja)); 2824 jj = a->j; 2825 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2826 cia[0] = oshift; 2827 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2828 PetscCall(PetscArrayzero(collengths, n)); 2829 jj = a->j; 2830 for (row = 0; row < m; row++) { 2831 mr = a->i[row + 1] - a->i[row]; 2832 for (i = 0; i < mr; i++) { 2833 col = *jj++; 2834 2835 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2836 } 2837 } 2838 PetscCall(PetscFree(collengths)); 2839 *ia = cia; 2840 *ja = cja; 2841 PetscFunctionReturn(0); 2842 } 2843 2844 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2845 { 2846 PetscFunctionBegin; 2847 if (!ia) PetscFunctionReturn(0); 2848 PetscCall(PetscFree(*ia)); 2849 PetscCall(PetscFree(*ja)); 2850 PetscFunctionReturn(0); 2851 } 2852 2853 /* 2854 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2855 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2856 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2857 */ 2858 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2859 { 2860 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2861 PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs; 2862 PetscInt nz = a->i[m], row, *jj, mr, col; 2863 PetscInt *cspidx; 2864 2865 PetscFunctionBegin; 2866 *nn = n; 2867 if (!ia) PetscFunctionReturn(0); 2868 2869 PetscCall(PetscCalloc1(n, &collengths)); 2870 PetscCall(PetscMalloc1(n + 1, &cia)); 2871 PetscCall(PetscMalloc1(nz, &cja)); 2872 PetscCall(PetscMalloc1(nz, &cspidx)); 2873 jj = a->j; 2874 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2875 cia[0] = oshift; 2876 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2877 PetscCall(PetscArrayzero(collengths, n)); 2878 jj = a->j; 2879 for (row = 0; row < m; row++) { 2880 mr = a->i[row + 1] - a->i[row]; 2881 for (i = 0; i < mr; i++) { 2882 col = *jj++; 2883 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2884 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2885 } 2886 } 2887 PetscCall(PetscFree(collengths)); 2888 *ia = cia; 2889 *ja = cja; 2890 *spidx = cspidx; 2891 PetscFunctionReturn(0); 2892 } 2893 2894 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2895 { 2896 PetscFunctionBegin; 2897 PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done)); 2898 PetscCall(PetscFree(*spidx)); 2899 PetscFunctionReturn(0); 2900 } 2901 2902 PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) 2903 { 2904 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data; 2905 2906 PetscFunctionBegin; 2907 if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL)); 2908 PetscCall(MatShift_Basic(Y, a)); 2909 PetscFunctionReturn(0); 2910 } 2911 2912 /* -------------------------------------------------------------------*/ 2913 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2914 MatGetRow_SeqBAIJ, 2915 MatRestoreRow_SeqBAIJ, 2916 MatMult_SeqBAIJ_N, 2917 /* 4*/ MatMultAdd_SeqBAIJ_N, 2918 MatMultTranspose_SeqBAIJ, 2919 MatMultTransposeAdd_SeqBAIJ, 2920 NULL, 2921 NULL, 2922 NULL, 2923 /* 10*/ NULL, 2924 MatLUFactor_SeqBAIJ, 2925 NULL, 2926 NULL, 2927 MatTranspose_SeqBAIJ, 2928 /* 15*/ MatGetInfo_SeqBAIJ, 2929 MatEqual_SeqBAIJ, 2930 MatGetDiagonal_SeqBAIJ, 2931 MatDiagonalScale_SeqBAIJ, 2932 MatNorm_SeqBAIJ, 2933 /* 20*/ NULL, 2934 MatAssemblyEnd_SeqBAIJ, 2935 MatSetOption_SeqBAIJ, 2936 MatZeroEntries_SeqBAIJ, 2937 /* 24*/ MatZeroRows_SeqBAIJ, 2938 NULL, 2939 NULL, 2940 NULL, 2941 NULL, 2942 /* 29*/ MatSetUp_SeqBAIJ, 2943 NULL, 2944 NULL, 2945 NULL, 2946 NULL, 2947 /* 34*/ MatDuplicate_SeqBAIJ, 2948 NULL, 2949 NULL, 2950 MatILUFactor_SeqBAIJ, 2951 NULL, 2952 /* 39*/ MatAXPY_SeqBAIJ, 2953 MatCreateSubMatrices_SeqBAIJ, 2954 MatIncreaseOverlap_SeqBAIJ, 2955 MatGetValues_SeqBAIJ, 2956 MatCopy_SeqBAIJ, 2957 /* 44*/ NULL, 2958 MatScale_SeqBAIJ, 2959 MatShift_SeqBAIJ, 2960 NULL, 2961 MatZeroRowsColumns_SeqBAIJ, 2962 /* 49*/ NULL, 2963 MatGetRowIJ_SeqBAIJ, 2964 MatRestoreRowIJ_SeqBAIJ, 2965 MatGetColumnIJ_SeqBAIJ, 2966 MatRestoreColumnIJ_SeqBAIJ, 2967 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2968 NULL, 2969 NULL, 2970 NULL, 2971 MatSetValuesBlocked_SeqBAIJ, 2972 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2973 MatDestroy_SeqBAIJ, 2974 MatView_SeqBAIJ, 2975 NULL, 2976 NULL, 2977 /* 64*/ NULL, 2978 NULL, 2979 NULL, 2980 NULL, 2981 NULL, 2982 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2983 NULL, 2984 MatConvert_Basic, 2985 NULL, 2986 NULL, 2987 /* 74*/ NULL, 2988 MatFDColoringApply_BAIJ, 2989 NULL, 2990 NULL, 2991 NULL, 2992 /* 79*/ NULL, 2993 NULL, 2994 NULL, 2995 NULL, 2996 MatLoad_SeqBAIJ, 2997 /* 84*/ NULL, 2998 NULL, 2999 NULL, 3000 NULL, 3001 NULL, 3002 /* 89*/ NULL, 3003 NULL, 3004 NULL, 3005 NULL, 3006 NULL, 3007 /* 94*/ NULL, 3008 NULL, 3009 NULL, 3010 NULL, 3011 NULL, 3012 /* 99*/ NULL, 3013 NULL, 3014 NULL, 3015 MatConjugate_SeqBAIJ, 3016 NULL, 3017 /*104*/ NULL, 3018 MatRealPart_SeqBAIJ, 3019 MatImaginaryPart_SeqBAIJ, 3020 NULL, 3021 NULL, 3022 /*109*/ NULL, 3023 NULL, 3024 NULL, 3025 NULL, 3026 MatMissingDiagonal_SeqBAIJ, 3027 /*114*/ NULL, 3028 NULL, 3029 NULL, 3030 NULL, 3031 NULL, 3032 /*119*/ NULL, 3033 NULL, 3034 MatMultHermitianTranspose_SeqBAIJ, 3035 MatMultHermitianTransposeAdd_SeqBAIJ, 3036 NULL, 3037 /*124*/ NULL, 3038 MatGetColumnReductions_SeqBAIJ, 3039 MatInvertBlockDiagonal_SeqBAIJ, 3040 NULL, 3041 NULL, 3042 /*129*/ NULL, 3043 NULL, 3044 NULL, 3045 NULL, 3046 NULL, 3047 /*134*/ NULL, 3048 NULL, 3049 NULL, 3050 NULL, 3051 NULL, 3052 /*139*/ MatSetBlockSizes_Default, 3053 NULL, 3054 NULL, 3055 MatFDColoringSetUp_SeqXAIJ, 3056 NULL, 3057 /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 3058 MatDestroySubMatrices_SeqBAIJ, 3059 NULL, 3060 NULL, 3061 NULL, 3062 NULL, 3063 /*150*/ NULL, 3064 NULL}; 3065 3066 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3067 { 3068 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3069 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3070 3071 PetscFunctionBegin; 3072 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3073 3074 /* allocate space for values if not already there */ 3075 if (!aij->saved_values) { PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); } 3076 3077 /* copy values over */ 3078 PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz)); 3079 PetscFunctionReturn(0); 3080 } 3081 3082 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3083 { 3084 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3085 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3086 3087 PetscFunctionBegin; 3088 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3089 PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 3090 3091 /* copy values over */ 3092 PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz)); 3093 PetscFunctionReturn(0); 3094 } 3095 3096 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 3097 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *); 3098 3099 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, PetscInt *nnz) 3100 { 3101 Mat_SeqBAIJ *b; 3102 PetscInt i, mbs, nbs, bs2; 3103 PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 3104 3105 PetscFunctionBegin; 3106 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3107 if (nz == MAT_SKIP_ALLOCATION) { 3108 skipallocation = PETSC_TRUE; 3109 nz = 0; 3110 } 3111 3112 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 3113 PetscCall(PetscLayoutSetUp(B->rmap)); 3114 PetscCall(PetscLayoutSetUp(B->cmap)); 3115 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3116 3117 B->preallocated = PETSC_TRUE; 3118 3119 mbs = B->rmap->n / bs; 3120 nbs = B->cmap->n / bs; 3121 bs2 = bs * bs; 3122 3123 PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs); 3124 3125 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3126 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 3127 if (nnz) { 3128 for (i = 0; i < mbs; i++) { 3129 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 3130 PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs); 3131 } 3132 } 3133 3134 b = (Mat_SeqBAIJ *)B->data; 3135 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat"); 3136 PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL)); 3137 PetscOptionsEnd(); 3138 3139 if (!flg) { 3140 switch (bs) { 3141 case 1: 3142 B->ops->mult = MatMult_SeqBAIJ_1; 3143 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3144 break; 3145 case 2: 3146 B->ops->mult = MatMult_SeqBAIJ_2; 3147 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3148 break; 3149 case 3: 3150 B->ops->mult = MatMult_SeqBAIJ_3; 3151 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3152 break; 3153 case 4: 3154 B->ops->mult = MatMult_SeqBAIJ_4; 3155 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3156 break; 3157 case 5: 3158 B->ops->mult = MatMult_SeqBAIJ_5; 3159 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3160 break; 3161 case 6: 3162 B->ops->mult = MatMult_SeqBAIJ_6; 3163 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3164 break; 3165 case 7: 3166 B->ops->mult = MatMult_SeqBAIJ_7; 3167 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3168 break; 3169 case 9: { 3170 PetscInt version = 1; 3171 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3172 switch (version) { 3173 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3174 case 1: 3175 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 3176 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 3177 PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3178 break; 3179 #endif 3180 default: 3181 B->ops->mult = MatMult_SeqBAIJ_N; 3182 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3183 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3184 break; 3185 } 3186 break; 3187 } 3188 case 11: 3189 B->ops->mult = MatMult_SeqBAIJ_11; 3190 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 3191 break; 3192 case 12: { 3193 PetscInt version = 1; 3194 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3195 switch (version) { 3196 case 1: 3197 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 3198 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3199 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3200 break; 3201 case 2: 3202 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 3203 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 3204 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3205 break; 3206 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3207 case 3: 3208 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 3209 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3210 PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3211 break; 3212 #endif 3213 default: 3214 B->ops->mult = MatMult_SeqBAIJ_N; 3215 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3216 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3217 break; 3218 } 3219 break; 3220 } 3221 case 15: { 3222 PetscInt version = 1; 3223 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3224 switch (version) { 3225 case 1: 3226 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3227 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3228 break; 3229 case 2: 3230 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 3231 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3232 break; 3233 case 3: 3234 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 3235 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3236 break; 3237 case 4: 3238 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 3239 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3240 break; 3241 default: 3242 B->ops->mult = MatMult_SeqBAIJ_N; 3243 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3244 break; 3245 } 3246 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3247 break; 3248 } 3249 default: 3250 B->ops->mult = MatMult_SeqBAIJ_N; 3251 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3252 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3253 break; 3254 } 3255 } 3256 B->ops->sor = MatSOR_SeqBAIJ; 3257 b->mbs = mbs; 3258 b->nbs = nbs; 3259 if (!skipallocation) { 3260 if (!b->imax) { 3261 PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen)); 3262 3263 b->free_imax_ilen = PETSC_TRUE; 3264 } 3265 /* b->ilen will count nonzeros in each block row so far. */ 3266 for (i = 0; i < mbs; i++) b->ilen[i] = 0; 3267 if (!nnz) { 3268 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3269 else if (nz < 0) nz = 1; 3270 nz = PetscMin(nz, nbs); 3271 for (i = 0; i < mbs; i++) b->imax[i] = nz; 3272 PetscCall(PetscIntMultError(nz, mbs, &nz)); 3273 } else { 3274 PetscInt64 nz64 = 0; 3275 for (i = 0; i < mbs; i++) { 3276 b->imax[i] = nnz[i]; 3277 nz64 += nnz[i]; 3278 } 3279 PetscCall(PetscIntCast(nz64, &nz)); 3280 } 3281 3282 /* allocate the matrix space */ 3283 PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i)); 3284 if (B->structure_only) { 3285 PetscCall(PetscMalloc1(nz, &b->j)); 3286 PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i)); 3287 } else { 3288 PetscInt nzbs2 = 0; 3289 PetscCall(PetscIntMultError(nz, bs2, &nzbs2)); 3290 PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i)); 3291 PetscCall(PetscArrayzero(b->a, nz * bs2)); 3292 } 3293 PetscCall(PetscArrayzero(b->j, nz)); 3294 3295 if (B->structure_only) { 3296 b->singlemalloc = PETSC_FALSE; 3297 b->free_a = PETSC_FALSE; 3298 } else { 3299 b->singlemalloc = PETSC_TRUE; 3300 b->free_a = PETSC_TRUE; 3301 } 3302 b->free_ij = PETSC_TRUE; 3303 3304 b->i[0] = 0; 3305 for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1]; 3306 3307 } else { 3308 b->free_a = PETSC_FALSE; 3309 b->free_ij = PETSC_FALSE; 3310 } 3311 3312 b->bs2 = bs2; 3313 b->mbs = mbs; 3314 b->nz = 0; 3315 b->maxnz = nz; 3316 B->info.nz_unneeded = (PetscReal)b->maxnz * bs2; 3317 B->was_assembled = PETSC_FALSE; 3318 B->assembled = PETSC_FALSE; 3319 if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3320 PetscFunctionReturn(0); 3321 } 3322 3323 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 3324 { 3325 PetscInt i, m, nz, nz_max = 0, *nnz; 3326 PetscScalar *values = NULL; 3327 PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented; 3328 3329 PetscFunctionBegin; 3330 PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 3331 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 3332 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 3333 PetscCall(PetscLayoutSetUp(B->rmap)); 3334 PetscCall(PetscLayoutSetUp(B->cmap)); 3335 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3336 m = B->rmap->n / bs; 3337 3338 PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 3339 PetscCall(PetscMalloc1(m + 1, &nnz)); 3340 for (i = 0; i < m; i++) { 3341 nz = ii[i + 1] - ii[i]; 3342 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 3343 nz_max = PetscMax(nz_max, nz); 3344 nnz[i] = nz; 3345 } 3346 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 3347 PetscCall(PetscFree(nnz)); 3348 3349 values = (PetscScalar *)V; 3350 if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values)); 3351 for (i = 0; i < m; i++) { 3352 PetscInt ncols = ii[i + 1] - ii[i]; 3353 const PetscInt *icols = jj + ii[i]; 3354 if (bs == 1 || !roworiented) { 3355 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 3356 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES)); 3357 } else { 3358 PetscInt j; 3359 for (j = 0; j < ncols; j++) { 3360 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 3361 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES)); 3362 } 3363 } 3364 } 3365 if (!V) PetscCall(PetscFree(values)); 3366 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3367 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3368 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3369 PetscFunctionReturn(0); 3370 } 3371 3372 /*@C 3373 MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored 3374 3375 Not Collective 3376 3377 Input Parameter: 3378 . mat - a `MATSEQBAIJ` matrix 3379 3380 Output Parameter: 3381 . array - pointer to the data 3382 3383 Level: intermediate 3384 3385 .seealso: `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3386 @*/ 3387 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array) 3388 { 3389 PetscFunctionBegin; 3390 PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array)); 3391 PetscFunctionReturn(0); 3392 } 3393 3394 /*@C 3395 MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()` 3396 3397 Not Collective 3398 3399 Input Parameters: 3400 + mat - a `MATSEQBAIJ` matrix 3401 - array - pointer to the data 3402 3403 Level: intermediate 3404 3405 .seealso: `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3406 @*/ 3407 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array) 3408 { 3409 PetscFunctionBegin; 3410 PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array)); 3411 PetscFunctionReturn(0); 3412 } 3413 3414 /*MC 3415 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3416 block sparse compressed row format. 3417 3418 Options Database Keys: 3419 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3420 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3421 3422 Level: beginner 3423 3424 Notes: 3425 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 3426 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 3427 3428 Run with -info to see what version of the matrix-vector product is being used 3429 3430 .seealso: `MatCreateSeqBAIJ()` 3431 M*/ 3432 3433 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *); 3434 3435 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3436 { 3437 PetscMPIInt size; 3438 Mat_SeqBAIJ *b; 3439 3440 PetscFunctionBegin; 3441 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3442 PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3443 3444 PetscCall(PetscNew(&b)); 3445 B->data = (void *)b; 3446 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 3447 3448 b->row = NULL; 3449 b->col = NULL; 3450 b->icol = NULL; 3451 b->reallocs = 0; 3452 b->saved_values = NULL; 3453 3454 b->roworiented = PETSC_TRUE; 3455 b->nonew = 0; 3456 b->diag = NULL; 3457 B->spptr = NULL; 3458 B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2; 3459 b->keepnonzeropattern = PETSC_FALSE; 3460 3461 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ)); 3462 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ)); 3463 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ)); 3464 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ)); 3465 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ)); 3466 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ)); 3467 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ)); 3468 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ)); 3469 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 3470 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ)); 3471 #if defined(PETSC_HAVE_HYPRE) 3472 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE)); 3473 #endif 3474 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS)); 3475 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 3476 PetscFunctionReturn(0); 3477 } 3478 3479 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) 3480 { 3481 Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data; 3482 PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2; 3483 3484 PetscFunctionBegin; 3485 PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix"); 3486 3487 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3488 c->imax = a->imax; 3489 c->ilen = a->ilen; 3490 c->free_imax_ilen = PETSC_FALSE; 3491 } else { 3492 PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen)); 3493 for (i = 0; i < mbs; i++) { 3494 c->imax[i] = a->imax[i]; 3495 c->ilen[i] = a->ilen[i]; 3496 } 3497 c->free_imax_ilen = PETSC_TRUE; 3498 } 3499 3500 /* allocate the matrix space */ 3501 if (mallocmatspace) { 3502 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3503 PetscCall(PetscCalloc1(bs2 * nz, &c->a)); 3504 3505 c->i = a->i; 3506 c->j = a->j; 3507 c->singlemalloc = PETSC_FALSE; 3508 c->free_a = PETSC_TRUE; 3509 c->free_ij = PETSC_FALSE; 3510 c->parent = A; 3511 C->preallocated = PETSC_TRUE; 3512 C->assembled = PETSC_TRUE; 3513 3514 PetscCall(PetscObjectReference((PetscObject)A)); 3515 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3516 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3517 } else { 3518 PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i)); 3519 3520 c->singlemalloc = PETSC_TRUE; 3521 c->free_a = PETSC_TRUE; 3522 c->free_ij = PETSC_TRUE; 3523 3524 PetscCall(PetscArraycpy(c->i, a->i, mbs + 1)); 3525 if (mbs > 0) { 3526 PetscCall(PetscArraycpy(c->j, a->j, nz)); 3527 if (cpvalues == MAT_COPY_VALUES) { 3528 PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz)); 3529 } else { 3530 PetscCall(PetscArrayzero(c->a, bs2 * nz)); 3531 } 3532 } 3533 C->preallocated = PETSC_TRUE; 3534 C->assembled = PETSC_TRUE; 3535 } 3536 } 3537 3538 c->roworiented = a->roworiented; 3539 c->nonew = a->nonew; 3540 3541 PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 3542 PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 3543 3544 c->bs2 = a->bs2; 3545 c->mbs = a->mbs; 3546 c->nbs = a->nbs; 3547 3548 if (a->diag) { 3549 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3550 c->diag = a->diag; 3551 c->free_diag = PETSC_FALSE; 3552 } else { 3553 PetscCall(PetscMalloc1(mbs + 1, &c->diag)); 3554 for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i]; 3555 c->free_diag = PETSC_TRUE; 3556 } 3557 } else c->diag = NULL; 3558 3559 c->nz = a->nz; 3560 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3561 c->solve_work = NULL; 3562 c->mult_work = NULL; 3563 c->sor_workt = NULL; 3564 c->sor_work = NULL; 3565 3566 c->compressedrow.use = a->compressedrow.use; 3567 c->compressedrow.nrows = a->compressedrow.nrows; 3568 if (a->compressedrow.use) { 3569 i = a->compressedrow.nrows; 3570 PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex)); 3571 PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1)); 3572 PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i)); 3573 } else { 3574 c->compressedrow.use = PETSC_FALSE; 3575 c->compressedrow.i = NULL; 3576 c->compressedrow.rindex = NULL; 3577 } 3578 C->nonzerostate = A->nonzerostate; 3579 3580 PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 3581 PetscFunctionReturn(0); 3582 } 3583 3584 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) 3585 { 3586 PetscFunctionBegin; 3587 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 3588 PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n)); 3589 PetscCall(MatSetType(*B, MATSEQBAIJ)); 3590 PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE)); 3591 PetscFunctionReturn(0); 3592 } 3593 3594 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3595 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 3596 { 3597 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3598 PetscInt *rowidxs, *colidxs; 3599 PetscScalar *matvals; 3600 3601 PetscFunctionBegin; 3602 PetscCall(PetscViewerSetUp(viewer)); 3603 3604 /* read matrix header */ 3605 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3606 PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3607 M = header[1]; 3608 N = header[2]; 3609 nz = header[3]; 3610 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3611 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3612 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3613 3614 /* set block sizes from the viewer's .info file */ 3615 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3616 /* set local and global sizes if not set already */ 3617 if (mat->rmap->n < 0) mat->rmap->n = M; 3618 if (mat->cmap->n < 0) mat->cmap->n = N; 3619 if (mat->rmap->N < 0) mat->rmap->N = M; 3620 if (mat->cmap->N < 0) mat->cmap->N = N; 3621 PetscCall(PetscLayoutSetUp(mat->rmap)); 3622 PetscCall(PetscLayoutSetUp(mat->cmap)); 3623 3624 /* check if the matrix sizes are correct */ 3625 PetscCall(MatGetSize(mat, &rows, &cols)); 3626 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3627 PetscCall(MatGetBlockSize(mat, &bs)); 3628 PetscCall(MatGetLocalSize(mat, &m, &n)); 3629 mbs = m / bs; 3630 nbs = n / bs; 3631 3632 /* read in row lengths, column indices and nonzero values */ 3633 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3634 PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT)); 3635 rowidxs[0] = 0; 3636 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3637 sum = rowidxs[m]; 3638 PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3639 3640 /* read in column indices and nonzero values */ 3641 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals)); 3642 PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT)); 3643 PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR)); 3644 3645 { /* preallocate matrix storage */ 3646 PetscBT bt; /* helper bit set to count nonzeros */ 3647 PetscInt *nnz; 3648 PetscBool sbaij; 3649 3650 PetscCall(PetscBTCreate(nbs, &bt)); 3651 PetscCall(PetscCalloc1(mbs, &nnz)); 3652 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij)); 3653 for (i = 0; i < mbs; i++) { 3654 PetscCall(PetscBTMemzero(nbs, bt)); 3655 for (k = 0; k < bs; k++) { 3656 PetscInt row = bs * i + k; 3657 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3658 PetscInt col = colidxs[j]; 3659 if (!sbaij || col >= row) 3660 if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++; 3661 } 3662 } 3663 } 3664 PetscCall(PetscBTDestroy(&bt)); 3665 PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz)); 3666 PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz)); 3667 PetscCall(PetscFree(nnz)); 3668 } 3669 3670 /* store matrix values */ 3671 for (i = 0; i < m; i++) { 3672 PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1]; 3673 PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3674 } 3675 3676 PetscCall(PetscFree(rowidxs)); 3677 PetscCall(PetscFree2(colidxs, matvals)); 3678 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3679 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3680 PetscFunctionReturn(0); 3681 } 3682 3683 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) 3684 { 3685 PetscBool isbinary; 3686 3687 PetscFunctionBegin; 3688 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3689 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3690 PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer)); 3691 PetscFunctionReturn(0); 3692 } 3693 3694 /*@C 3695 MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block 3696 compressed row) format. For good matrix assembly performance the 3697 user should preallocate the matrix storage by setting the parameter nz 3698 (or the array nnz). By setting these parameters accurately, performance 3699 during matrix assembly can be increased by more than a factor of 50. 3700 3701 Collective 3702 3703 Input Parameters: 3704 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3705 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3706 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3707 . m - number of rows 3708 . n - number of columns 3709 . nz - number of nonzero blocks per block row (same for all rows) 3710 - nnz - array containing the number of nonzero blocks in the various block rows 3711 (possibly different for each block row) or NULL 3712 3713 Output Parameter: 3714 . A - the matrix 3715 3716 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3717 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3718 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 3719 3720 Options Database Keys: 3721 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3722 - -mat_block_size - size of the blocks to use 3723 3724 Level: intermediate 3725 3726 Notes: 3727 The number of rows and columns must be divisible by blocksize. 3728 3729 If the nnz parameter is given then the nz parameter is ignored 3730 3731 A nonzero block is any block that as 1 or more nonzeros in it 3732 3733 The `MATSEQBAIJ` format is fully compatible with standard Fortran 77 3734 storage. That is, the stored row and column indices can begin at 3735 either one (as in Fortran) or zero. See the users' manual for details. 3736 3737 Specify the preallocated storage with either nz or nnz (not both). 3738 Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory 3739 allocation. See [Sparse Matrices](sec_matsparse) for details. 3740 matrices. 3741 3742 .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3743 @*/ 3744 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 3745 { 3746 PetscFunctionBegin; 3747 PetscCall(MatCreate(comm, A)); 3748 PetscCall(MatSetSizes(*A, m, n, m, n)); 3749 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3750 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz)); 3751 PetscFunctionReturn(0); 3752 } 3753 3754 /*@C 3755 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3756 per row in the matrix. For good matrix assembly performance the 3757 user should preallocate the matrix storage by setting the parameter nz 3758 (or the array nnz). By setting these parameters accurately, performance 3759 during matrix assembly can be increased by more than a factor of 50. 3760 3761 Collective 3762 3763 Input Parameters: 3764 + B - the matrix 3765 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3766 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3767 . nz - number of block nonzeros per block row (same for all rows) 3768 - nnz - array containing the number of block nonzeros in the various block rows 3769 (possibly different for each block row) or NULL 3770 3771 Options Database Keys: 3772 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3773 - -mat_block_size - size of the blocks to use 3774 3775 Level: intermediate 3776 3777 Notes: 3778 If the nnz parameter is given then the nz parameter is ignored 3779 3780 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3781 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3782 You can also run with the option -info and look for messages with the string 3783 malloc in them to see if additional memory allocation was needed. 3784 3785 The `MATSEQBAIJ` format is fully compatible with standard Fortran 77 3786 storage. That is, the stored row and column indices can begin at 3787 either one (as in Fortran) or zero. See the users' manual for details. 3788 3789 Specify the preallocated storage with either nz or nnz (not both). 3790 Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory 3791 allocation. See [Sparse Matrices](sec_matsparse) for details. 3792 3793 .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3794 @*/ 3795 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3796 { 3797 PetscFunctionBegin; 3798 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3799 PetscValidType(B, 1); 3800 PetscValidLogicalCollectiveInt(B, bs, 2); 3801 PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 3802 PetscFunctionReturn(0); 3803 } 3804 3805 /*@C 3806 MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values 3807 3808 Collective 3809 3810 Input Parameters: 3811 + B - the matrix 3812 . i - the indices into j for the start of each local row (starts with zero) 3813 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3814 - v - optional values in the matrix 3815 3816 Level: advanced 3817 3818 Notes: 3819 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 3820 may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 3821 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3822 `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3823 block column and the second index is over columns within a block. 3824 3825 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3826 3827 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3828 @*/ 3829 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3830 { 3831 PetscFunctionBegin; 3832 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3833 PetscValidType(B, 1); 3834 PetscValidLogicalCollectiveInt(B, bs, 2); 3835 PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 3836 PetscFunctionReturn(0); 3837 } 3838 3839 /*@ 3840 MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user. 3841 3842 Collective 3843 3844 Input Parameters: 3845 + comm - must be an MPI communicator of size 1 3846 . bs - size of block 3847 . m - number of rows 3848 . n - number of columns 3849 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3850 . j - column indices 3851 - a - matrix values 3852 3853 Output Parameter: 3854 . mat - the matrix 3855 3856 Level: advanced 3857 3858 Notes: 3859 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3860 once the matrix is destroyed 3861 3862 You cannot set new nonzero locations into this matrix, that will generate an error. 3863 3864 The i and j indices are 0 based 3865 3866 When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format 3867 3868 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3869 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3870 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3871 with column-major ordering within blocks. 3872 3873 .seealso: `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3874 @*/ 3875 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) 3876 { 3877 PetscInt ii; 3878 Mat_SeqBAIJ *baij; 3879 3880 PetscFunctionBegin; 3881 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs); 3882 if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3883 3884 PetscCall(MatCreate(comm, mat)); 3885 PetscCall(MatSetSizes(*mat, m, n, m, n)); 3886 PetscCall(MatSetType(*mat, MATSEQBAIJ)); 3887 PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL)); 3888 baij = (Mat_SeqBAIJ *)(*mat)->data; 3889 PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen)); 3890 3891 baij->i = i; 3892 baij->j = j; 3893 baij->a = a; 3894 3895 baij->singlemalloc = PETSC_FALSE; 3896 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3897 baij->free_a = PETSC_FALSE; 3898 baij->free_ij = PETSC_FALSE; 3899 3900 for (ii = 0; ii < m; ii++) { 3901 baij->ilen[ii] = baij->imax[ii] = i[ii + 1] - i[ii]; 3902 PetscCheck(i[ii + 1] - i[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, i[ii + 1] - i[ii]); 3903 } 3904 if (PetscDefined(USE_DEBUG)) { 3905 for (ii = 0; ii < baij->i[m]; ii++) { 3906 PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3907 PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3908 } 3909 } 3910 3911 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3912 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3913 PetscFunctionReturn(0); 3914 } 3915 3916 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3917 { 3918 PetscFunctionBegin; 3919 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat)); 3920 PetscFunctionReturn(0); 3921 } 3922