1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */ 12 #define TYPE BAIJ 13 #define TYPE_BS 14 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 15 #undef TYPE_BS 16 #define TYPE_BS _BS 17 #define TYPE_BS_ON 18 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 19 #undef TYPE_BS 20 #include "../src/mat/impls/aij/seq/seqhashmat.h" 21 #undef TYPE 22 #undef TYPE_BS_ON 23 24 #if defined(PETSC_HAVE_HYPRE) 25 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 26 #endif 27 28 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 29 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 30 #endif 31 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 32 33 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) 34 { 35 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data; 36 PetscInt m, n, ib, jb, bs = A->rmap->bs; 37 MatScalar *a_val = a_aij->a; 38 39 PetscFunctionBegin; 40 PetscCall(MatGetSize(A, &m, &n)); 41 PetscCall(PetscArrayzero(reductions, n)); 42 if (type == NORM_2) { 43 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 44 for (jb = 0; jb < bs; jb++) { 45 for (ib = 0; ib < bs; ib++) { 46 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 47 a_val++; 48 } 49 } 50 } 51 } else if (type == NORM_1) { 52 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 53 for (jb = 0; jb < bs; jb++) { 54 for (ib = 0; ib < bs; ib++) { 55 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 56 a_val++; 57 } 58 } 59 } 60 } else if (type == NORM_INFINITY) { 61 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 62 for (jb = 0; jb < bs; jb++) { 63 for (ib = 0; ib < bs; ib++) { 64 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 65 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 66 a_val++; 67 } 68 } 69 } 70 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 71 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 72 for (jb = 0; jb < bs; jb++) { 73 for (ib = 0; ib < bs; ib++) { 74 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 75 a_val++; 76 } 77 } 78 } 79 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 80 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 81 for (jb = 0; jb < bs; jb++) { 82 for (ib = 0; ib < bs; ib++) { 83 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 84 a_val++; 85 } 86 } 87 } 88 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 89 if (type == NORM_2) { 90 for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 91 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 92 for (PetscInt i = 0; i < n; i++) reductions[i] /= m; 93 } 94 PetscFunctionReturn(PETSC_SUCCESS); 95 } 96 97 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) 98 { 99 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 100 PetscInt *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots; 101 MatScalar *v = a->a, *odiag, *diag, work[25], *v_work; 102 PetscReal shift = 0.0; 103 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 104 105 PetscFunctionBegin; 106 allowzeropivot = PetscNot(A->erroriffailure); 107 108 if (a->idiagvalid) { 109 if (values) *values = a->idiag; 110 PetscFunctionReturn(PETSC_SUCCESS); 111 } 112 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 113 diag_offset = a->diag; 114 if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); } 115 diag = a->idiag; 116 if (values) *values = a->idiag; 117 /* factor and invert each block */ 118 switch (bs) { 119 case 1: 120 for (i = 0; i < mbs; i++) { 121 odiag = v + 1 * diag_offset[i]; 122 diag[0] = odiag[0]; 123 124 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 125 if (allowzeropivot) { 126 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 127 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 128 A->factorerror_zeropivot_row = i; 129 PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i)); 130 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON); 131 } 132 133 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 134 diag += 1; 135 } 136 break; 137 case 2: 138 for (i = 0; i < mbs; i++) { 139 odiag = v + 4 * diag_offset[i]; 140 diag[0] = odiag[0]; 141 diag[1] = odiag[1]; 142 diag[2] = odiag[2]; 143 diag[3] = odiag[3]; 144 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 145 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 146 diag += 4; 147 } 148 break; 149 case 3: 150 for (i = 0; i < mbs; i++) { 151 odiag = v + 9 * diag_offset[i]; 152 diag[0] = odiag[0]; 153 diag[1] = odiag[1]; 154 diag[2] = odiag[2]; 155 diag[3] = odiag[3]; 156 diag[4] = odiag[4]; 157 diag[5] = odiag[5]; 158 diag[6] = odiag[6]; 159 diag[7] = odiag[7]; 160 diag[8] = odiag[8]; 161 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 162 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 163 diag += 9; 164 } 165 break; 166 case 4: 167 for (i = 0; i < mbs; i++) { 168 odiag = v + 16 * diag_offset[i]; 169 PetscCall(PetscArraycpy(diag, odiag, 16)); 170 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 171 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 172 diag += 16; 173 } 174 break; 175 case 5: 176 for (i = 0; i < mbs; i++) { 177 odiag = v + 25 * diag_offset[i]; 178 PetscCall(PetscArraycpy(diag, odiag, 25)); 179 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 180 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 181 diag += 25; 182 } 183 break; 184 case 6: 185 for (i = 0; i < mbs; i++) { 186 odiag = v + 36 * diag_offset[i]; 187 PetscCall(PetscArraycpy(diag, odiag, 36)); 188 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 189 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 190 diag += 36; 191 } 192 break; 193 case 7: 194 for (i = 0; i < mbs; i++) { 195 odiag = v + 49 * diag_offset[i]; 196 PetscCall(PetscArraycpy(diag, odiag, 49)); 197 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 198 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 199 diag += 49; 200 } 201 break; 202 default: 203 PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots)); 204 for (i = 0; i < mbs; i++) { 205 odiag = v + bs2 * diag_offset[i]; 206 PetscCall(PetscArraycpy(diag, odiag, bs2)); 207 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 208 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 209 diag += bs2; 210 } 211 PetscCall(PetscFree2(v_work, v_pivots)); 212 } 213 a->idiagvalid = PETSC_TRUE; 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 217 PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 218 { 219 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 220 PetscScalar *x, *work, *w, *workt, *t; 221 const MatScalar *v, *aa = a->a, *idiag; 222 const PetscScalar *b, *xb; 223 PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */ 224 PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it; 225 const PetscInt *diag, *ai = a->i, *aj = a->j, *vi; 226 227 PetscFunctionBegin; 228 its = its * lits; 229 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 230 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 231 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 232 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor"); 233 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 234 235 if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL)); 236 237 if (!m) PetscFunctionReturn(PETSC_SUCCESS); 238 diag = a->diag; 239 idiag = a->idiag; 240 k = PetscMax(A->rmap->n, A->cmap->n); 241 if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work)); 242 if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt)); 243 if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work)); 244 work = a->mult_work; 245 t = a->sor_workt; 246 w = a->sor_work; 247 248 PetscCall(VecGetArray(xx, &x)); 249 PetscCall(VecGetArrayRead(bb, &b)); 250 251 if (flag & SOR_ZERO_INITIAL_GUESS) { 252 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 253 switch (bs) { 254 case 1: 255 PetscKernel_v_gets_A_times_w_1(x, idiag, b); 256 t[0] = b[0]; 257 i2 = 1; 258 idiag += 1; 259 for (i = 1; i < m; i++) { 260 v = aa + ai[i]; 261 vi = aj + ai[i]; 262 nz = diag[i] - ai[i]; 263 s[0] = b[i2]; 264 for (j = 0; j < nz; j++) { 265 xw[0] = x[vi[j]]; 266 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 267 } 268 t[i2] = s[0]; 269 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 270 x[i2] = xw[0]; 271 idiag += 1; 272 i2 += 1; 273 } 274 break; 275 case 2: 276 PetscKernel_v_gets_A_times_w_2(x, idiag, b); 277 t[0] = b[0]; 278 t[1] = b[1]; 279 i2 = 2; 280 idiag += 4; 281 for (i = 1; i < m; i++) { 282 v = aa + 4 * ai[i]; 283 vi = aj + ai[i]; 284 nz = diag[i] - ai[i]; 285 s[0] = b[i2]; 286 s[1] = b[i2 + 1]; 287 for (j = 0; j < nz; j++) { 288 idx = 2 * vi[j]; 289 it = 4 * j; 290 xw[0] = x[idx]; 291 xw[1] = x[1 + idx]; 292 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 293 } 294 t[i2] = s[0]; 295 t[i2 + 1] = s[1]; 296 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 297 x[i2] = xw[0]; 298 x[i2 + 1] = xw[1]; 299 idiag += 4; 300 i2 += 2; 301 } 302 break; 303 case 3: 304 PetscKernel_v_gets_A_times_w_3(x, idiag, b); 305 t[0] = b[0]; 306 t[1] = b[1]; 307 t[2] = b[2]; 308 i2 = 3; 309 idiag += 9; 310 for (i = 1; i < m; i++) { 311 v = aa + 9 * ai[i]; 312 vi = aj + ai[i]; 313 nz = diag[i] - ai[i]; 314 s[0] = b[i2]; 315 s[1] = b[i2 + 1]; 316 s[2] = b[i2 + 2]; 317 while (nz--) { 318 idx = 3 * (*vi++); 319 xw[0] = x[idx]; 320 xw[1] = x[1 + idx]; 321 xw[2] = x[2 + idx]; 322 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 323 v += 9; 324 } 325 t[i2] = s[0]; 326 t[i2 + 1] = s[1]; 327 t[i2 + 2] = s[2]; 328 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 329 x[i2] = xw[0]; 330 x[i2 + 1] = xw[1]; 331 x[i2 + 2] = xw[2]; 332 idiag += 9; 333 i2 += 3; 334 } 335 break; 336 case 4: 337 PetscKernel_v_gets_A_times_w_4(x, idiag, b); 338 t[0] = b[0]; 339 t[1] = b[1]; 340 t[2] = b[2]; 341 t[3] = b[3]; 342 i2 = 4; 343 idiag += 16; 344 for (i = 1; i < m; i++) { 345 v = aa + 16 * ai[i]; 346 vi = aj + ai[i]; 347 nz = diag[i] - ai[i]; 348 s[0] = b[i2]; 349 s[1] = b[i2 + 1]; 350 s[2] = b[i2 + 2]; 351 s[3] = b[i2 + 3]; 352 while (nz--) { 353 idx = 4 * (*vi++); 354 xw[0] = x[idx]; 355 xw[1] = x[1 + idx]; 356 xw[2] = x[2 + idx]; 357 xw[3] = x[3 + idx]; 358 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 359 v += 16; 360 } 361 t[i2] = s[0]; 362 t[i2 + 1] = s[1]; 363 t[i2 + 2] = s[2]; 364 t[i2 + 3] = s[3]; 365 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 366 x[i2] = xw[0]; 367 x[i2 + 1] = xw[1]; 368 x[i2 + 2] = xw[2]; 369 x[i2 + 3] = xw[3]; 370 idiag += 16; 371 i2 += 4; 372 } 373 break; 374 case 5: 375 PetscKernel_v_gets_A_times_w_5(x, idiag, b); 376 t[0] = b[0]; 377 t[1] = b[1]; 378 t[2] = b[2]; 379 t[3] = b[3]; 380 t[4] = b[4]; 381 i2 = 5; 382 idiag += 25; 383 for (i = 1; i < m; i++) { 384 v = aa + 25 * ai[i]; 385 vi = aj + ai[i]; 386 nz = diag[i] - ai[i]; 387 s[0] = b[i2]; 388 s[1] = b[i2 + 1]; 389 s[2] = b[i2 + 2]; 390 s[3] = b[i2 + 3]; 391 s[4] = b[i2 + 4]; 392 while (nz--) { 393 idx = 5 * (*vi++); 394 xw[0] = x[idx]; 395 xw[1] = x[1 + idx]; 396 xw[2] = x[2 + idx]; 397 xw[3] = x[3 + idx]; 398 xw[4] = x[4 + idx]; 399 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 400 v += 25; 401 } 402 t[i2] = s[0]; 403 t[i2 + 1] = s[1]; 404 t[i2 + 2] = s[2]; 405 t[i2 + 3] = s[3]; 406 t[i2 + 4] = s[4]; 407 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 408 x[i2] = xw[0]; 409 x[i2 + 1] = xw[1]; 410 x[i2 + 2] = xw[2]; 411 x[i2 + 3] = xw[3]; 412 x[i2 + 4] = xw[4]; 413 idiag += 25; 414 i2 += 5; 415 } 416 break; 417 case 6: 418 PetscKernel_v_gets_A_times_w_6(x, idiag, b); 419 t[0] = b[0]; 420 t[1] = b[1]; 421 t[2] = b[2]; 422 t[3] = b[3]; 423 t[4] = b[4]; 424 t[5] = b[5]; 425 i2 = 6; 426 idiag += 36; 427 for (i = 1; i < m; i++) { 428 v = aa + 36 * ai[i]; 429 vi = aj + ai[i]; 430 nz = diag[i] - ai[i]; 431 s[0] = b[i2]; 432 s[1] = b[i2 + 1]; 433 s[2] = b[i2 + 2]; 434 s[3] = b[i2 + 3]; 435 s[4] = b[i2 + 4]; 436 s[5] = b[i2 + 5]; 437 while (nz--) { 438 idx = 6 * (*vi++); 439 xw[0] = x[idx]; 440 xw[1] = x[1 + idx]; 441 xw[2] = x[2 + idx]; 442 xw[3] = x[3 + idx]; 443 xw[4] = x[4 + idx]; 444 xw[5] = x[5 + idx]; 445 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 446 v += 36; 447 } 448 t[i2] = s[0]; 449 t[i2 + 1] = s[1]; 450 t[i2 + 2] = s[2]; 451 t[i2 + 3] = s[3]; 452 t[i2 + 4] = s[4]; 453 t[i2 + 5] = s[5]; 454 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 455 x[i2] = xw[0]; 456 x[i2 + 1] = xw[1]; 457 x[i2 + 2] = xw[2]; 458 x[i2 + 3] = xw[3]; 459 x[i2 + 4] = xw[4]; 460 x[i2 + 5] = xw[5]; 461 idiag += 36; 462 i2 += 6; 463 } 464 break; 465 case 7: 466 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 467 t[0] = b[0]; 468 t[1] = b[1]; 469 t[2] = b[2]; 470 t[3] = b[3]; 471 t[4] = b[4]; 472 t[5] = b[5]; 473 t[6] = b[6]; 474 i2 = 7; 475 idiag += 49; 476 for (i = 1; i < m; i++) { 477 v = aa + 49 * ai[i]; 478 vi = aj + ai[i]; 479 nz = diag[i] - ai[i]; 480 s[0] = b[i2]; 481 s[1] = b[i2 + 1]; 482 s[2] = b[i2 + 2]; 483 s[3] = b[i2 + 3]; 484 s[4] = b[i2 + 4]; 485 s[5] = b[i2 + 5]; 486 s[6] = b[i2 + 6]; 487 while (nz--) { 488 idx = 7 * (*vi++); 489 xw[0] = x[idx]; 490 xw[1] = x[1 + idx]; 491 xw[2] = x[2 + idx]; 492 xw[3] = x[3 + idx]; 493 xw[4] = x[4 + idx]; 494 xw[5] = x[5 + idx]; 495 xw[6] = x[6 + idx]; 496 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 497 v += 49; 498 } 499 t[i2] = s[0]; 500 t[i2 + 1] = s[1]; 501 t[i2 + 2] = s[2]; 502 t[i2 + 3] = s[3]; 503 t[i2 + 4] = s[4]; 504 t[i2 + 5] = s[5]; 505 t[i2 + 6] = s[6]; 506 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 507 x[i2] = xw[0]; 508 x[i2 + 1] = xw[1]; 509 x[i2 + 2] = xw[2]; 510 x[i2 + 3] = xw[3]; 511 x[i2 + 4] = xw[4]; 512 x[i2 + 5] = xw[5]; 513 x[i2 + 6] = xw[6]; 514 idiag += 49; 515 i2 += 7; 516 } 517 break; 518 default: 519 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); 520 PetscCall(PetscArraycpy(t, b, bs)); 521 i2 = bs; 522 idiag += bs2; 523 for (i = 1; i < m; i++) { 524 v = aa + bs2 * ai[i]; 525 vi = aj + ai[i]; 526 nz = diag[i] - ai[i]; 527 528 PetscCall(PetscArraycpy(w, b + i2, bs)); 529 /* copy all rows of x that are needed into contiguous space */ 530 workt = work; 531 for (j = 0; j < nz; j++) { 532 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 533 workt += bs; 534 } 535 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 536 PetscCall(PetscArraycpy(t + i2, w, bs)); 537 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 538 539 idiag += bs2; 540 i2 += bs; 541 } 542 break; 543 } 544 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 545 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 546 xb = t; 547 } else xb = b; 548 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 549 idiag = a->idiag + bs2 * (a->mbs - 1); 550 i2 = bs * (m - 1); 551 switch (bs) { 552 case 1: 553 s[0] = xb[i2]; 554 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 555 x[i2] = xw[0]; 556 i2 -= 1; 557 for (i = m - 2; i >= 0; i--) { 558 v = aa + (diag[i] + 1); 559 vi = aj + diag[i] + 1; 560 nz = ai[i + 1] - diag[i] - 1; 561 s[0] = xb[i2]; 562 for (j = 0; j < nz; j++) { 563 xw[0] = x[vi[j]]; 564 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 565 } 566 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 567 x[i2] = xw[0]; 568 idiag -= 1; 569 i2 -= 1; 570 } 571 break; 572 case 2: 573 s[0] = xb[i2]; 574 s[1] = xb[i2 + 1]; 575 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 576 x[i2] = xw[0]; 577 x[i2 + 1] = xw[1]; 578 i2 -= 2; 579 idiag -= 4; 580 for (i = m - 2; i >= 0; i--) { 581 v = aa + 4 * (diag[i] + 1); 582 vi = aj + diag[i] + 1; 583 nz = ai[i + 1] - diag[i] - 1; 584 s[0] = xb[i2]; 585 s[1] = xb[i2 + 1]; 586 for (j = 0; j < nz; j++) { 587 idx = 2 * vi[j]; 588 it = 4 * j; 589 xw[0] = x[idx]; 590 xw[1] = x[1 + idx]; 591 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 592 } 593 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 594 x[i2] = xw[0]; 595 x[i2 + 1] = xw[1]; 596 idiag -= 4; 597 i2 -= 2; 598 } 599 break; 600 case 3: 601 s[0] = xb[i2]; 602 s[1] = xb[i2 + 1]; 603 s[2] = xb[i2 + 2]; 604 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 605 x[i2] = xw[0]; 606 x[i2 + 1] = xw[1]; 607 x[i2 + 2] = xw[2]; 608 i2 -= 3; 609 idiag -= 9; 610 for (i = m - 2; i >= 0; i--) { 611 v = aa + 9 * (diag[i] + 1); 612 vi = aj + diag[i] + 1; 613 nz = ai[i + 1] - diag[i] - 1; 614 s[0] = xb[i2]; 615 s[1] = xb[i2 + 1]; 616 s[2] = xb[i2 + 2]; 617 while (nz--) { 618 idx = 3 * (*vi++); 619 xw[0] = x[idx]; 620 xw[1] = x[1 + idx]; 621 xw[2] = x[2 + idx]; 622 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 623 v += 9; 624 } 625 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 626 x[i2] = xw[0]; 627 x[i2 + 1] = xw[1]; 628 x[i2 + 2] = xw[2]; 629 idiag -= 9; 630 i2 -= 3; 631 } 632 break; 633 case 4: 634 s[0] = xb[i2]; 635 s[1] = xb[i2 + 1]; 636 s[2] = xb[i2 + 2]; 637 s[3] = xb[i2 + 3]; 638 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 639 x[i2] = xw[0]; 640 x[i2 + 1] = xw[1]; 641 x[i2 + 2] = xw[2]; 642 x[i2 + 3] = xw[3]; 643 i2 -= 4; 644 idiag -= 16; 645 for (i = m - 2; i >= 0; i--) { 646 v = aa + 16 * (diag[i] + 1); 647 vi = aj + diag[i] + 1; 648 nz = ai[i + 1] - diag[i] - 1; 649 s[0] = xb[i2]; 650 s[1] = xb[i2 + 1]; 651 s[2] = xb[i2 + 2]; 652 s[3] = xb[i2 + 3]; 653 while (nz--) { 654 idx = 4 * (*vi++); 655 xw[0] = x[idx]; 656 xw[1] = x[1 + idx]; 657 xw[2] = x[2 + idx]; 658 xw[3] = x[3 + idx]; 659 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 660 v += 16; 661 } 662 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 663 x[i2] = xw[0]; 664 x[i2 + 1] = xw[1]; 665 x[i2 + 2] = xw[2]; 666 x[i2 + 3] = xw[3]; 667 idiag -= 16; 668 i2 -= 4; 669 } 670 break; 671 case 5: 672 s[0] = xb[i2]; 673 s[1] = xb[i2 + 1]; 674 s[2] = xb[i2 + 2]; 675 s[3] = xb[i2 + 3]; 676 s[4] = xb[i2 + 4]; 677 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 678 x[i2] = xw[0]; 679 x[i2 + 1] = xw[1]; 680 x[i2 + 2] = xw[2]; 681 x[i2 + 3] = xw[3]; 682 x[i2 + 4] = xw[4]; 683 i2 -= 5; 684 idiag -= 25; 685 for (i = m - 2; i >= 0; i--) { 686 v = aa + 25 * (diag[i] + 1); 687 vi = aj + diag[i] + 1; 688 nz = ai[i + 1] - diag[i] - 1; 689 s[0] = xb[i2]; 690 s[1] = xb[i2 + 1]; 691 s[2] = xb[i2 + 2]; 692 s[3] = xb[i2 + 3]; 693 s[4] = xb[i2 + 4]; 694 while (nz--) { 695 idx = 5 * (*vi++); 696 xw[0] = x[idx]; 697 xw[1] = x[1 + idx]; 698 xw[2] = x[2 + idx]; 699 xw[3] = x[3 + idx]; 700 xw[4] = x[4 + idx]; 701 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 702 v += 25; 703 } 704 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 705 x[i2] = xw[0]; 706 x[i2 + 1] = xw[1]; 707 x[i2 + 2] = xw[2]; 708 x[i2 + 3] = xw[3]; 709 x[i2 + 4] = xw[4]; 710 idiag -= 25; 711 i2 -= 5; 712 } 713 break; 714 case 6: 715 s[0] = xb[i2]; 716 s[1] = xb[i2 + 1]; 717 s[2] = xb[i2 + 2]; 718 s[3] = xb[i2 + 3]; 719 s[4] = xb[i2 + 4]; 720 s[5] = xb[i2 + 5]; 721 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 722 x[i2] = xw[0]; 723 x[i2 + 1] = xw[1]; 724 x[i2 + 2] = xw[2]; 725 x[i2 + 3] = xw[3]; 726 x[i2 + 4] = xw[4]; 727 x[i2 + 5] = xw[5]; 728 i2 -= 6; 729 idiag -= 36; 730 for (i = m - 2; i >= 0; i--) { 731 v = aa + 36 * (diag[i] + 1); 732 vi = aj + diag[i] + 1; 733 nz = ai[i + 1] - diag[i] - 1; 734 s[0] = xb[i2]; 735 s[1] = xb[i2 + 1]; 736 s[2] = xb[i2 + 2]; 737 s[3] = xb[i2 + 3]; 738 s[4] = xb[i2 + 4]; 739 s[5] = xb[i2 + 5]; 740 while (nz--) { 741 idx = 6 * (*vi++); 742 xw[0] = x[idx]; 743 xw[1] = x[1 + idx]; 744 xw[2] = x[2 + idx]; 745 xw[3] = x[3 + idx]; 746 xw[4] = x[4 + idx]; 747 xw[5] = x[5 + idx]; 748 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 749 v += 36; 750 } 751 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 752 x[i2] = xw[0]; 753 x[i2 + 1] = xw[1]; 754 x[i2 + 2] = xw[2]; 755 x[i2 + 3] = xw[3]; 756 x[i2 + 4] = xw[4]; 757 x[i2 + 5] = xw[5]; 758 idiag -= 36; 759 i2 -= 6; 760 } 761 break; 762 case 7: 763 s[0] = xb[i2]; 764 s[1] = xb[i2 + 1]; 765 s[2] = xb[i2 + 2]; 766 s[3] = xb[i2 + 3]; 767 s[4] = xb[i2 + 4]; 768 s[5] = xb[i2 + 5]; 769 s[6] = xb[i2 + 6]; 770 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 771 x[i2] = xw[0]; 772 x[i2 + 1] = xw[1]; 773 x[i2 + 2] = xw[2]; 774 x[i2 + 3] = xw[3]; 775 x[i2 + 4] = xw[4]; 776 x[i2 + 5] = xw[5]; 777 x[i2 + 6] = xw[6]; 778 i2 -= 7; 779 idiag -= 49; 780 for (i = m - 2; i >= 0; i--) { 781 v = aa + 49 * (diag[i] + 1); 782 vi = aj + diag[i] + 1; 783 nz = ai[i + 1] - diag[i] - 1; 784 s[0] = xb[i2]; 785 s[1] = xb[i2 + 1]; 786 s[2] = xb[i2 + 2]; 787 s[3] = xb[i2 + 3]; 788 s[4] = xb[i2 + 4]; 789 s[5] = xb[i2 + 5]; 790 s[6] = xb[i2 + 6]; 791 while (nz--) { 792 idx = 7 * (*vi++); 793 xw[0] = x[idx]; 794 xw[1] = x[1 + idx]; 795 xw[2] = x[2 + idx]; 796 xw[3] = x[3 + idx]; 797 xw[4] = x[4 + idx]; 798 xw[5] = x[5 + idx]; 799 xw[6] = x[6 + idx]; 800 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 801 v += 49; 802 } 803 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 804 x[i2] = xw[0]; 805 x[i2 + 1] = xw[1]; 806 x[i2 + 2] = xw[2]; 807 x[i2 + 3] = xw[3]; 808 x[i2 + 4] = xw[4]; 809 x[i2 + 5] = xw[5]; 810 x[i2 + 6] = xw[6]; 811 idiag -= 49; 812 i2 -= 7; 813 } 814 break; 815 default: 816 PetscCall(PetscArraycpy(w, xb + i2, bs)); 817 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 818 i2 -= bs; 819 idiag -= bs2; 820 for (i = m - 2; i >= 0; i--) { 821 v = aa + bs2 * (diag[i] + 1); 822 vi = aj + diag[i] + 1; 823 nz = ai[i + 1] - diag[i] - 1; 824 825 PetscCall(PetscArraycpy(w, xb + i2, bs)); 826 /* copy all rows of x that are needed into contiguous space */ 827 workt = work; 828 for (j = 0; j < nz; j++) { 829 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 830 workt += bs; 831 } 832 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 833 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 834 835 idiag -= bs2; 836 i2 -= bs; 837 } 838 break; 839 } 840 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 841 } 842 its--; 843 } 844 while (its--) { 845 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 846 idiag = a->idiag; 847 i2 = 0; 848 switch (bs) { 849 case 1: 850 for (i = 0; i < m; i++) { 851 v = aa + ai[i]; 852 vi = aj + ai[i]; 853 nz = ai[i + 1] - ai[i]; 854 s[0] = b[i2]; 855 for (j = 0; j < nz; j++) { 856 xw[0] = x[vi[j]]; 857 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 858 } 859 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 860 x[i2] += xw[0]; 861 idiag += 1; 862 i2 += 1; 863 } 864 break; 865 case 2: 866 for (i = 0; i < m; i++) { 867 v = aa + 4 * ai[i]; 868 vi = aj + ai[i]; 869 nz = ai[i + 1] - ai[i]; 870 s[0] = b[i2]; 871 s[1] = b[i2 + 1]; 872 for (j = 0; j < nz; j++) { 873 idx = 2 * vi[j]; 874 it = 4 * j; 875 xw[0] = x[idx]; 876 xw[1] = x[1 + idx]; 877 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 878 } 879 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 880 x[i2] += xw[0]; 881 x[i2 + 1] += xw[1]; 882 idiag += 4; 883 i2 += 2; 884 } 885 break; 886 case 3: 887 for (i = 0; i < m; i++) { 888 v = aa + 9 * ai[i]; 889 vi = aj + ai[i]; 890 nz = ai[i + 1] - ai[i]; 891 s[0] = b[i2]; 892 s[1] = b[i2 + 1]; 893 s[2] = b[i2 + 2]; 894 while (nz--) { 895 idx = 3 * (*vi++); 896 xw[0] = x[idx]; 897 xw[1] = x[1 + idx]; 898 xw[2] = x[2 + idx]; 899 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 900 v += 9; 901 } 902 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 903 x[i2] += xw[0]; 904 x[i2 + 1] += xw[1]; 905 x[i2 + 2] += xw[2]; 906 idiag += 9; 907 i2 += 3; 908 } 909 break; 910 case 4: 911 for (i = 0; i < m; i++) { 912 v = aa + 16 * ai[i]; 913 vi = aj + ai[i]; 914 nz = ai[i + 1] - ai[i]; 915 s[0] = b[i2]; 916 s[1] = b[i2 + 1]; 917 s[2] = b[i2 + 2]; 918 s[3] = b[i2 + 3]; 919 while (nz--) { 920 idx = 4 * (*vi++); 921 xw[0] = x[idx]; 922 xw[1] = x[1 + idx]; 923 xw[2] = x[2 + idx]; 924 xw[3] = x[3 + idx]; 925 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 926 v += 16; 927 } 928 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 929 x[i2] += xw[0]; 930 x[i2 + 1] += xw[1]; 931 x[i2 + 2] += xw[2]; 932 x[i2 + 3] += xw[3]; 933 idiag += 16; 934 i2 += 4; 935 } 936 break; 937 case 5: 938 for (i = 0; i < m; i++) { 939 v = aa + 25 * ai[i]; 940 vi = aj + ai[i]; 941 nz = ai[i + 1] - ai[i]; 942 s[0] = b[i2]; 943 s[1] = b[i2 + 1]; 944 s[2] = b[i2 + 2]; 945 s[3] = b[i2 + 3]; 946 s[4] = b[i2 + 4]; 947 while (nz--) { 948 idx = 5 * (*vi++); 949 xw[0] = x[idx]; 950 xw[1] = x[1 + idx]; 951 xw[2] = x[2 + idx]; 952 xw[3] = x[3 + idx]; 953 xw[4] = x[4 + idx]; 954 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 955 v += 25; 956 } 957 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 958 x[i2] += xw[0]; 959 x[i2 + 1] += xw[1]; 960 x[i2 + 2] += xw[2]; 961 x[i2 + 3] += xw[3]; 962 x[i2 + 4] += xw[4]; 963 idiag += 25; 964 i2 += 5; 965 } 966 break; 967 case 6: 968 for (i = 0; i < m; i++) { 969 v = aa + 36 * ai[i]; 970 vi = aj + ai[i]; 971 nz = ai[i + 1] - ai[i]; 972 s[0] = b[i2]; 973 s[1] = b[i2 + 1]; 974 s[2] = b[i2 + 2]; 975 s[3] = b[i2 + 3]; 976 s[4] = b[i2 + 4]; 977 s[5] = b[i2 + 5]; 978 while (nz--) { 979 idx = 6 * (*vi++); 980 xw[0] = x[idx]; 981 xw[1] = x[1 + idx]; 982 xw[2] = x[2 + idx]; 983 xw[3] = x[3 + idx]; 984 xw[4] = x[4 + idx]; 985 xw[5] = x[5 + idx]; 986 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 987 v += 36; 988 } 989 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 990 x[i2] += xw[0]; 991 x[i2 + 1] += xw[1]; 992 x[i2 + 2] += xw[2]; 993 x[i2 + 3] += xw[3]; 994 x[i2 + 4] += xw[4]; 995 x[i2 + 5] += xw[5]; 996 idiag += 36; 997 i2 += 6; 998 } 999 break; 1000 case 7: 1001 for (i = 0; i < m; i++) { 1002 v = aa + 49 * ai[i]; 1003 vi = aj + ai[i]; 1004 nz = ai[i + 1] - ai[i]; 1005 s[0] = b[i2]; 1006 s[1] = b[i2 + 1]; 1007 s[2] = b[i2 + 2]; 1008 s[3] = b[i2 + 3]; 1009 s[4] = b[i2 + 4]; 1010 s[5] = b[i2 + 5]; 1011 s[6] = b[i2 + 6]; 1012 while (nz--) { 1013 idx = 7 * (*vi++); 1014 xw[0] = x[idx]; 1015 xw[1] = x[1 + idx]; 1016 xw[2] = x[2 + idx]; 1017 xw[3] = x[3 + idx]; 1018 xw[4] = x[4 + idx]; 1019 xw[5] = x[5 + idx]; 1020 xw[6] = x[6 + idx]; 1021 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1022 v += 49; 1023 } 1024 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1025 x[i2] += xw[0]; 1026 x[i2 + 1] += xw[1]; 1027 x[i2 + 2] += xw[2]; 1028 x[i2 + 3] += xw[3]; 1029 x[i2 + 4] += xw[4]; 1030 x[i2 + 5] += xw[5]; 1031 x[i2 + 6] += xw[6]; 1032 idiag += 49; 1033 i2 += 7; 1034 } 1035 break; 1036 default: 1037 for (i = 0; i < m; i++) { 1038 v = aa + bs2 * ai[i]; 1039 vi = aj + ai[i]; 1040 nz = ai[i + 1] - ai[i]; 1041 1042 PetscCall(PetscArraycpy(w, b + i2, bs)); 1043 /* copy all rows of x that are needed into contiguous space */ 1044 workt = work; 1045 for (j = 0; j < nz; j++) { 1046 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1047 workt += bs; 1048 } 1049 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1050 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1051 1052 idiag += bs2; 1053 i2 += bs; 1054 } 1055 break; 1056 } 1057 PetscCall(PetscLogFlops(2.0 * bs2 * a->nz)); 1058 } 1059 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1060 idiag = a->idiag + bs2 * (a->mbs - 1); 1061 i2 = bs * (m - 1); 1062 switch (bs) { 1063 case 1: 1064 for (i = m - 1; i >= 0; i--) { 1065 v = aa + ai[i]; 1066 vi = aj + ai[i]; 1067 nz = ai[i + 1] - ai[i]; 1068 s[0] = b[i2]; 1069 for (j = 0; j < nz; j++) { 1070 xw[0] = x[vi[j]]; 1071 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 1072 } 1073 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 1074 x[i2] += xw[0]; 1075 idiag -= 1; 1076 i2 -= 1; 1077 } 1078 break; 1079 case 2: 1080 for (i = m - 1; i >= 0; i--) { 1081 v = aa + 4 * ai[i]; 1082 vi = aj + ai[i]; 1083 nz = ai[i + 1] - ai[i]; 1084 s[0] = b[i2]; 1085 s[1] = b[i2 + 1]; 1086 for (j = 0; j < nz; j++) { 1087 idx = 2 * vi[j]; 1088 it = 4 * j; 1089 xw[0] = x[idx]; 1090 xw[1] = x[1 + idx]; 1091 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 1092 } 1093 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 1094 x[i2] += xw[0]; 1095 x[i2 + 1] += xw[1]; 1096 idiag -= 4; 1097 i2 -= 2; 1098 } 1099 break; 1100 case 3: 1101 for (i = m - 1; i >= 0; i--) { 1102 v = aa + 9 * ai[i]; 1103 vi = aj + ai[i]; 1104 nz = ai[i + 1] - ai[i]; 1105 s[0] = b[i2]; 1106 s[1] = b[i2 + 1]; 1107 s[2] = b[i2 + 2]; 1108 while (nz--) { 1109 idx = 3 * (*vi++); 1110 xw[0] = x[idx]; 1111 xw[1] = x[1 + idx]; 1112 xw[2] = x[2 + idx]; 1113 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 1114 v += 9; 1115 } 1116 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 1117 x[i2] += xw[0]; 1118 x[i2 + 1] += xw[1]; 1119 x[i2 + 2] += xw[2]; 1120 idiag -= 9; 1121 i2 -= 3; 1122 } 1123 break; 1124 case 4: 1125 for (i = m - 1; i >= 0; i--) { 1126 v = aa + 16 * ai[i]; 1127 vi = aj + ai[i]; 1128 nz = ai[i + 1] - ai[i]; 1129 s[0] = b[i2]; 1130 s[1] = b[i2 + 1]; 1131 s[2] = b[i2 + 2]; 1132 s[3] = b[i2 + 3]; 1133 while (nz--) { 1134 idx = 4 * (*vi++); 1135 xw[0] = x[idx]; 1136 xw[1] = x[1 + idx]; 1137 xw[2] = x[2 + idx]; 1138 xw[3] = x[3 + idx]; 1139 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 1140 v += 16; 1141 } 1142 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 1143 x[i2] += xw[0]; 1144 x[i2 + 1] += xw[1]; 1145 x[i2 + 2] += xw[2]; 1146 x[i2 + 3] += xw[3]; 1147 idiag -= 16; 1148 i2 -= 4; 1149 } 1150 break; 1151 case 5: 1152 for (i = m - 1; i >= 0; i--) { 1153 v = aa + 25 * ai[i]; 1154 vi = aj + ai[i]; 1155 nz = ai[i + 1] - ai[i]; 1156 s[0] = b[i2]; 1157 s[1] = b[i2 + 1]; 1158 s[2] = b[i2 + 2]; 1159 s[3] = b[i2 + 3]; 1160 s[4] = b[i2 + 4]; 1161 while (nz--) { 1162 idx = 5 * (*vi++); 1163 xw[0] = x[idx]; 1164 xw[1] = x[1 + idx]; 1165 xw[2] = x[2 + idx]; 1166 xw[3] = x[3 + idx]; 1167 xw[4] = x[4 + idx]; 1168 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 1169 v += 25; 1170 } 1171 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 1172 x[i2] += xw[0]; 1173 x[i2 + 1] += xw[1]; 1174 x[i2 + 2] += xw[2]; 1175 x[i2 + 3] += xw[3]; 1176 x[i2 + 4] += xw[4]; 1177 idiag -= 25; 1178 i2 -= 5; 1179 } 1180 break; 1181 case 6: 1182 for (i = m - 1; i >= 0; i--) { 1183 v = aa + 36 * ai[i]; 1184 vi = aj + ai[i]; 1185 nz = ai[i + 1] - ai[i]; 1186 s[0] = b[i2]; 1187 s[1] = b[i2 + 1]; 1188 s[2] = b[i2 + 2]; 1189 s[3] = b[i2 + 3]; 1190 s[4] = b[i2 + 4]; 1191 s[5] = b[i2 + 5]; 1192 while (nz--) { 1193 idx = 6 * (*vi++); 1194 xw[0] = x[idx]; 1195 xw[1] = x[1 + idx]; 1196 xw[2] = x[2 + idx]; 1197 xw[3] = x[3 + idx]; 1198 xw[4] = x[4 + idx]; 1199 xw[5] = x[5 + idx]; 1200 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 1201 v += 36; 1202 } 1203 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 1204 x[i2] += xw[0]; 1205 x[i2 + 1] += xw[1]; 1206 x[i2 + 2] += xw[2]; 1207 x[i2 + 3] += xw[3]; 1208 x[i2 + 4] += xw[4]; 1209 x[i2 + 5] += xw[5]; 1210 idiag -= 36; 1211 i2 -= 6; 1212 } 1213 break; 1214 case 7: 1215 for (i = m - 1; i >= 0; i--) { 1216 v = aa + 49 * ai[i]; 1217 vi = aj + ai[i]; 1218 nz = ai[i + 1] - ai[i]; 1219 s[0] = b[i2]; 1220 s[1] = b[i2 + 1]; 1221 s[2] = b[i2 + 2]; 1222 s[3] = b[i2 + 3]; 1223 s[4] = b[i2 + 4]; 1224 s[5] = b[i2 + 5]; 1225 s[6] = b[i2 + 6]; 1226 while (nz--) { 1227 idx = 7 * (*vi++); 1228 xw[0] = x[idx]; 1229 xw[1] = x[1 + idx]; 1230 xw[2] = x[2 + idx]; 1231 xw[3] = x[3 + idx]; 1232 xw[4] = x[4 + idx]; 1233 xw[5] = x[5 + idx]; 1234 xw[6] = x[6 + idx]; 1235 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1236 v += 49; 1237 } 1238 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1239 x[i2] += xw[0]; 1240 x[i2 + 1] += xw[1]; 1241 x[i2 + 2] += xw[2]; 1242 x[i2 + 3] += xw[3]; 1243 x[i2 + 4] += xw[4]; 1244 x[i2 + 5] += xw[5]; 1245 x[i2 + 6] += xw[6]; 1246 idiag -= 49; 1247 i2 -= 7; 1248 } 1249 break; 1250 default: 1251 for (i = m - 1; i >= 0; i--) { 1252 v = aa + bs2 * ai[i]; 1253 vi = aj + ai[i]; 1254 nz = ai[i + 1] - ai[i]; 1255 1256 PetscCall(PetscArraycpy(w, b + i2, bs)); 1257 /* copy all rows of x that are needed into contiguous space */ 1258 workt = work; 1259 for (j = 0; j < nz; j++) { 1260 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1261 workt += bs; 1262 } 1263 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1264 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1265 1266 idiag -= bs2; 1267 i2 -= bs; 1268 } 1269 break; 1270 } 1271 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz))); 1272 } 1273 } 1274 PetscCall(VecRestoreArray(xx, &x)); 1275 PetscCall(VecRestoreArrayRead(bb, &b)); 1276 PetscFunctionReturn(PETSC_SUCCESS); 1277 } 1278 1279 /* 1280 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1281 */ 1282 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1283 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1284 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1285 #define matsetvaluesblocked4_ matsetvaluesblocked4 1286 #endif 1287 1288 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) 1289 { 1290 Mat A = *AA; 1291 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1292 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn; 1293 PetscInt *ai = a->i, *ailen = a->ilen; 1294 PetscInt *aj = a->j, stepval, lastcol = -1; 1295 const PetscScalar *value = v; 1296 MatScalar *ap, *aa = a->a, *bap; 1297 1298 PetscFunctionBegin; 1299 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4"); 1300 stepval = (n - 1) * 4; 1301 for (k = 0; k < m; k++) { /* loop over added rows */ 1302 row = im[k]; 1303 rp = aj + ai[row]; 1304 ap = aa + 16 * ai[row]; 1305 nrow = ailen[row]; 1306 low = 0; 1307 high = nrow; 1308 for (l = 0; l < n; l++) { /* loop over added columns */ 1309 col = in[l]; 1310 if (col <= lastcol) low = 0; 1311 else high = nrow; 1312 lastcol = col; 1313 value = v + k * (stepval + 4 + l) * 4; 1314 while (high - low > 7) { 1315 t = (low + high) / 2; 1316 if (rp[t] > col) high = t; 1317 else low = t; 1318 } 1319 for (i = low; i < high; i++) { 1320 if (rp[i] > col) break; 1321 if (rp[i] == col) { 1322 bap = ap + 16 * i; 1323 for (ii = 0; ii < 4; ii++, value += stepval) { 1324 for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++; 1325 } 1326 goto noinsert2; 1327 } 1328 } 1329 N = nrow++ - 1; 1330 high++; /* added new column index thus must search to one higher than before */ 1331 /* shift up all the later entries in this row */ 1332 for (ii = N; ii >= i; ii--) { 1333 rp[ii + 1] = rp[ii]; 1334 PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16)); 1335 } 1336 if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1337 rp[i] = col; 1338 bap = ap + 16 * i; 1339 for (ii = 0; ii < 4; ii++, value += stepval) { 1340 for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++; 1341 } 1342 noinsert2:; 1343 low = i; 1344 } 1345 ailen[row] = nrow; 1346 } 1347 PetscFunctionReturnVoid(); 1348 } 1349 1350 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1351 #define matsetvalues4_ MATSETVALUES4 1352 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1353 #define matsetvalues4_ matsetvalues4 1354 #endif 1355 1356 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) 1357 { 1358 Mat A = *AA; 1359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1360 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm; 1361 PetscInt *ai = a->i, *ailen = a->ilen; 1362 PetscInt *aj = a->j, brow, bcol; 1363 PetscInt ridx, cidx, lastcol = -1; 1364 MatScalar *ap, value, *aa = a->a, *bap; 1365 1366 PetscFunctionBegin; 1367 for (k = 0; k < m; k++) { /* loop over added rows */ 1368 row = im[k]; 1369 brow = row / 4; 1370 rp = aj + ai[brow]; 1371 ap = aa + 16 * ai[brow]; 1372 nrow = ailen[brow]; 1373 low = 0; 1374 high = nrow; 1375 for (l = 0; l < n; l++) { /* loop over added columns */ 1376 col = in[l]; 1377 bcol = col / 4; 1378 ridx = row % 4; 1379 cidx = col % 4; 1380 value = v[l + k * n]; 1381 if (col <= lastcol) low = 0; 1382 else high = nrow; 1383 lastcol = col; 1384 while (high - low > 7) { 1385 t = (low + high) / 2; 1386 if (rp[t] > bcol) high = t; 1387 else low = t; 1388 } 1389 for (i = low; i < high; i++) { 1390 if (rp[i] > bcol) break; 1391 if (rp[i] == bcol) { 1392 bap = ap + 16 * i + 4 * cidx + ridx; 1393 *bap += value; 1394 goto noinsert1; 1395 } 1396 } 1397 N = nrow++ - 1; 1398 high++; /* added new column thus must search to one higher than before */ 1399 /* shift up all the later entries in this row */ 1400 PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 1401 PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1))); 1402 PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1403 rp[i] = bcol; 1404 ap[16 * i + 4 * cidx + ridx] = value; 1405 noinsert1:; 1406 low = i; 1407 } 1408 ailen[brow] = nrow; 1409 } 1410 PetscFunctionReturnVoid(); 1411 } 1412 1413 /* 1414 Checks for missing diagonals 1415 */ 1416 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1417 { 1418 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1419 PetscInt *diag, *ii = a->i, i; 1420 1421 PetscFunctionBegin; 1422 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 1423 *missing = PETSC_FALSE; 1424 if (A->rmap->n > 0 && !ii) { 1425 *missing = PETSC_TRUE; 1426 if (d) *d = 0; 1427 PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n")); 1428 } else { 1429 PetscInt n; 1430 n = PetscMin(a->mbs, a->nbs); 1431 diag = a->diag; 1432 for (i = 0; i < n; i++) { 1433 if (diag[i] >= ii[i + 1]) { 1434 *missing = PETSC_TRUE; 1435 if (d) *d = i; 1436 PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i)); 1437 break; 1438 } 1439 } 1440 } 1441 PetscFunctionReturn(PETSC_SUCCESS); 1442 } 1443 1444 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1445 { 1446 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1447 PetscInt i, j, m = a->mbs; 1448 1449 PetscFunctionBegin; 1450 if (!a->diag) { 1451 PetscCall(PetscMalloc1(m, &a->diag)); 1452 a->free_diag = PETSC_TRUE; 1453 } 1454 for (i = 0; i < m; i++) { 1455 a->diag[i] = a->i[i + 1]; 1456 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1457 if (a->j[j] == i) { 1458 a->diag[i] = j; 1459 break; 1460 } 1461 } 1462 } 1463 PetscFunctionReturn(PETSC_SUCCESS); 1464 } 1465 1466 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) 1467 { 1468 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1469 PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt; 1470 PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja; 1471 1472 PetscFunctionBegin; 1473 *nn = n; 1474 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1475 if (symmetric) { 1476 PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja)); 1477 nz = tia[n]; 1478 } else { 1479 tia = a->i; 1480 tja = a->j; 1481 } 1482 1483 if (!blockcompressed && bs > 1) { 1484 (*nn) *= bs; 1485 /* malloc & create the natural set of indices */ 1486 PetscCall(PetscMalloc1((n + 1) * bs, ia)); 1487 if (n) { 1488 (*ia)[0] = oshift; 1489 for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; 1490 } 1491 1492 for (i = 1; i < n; i++) { 1493 (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1]; 1494 for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; 1495 } 1496 if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; 1497 1498 if (inja) { 1499 PetscCall(PetscMalloc1(nz * bs * bs, ja)); 1500 cnt = 0; 1501 for (i = 0; i < n; i++) { 1502 for (j = 0; j < bs; j++) { 1503 for (k = tia[i]; k < tia[i + 1]; k++) { 1504 for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l; 1505 } 1506 } 1507 } 1508 } 1509 1510 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1511 PetscCall(PetscFree(tia)); 1512 PetscCall(PetscFree(tja)); 1513 } 1514 } else if (oshift == 1) { 1515 if (symmetric) { 1516 nz = tia[A->rmap->n / bs]; 1517 /* add 1 to i and j indices */ 1518 for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1; 1519 *ia = tia; 1520 if (ja) { 1521 for (i = 0; i < nz; i++) tja[i] = tja[i] + 1; 1522 *ja = tja; 1523 } 1524 } else { 1525 nz = a->i[A->rmap->n / bs]; 1526 /* malloc space and add 1 to i and j indices */ 1527 PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia)); 1528 for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1; 1529 if (ja) { 1530 PetscCall(PetscMalloc1(nz, ja)); 1531 for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1; 1532 } 1533 } 1534 } else { 1535 *ia = tia; 1536 if (ja) *ja = tja; 1537 } 1538 PetscFunctionReturn(PETSC_SUCCESS); 1539 } 1540 1541 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 1542 { 1543 PetscFunctionBegin; 1544 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1545 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1546 PetscCall(PetscFree(*ia)); 1547 if (ja) PetscCall(PetscFree(*ja)); 1548 } 1549 PetscFunctionReturn(PETSC_SUCCESS); 1550 } 1551 1552 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1553 { 1554 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1555 1556 PetscFunctionBegin; 1557 #if defined(PETSC_USE_LOG) 1558 PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz)); 1559 #endif 1560 PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i)); 1561 PetscCall(ISDestroy(&a->row)); 1562 PetscCall(ISDestroy(&a->col)); 1563 if (a->free_diag) PetscCall(PetscFree(a->diag)); 1564 PetscCall(PetscFree(a->idiag)); 1565 if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen)); 1566 PetscCall(PetscFree(a->solve_work)); 1567 PetscCall(PetscFree(a->mult_work)); 1568 PetscCall(PetscFree(a->sor_workt)); 1569 PetscCall(PetscFree(a->sor_work)); 1570 PetscCall(ISDestroy(&a->icol)); 1571 PetscCall(PetscFree(a->saved_values)); 1572 PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex)); 1573 1574 PetscCall(MatDestroy(&a->sbaijMat)); 1575 PetscCall(MatDestroy(&a->parent)); 1576 PetscCall(PetscFree(A->data)); 1577 1578 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 1579 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL)); 1580 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL)); 1581 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 1582 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 1583 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL)); 1584 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL)); 1585 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL)); 1586 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL)); 1587 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL)); 1588 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL)); 1589 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL)); 1590 #if defined(PETSC_HAVE_HYPRE) 1591 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL)); 1592 #endif 1593 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL)); 1594 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1595 PetscFunctionReturn(PETSC_SUCCESS); 1596 } 1597 1598 PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) 1599 { 1600 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1601 1602 PetscFunctionBegin; 1603 switch (op) { 1604 case MAT_ROW_ORIENTED: 1605 a->roworiented = flg; 1606 break; 1607 case MAT_KEEP_NONZERO_PATTERN: 1608 a->keepnonzeropattern = flg; 1609 break; 1610 case MAT_NEW_NONZERO_LOCATIONS: 1611 a->nonew = (flg ? 0 : 1); 1612 break; 1613 case MAT_NEW_NONZERO_LOCATION_ERR: 1614 a->nonew = (flg ? -1 : 0); 1615 break; 1616 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1617 a->nonew = (flg ? -2 : 0); 1618 break; 1619 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1620 a->nounused = (flg ? -1 : 0); 1621 break; 1622 case MAT_FORCE_DIAGONAL_ENTRIES: 1623 case MAT_IGNORE_OFF_PROC_ENTRIES: 1624 case MAT_USE_HASH_TABLE: 1625 case MAT_SORTED_FULL: 1626 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1627 break; 1628 case MAT_SPD: 1629 case MAT_SYMMETRIC: 1630 case MAT_STRUCTURALLY_SYMMETRIC: 1631 case MAT_HERMITIAN: 1632 case MAT_SYMMETRY_ETERNAL: 1633 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1634 case MAT_SUBMAT_SINGLEIS: 1635 case MAT_STRUCTURE_ONLY: 1636 case MAT_SPD_ETERNAL: 1637 /* if the diagonal matrix is square it inherits some of the properties above */ 1638 break; 1639 default: 1640 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1641 } 1642 PetscFunctionReturn(PETSC_SUCCESS); 1643 } 1644 1645 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1646 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) 1647 { 1648 PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2; 1649 MatScalar *aa_i; 1650 PetscScalar *v_i; 1651 1652 PetscFunctionBegin; 1653 bs = A->rmap->bs; 1654 bs2 = bs * bs; 1655 PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1656 1657 bn = row / bs; /* Block number */ 1658 bp = row % bs; /* Block Position */ 1659 M = ai[bn + 1] - ai[bn]; 1660 *nz = bs * M; 1661 1662 if (v) { 1663 *v = NULL; 1664 if (*nz) { 1665 PetscCall(PetscMalloc1(*nz, v)); 1666 for (i = 0; i < M; i++) { /* for each block in the block row */ 1667 v_i = *v + i * bs; 1668 aa_i = aa + bs2 * (ai[bn] + i); 1669 for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j]; 1670 } 1671 } 1672 } 1673 1674 if (idx) { 1675 *idx = NULL; 1676 if (*nz) { 1677 PetscCall(PetscMalloc1(*nz, idx)); 1678 for (i = 0; i < M; i++) { /* for each block in the block row */ 1679 idx_i = *idx + i * bs; 1680 itmp = bs * aj[ai[bn] + i]; 1681 for (j = 0; j < bs; j++) idx_i[j] = itmp++; 1682 } 1683 } 1684 } 1685 PetscFunctionReturn(PETSC_SUCCESS); 1686 } 1687 1688 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1689 { 1690 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1691 1692 PetscFunctionBegin; 1693 PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a)); 1694 PetscFunctionReturn(PETSC_SUCCESS); 1695 } 1696 1697 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1698 { 1699 PetscFunctionBegin; 1700 if (nz) *nz = 0; 1701 if (idx) PetscCall(PetscFree(*idx)); 1702 if (v) PetscCall(PetscFree(*v)); 1703 PetscFunctionReturn(PETSC_SUCCESS); 1704 } 1705 1706 PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) 1707 { 1708 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at; 1709 Mat C; 1710 PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill; 1711 PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr; 1712 MatScalar *ata, *aa = a->a; 1713 1714 PetscFunctionBegin; 1715 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1716 PetscCall(PetscCalloc1(1 + nbs, &atfill)); 1717 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1718 for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1719 1720 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 1721 PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N)); 1722 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 1723 PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill)); 1724 1725 at = (Mat_SeqBAIJ *)C->data; 1726 ati = at->i; 1727 for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i]; 1728 } else { 1729 C = *B; 1730 at = (Mat_SeqBAIJ *)C->data; 1731 ati = at->i; 1732 } 1733 1734 atj = at->j; 1735 ata = at->a; 1736 1737 /* Copy ati into atfill so we have locations of the next free space in atj */ 1738 PetscCall(PetscArraycpy(atfill, ati, nbs)); 1739 1740 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1741 for (i = 0; i < mbs; i++) { 1742 anzj = ai[i + 1] - ai[i]; 1743 for (j = 0; j < anzj; j++) { 1744 atj[atfill[*aj]] = i; 1745 for (kr = 0; kr < bs; kr++) { 1746 for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; 1747 } 1748 atfill[*aj++] += 1; 1749 } 1750 } 1751 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1752 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1753 1754 /* Clean up temporary space and complete requests. */ 1755 PetscCall(PetscFree(atfill)); 1756 1757 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1758 PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1759 *B = C; 1760 } else { 1761 PetscCall(MatHeaderMerge(A, &C)); 1762 } 1763 PetscFunctionReturn(PETSC_SUCCESS); 1764 } 1765 1766 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) 1767 { 1768 Mat Btrans; 1769 1770 PetscFunctionBegin; 1771 *f = PETSC_FALSE; 1772 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans)); 1773 PetscCall(MatEqual_SeqBAIJ(B, Btrans, f)); 1774 PetscCall(MatDestroy(&Btrans)); 1775 PetscFunctionReturn(PETSC_SUCCESS); 1776 } 1777 1778 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1779 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 1780 { 1781 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data; 1782 PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l; 1783 PetscInt *rowlens, *colidxs; 1784 PetscScalar *matvals; 1785 1786 PetscFunctionBegin; 1787 PetscCall(PetscViewerSetUp(viewer)); 1788 1789 M = mat->rmap->N; 1790 N = mat->cmap->N; 1791 m = mat->rmap->n; 1792 bs = mat->rmap->bs; 1793 nz = bs * bs * A->nz; 1794 1795 /* write matrix header */ 1796 header[0] = MAT_FILE_CLASSID; 1797 header[1] = M; 1798 header[2] = N; 1799 header[3] = nz; 1800 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1801 1802 /* store row lengths */ 1803 PetscCall(PetscMalloc1(m, &rowlens)); 1804 for (cnt = 0, i = 0; i < A->mbs; i++) 1805 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]); 1806 PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT)); 1807 PetscCall(PetscFree(rowlens)); 1808 1809 /* store column indices */ 1810 PetscCall(PetscMalloc1(nz, &colidxs)); 1811 for (cnt = 0, i = 0; i < A->mbs; i++) 1812 for (k = 0; k < bs; k++) 1813 for (j = A->i[i]; j < A->i[i + 1]; j++) 1814 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l; 1815 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1816 PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT)); 1817 PetscCall(PetscFree(colidxs)); 1818 1819 /* store nonzero values */ 1820 PetscCall(PetscMalloc1(nz, &matvals)); 1821 for (cnt = 0, i = 0; i < A->mbs; i++) 1822 for (k = 0; k < bs; k++) 1823 for (j = A->i[i]; j < A->i[i + 1]; j++) 1824 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k]; 1825 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1826 PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR)); 1827 PetscCall(PetscFree(matvals)); 1828 1829 /* write block size option to the viewer's .info file */ 1830 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1831 PetscFunctionReturn(PETSC_SUCCESS); 1832 } 1833 1834 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) 1835 { 1836 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1837 PetscInt i, bs = A->rmap->bs, k; 1838 1839 PetscFunctionBegin; 1840 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1841 for (i = 0; i < a->mbs; i++) { 1842 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1)); 1843 for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1)); 1844 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1845 } 1846 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1847 PetscFunctionReturn(PETSC_SUCCESS); 1848 } 1849 1850 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) 1851 { 1852 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1853 PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2; 1854 PetscViewerFormat format; 1855 1856 PetscFunctionBegin; 1857 if (A->structure_only) { 1858 PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer)); 1859 PetscFunctionReturn(PETSC_SUCCESS); 1860 } 1861 1862 PetscCall(PetscViewerGetFormat(viewer, &format)); 1863 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1864 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1865 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1866 const char *matname; 1867 Mat aij; 1868 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij)); 1869 PetscCall(PetscObjectGetName((PetscObject)A, &matname)); 1870 PetscCall(PetscObjectSetName((PetscObject)aij, matname)); 1871 PetscCall(MatView(aij, viewer)); 1872 PetscCall(MatDestroy(&aij)); 1873 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1874 PetscFunctionReturn(PETSC_SUCCESS); 1875 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1876 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1877 for (i = 0; i < a->mbs; i++) { 1878 for (j = 0; j < bs; j++) { 1879 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1880 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1881 for (l = 0; l < bs; l++) { 1882 #if defined(PETSC_USE_COMPLEX) 1883 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1884 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1885 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1886 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1887 } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1888 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1889 } 1890 #else 1891 if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1892 #endif 1893 } 1894 } 1895 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1896 } 1897 } 1898 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1899 } else { 1900 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1901 for (i = 0; i < a->mbs; i++) { 1902 for (j = 0; j < bs; j++) { 1903 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1904 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1905 for (l = 0; l < bs; l++) { 1906 #if defined(PETSC_USE_COMPLEX) 1907 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) { 1908 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1909 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) { 1910 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1911 } else { 1912 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1913 } 1914 #else 1915 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1916 #endif 1917 } 1918 } 1919 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1920 } 1921 } 1922 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1923 } 1924 PetscCall(PetscViewerFlush(viewer)); 1925 PetscFunctionReturn(PETSC_SUCCESS); 1926 } 1927 1928 #include <petscdraw.h> 1929 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) 1930 { 1931 Mat A = (Mat)Aa; 1932 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1933 PetscInt row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2; 1934 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1935 MatScalar *aa; 1936 PetscViewer viewer; 1937 PetscViewerFormat format; 1938 1939 PetscFunctionBegin; 1940 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1941 PetscCall(PetscViewerGetFormat(viewer, &format)); 1942 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1943 1944 /* loop over matrix elements drawing boxes */ 1945 1946 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1947 PetscDrawCollectiveBegin(draw); 1948 /* Blue for negative, Cyan for zero and Red for positive */ 1949 color = PETSC_DRAW_BLUE; 1950 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1951 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1952 y_l = A->rmap->N - row - 1.0; 1953 y_r = y_l + 1.0; 1954 x_l = a->j[j] * bs; 1955 x_r = x_l + 1.0; 1956 aa = a->a + j * bs2; 1957 for (k = 0; k < bs; k++) { 1958 for (l = 0; l < bs; l++) { 1959 if (PetscRealPart(*aa++) >= 0.) continue; 1960 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1961 } 1962 } 1963 } 1964 } 1965 color = PETSC_DRAW_CYAN; 1966 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1967 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1968 y_l = A->rmap->N - row - 1.0; 1969 y_r = y_l + 1.0; 1970 x_l = a->j[j] * bs; 1971 x_r = x_l + 1.0; 1972 aa = a->a + j * bs2; 1973 for (k = 0; k < bs; k++) { 1974 for (l = 0; l < bs; l++) { 1975 if (PetscRealPart(*aa++) != 0.) continue; 1976 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1977 } 1978 } 1979 } 1980 } 1981 color = PETSC_DRAW_RED; 1982 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1983 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1984 y_l = A->rmap->N - row - 1.0; 1985 y_r = y_l + 1.0; 1986 x_l = a->j[j] * bs; 1987 x_r = x_l + 1.0; 1988 aa = a->a + j * bs2; 1989 for (k = 0; k < bs; k++) { 1990 for (l = 0; l < bs; l++) { 1991 if (PetscRealPart(*aa++) <= 0.) continue; 1992 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1993 } 1994 } 1995 } 1996 } 1997 PetscDrawCollectiveEnd(draw); 1998 } else { 1999 /* use contour shading to indicate magnitude of values */ 2000 /* first determine max of all nonzero values */ 2001 PetscReal minv = 0.0, maxv = 0.0; 2002 PetscDraw popup; 2003 2004 for (i = 0; i < a->nz * a->bs2; i++) { 2005 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 2006 } 2007 if (minv >= maxv) maxv = minv + PETSC_SMALL; 2008 PetscCall(PetscDrawGetPopup(draw, &popup)); 2009 PetscCall(PetscDrawScalePopup(popup, 0.0, maxv)); 2010 2011 PetscDrawCollectiveBegin(draw); 2012 for (i = 0, row = 0; i < mbs; i++, row += bs) { 2013 for (j = a->i[i]; j < a->i[i + 1]; j++) { 2014 y_l = A->rmap->N - row - 1.0; 2015 y_r = y_l + 1.0; 2016 x_l = a->j[j] * bs; 2017 x_r = x_l + 1.0; 2018 aa = a->a + j * bs2; 2019 for (k = 0; k < bs; k++) { 2020 for (l = 0; l < bs; l++) { 2021 MatScalar v = *aa++; 2022 color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv); 2023 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2024 } 2025 } 2026 } 2027 } 2028 PetscDrawCollectiveEnd(draw); 2029 } 2030 PetscFunctionReturn(PETSC_SUCCESS); 2031 } 2032 2033 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) 2034 { 2035 PetscReal xl, yl, xr, yr, w, h; 2036 PetscDraw draw; 2037 PetscBool isnull; 2038 2039 PetscFunctionBegin; 2040 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 2041 PetscCall(PetscDrawIsNull(draw, &isnull)); 2042 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 2043 2044 xr = A->cmap->n; 2045 yr = A->rmap->N; 2046 h = yr / 10.0; 2047 w = xr / 10.0; 2048 xr += w; 2049 yr += h; 2050 xl = -w; 2051 yl = -h; 2052 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 2053 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 2054 PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A)); 2055 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 2056 PetscCall(PetscDrawSave(draw)); 2057 PetscFunctionReturn(PETSC_SUCCESS); 2058 } 2059 2060 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) 2061 { 2062 PetscBool iascii, isbinary, isdraw; 2063 2064 PetscFunctionBegin; 2065 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2066 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2067 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 2068 if (iascii) { 2069 PetscCall(MatView_SeqBAIJ_ASCII(A, viewer)); 2070 } else if (isbinary) { 2071 PetscCall(MatView_SeqBAIJ_Binary(A, viewer)); 2072 } else if (isdraw) { 2073 PetscCall(MatView_SeqBAIJ_Draw(A, viewer)); 2074 } else { 2075 Mat B; 2076 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2077 PetscCall(MatView(B, viewer)); 2078 PetscCall(MatDestroy(&B)); 2079 } 2080 PetscFunctionReturn(PETSC_SUCCESS); 2081 } 2082 2083 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) 2084 { 2085 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2086 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 2087 PetscInt *ai = a->i, *ailen = a->ilen; 2088 PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2; 2089 MatScalar *ap, *aa = a->a; 2090 2091 PetscFunctionBegin; 2092 for (k = 0; k < m; k++) { /* loop over rows */ 2093 row = im[k]; 2094 brow = row / bs; 2095 if (row < 0) { 2096 v += n; 2097 continue; 2098 } /* negative row */ 2099 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row); 2100 rp = aj ? aj + ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2101 ap = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2102 nrow = ailen[brow]; 2103 for (l = 0; l < n; l++) { /* loop over columns */ 2104 if (in[l] < 0) { 2105 v++; 2106 continue; 2107 } /* negative column */ 2108 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]); 2109 col = in[l]; 2110 bcol = col / bs; 2111 cidx = col % bs; 2112 ridx = row % bs; 2113 high = nrow; 2114 low = 0; /* assume unsorted */ 2115 while (high - low > 5) { 2116 t = (low + high) / 2; 2117 if (rp[t] > bcol) high = t; 2118 else low = t; 2119 } 2120 for (i = low; i < high; i++) { 2121 if (rp[i] > bcol) break; 2122 if (rp[i] == bcol) { 2123 *v++ = ap[bs2 * i + bs * cidx + ridx]; 2124 goto finished; 2125 } 2126 } 2127 *v++ = 0.0; 2128 finished:; 2129 } 2130 } 2131 PetscFunctionReturn(PETSC_SUCCESS); 2132 } 2133 2134 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2135 { 2136 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2137 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1; 2138 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2139 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval; 2140 PetscBool roworiented = a->roworiented; 2141 const PetscScalar *value = v; 2142 MatScalar *ap = NULL, *aa = a->a, *bap; 2143 2144 PetscFunctionBegin; 2145 if (roworiented) { 2146 stepval = (n - 1) * bs; 2147 } else { 2148 stepval = (m - 1) * bs; 2149 } 2150 for (k = 0; k < m; k++) { /* loop over added rows */ 2151 row = im[k]; 2152 if (row < 0) continue; 2153 PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1); 2154 rp = aj + ai[row]; 2155 if (!A->structure_only) ap = aa + bs2 * ai[row]; 2156 rmax = imax[row]; 2157 nrow = ailen[row]; 2158 low = 0; 2159 high = nrow; 2160 for (l = 0; l < n; l++) { /* loop over added columns */ 2161 if (in[l] < 0) continue; 2162 PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1); 2163 col = in[l]; 2164 if (!A->structure_only) { 2165 if (roworiented) { 2166 value = v + (k * (stepval + bs) + l) * bs; 2167 } else { 2168 value = v + (l * (stepval + bs) + k) * bs; 2169 } 2170 } 2171 if (col <= lastcol) low = 0; 2172 else high = nrow; 2173 lastcol = col; 2174 while (high - low > 7) { 2175 t = (low + high) / 2; 2176 if (rp[t] > col) high = t; 2177 else low = t; 2178 } 2179 for (i = low; i < high; i++) { 2180 if (rp[i] > col) break; 2181 if (rp[i] == col) { 2182 if (A->structure_only) goto noinsert2; 2183 bap = ap + bs2 * i; 2184 if (roworiented) { 2185 if (is == ADD_VALUES) { 2186 for (ii = 0; ii < bs; ii++, value += stepval) { 2187 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 2188 } 2189 } else { 2190 for (ii = 0; ii < bs; ii++, value += stepval) { 2191 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2192 } 2193 } 2194 } else { 2195 if (is == ADD_VALUES) { 2196 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2197 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 2198 bap += bs; 2199 } 2200 } else { 2201 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2202 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 2203 bap += bs; 2204 } 2205 } 2206 } 2207 goto noinsert2; 2208 } 2209 } 2210 if (nonew == 1) goto noinsert2; 2211 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2212 if (A->structure_only) { 2213 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar); 2214 } else { 2215 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2216 } 2217 N = nrow++ - 1; 2218 high++; 2219 /* shift up all the later entries in this row */ 2220 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2221 rp[i] = col; 2222 if (!A->structure_only) { 2223 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2224 bap = ap + bs2 * i; 2225 if (roworiented) { 2226 for (ii = 0; ii < bs; ii++, value += stepval) { 2227 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2228 } 2229 } else { 2230 for (ii = 0; ii < bs; ii++, value += stepval) { 2231 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 2232 } 2233 } 2234 } 2235 noinsert2:; 2236 low = i; 2237 } 2238 ailen[row] = nrow; 2239 } 2240 PetscFunctionReturn(PETSC_SUCCESS); 2241 } 2242 2243 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) 2244 { 2245 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2246 PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax; 2247 PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen; 2248 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2249 MatScalar *aa = a->a, *ap; 2250 PetscReal ratio = 0.6; 2251 2252 PetscFunctionBegin; 2253 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 2254 2255 if (m) rmax = ailen[0]; 2256 for (i = 1; i < mbs; i++) { 2257 /* move each row back by the amount of empty slots (fshift) before it*/ 2258 fshift += imax[i - 1] - ailen[i - 1]; 2259 rmax = PetscMax(rmax, ailen[i]); 2260 if (fshift) { 2261 ip = aj + ai[i]; 2262 ap = aa + bs2 * ai[i]; 2263 N = ailen[i]; 2264 PetscCall(PetscArraymove(ip - fshift, ip, N)); 2265 if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N)); 2266 } 2267 ai[i] = ai[i - 1] + ailen[i - 1]; 2268 } 2269 if (mbs) { 2270 fshift += imax[mbs - 1] - ailen[mbs - 1]; 2271 ai[mbs] = ai[mbs - 1] + ailen[mbs - 1]; 2272 } 2273 2274 /* reset ilen and imax for each row */ 2275 a->nonzerorowcnt = 0; 2276 if (A->structure_only) { 2277 PetscCall(PetscFree2(a->imax, a->ilen)); 2278 } else { /* !A->structure_only */ 2279 for (i = 0; i < mbs; i++) { 2280 ailen[i] = imax[i] = ai[i + 1] - ai[i]; 2281 a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0); 2282 } 2283 } 2284 a->nz = ai[mbs]; 2285 2286 /* diagonals may have moved, so kill the diagonal pointers */ 2287 a->idiagvalid = PETSC_FALSE; 2288 if (fshift && a->diag) PetscCall(PetscFree(a->diag)); 2289 if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2); 2290 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2)); 2291 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs)); 2292 PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax)); 2293 2294 A->info.mallocs += a->reallocs; 2295 a->reallocs = 0; 2296 A->info.nz_unneeded = (PetscReal)fshift * bs2; 2297 a->rmax = rmax; 2298 2299 if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio)); 2300 PetscFunctionReturn(PETSC_SUCCESS); 2301 } 2302 2303 /* 2304 This function returns an array of flags which indicate the locations of contiguous 2305 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2306 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2307 Assume: sizes should be long enough to hold all the values. 2308 */ 2309 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) 2310 { 2311 PetscInt j = 0; 2312 2313 PetscFunctionBegin; 2314 for (PetscInt i = 0; i < n; j++) { 2315 PetscInt row = idx[i]; 2316 if (row % bs != 0) { /* Not the beginning of a block */ 2317 sizes[j] = 1; 2318 i++; 2319 } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */ 2320 sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2321 i++; 2322 } else { /* Beginning of the block, so check if the complete block exists */ 2323 PetscBool flg = PETSC_TRUE; 2324 for (PetscInt k = 1; k < bs; k++) { 2325 if (row + k != idx[i + k]) { /* break in the block */ 2326 flg = PETSC_FALSE; 2327 break; 2328 } 2329 } 2330 if (flg) { /* No break in the bs */ 2331 sizes[j] = bs; 2332 i += bs; 2333 } else { 2334 sizes[j] = 1; 2335 i++; 2336 } 2337 } 2338 } 2339 *bs_max = j; 2340 PetscFunctionReturn(PETSC_SUCCESS); 2341 } 2342 2343 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2344 { 2345 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2346 PetscInt i, j, k, count, *rows; 2347 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max; 2348 PetscScalar zero = 0.0; 2349 MatScalar *aa; 2350 const PetscScalar *xx; 2351 PetscScalar *bb; 2352 2353 PetscFunctionBegin; 2354 /* fix right hand side if needed */ 2355 if (x && b) { 2356 PetscCall(VecGetArrayRead(x, &xx)); 2357 PetscCall(VecGetArray(b, &bb)); 2358 for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]]; 2359 PetscCall(VecRestoreArrayRead(x, &xx)); 2360 PetscCall(VecRestoreArray(b, &bb)); 2361 } 2362 2363 /* Make a copy of the IS and sort it */ 2364 /* allocate memory for rows,sizes */ 2365 PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes)); 2366 2367 /* copy IS values to rows, and sort them */ 2368 for (i = 0; i < is_n; i++) rows[i] = is_idx[i]; 2369 PetscCall(PetscSortInt(is_n, rows)); 2370 2371 if (baij->keepnonzeropattern) { 2372 for (i = 0; i < is_n; i++) sizes[i] = 1; 2373 bs_max = is_n; 2374 } else { 2375 PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max)); 2376 A->nonzerostate++; 2377 } 2378 2379 for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) { 2380 row = rows[j]; 2381 PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row); 2382 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2383 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2384 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2385 if (diag != (PetscScalar)0.0) { 2386 if (baij->ilen[row / bs] > 0) { 2387 baij->ilen[row / bs] = 1; 2388 baij->j[baij->i[row / bs]] = row / bs; 2389 2390 PetscCall(PetscArrayzero(aa, count * bs)); 2391 } 2392 /* Now insert all the diagonal values for this bs */ 2393 for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES)); 2394 } else { /* (diag == 0.0) */ 2395 baij->ilen[row / bs] = 0; 2396 } /* end (diag == 0.0) */ 2397 } else { /* (sizes[i] != bs) */ 2398 PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1"); 2399 for (k = 0; k < count; k++) { 2400 aa[0] = zero; 2401 aa += bs; 2402 } 2403 if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES)); 2404 } 2405 } 2406 2407 PetscCall(PetscFree2(rows, sizes)); 2408 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2409 PetscFunctionReturn(PETSC_SUCCESS); 2410 } 2411 2412 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2413 { 2414 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2415 PetscInt i, j, k, count; 2416 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col; 2417 PetscScalar zero = 0.0; 2418 MatScalar *aa; 2419 const PetscScalar *xx; 2420 PetscScalar *bb; 2421 PetscBool *zeroed, vecs = PETSC_FALSE; 2422 2423 PetscFunctionBegin; 2424 /* fix right hand side if needed */ 2425 if (x && b) { 2426 PetscCall(VecGetArrayRead(x, &xx)); 2427 PetscCall(VecGetArray(b, &bb)); 2428 vecs = PETSC_TRUE; 2429 } 2430 2431 /* zero the columns */ 2432 PetscCall(PetscCalloc1(A->rmap->n, &zeroed)); 2433 for (i = 0; i < is_n; i++) { 2434 PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]); 2435 zeroed[is_idx[i]] = PETSC_TRUE; 2436 } 2437 for (i = 0; i < A->rmap->N; i++) { 2438 if (!zeroed[i]) { 2439 row = i / bs; 2440 for (j = baij->i[row]; j < baij->i[row + 1]; j++) { 2441 for (k = 0; k < bs; k++) { 2442 col = bs * baij->j[j] + k; 2443 if (zeroed[col]) { 2444 aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 2445 if (vecs) bb[i] -= aa[0] * xx[col]; 2446 aa[0] = 0.0; 2447 } 2448 } 2449 } 2450 } else if (vecs) bb[i] = diag * xx[i]; 2451 } 2452 PetscCall(PetscFree(zeroed)); 2453 if (vecs) { 2454 PetscCall(VecRestoreArrayRead(x, &xx)); 2455 PetscCall(VecRestoreArray(b, &bb)); 2456 } 2457 2458 /* zero the rows */ 2459 for (i = 0; i < is_n; i++) { 2460 row = is_idx[i]; 2461 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2462 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2463 for (k = 0; k < count; k++) { 2464 aa[0] = zero; 2465 aa += bs; 2466 } 2467 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES); 2468 } 2469 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2470 PetscFunctionReturn(PETSC_SUCCESS); 2471 } 2472 2473 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2474 { 2475 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2476 PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 2477 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2478 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 2479 PetscInt ridx, cidx, bs2 = a->bs2; 2480 PetscBool roworiented = a->roworiented; 2481 MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap; 2482 2483 PetscFunctionBegin; 2484 for (k = 0; k < m; k++) { /* loop over added rows */ 2485 row = im[k]; 2486 brow = row / bs; 2487 if (row < 0) continue; 2488 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 2489 rp = aj + ai[brow]; 2490 if (!A->structure_only) ap = aa + bs2 * ai[brow]; 2491 rmax = imax[brow]; 2492 nrow = ailen[brow]; 2493 low = 0; 2494 high = nrow; 2495 for (l = 0; l < n; l++) { /* loop over added columns */ 2496 if (in[l] < 0) continue; 2497 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 2498 col = in[l]; 2499 bcol = col / bs; 2500 ridx = row % bs; 2501 cidx = col % bs; 2502 if (!A->structure_only) { 2503 if (roworiented) { 2504 value = v[l + k * n]; 2505 } else { 2506 value = v[k + l * m]; 2507 } 2508 } 2509 if (col <= lastcol) low = 0; 2510 else high = nrow; 2511 lastcol = col; 2512 while (high - low > 7) { 2513 t = (low + high) / 2; 2514 if (rp[t] > bcol) high = t; 2515 else low = t; 2516 } 2517 for (i = low; i < high; i++) { 2518 if (rp[i] > bcol) break; 2519 if (rp[i] == bcol) { 2520 bap = ap + bs2 * i + bs * cidx + ridx; 2521 if (!A->structure_only) { 2522 if (is == ADD_VALUES) *bap += value; 2523 else *bap = value; 2524 } 2525 goto noinsert1; 2526 } 2527 } 2528 if (nonew == 1) goto noinsert1; 2529 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2530 if (A->structure_only) { 2531 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar); 2532 } else { 2533 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2534 } 2535 N = nrow++ - 1; 2536 high++; 2537 /* shift up all the later entries in this row */ 2538 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2539 rp[i] = bcol; 2540 if (!A->structure_only) { 2541 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2542 PetscCall(PetscArrayzero(ap + bs2 * i, bs2)); 2543 ap[bs2 * i + bs * cidx + ridx] = value; 2544 } 2545 a->nz++; 2546 A->nonzerostate++; 2547 noinsert1:; 2548 low = i; 2549 } 2550 ailen[brow] = nrow; 2551 } 2552 PetscFunctionReturn(PETSC_SUCCESS); 2553 } 2554 2555 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) 2556 { 2557 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data; 2558 Mat outA; 2559 PetscBool row_identity, col_identity; 2560 2561 PetscFunctionBegin; 2562 PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU"); 2563 PetscCall(ISIdentity(row, &row_identity)); 2564 PetscCall(ISIdentity(col, &col_identity)); 2565 PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU"); 2566 2567 outA = inA; 2568 inA->factortype = MAT_FACTOR_LU; 2569 PetscCall(PetscFree(inA->solvertype)); 2570 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype)); 2571 2572 PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2573 2574 PetscCall(PetscObjectReference((PetscObject)row)); 2575 PetscCall(ISDestroy(&a->row)); 2576 a->row = row; 2577 PetscCall(PetscObjectReference((PetscObject)col)); 2578 PetscCall(ISDestroy(&a->col)); 2579 a->col = col; 2580 2581 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2582 PetscCall(ISDestroy(&a->icol)); 2583 PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol)); 2584 2585 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity))); 2586 if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); 2587 PetscCall(MatLUFactorNumeric(outA, inA, info)); 2588 PetscFunctionReturn(PETSC_SUCCESS); 2589 } 2590 2591 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices) 2592 { 2593 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2594 2595 PetscFunctionBegin; 2596 baij->nz = baij->maxnz; 2597 PetscCall(PetscArraycpy(baij->j, indices, baij->nz)); 2598 PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs)); 2599 PetscFunctionReturn(PETSC_SUCCESS); 2600 } 2601 2602 /*@ 2603 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix. 2604 2605 Input Parameters: 2606 + mat - the `MATSEQBAIJ` matrix 2607 - indices - the column indices 2608 2609 Level: advanced 2610 2611 Notes: 2612 This can be called if you have precomputed the nonzero structure of the 2613 matrix and want to provide it to the matrix object to improve the performance 2614 of the `MatSetValues()` operation. 2615 2616 You MUST have set the correct numbers of nonzeros per row in the call to 2617 `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted. 2618 2619 MUST be called before any calls to `MatSetValues()` 2620 2621 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()` 2622 @*/ 2623 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) 2624 { 2625 PetscFunctionBegin; 2626 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2627 PetscValidIntPointer(indices, 2); 2628 PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices)); 2629 PetscFunctionReturn(PETSC_SUCCESS); 2630 } 2631 2632 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) 2633 { 2634 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2635 PetscInt i, j, n, row, bs, *ai, *aj, mbs; 2636 PetscReal atmp; 2637 PetscScalar *x, zero = 0.0; 2638 MatScalar *aa; 2639 PetscInt ncols, brow, krow, kcol; 2640 2641 PetscFunctionBegin; 2642 /* why is this not a macro???????????????????????????????????????????????????????????????? */ 2643 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2644 bs = A->rmap->bs; 2645 aa = a->a; 2646 ai = a->i; 2647 aj = a->j; 2648 mbs = a->mbs; 2649 2650 PetscCall(VecSet(v, zero)); 2651 PetscCall(VecGetArray(v, &x)); 2652 PetscCall(VecGetLocalSize(v, &n)); 2653 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2654 for (i = 0; i < mbs; i++) { 2655 ncols = ai[1] - ai[0]; 2656 ai++; 2657 brow = bs * i; 2658 for (j = 0; j < ncols; j++) { 2659 for (kcol = 0; kcol < bs; kcol++) { 2660 for (krow = 0; krow < bs; krow++) { 2661 atmp = PetscAbsScalar(*aa); 2662 aa++; 2663 row = brow + krow; /* row index */ 2664 if (PetscAbsScalar(x[row]) < atmp) { 2665 x[row] = atmp; 2666 if (idx) idx[row] = bs * (*aj) + kcol; 2667 } 2668 } 2669 } 2670 aj++; 2671 } 2672 } 2673 PetscCall(VecRestoreArray(v, &x)); 2674 PetscFunctionReturn(PETSC_SUCCESS); 2675 } 2676 2677 PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) 2678 { 2679 PetscFunctionBegin; 2680 /* If the two matrices have the same copy implementation, use fast copy. */ 2681 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2682 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2683 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 2684 PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs; 2685 2686 PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]); 2687 PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs); 2688 PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs])); 2689 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2690 } else { 2691 PetscCall(MatCopy_Basic(A, B, str)); 2692 } 2693 PetscFunctionReturn(PETSC_SUCCESS); 2694 } 2695 2696 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2697 { 2698 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2699 2700 PetscFunctionBegin; 2701 *array = a->a; 2702 PetscFunctionReturn(PETSC_SUCCESS); 2703 } 2704 2705 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2706 { 2707 PetscFunctionBegin; 2708 *array = NULL; 2709 PetscFunctionReturn(PETSC_SUCCESS); 2710 } 2711 2712 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) 2713 { 2714 PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs; 2715 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 2716 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 2717 2718 PetscFunctionBegin; 2719 /* Set the number of nonzeros in the new matrix */ 2720 PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz)); 2721 PetscFunctionReturn(PETSC_SUCCESS); 2722 } 2723 2724 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2725 { 2726 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data; 2727 PetscInt bs = Y->rmap->bs, bs2 = bs * bs; 2728 PetscBLASInt one = 1; 2729 2730 PetscFunctionBegin; 2731 if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2732 PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2733 if (e) { 2734 PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e)); 2735 if (e) { 2736 PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e)); 2737 if (e) str = SAME_NONZERO_PATTERN; 2738 } 2739 } 2740 if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN"); 2741 } 2742 if (str == SAME_NONZERO_PATTERN) { 2743 PetscScalar alpha = a; 2744 PetscBLASInt bnz; 2745 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 2746 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 2747 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2748 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2749 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2750 } else { 2751 Mat B; 2752 PetscInt *nnz; 2753 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 2754 PetscCall(PetscMalloc1(Y->rmap->N, &nnz)); 2755 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2756 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2757 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 2758 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 2759 PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name)); 2760 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz)); 2761 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 2762 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2763 PetscCall(MatHeaderMerge(Y, &B)); 2764 PetscCall(PetscFree(nnz)); 2765 } 2766 PetscFunctionReturn(PETSC_SUCCESS); 2767 } 2768 2769 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2770 { 2771 #if PetscDefined(USE_COMPLEX) 2772 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2773 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2774 MatScalar *aa = a->a; 2775 2776 PetscFunctionBegin; 2777 for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]); 2778 PetscFunctionReturn(PETSC_SUCCESS); 2779 #else 2780 (void)A; 2781 return PETSC_SUCCESS; 2782 #endif 2783 } 2784 2785 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2786 { 2787 #if PetscDefined(USE_COMPLEX) 2788 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2789 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2790 MatScalar *aa = a->a; 2791 2792 PetscFunctionBegin; 2793 for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]); 2794 PetscFunctionReturn(PETSC_SUCCESS); 2795 #else 2796 (void)A; 2797 return PETSC_SUCCESS; 2798 #endif 2799 } 2800 2801 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2802 { 2803 #if PetscDefined(USE_COMPLEX) 2804 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2805 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2806 MatScalar *aa = a->a; 2807 2808 PetscFunctionBegin; 2809 for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2810 PetscFunctionReturn(PETSC_SUCCESS); 2811 #else 2812 (void)A; 2813 return PETSC_SUCCESS; 2814 #endif 2815 } 2816 2817 /* 2818 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2819 */ 2820 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2821 { 2822 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2823 PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs; 2824 PetscInt nz = a->i[m], row, *jj, mr, col; 2825 2826 PetscFunctionBegin; 2827 *nn = n; 2828 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2829 PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices"); 2830 PetscCall(PetscCalloc1(n, &collengths)); 2831 PetscCall(PetscMalloc1(n + 1, &cia)); 2832 PetscCall(PetscMalloc1(nz, &cja)); 2833 jj = a->j; 2834 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2835 cia[0] = oshift; 2836 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2837 PetscCall(PetscArrayzero(collengths, n)); 2838 jj = a->j; 2839 for (row = 0; row < m; row++) { 2840 mr = a->i[row + 1] - a->i[row]; 2841 for (i = 0; i < mr; i++) { 2842 col = *jj++; 2843 2844 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2845 } 2846 } 2847 PetscCall(PetscFree(collengths)); 2848 *ia = cia; 2849 *ja = cja; 2850 PetscFunctionReturn(PETSC_SUCCESS); 2851 } 2852 2853 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2854 { 2855 PetscFunctionBegin; 2856 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2857 PetscCall(PetscFree(*ia)); 2858 PetscCall(PetscFree(*ja)); 2859 PetscFunctionReturn(PETSC_SUCCESS); 2860 } 2861 2862 /* 2863 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2864 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2865 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2866 */ 2867 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2868 { 2869 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2870 PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs; 2871 PetscInt nz = a->i[m], row, *jj, mr, col; 2872 PetscInt *cspidx; 2873 2874 PetscFunctionBegin; 2875 *nn = n; 2876 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2877 2878 PetscCall(PetscCalloc1(n, &collengths)); 2879 PetscCall(PetscMalloc1(n + 1, &cia)); 2880 PetscCall(PetscMalloc1(nz, &cja)); 2881 PetscCall(PetscMalloc1(nz, &cspidx)); 2882 jj = a->j; 2883 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2884 cia[0] = oshift; 2885 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2886 PetscCall(PetscArrayzero(collengths, n)); 2887 jj = a->j; 2888 for (row = 0; row < m; row++) { 2889 mr = a->i[row + 1] - a->i[row]; 2890 for (i = 0; i < mr; i++) { 2891 col = *jj++; 2892 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2893 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2894 } 2895 } 2896 PetscCall(PetscFree(collengths)); 2897 *ia = cia; 2898 *ja = cja; 2899 *spidx = cspidx; 2900 PetscFunctionReturn(PETSC_SUCCESS); 2901 } 2902 2903 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2904 { 2905 PetscFunctionBegin; 2906 PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done)); 2907 PetscCall(PetscFree(*spidx)); 2908 PetscFunctionReturn(PETSC_SUCCESS); 2909 } 2910 2911 PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) 2912 { 2913 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data; 2914 2915 PetscFunctionBegin; 2916 if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL)); 2917 PetscCall(MatShift_Basic(Y, a)); 2918 PetscFunctionReturn(PETSC_SUCCESS); 2919 } 2920 2921 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2922 MatGetRow_SeqBAIJ, 2923 MatRestoreRow_SeqBAIJ, 2924 MatMult_SeqBAIJ_N, 2925 /* 4*/ MatMultAdd_SeqBAIJ_N, 2926 MatMultTranspose_SeqBAIJ, 2927 MatMultTransposeAdd_SeqBAIJ, 2928 NULL, 2929 NULL, 2930 NULL, 2931 /* 10*/ NULL, 2932 MatLUFactor_SeqBAIJ, 2933 NULL, 2934 NULL, 2935 MatTranspose_SeqBAIJ, 2936 /* 15*/ MatGetInfo_SeqBAIJ, 2937 MatEqual_SeqBAIJ, 2938 MatGetDiagonal_SeqBAIJ, 2939 MatDiagonalScale_SeqBAIJ, 2940 MatNorm_SeqBAIJ, 2941 /* 20*/ NULL, 2942 MatAssemblyEnd_SeqBAIJ, 2943 MatSetOption_SeqBAIJ, 2944 MatZeroEntries_SeqBAIJ, 2945 /* 24*/ MatZeroRows_SeqBAIJ, 2946 NULL, 2947 NULL, 2948 NULL, 2949 NULL, 2950 /* 29*/ MatSetUp_Seq_Hash, 2951 NULL, 2952 NULL, 2953 NULL, 2954 NULL, 2955 /* 34*/ MatDuplicate_SeqBAIJ, 2956 NULL, 2957 NULL, 2958 MatILUFactor_SeqBAIJ, 2959 NULL, 2960 /* 39*/ MatAXPY_SeqBAIJ, 2961 MatCreateSubMatrices_SeqBAIJ, 2962 MatIncreaseOverlap_SeqBAIJ, 2963 MatGetValues_SeqBAIJ, 2964 MatCopy_SeqBAIJ, 2965 /* 44*/ NULL, 2966 MatScale_SeqBAIJ, 2967 MatShift_SeqBAIJ, 2968 NULL, 2969 MatZeroRowsColumns_SeqBAIJ, 2970 /* 49*/ NULL, 2971 MatGetRowIJ_SeqBAIJ, 2972 MatRestoreRowIJ_SeqBAIJ, 2973 MatGetColumnIJ_SeqBAIJ, 2974 MatRestoreColumnIJ_SeqBAIJ, 2975 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2976 NULL, 2977 NULL, 2978 NULL, 2979 MatSetValuesBlocked_SeqBAIJ, 2980 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2981 MatDestroy_SeqBAIJ, 2982 MatView_SeqBAIJ, 2983 NULL, 2984 NULL, 2985 /* 64*/ NULL, 2986 NULL, 2987 NULL, 2988 NULL, 2989 NULL, 2990 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2991 NULL, 2992 MatConvert_Basic, 2993 NULL, 2994 NULL, 2995 /* 74*/ NULL, 2996 MatFDColoringApply_BAIJ, 2997 NULL, 2998 NULL, 2999 NULL, 3000 /* 79*/ NULL, 3001 NULL, 3002 NULL, 3003 NULL, 3004 MatLoad_SeqBAIJ, 3005 /* 84*/ NULL, 3006 NULL, 3007 NULL, 3008 NULL, 3009 NULL, 3010 /* 89*/ NULL, 3011 NULL, 3012 NULL, 3013 NULL, 3014 NULL, 3015 /* 94*/ NULL, 3016 NULL, 3017 NULL, 3018 NULL, 3019 NULL, 3020 /* 99*/ NULL, 3021 NULL, 3022 NULL, 3023 MatConjugate_SeqBAIJ, 3024 NULL, 3025 /*104*/ NULL, 3026 MatRealPart_SeqBAIJ, 3027 MatImaginaryPart_SeqBAIJ, 3028 NULL, 3029 NULL, 3030 /*109*/ NULL, 3031 NULL, 3032 NULL, 3033 NULL, 3034 MatMissingDiagonal_SeqBAIJ, 3035 /*114*/ NULL, 3036 NULL, 3037 NULL, 3038 NULL, 3039 NULL, 3040 /*119*/ NULL, 3041 NULL, 3042 MatMultHermitianTranspose_SeqBAIJ, 3043 MatMultHermitianTransposeAdd_SeqBAIJ, 3044 NULL, 3045 /*124*/ NULL, 3046 MatGetColumnReductions_SeqBAIJ, 3047 MatInvertBlockDiagonal_SeqBAIJ, 3048 NULL, 3049 NULL, 3050 /*129*/ NULL, 3051 NULL, 3052 NULL, 3053 NULL, 3054 NULL, 3055 /*134*/ NULL, 3056 NULL, 3057 NULL, 3058 NULL, 3059 NULL, 3060 /*139*/ MatSetBlockSizes_Default, 3061 NULL, 3062 NULL, 3063 MatFDColoringSetUp_SeqXAIJ, 3064 NULL, 3065 /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 3066 MatDestroySubMatrices_SeqBAIJ, 3067 NULL, 3068 NULL, 3069 NULL, 3070 NULL, 3071 /*150*/ NULL, 3072 NULL}; 3073 3074 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3075 { 3076 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3077 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3078 3079 PetscFunctionBegin; 3080 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3081 3082 /* allocate space for values if not already there */ 3083 if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); 3084 3085 /* copy values over */ 3086 PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz)); 3087 PetscFunctionReturn(PETSC_SUCCESS); 3088 } 3089 3090 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3091 { 3092 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3093 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3094 3095 PetscFunctionBegin; 3096 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3097 PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 3098 3099 /* copy values over */ 3100 PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz)); 3101 PetscFunctionReturn(PETSC_SUCCESS); 3102 } 3103 3104 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 3105 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *); 3106 3107 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, PetscInt *nnz) 3108 { 3109 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 3110 PetscInt i, mbs, nbs, bs2; 3111 PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 3112 3113 PetscFunctionBegin; 3114 if (B->hash_active) { 3115 PetscInt bs; 3116 B->ops[0] = b->cops; 3117 PetscCall(PetscHMapIJVDestroy(&b->ht)); 3118 PetscCall(MatGetBlockSize(B, &bs)); 3119 if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht)); 3120 PetscCall(PetscFree(b->dnz)); 3121 PetscCall(PetscFree(b->bdnz)); 3122 B->hash_active = PETSC_FALSE; 3123 } 3124 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3125 if (nz == MAT_SKIP_ALLOCATION) { 3126 skipallocation = PETSC_TRUE; 3127 nz = 0; 3128 } 3129 3130 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 3131 PetscCall(PetscLayoutSetUp(B->rmap)); 3132 PetscCall(PetscLayoutSetUp(B->cmap)); 3133 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3134 3135 B->preallocated = PETSC_TRUE; 3136 3137 mbs = B->rmap->n / bs; 3138 nbs = B->cmap->n / bs; 3139 bs2 = bs * bs; 3140 3141 PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs); 3142 3143 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3144 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 3145 if (nnz) { 3146 for (i = 0; i < mbs; i++) { 3147 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 3148 PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs); 3149 } 3150 } 3151 3152 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat"); 3153 PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL)); 3154 PetscOptionsEnd(); 3155 3156 if (!flg) { 3157 switch (bs) { 3158 case 1: 3159 B->ops->mult = MatMult_SeqBAIJ_1; 3160 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3161 break; 3162 case 2: 3163 B->ops->mult = MatMult_SeqBAIJ_2; 3164 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3165 break; 3166 case 3: 3167 B->ops->mult = MatMult_SeqBAIJ_3; 3168 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3169 break; 3170 case 4: 3171 B->ops->mult = MatMult_SeqBAIJ_4; 3172 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3173 break; 3174 case 5: 3175 B->ops->mult = MatMult_SeqBAIJ_5; 3176 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3177 break; 3178 case 6: 3179 B->ops->mult = MatMult_SeqBAIJ_6; 3180 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3181 break; 3182 case 7: 3183 B->ops->mult = MatMult_SeqBAIJ_7; 3184 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3185 break; 3186 case 9: { 3187 PetscInt version = 1; 3188 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3189 switch (version) { 3190 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3191 case 1: 3192 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 3193 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 3194 PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3195 break; 3196 #endif 3197 default: 3198 B->ops->mult = MatMult_SeqBAIJ_N; 3199 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3200 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3201 break; 3202 } 3203 break; 3204 } 3205 case 11: 3206 B->ops->mult = MatMult_SeqBAIJ_11; 3207 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 3208 break; 3209 case 12: { 3210 PetscInt version = 1; 3211 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3212 switch (version) { 3213 case 1: 3214 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 3215 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3216 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3217 break; 3218 case 2: 3219 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 3220 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 3221 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3222 break; 3223 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3224 case 3: 3225 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 3226 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3227 PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3228 break; 3229 #endif 3230 default: 3231 B->ops->mult = MatMult_SeqBAIJ_N; 3232 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3233 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3234 break; 3235 } 3236 break; 3237 } 3238 case 15: { 3239 PetscInt version = 1; 3240 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3241 switch (version) { 3242 case 1: 3243 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3244 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3245 break; 3246 case 2: 3247 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 3248 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3249 break; 3250 case 3: 3251 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 3252 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3253 break; 3254 case 4: 3255 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 3256 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3257 break; 3258 default: 3259 B->ops->mult = MatMult_SeqBAIJ_N; 3260 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3261 break; 3262 } 3263 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3264 break; 3265 } 3266 default: 3267 B->ops->mult = MatMult_SeqBAIJ_N; 3268 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3269 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3270 break; 3271 } 3272 } 3273 B->ops->sor = MatSOR_SeqBAIJ; 3274 b->mbs = mbs; 3275 b->nbs = nbs; 3276 if (!skipallocation) { 3277 if (!b->imax) { 3278 PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen)); 3279 3280 b->free_imax_ilen = PETSC_TRUE; 3281 } 3282 /* b->ilen will count nonzeros in each block row so far. */ 3283 for (i = 0; i < mbs; i++) b->ilen[i] = 0; 3284 if (!nnz) { 3285 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3286 else if (nz < 0) nz = 1; 3287 nz = PetscMin(nz, nbs); 3288 for (i = 0; i < mbs; i++) b->imax[i] = nz; 3289 PetscCall(PetscIntMultError(nz, mbs, &nz)); 3290 } else { 3291 PetscInt64 nz64 = 0; 3292 for (i = 0; i < mbs; i++) { 3293 b->imax[i] = nnz[i]; 3294 nz64 += nnz[i]; 3295 } 3296 PetscCall(PetscIntCast(nz64, &nz)); 3297 } 3298 3299 /* allocate the matrix space */ 3300 PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i)); 3301 if (B->structure_only) { 3302 PetscCall(PetscMalloc1(nz, &b->j)); 3303 PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i)); 3304 } else { 3305 PetscInt nzbs2 = 0; 3306 PetscCall(PetscIntMultError(nz, bs2, &nzbs2)); 3307 PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i)); 3308 PetscCall(PetscArrayzero(b->a, nz * bs2)); 3309 } 3310 PetscCall(PetscArrayzero(b->j, nz)); 3311 3312 if (B->structure_only) { 3313 b->singlemalloc = PETSC_FALSE; 3314 b->free_a = PETSC_FALSE; 3315 } else { 3316 b->singlemalloc = PETSC_TRUE; 3317 b->free_a = PETSC_TRUE; 3318 } 3319 b->free_ij = PETSC_TRUE; 3320 3321 b->i[0] = 0; 3322 for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1]; 3323 3324 } else { 3325 b->free_a = PETSC_FALSE; 3326 b->free_ij = PETSC_FALSE; 3327 } 3328 3329 b->bs2 = bs2; 3330 b->mbs = mbs; 3331 b->nz = 0; 3332 b->maxnz = nz; 3333 B->info.nz_unneeded = (PetscReal)b->maxnz * bs2; 3334 B->was_assembled = PETSC_FALSE; 3335 B->assembled = PETSC_FALSE; 3336 if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3337 PetscFunctionReturn(PETSC_SUCCESS); 3338 } 3339 3340 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 3341 { 3342 PetscInt i, m, nz, nz_max = 0, *nnz; 3343 PetscScalar *values = NULL; 3344 PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented; 3345 3346 PetscFunctionBegin; 3347 PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 3348 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 3349 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 3350 PetscCall(PetscLayoutSetUp(B->rmap)); 3351 PetscCall(PetscLayoutSetUp(B->cmap)); 3352 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3353 m = B->rmap->n / bs; 3354 3355 PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 3356 PetscCall(PetscMalloc1(m + 1, &nnz)); 3357 for (i = 0; i < m; i++) { 3358 nz = ii[i + 1] - ii[i]; 3359 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 3360 nz_max = PetscMax(nz_max, nz); 3361 nnz[i] = nz; 3362 } 3363 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 3364 PetscCall(PetscFree(nnz)); 3365 3366 values = (PetscScalar *)V; 3367 if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values)); 3368 for (i = 0; i < m; i++) { 3369 PetscInt ncols = ii[i + 1] - ii[i]; 3370 const PetscInt *icols = jj + ii[i]; 3371 if (bs == 1 || !roworiented) { 3372 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 3373 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES)); 3374 } else { 3375 PetscInt j; 3376 for (j = 0; j < ncols; j++) { 3377 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 3378 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES)); 3379 } 3380 } 3381 } 3382 if (!V) PetscCall(PetscFree(values)); 3383 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3384 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3385 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3386 PetscFunctionReturn(PETSC_SUCCESS); 3387 } 3388 3389 /*@C 3390 MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored 3391 3392 Not Collective 3393 3394 Input Parameter: 3395 . mat - a `MATSEQBAIJ` matrix 3396 3397 Output Parameter: 3398 . array - pointer to the data 3399 3400 Level: intermediate 3401 3402 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3403 @*/ 3404 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array) 3405 { 3406 PetscFunctionBegin; 3407 PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array)); 3408 PetscFunctionReturn(PETSC_SUCCESS); 3409 } 3410 3411 /*@C 3412 MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()` 3413 3414 Not Collective 3415 3416 Input Parameters: 3417 + mat - a `MATSEQBAIJ` matrix 3418 - array - pointer to the data 3419 3420 Level: intermediate 3421 3422 .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3423 @*/ 3424 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array) 3425 { 3426 PetscFunctionBegin; 3427 PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array)); 3428 PetscFunctionReturn(PETSC_SUCCESS); 3429 } 3430 3431 /*MC 3432 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3433 block sparse compressed row format. 3434 3435 Options Database Keys: 3436 + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()` 3437 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3438 3439 Level: beginner 3440 3441 Notes: 3442 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 3443 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 3444 3445 Run with `-info` to see what version of the matrix-vector product is being used 3446 3447 .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()` 3448 M*/ 3449 3450 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *); 3451 3452 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3453 { 3454 PetscMPIInt size; 3455 Mat_SeqBAIJ *b; 3456 3457 PetscFunctionBegin; 3458 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3459 PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3460 3461 PetscCall(PetscNew(&b)); 3462 B->data = (void *)b; 3463 B->ops[0] = MatOps_Values; 3464 3465 b->row = NULL; 3466 b->col = NULL; 3467 b->icol = NULL; 3468 b->reallocs = 0; 3469 b->saved_values = NULL; 3470 3471 b->roworiented = PETSC_TRUE; 3472 b->nonew = 0; 3473 b->diag = NULL; 3474 B->spptr = NULL; 3475 B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2; 3476 b->keepnonzeropattern = PETSC_FALSE; 3477 3478 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ)); 3479 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ)); 3480 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ)); 3481 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ)); 3482 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ)); 3483 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ)); 3484 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ)); 3485 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ)); 3486 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 3487 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ)); 3488 #if defined(PETSC_HAVE_HYPRE) 3489 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE)); 3490 #endif 3491 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS)); 3492 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 3493 PetscFunctionReturn(PETSC_SUCCESS); 3494 } 3495 3496 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) 3497 { 3498 Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data; 3499 PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2; 3500 3501 PetscFunctionBegin; 3502 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix"); 3503 PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix"); 3504 3505 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3506 c->imax = a->imax; 3507 c->ilen = a->ilen; 3508 c->free_imax_ilen = PETSC_FALSE; 3509 } else { 3510 PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen)); 3511 for (i = 0; i < mbs; i++) { 3512 c->imax[i] = a->imax[i]; 3513 c->ilen[i] = a->ilen[i]; 3514 } 3515 c->free_imax_ilen = PETSC_TRUE; 3516 } 3517 3518 /* allocate the matrix space */ 3519 if (mallocmatspace) { 3520 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3521 PetscCall(PetscCalloc1(bs2 * nz, &c->a)); 3522 3523 c->i = a->i; 3524 c->j = a->j; 3525 c->singlemalloc = PETSC_FALSE; 3526 c->free_a = PETSC_TRUE; 3527 c->free_ij = PETSC_FALSE; 3528 c->parent = A; 3529 C->preallocated = PETSC_TRUE; 3530 C->assembled = PETSC_TRUE; 3531 3532 PetscCall(PetscObjectReference((PetscObject)A)); 3533 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3534 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3535 } else { 3536 PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i)); 3537 3538 c->singlemalloc = PETSC_TRUE; 3539 c->free_a = PETSC_TRUE; 3540 c->free_ij = PETSC_TRUE; 3541 3542 PetscCall(PetscArraycpy(c->i, a->i, mbs + 1)); 3543 if (mbs > 0) { 3544 PetscCall(PetscArraycpy(c->j, a->j, nz)); 3545 if (cpvalues == MAT_COPY_VALUES) { 3546 PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz)); 3547 } else { 3548 PetscCall(PetscArrayzero(c->a, bs2 * nz)); 3549 } 3550 } 3551 C->preallocated = PETSC_TRUE; 3552 C->assembled = PETSC_TRUE; 3553 } 3554 } 3555 3556 c->roworiented = a->roworiented; 3557 c->nonew = a->nonew; 3558 3559 PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 3560 PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 3561 3562 c->bs2 = a->bs2; 3563 c->mbs = a->mbs; 3564 c->nbs = a->nbs; 3565 3566 if (a->diag) { 3567 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3568 c->diag = a->diag; 3569 c->free_diag = PETSC_FALSE; 3570 } else { 3571 PetscCall(PetscMalloc1(mbs + 1, &c->diag)); 3572 for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i]; 3573 c->free_diag = PETSC_TRUE; 3574 } 3575 } else c->diag = NULL; 3576 3577 c->nz = a->nz; 3578 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3579 c->solve_work = NULL; 3580 c->mult_work = NULL; 3581 c->sor_workt = NULL; 3582 c->sor_work = NULL; 3583 3584 c->compressedrow.use = a->compressedrow.use; 3585 c->compressedrow.nrows = a->compressedrow.nrows; 3586 if (a->compressedrow.use) { 3587 i = a->compressedrow.nrows; 3588 PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex)); 3589 PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1)); 3590 PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i)); 3591 } else { 3592 c->compressedrow.use = PETSC_FALSE; 3593 c->compressedrow.i = NULL; 3594 c->compressedrow.rindex = NULL; 3595 } 3596 C->nonzerostate = A->nonzerostate; 3597 3598 PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 3599 PetscFunctionReturn(PETSC_SUCCESS); 3600 } 3601 3602 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) 3603 { 3604 PetscFunctionBegin; 3605 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 3606 PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n)); 3607 PetscCall(MatSetType(*B, MATSEQBAIJ)); 3608 PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE)); 3609 PetscFunctionReturn(PETSC_SUCCESS); 3610 } 3611 3612 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3613 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 3614 { 3615 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3616 PetscInt *rowidxs, *colidxs; 3617 PetscScalar *matvals; 3618 3619 PetscFunctionBegin; 3620 PetscCall(PetscViewerSetUp(viewer)); 3621 3622 /* read matrix header */ 3623 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3624 PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3625 M = header[1]; 3626 N = header[2]; 3627 nz = header[3]; 3628 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3629 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3630 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3631 3632 /* set block sizes from the viewer's .info file */ 3633 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3634 /* set local and global sizes if not set already */ 3635 if (mat->rmap->n < 0) mat->rmap->n = M; 3636 if (mat->cmap->n < 0) mat->cmap->n = N; 3637 if (mat->rmap->N < 0) mat->rmap->N = M; 3638 if (mat->cmap->N < 0) mat->cmap->N = N; 3639 PetscCall(PetscLayoutSetUp(mat->rmap)); 3640 PetscCall(PetscLayoutSetUp(mat->cmap)); 3641 3642 /* check if the matrix sizes are correct */ 3643 PetscCall(MatGetSize(mat, &rows, &cols)); 3644 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3645 PetscCall(MatGetBlockSize(mat, &bs)); 3646 PetscCall(MatGetLocalSize(mat, &m, &n)); 3647 mbs = m / bs; 3648 nbs = n / bs; 3649 3650 /* read in row lengths, column indices and nonzero values */ 3651 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3652 PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT)); 3653 rowidxs[0] = 0; 3654 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3655 sum = rowidxs[m]; 3656 PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3657 3658 /* read in column indices and nonzero values */ 3659 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals)); 3660 PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT)); 3661 PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR)); 3662 3663 { /* preallocate matrix storage */ 3664 PetscBT bt; /* helper bit set to count nonzeros */ 3665 PetscInt *nnz; 3666 PetscBool sbaij; 3667 3668 PetscCall(PetscBTCreate(nbs, &bt)); 3669 PetscCall(PetscCalloc1(mbs, &nnz)); 3670 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij)); 3671 for (i = 0; i < mbs; i++) { 3672 PetscCall(PetscBTMemzero(nbs, bt)); 3673 for (k = 0; k < bs; k++) { 3674 PetscInt row = bs * i + k; 3675 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3676 PetscInt col = colidxs[j]; 3677 if (!sbaij || col >= row) 3678 if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++; 3679 } 3680 } 3681 } 3682 PetscCall(PetscBTDestroy(&bt)); 3683 PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz)); 3684 PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz)); 3685 PetscCall(PetscFree(nnz)); 3686 } 3687 3688 /* store matrix values */ 3689 for (i = 0; i < m; i++) { 3690 PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1]; 3691 PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3692 } 3693 3694 PetscCall(PetscFree(rowidxs)); 3695 PetscCall(PetscFree2(colidxs, matvals)); 3696 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3697 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3698 PetscFunctionReturn(PETSC_SUCCESS); 3699 } 3700 3701 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) 3702 { 3703 PetscBool isbinary; 3704 3705 PetscFunctionBegin; 3706 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3707 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3708 PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer)); 3709 PetscFunctionReturn(PETSC_SUCCESS); 3710 } 3711 3712 /*@C 3713 MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block 3714 compressed row) format. For good matrix assembly performance the 3715 user should preallocate the matrix storage by setting the parameter `nz` 3716 (or the array `nnz`). 3717 3718 Collective 3719 3720 Input Parameters: 3721 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3722 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3723 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3724 . m - number of rows 3725 . n - number of columns 3726 . nz - number of nonzero blocks per block row (same for all rows) 3727 - nnz - array containing the number of nonzero blocks in the various block rows 3728 (possibly different for each block row) or `NULL` 3729 3730 Output Parameter: 3731 . A - the matrix 3732 3733 Options Database Keys: 3734 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3735 - -mat_block_size - size of the blocks to use 3736 3737 Level: intermediate 3738 3739 Notes: 3740 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3741 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3742 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 3743 3744 The number of rows and columns must be divisible by blocksize. 3745 3746 If the `nnz` parameter is given then the `nz` parameter is ignored 3747 3748 A nonzero block is any block that as 1 or more nonzeros in it 3749 3750 The `MATSEQBAIJ` format is fully compatible with standard Fortran 3751 storage. That is, the stored row and column indices can begin at 3752 either one (as in Fortran) or zero. 3753 3754 Specify the preallocated storage with either `nz` or `nnz` (not both). 3755 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3756 allocation. See [Sparse Matrices](sec_matsparse) for details. 3757 matrices. 3758 3759 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3760 @*/ 3761 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 3762 { 3763 PetscFunctionBegin; 3764 PetscCall(MatCreate(comm, A)); 3765 PetscCall(MatSetSizes(*A, m, n, m, n)); 3766 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3767 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz)); 3768 PetscFunctionReturn(PETSC_SUCCESS); 3769 } 3770 3771 /*@C 3772 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3773 per row in the matrix. For good matrix assembly performance the 3774 user should preallocate the matrix storage by setting the parameter `nz` 3775 (or the array `nnz`). 3776 3777 Collective 3778 3779 Input Parameters: 3780 + B - the matrix 3781 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3782 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3783 . nz - number of block nonzeros per block row (same for all rows) 3784 - nnz - array containing the number of block nonzeros in the various block rows 3785 (possibly different for each block row) or `NULL` 3786 3787 Options Database Keys: 3788 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3789 - -mat_block_size - size of the blocks to use 3790 3791 Level: intermediate 3792 3793 Notes: 3794 If the `nnz` parameter is given then the `nz` parameter is ignored 3795 3796 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3797 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3798 You can also run with the option `-info` and look for messages with the string 3799 malloc in them to see if additional memory allocation was needed. 3800 3801 The `MATSEQBAIJ` format is fully compatible with standard Fortran 3802 storage. That is, the stored row and column indices can begin at 3803 either one (as in Fortran) or zero. 3804 3805 Specify the preallocated storage with either nz or nnz (not both). 3806 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3807 allocation. See [Sparse Matrices](sec_matsparse) for details. 3808 3809 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3810 @*/ 3811 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3812 { 3813 PetscFunctionBegin; 3814 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3815 PetscValidType(B, 1); 3816 PetscValidLogicalCollectiveInt(B, bs, 2); 3817 PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 3818 PetscFunctionReturn(PETSC_SUCCESS); 3819 } 3820 3821 /*@C 3822 MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values 3823 3824 Collective 3825 3826 Input Parameters: 3827 + B - the matrix 3828 . bs - the blocksize 3829 . i - the indices into `j` for the start of each local row (starts with zero) 3830 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3831 - v - optional values in the matrix 3832 3833 Level: advanced 3834 3835 Notes: 3836 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 3837 may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 3838 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3839 `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3840 block column and the second index is over columns within a block. 3841 3842 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3843 3844 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3845 @*/ 3846 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3847 { 3848 PetscFunctionBegin; 3849 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3850 PetscValidType(B, 1); 3851 PetscValidLogicalCollectiveInt(B, bs, 2); 3852 PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 3853 PetscFunctionReturn(PETSC_SUCCESS); 3854 } 3855 3856 /*@ 3857 MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user. 3858 3859 Collective 3860 3861 Input Parameters: 3862 + comm - must be an MPI communicator of size 1 3863 . bs - size of block 3864 . m - number of rows 3865 . n - number of columns 3866 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3867 . j - column indices 3868 - a - matrix values 3869 3870 Output Parameter: 3871 . mat - the matrix 3872 3873 Level: advanced 3874 3875 Notes: 3876 The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays 3877 once the matrix is destroyed 3878 3879 You cannot set new nonzero locations into this matrix, that will generate an error. 3880 3881 The `i` and `j` indices are 0 based 3882 3883 When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format 3884 3885 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3886 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3887 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3888 with column-major ordering within blocks. 3889 3890 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3891 @*/ 3892 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) 3893 { 3894 Mat_SeqBAIJ *baij; 3895 3896 PetscFunctionBegin; 3897 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs); 3898 if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3899 3900 PetscCall(MatCreate(comm, mat)); 3901 PetscCall(MatSetSizes(*mat, m, n, m, n)); 3902 PetscCall(MatSetType(*mat, MATSEQBAIJ)); 3903 PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL)); 3904 baij = (Mat_SeqBAIJ *)(*mat)->data; 3905 PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen)); 3906 3907 baij->i = i; 3908 baij->j = j; 3909 baij->a = a; 3910 3911 baij->singlemalloc = PETSC_FALSE; 3912 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3913 baij->free_a = PETSC_FALSE; 3914 baij->free_ij = PETSC_FALSE; 3915 baij->free_imax_ilen = PETSC_TRUE; 3916 3917 for (PetscInt ii = 0; ii < m; ii++) { 3918 const PetscInt row_len = i[ii + 1] - i[ii]; 3919 3920 baij->ilen[ii] = baij->imax[ii] = row_len; 3921 PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len); 3922 } 3923 if (PetscDefined(USE_DEBUG)) { 3924 for (PetscInt ii = 0; ii < baij->i[m]; ii++) { 3925 PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3926 PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3927 } 3928 } 3929 3930 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3931 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3932 PetscFunctionReturn(PETSC_SUCCESS); 3933 } 3934 3935 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3936 { 3937 PetscFunctionBegin; 3938 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat)); 3939 PetscFunctionReturn(PETSC_SUCCESS); 3940 } 3941