1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */ 12 #define TYPE BAIJ 13 #define TYPE_BS 14 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 15 #undef TYPE_BS 16 #define TYPE_BS _BS 17 #define TYPE_BS_ON 18 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 19 #undef TYPE_BS 20 #include "../src/mat/impls/aij/seq/seqhashmat.h" 21 #undef TYPE 22 #undef TYPE_BS_ON 23 24 #if defined(PETSC_HAVE_HYPRE) 25 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 26 #endif 27 28 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 29 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 30 #endif 31 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 32 33 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) 34 { 35 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data; 36 PetscInt m, n, ib, jb, bs = A->rmap->bs; 37 MatScalar *a_val = a_aij->a; 38 39 PetscFunctionBegin; 40 PetscCall(MatGetSize(A, &m, &n)); 41 PetscCall(PetscArrayzero(reductions, n)); 42 if (type == NORM_2) { 43 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 44 for (jb = 0; jb < bs; jb++) { 45 for (ib = 0; ib < bs; ib++) { 46 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 47 a_val++; 48 } 49 } 50 } 51 } else if (type == NORM_1) { 52 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 53 for (jb = 0; jb < bs; jb++) { 54 for (ib = 0; ib < bs; ib++) { 55 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 56 a_val++; 57 } 58 } 59 } 60 } else if (type == NORM_INFINITY) { 61 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 62 for (jb = 0; jb < bs; jb++) { 63 for (ib = 0; ib < bs; ib++) { 64 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 65 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 66 a_val++; 67 } 68 } 69 } 70 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 71 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 72 for (jb = 0; jb < bs; jb++) { 73 for (ib = 0; ib < bs; ib++) { 74 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 75 a_val++; 76 } 77 } 78 } 79 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 80 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 81 for (jb = 0; jb < bs; jb++) { 82 for (ib = 0; ib < bs; ib++) { 83 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 84 a_val++; 85 } 86 } 87 } 88 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 89 if (type == NORM_2) { 90 for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 91 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 92 for (PetscInt i = 0; i < n; i++) reductions[i] /= m; 93 } 94 PetscFunctionReturn(PETSC_SUCCESS); 95 } 96 97 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) 98 { 99 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 100 PetscInt *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots; 101 MatScalar *v = a->a, *odiag, *diag, work[25], *v_work; 102 PetscReal shift = 0.0; 103 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 104 105 PetscFunctionBegin; 106 allowzeropivot = PetscNot(A->erroriffailure); 107 108 if (a->idiagvalid) { 109 if (values) *values = a->idiag; 110 PetscFunctionReturn(PETSC_SUCCESS); 111 } 112 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 113 diag_offset = a->diag; 114 if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); } 115 diag = a->idiag; 116 if (values) *values = a->idiag; 117 /* factor and invert each block */ 118 switch (bs) { 119 case 1: 120 for (i = 0; i < mbs; i++) { 121 odiag = v + 1 * diag_offset[i]; 122 diag[0] = odiag[0]; 123 124 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 125 if (allowzeropivot) { 126 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 127 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 128 A->factorerror_zeropivot_row = i; 129 PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i)); 130 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON); 131 } 132 133 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 134 diag += 1; 135 } 136 break; 137 case 2: 138 for (i = 0; i < mbs; i++) { 139 odiag = v + 4 * diag_offset[i]; 140 diag[0] = odiag[0]; 141 diag[1] = odiag[1]; 142 diag[2] = odiag[2]; 143 diag[3] = odiag[3]; 144 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 145 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 146 diag += 4; 147 } 148 break; 149 case 3: 150 for (i = 0; i < mbs; i++) { 151 odiag = v + 9 * diag_offset[i]; 152 diag[0] = odiag[0]; 153 diag[1] = odiag[1]; 154 diag[2] = odiag[2]; 155 diag[3] = odiag[3]; 156 diag[4] = odiag[4]; 157 diag[5] = odiag[5]; 158 diag[6] = odiag[6]; 159 diag[7] = odiag[7]; 160 diag[8] = odiag[8]; 161 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 162 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 163 diag += 9; 164 } 165 break; 166 case 4: 167 for (i = 0; i < mbs; i++) { 168 odiag = v + 16 * diag_offset[i]; 169 PetscCall(PetscArraycpy(diag, odiag, 16)); 170 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 171 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 172 diag += 16; 173 } 174 break; 175 case 5: 176 for (i = 0; i < mbs; i++) { 177 odiag = v + 25 * diag_offset[i]; 178 PetscCall(PetscArraycpy(diag, odiag, 25)); 179 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 180 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 181 diag += 25; 182 } 183 break; 184 case 6: 185 for (i = 0; i < mbs; i++) { 186 odiag = v + 36 * diag_offset[i]; 187 PetscCall(PetscArraycpy(diag, odiag, 36)); 188 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 189 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 190 diag += 36; 191 } 192 break; 193 case 7: 194 for (i = 0; i < mbs; i++) { 195 odiag = v + 49 * diag_offset[i]; 196 PetscCall(PetscArraycpy(diag, odiag, 49)); 197 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 198 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 199 diag += 49; 200 } 201 break; 202 default: 203 PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots)); 204 for (i = 0; i < mbs; i++) { 205 odiag = v + bs2 * diag_offset[i]; 206 PetscCall(PetscArraycpy(diag, odiag, bs2)); 207 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 208 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 209 diag += bs2; 210 } 211 PetscCall(PetscFree2(v_work, v_pivots)); 212 } 213 a->idiagvalid = PETSC_TRUE; 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 217 PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 218 { 219 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 220 PetscScalar *x, *work, *w, *workt, *t; 221 const MatScalar *v, *aa = a->a, *idiag; 222 const PetscScalar *b, *xb; 223 PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */ 224 PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it; 225 const PetscInt *diag, *ai = a->i, *aj = a->j, *vi; 226 227 PetscFunctionBegin; 228 its = its * lits; 229 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 230 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 231 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 232 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor"); 233 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 234 235 if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL)); 236 237 if (!m) PetscFunctionReturn(PETSC_SUCCESS); 238 diag = a->diag; 239 idiag = a->idiag; 240 k = PetscMax(A->rmap->n, A->cmap->n); 241 if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work)); 242 if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt)); 243 if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work)); 244 work = a->mult_work; 245 t = a->sor_workt; 246 w = a->sor_work; 247 248 PetscCall(VecGetArray(xx, &x)); 249 PetscCall(VecGetArrayRead(bb, &b)); 250 251 if (flag & SOR_ZERO_INITIAL_GUESS) { 252 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 253 switch (bs) { 254 case 1: 255 PetscKernel_v_gets_A_times_w_1(x, idiag, b); 256 t[0] = b[0]; 257 i2 = 1; 258 idiag += 1; 259 for (i = 1; i < m; i++) { 260 v = aa + ai[i]; 261 vi = aj + ai[i]; 262 nz = diag[i] - ai[i]; 263 s[0] = b[i2]; 264 for (j = 0; j < nz; j++) { 265 xw[0] = x[vi[j]]; 266 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 267 } 268 t[i2] = s[0]; 269 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 270 x[i2] = xw[0]; 271 idiag += 1; 272 i2 += 1; 273 } 274 break; 275 case 2: 276 PetscKernel_v_gets_A_times_w_2(x, idiag, b); 277 t[0] = b[0]; 278 t[1] = b[1]; 279 i2 = 2; 280 idiag += 4; 281 for (i = 1; i < m; i++) { 282 v = aa + 4 * ai[i]; 283 vi = aj + ai[i]; 284 nz = diag[i] - ai[i]; 285 s[0] = b[i2]; 286 s[1] = b[i2 + 1]; 287 for (j = 0; j < nz; j++) { 288 idx = 2 * vi[j]; 289 it = 4 * j; 290 xw[0] = x[idx]; 291 xw[1] = x[1 + idx]; 292 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 293 } 294 t[i2] = s[0]; 295 t[i2 + 1] = s[1]; 296 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 297 x[i2] = xw[0]; 298 x[i2 + 1] = xw[1]; 299 idiag += 4; 300 i2 += 2; 301 } 302 break; 303 case 3: 304 PetscKernel_v_gets_A_times_w_3(x, idiag, b); 305 t[0] = b[0]; 306 t[1] = b[1]; 307 t[2] = b[2]; 308 i2 = 3; 309 idiag += 9; 310 for (i = 1; i < m; i++) { 311 v = aa + 9 * ai[i]; 312 vi = aj + ai[i]; 313 nz = diag[i] - ai[i]; 314 s[0] = b[i2]; 315 s[1] = b[i2 + 1]; 316 s[2] = b[i2 + 2]; 317 while (nz--) { 318 idx = 3 * (*vi++); 319 xw[0] = x[idx]; 320 xw[1] = x[1 + idx]; 321 xw[2] = x[2 + idx]; 322 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 323 v += 9; 324 } 325 t[i2] = s[0]; 326 t[i2 + 1] = s[1]; 327 t[i2 + 2] = s[2]; 328 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 329 x[i2] = xw[0]; 330 x[i2 + 1] = xw[1]; 331 x[i2 + 2] = xw[2]; 332 idiag += 9; 333 i2 += 3; 334 } 335 break; 336 case 4: 337 PetscKernel_v_gets_A_times_w_4(x, idiag, b); 338 t[0] = b[0]; 339 t[1] = b[1]; 340 t[2] = b[2]; 341 t[3] = b[3]; 342 i2 = 4; 343 idiag += 16; 344 for (i = 1; i < m; i++) { 345 v = aa + 16 * ai[i]; 346 vi = aj + ai[i]; 347 nz = diag[i] - ai[i]; 348 s[0] = b[i2]; 349 s[1] = b[i2 + 1]; 350 s[2] = b[i2 + 2]; 351 s[3] = b[i2 + 3]; 352 while (nz--) { 353 idx = 4 * (*vi++); 354 xw[0] = x[idx]; 355 xw[1] = x[1 + idx]; 356 xw[2] = x[2 + idx]; 357 xw[3] = x[3 + idx]; 358 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 359 v += 16; 360 } 361 t[i2] = s[0]; 362 t[i2 + 1] = s[1]; 363 t[i2 + 2] = s[2]; 364 t[i2 + 3] = s[3]; 365 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 366 x[i2] = xw[0]; 367 x[i2 + 1] = xw[1]; 368 x[i2 + 2] = xw[2]; 369 x[i2 + 3] = xw[3]; 370 idiag += 16; 371 i2 += 4; 372 } 373 break; 374 case 5: 375 PetscKernel_v_gets_A_times_w_5(x, idiag, b); 376 t[0] = b[0]; 377 t[1] = b[1]; 378 t[2] = b[2]; 379 t[3] = b[3]; 380 t[4] = b[4]; 381 i2 = 5; 382 idiag += 25; 383 for (i = 1; i < m; i++) { 384 v = aa + 25 * ai[i]; 385 vi = aj + ai[i]; 386 nz = diag[i] - ai[i]; 387 s[0] = b[i2]; 388 s[1] = b[i2 + 1]; 389 s[2] = b[i2 + 2]; 390 s[3] = b[i2 + 3]; 391 s[4] = b[i2 + 4]; 392 while (nz--) { 393 idx = 5 * (*vi++); 394 xw[0] = x[idx]; 395 xw[1] = x[1 + idx]; 396 xw[2] = x[2 + idx]; 397 xw[3] = x[3 + idx]; 398 xw[4] = x[4 + idx]; 399 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 400 v += 25; 401 } 402 t[i2] = s[0]; 403 t[i2 + 1] = s[1]; 404 t[i2 + 2] = s[2]; 405 t[i2 + 3] = s[3]; 406 t[i2 + 4] = s[4]; 407 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 408 x[i2] = xw[0]; 409 x[i2 + 1] = xw[1]; 410 x[i2 + 2] = xw[2]; 411 x[i2 + 3] = xw[3]; 412 x[i2 + 4] = xw[4]; 413 idiag += 25; 414 i2 += 5; 415 } 416 break; 417 case 6: 418 PetscKernel_v_gets_A_times_w_6(x, idiag, b); 419 t[0] = b[0]; 420 t[1] = b[1]; 421 t[2] = b[2]; 422 t[3] = b[3]; 423 t[4] = b[4]; 424 t[5] = b[5]; 425 i2 = 6; 426 idiag += 36; 427 for (i = 1; i < m; i++) { 428 v = aa + 36 * ai[i]; 429 vi = aj + ai[i]; 430 nz = diag[i] - ai[i]; 431 s[0] = b[i2]; 432 s[1] = b[i2 + 1]; 433 s[2] = b[i2 + 2]; 434 s[3] = b[i2 + 3]; 435 s[4] = b[i2 + 4]; 436 s[5] = b[i2 + 5]; 437 while (nz--) { 438 idx = 6 * (*vi++); 439 xw[0] = x[idx]; 440 xw[1] = x[1 + idx]; 441 xw[2] = x[2 + idx]; 442 xw[3] = x[3 + idx]; 443 xw[4] = x[4 + idx]; 444 xw[5] = x[5 + idx]; 445 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 446 v += 36; 447 } 448 t[i2] = s[0]; 449 t[i2 + 1] = s[1]; 450 t[i2 + 2] = s[2]; 451 t[i2 + 3] = s[3]; 452 t[i2 + 4] = s[4]; 453 t[i2 + 5] = s[5]; 454 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 455 x[i2] = xw[0]; 456 x[i2 + 1] = xw[1]; 457 x[i2 + 2] = xw[2]; 458 x[i2 + 3] = xw[3]; 459 x[i2 + 4] = xw[4]; 460 x[i2 + 5] = xw[5]; 461 idiag += 36; 462 i2 += 6; 463 } 464 break; 465 case 7: 466 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 467 t[0] = b[0]; 468 t[1] = b[1]; 469 t[2] = b[2]; 470 t[3] = b[3]; 471 t[4] = b[4]; 472 t[5] = b[5]; 473 t[6] = b[6]; 474 i2 = 7; 475 idiag += 49; 476 for (i = 1; i < m; i++) { 477 v = aa + 49 * ai[i]; 478 vi = aj + ai[i]; 479 nz = diag[i] - ai[i]; 480 s[0] = b[i2]; 481 s[1] = b[i2 + 1]; 482 s[2] = b[i2 + 2]; 483 s[3] = b[i2 + 3]; 484 s[4] = b[i2 + 4]; 485 s[5] = b[i2 + 5]; 486 s[6] = b[i2 + 6]; 487 while (nz--) { 488 idx = 7 * (*vi++); 489 xw[0] = x[idx]; 490 xw[1] = x[1 + idx]; 491 xw[2] = x[2 + idx]; 492 xw[3] = x[3 + idx]; 493 xw[4] = x[4 + idx]; 494 xw[5] = x[5 + idx]; 495 xw[6] = x[6 + idx]; 496 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 497 v += 49; 498 } 499 t[i2] = s[0]; 500 t[i2 + 1] = s[1]; 501 t[i2 + 2] = s[2]; 502 t[i2 + 3] = s[3]; 503 t[i2 + 4] = s[4]; 504 t[i2 + 5] = s[5]; 505 t[i2 + 6] = s[6]; 506 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 507 x[i2] = xw[0]; 508 x[i2 + 1] = xw[1]; 509 x[i2 + 2] = xw[2]; 510 x[i2 + 3] = xw[3]; 511 x[i2 + 4] = xw[4]; 512 x[i2 + 5] = xw[5]; 513 x[i2 + 6] = xw[6]; 514 idiag += 49; 515 i2 += 7; 516 } 517 break; 518 default: 519 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); 520 PetscCall(PetscArraycpy(t, b, bs)); 521 i2 = bs; 522 idiag += bs2; 523 for (i = 1; i < m; i++) { 524 v = aa + bs2 * ai[i]; 525 vi = aj + ai[i]; 526 nz = diag[i] - ai[i]; 527 528 PetscCall(PetscArraycpy(w, b + i2, bs)); 529 /* copy all rows of x that are needed into contiguous space */ 530 workt = work; 531 for (j = 0; j < nz; j++) { 532 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 533 workt += bs; 534 } 535 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 536 PetscCall(PetscArraycpy(t + i2, w, bs)); 537 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 538 539 idiag += bs2; 540 i2 += bs; 541 } 542 break; 543 } 544 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 545 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 546 xb = t; 547 } else xb = b; 548 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 549 idiag = a->idiag + bs2 * (a->mbs - 1); 550 i2 = bs * (m - 1); 551 switch (bs) { 552 case 1: 553 s[0] = xb[i2]; 554 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 555 x[i2] = xw[0]; 556 i2 -= 1; 557 for (i = m - 2; i >= 0; i--) { 558 v = aa + (diag[i] + 1); 559 vi = aj + diag[i] + 1; 560 nz = ai[i + 1] - diag[i] - 1; 561 s[0] = xb[i2]; 562 for (j = 0; j < nz; j++) { 563 xw[0] = x[vi[j]]; 564 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 565 } 566 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 567 x[i2] = xw[0]; 568 idiag -= 1; 569 i2 -= 1; 570 } 571 break; 572 case 2: 573 s[0] = xb[i2]; 574 s[1] = xb[i2 + 1]; 575 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 576 x[i2] = xw[0]; 577 x[i2 + 1] = xw[1]; 578 i2 -= 2; 579 idiag -= 4; 580 for (i = m - 2; i >= 0; i--) { 581 v = aa + 4 * (diag[i] + 1); 582 vi = aj + diag[i] + 1; 583 nz = ai[i + 1] - diag[i] - 1; 584 s[0] = xb[i2]; 585 s[1] = xb[i2 + 1]; 586 for (j = 0; j < nz; j++) { 587 idx = 2 * vi[j]; 588 it = 4 * j; 589 xw[0] = x[idx]; 590 xw[1] = x[1 + idx]; 591 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 592 } 593 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 594 x[i2] = xw[0]; 595 x[i2 + 1] = xw[1]; 596 idiag -= 4; 597 i2 -= 2; 598 } 599 break; 600 case 3: 601 s[0] = xb[i2]; 602 s[1] = xb[i2 + 1]; 603 s[2] = xb[i2 + 2]; 604 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 605 x[i2] = xw[0]; 606 x[i2 + 1] = xw[1]; 607 x[i2 + 2] = xw[2]; 608 i2 -= 3; 609 idiag -= 9; 610 for (i = m - 2; i >= 0; i--) { 611 v = aa + 9 * (diag[i] + 1); 612 vi = aj + diag[i] + 1; 613 nz = ai[i + 1] - diag[i] - 1; 614 s[0] = xb[i2]; 615 s[1] = xb[i2 + 1]; 616 s[2] = xb[i2 + 2]; 617 while (nz--) { 618 idx = 3 * (*vi++); 619 xw[0] = x[idx]; 620 xw[1] = x[1 + idx]; 621 xw[2] = x[2 + idx]; 622 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 623 v += 9; 624 } 625 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 626 x[i2] = xw[0]; 627 x[i2 + 1] = xw[1]; 628 x[i2 + 2] = xw[2]; 629 idiag -= 9; 630 i2 -= 3; 631 } 632 break; 633 case 4: 634 s[0] = xb[i2]; 635 s[1] = xb[i2 + 1]; 636 s[2] = xb[i2 + 2]; 637 s[3] = xb[i2 + 3]; 638 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 639 x[i2] = xw[0]; 640 x[i2 + 1] = xw[1]; 641 x[i2 + 2] = xw[2]; 642 x[i2 + 3] = xw[3]; 643 i2 -= 4; 644 idiag -= 16; 645 for (i = m - 2; i >= 0; i--) { 646 v = aa + 16 * (diag[i] + 1); 647 vi = aj + diag[i] + 1; 648 nz = ai[i + 1] - diag[i] - 1; 649 s[0] = xb[i2]; 650 s[1] = xb[i2 + 1]; 651 s[2] = xb[i2 + 2]; 652 s[3] = xb[i2 + 3]; 653 while (nz--) { 654 idx = 4 * (*vi++); 655 xw[0] = x[idx]; 656 xw[1] = x[1 + idx]; 657 xw[2] = x[2 + idx]; 658 xw[3] = x[3 + idx]; 659 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 660 v += 16; 661 } 662 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 663 x[i2] = xw[0]; 664 x[i2 + 1] = xw[1]; 665 x[i2 + 2] = xw[2]; 666 x[i2 + 3] = xw[3]; 667 idiag -= 16; 668 i2 -= 4; 669 } 670 break; 671 case 5: 672 s[0] = xb[i2]; 673 s[1] = xb[i2 + 1]; 674 s[2] = xb[i2 + 2]; 675 s[3] = xb[i2 + 3]; 676 s[4] = xb[i2 + 4]; 677 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 678 x[i2] = xw[0]; 679 x[i2 + 1] = xw[1]; 680 x[i2 + 2] = xw[2]; 681 x[i2 + 3] = xw[3]; 682 x[i2 + 4] = xw[4]; 683 i2 -= 5; 684 idiag -= 25; 685 for (i = m - 2; i >= 0; i--) { 686 v = aa + 25 * (diag[i] + 1); 687 vi = aj + diag[i] + 1; 688 nz = ai[i + 1] - diag[i] - 1; 689 s[0] = xb[i2]; 690 s[1] = xb[i2 + 1]; 691 s[2] = xb[i2 + 2]; 692 s[3] = xb[i2 + 3]; 693 s[4] = xb[i2 + 4]; 694 while (nz--) { 695 idx = 5 * (*vi++); 696 xw[0] = x[idx]; 697 xw[1] = x[1 + idx]; 698 xw[2] = x[2 + idx]; 699 xw[3] = x[3 + idx]; 700 xw[4] = x[4 + idx]; 701 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 702 v += 25; 703 } 704 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 705 x[i2] = xw[0]; 706 x[i2 + 1] = xw[1]; 707 x[i2 + 2] = xw[2]; 708 x[i2 + 3] = xw[3]; 709 x[i2 + 4] = xw[4]; 710 idiag -= 25; 711 i2 -= 5; 712 } 713 break; 714 case 6: 715 s[0] = xb[i2]; 716 s[1] = xb[i2 + 1]; 717 s[2] = xb[i2 + 2]; 718 s[3] = xb[i2 + 3]; 719 s[4] = xb[i2 + 4]; 720 s[5] = xb[i2 + 5]; 721 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 722 x[i2] = xw[0]; 723 x[i2 + 1] = xw[1]; 724 x[i2 + 2] = xw[2]; 725 x[i2 + 3] = xw[3]; 726 x[i2 + 4] = xw[4]; 727 x[i2 + 5] = xw[5]; 728 i2 -= 6; 729 idiag -= 36; 730 for (i = m - 2; i >= 0; i--) { 731 v = aa + 36 * (diag[i] + 1); 732 vi = aj + diag[i] + 1; 733 nz = ai[i + 1] - diag[i] - 1; 734 s[0] = xb[i2]; 735 s[1] = xb[i2 + 1]; 736 s[2] = xb[i2 + 2]; 737 s[3] = xb[i2 + 3]; 738 s[4] = xb[i2 + 4]; 739 s[5] = xb[i2 + 5]; 740 while (nz--) { 741 idx = 6 * (*vi++); 742 xw[0] = x[idx]; 743 xw[1] = x[1 + idx]; 744 xw[2] = x[2 + idx]; 745 xw[3] = x[3 + idx]; 746 xw[4] = x[4 + idx]; 747 xw[5] = x[5 + idx]; 748 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 749 v += 36; 750 } 751 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 752 x[i2] = xw[0]; 753 x[i2 + 1] = xw[1]; 754 x[i2 + 2] = xw[2]; 755 x[i2 + 3] = xw[3]; 756 x[i2 + 4] = xw[4]; 757 x[i2 + 5] = xw[5]; 758 idiag -= 36; 759 i2 -= 6; 760 } 761 break; 762 case 7: 763 s[0] = xb[i2]; 764 s[1] = xb[i2 + 1]; 765 s[2] = xb[i2 + 2]; 766 s[3] = xb[i2 + 3]; 767 s[4] = xb[i2 + 4]; 768 s[5] = xb[i2 + 5]; 769 s[6] = xb[i2 + 6]; 770 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 771 x[i2] = xw[0]; 772 x[i2 + 1] = xw[1]; 773 x[i2 + 2] = xw[2]; 774 x[i2 + 3] = xw[3]; 775 x[i2 + 4] = xw[4]; 776 x[i2 + 5] = xw[5]; 777 x[i2 + 6] = xw[6]; 778 i2 -= 7; 779 idiag -= 49; 780 for (i = m - 2; i >= 0; i--) { 781 v = aa + 49 * (diag[i] + 1); 782 vi = aj + diag[i] + 1; 783 nz = ai[i + 1] - diag[i] - 1; 784 s[0] = xb[i2]; 785 s[1] = xb[i2 + 1]; 786 s[2] = xb[i2 + 2]; 787 s[3] = xb[i2 + 3]; 788 s[4] = xb[i2 + 4]; 789 s[5] = xb[i2 + 5]; 790 s[6] = xb[i2 + 6]; 791 while (nz--) { 792 idx = 7 * (*vi++); 793 xw[0] = x[idx]; 794 xw[1] = x[1 + idx]; 795 xw[2] = x[2 + idx]; 796 xw[3] = x[3 + idx]; 797 xw[4] = x[4 + idx]; 798 xw[5] = x[5 + idx]; 799 xw[6] = x[6 + idx]; 800 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 801 v += 49; 802 } 803 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 804 x[i2] = xw[0]; 805 x[i2 + 1] = xw[1]; 806 x[i2 + 2] = xw[2]; 807 x[i2 + 3] = xw[3]; 808 x[i2 + 4] = xw[4]; 809 x[i2 + 5] = xw[5]; 810 x[i2 + 6] = xw[6]; 811 idiag -= 49; 812 i2 -= 7; 813 } 814 break; 815 default: 816 PetscCall(PetscArraycpy(w, xb + i2, bs)); 817 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 818 i2 -= bs; 819 idiag -= bs2; 820 for (i = m - 2; i >= 0; i--) { 821 v = aa + bs2 * (diag[i] + 1); 822 vi = aj + diag[i] + 1; 823 nz = ai[i + 1] - diag[i] - 1; 824 825 PetscCall(PetscArraycpy(w, xb + i2, bs)); 826 /* copy all rows of x that are needed into contiguous space */ 827 workt = work; 828 for (j = 0; j < nz; j++) { 829 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 830 workt += bs; 831 } 832 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 833 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 834 835 idiag -= bs2; 836 i2 -= bs; 837 } 838 break; 839 } 840 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 841 } 842 its--; 843 } 844 while (its--) { 845 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 846 idiag = a->idiag; 847 i2 = 0; 848 switch (bs) { 849 case 1: 850 for (i = 0; i < m; i++) { 851 v = aa + ai[i]; 852 vi = aj + ai[i]; 853 nz = ai[i + 1] - ai[i]; 854 s[0] = b[i2]; 855 for (j = 0; j < nz; j++) { 856 xw[0] = x[vi[j]]; 857 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 858 } 859 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 860 x[i2] += xw[0]; 861 idiag += 1; 862 i2 += 1; 863 } 864 break; 865 case 2: 866 for (i = 0; i < m; i++) { 867 v = aa + 4 * ai[i]; 868 vi = aj + ai[i]; 869 nz = ai[i + 1] - ai[i]; 870 s[0] = b[i2]; 871 s[1] = b[i2 + 1]; 872 for (j = 0; j < nz; j++) { 873 idx = 2 * vi[j]; 874 it = 4 * j; 875 xw[0] = x[idx]; 876 xw[1] = x[1 + idx]; 877 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 878 } 879 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 880 x[i2] += xw[0]; 881 x[i2 + 1] += xw[1]; 882 idiag += 4; 883 i2 += 2; 884 } 885 break; 886 case 3: 887 for (i = 0; i < m; i++) { 888 v = aa + 9 * ai[i]; 889 vi = aj + ai[i]; 890 nz = ai[i + 1] - ai[i]; 891 s[0] = b[i2]; 892 s[1] = b[i2 + 1]; 893 s[2] = b[i2 + 2]; 894 while (nz--) { 895 idx = 3 * (*vi++); 896 xw[0] = x[idx]; 897 xw[1] = x[1 + idx]; 898 xw[2] = x[2 + idx]; 899 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 900 v += 9; 901 } 902 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 903 x[i2] += xw[0]; 904 x[i2 + 1] += xw[1]; 905 x[i2 + 2] += xw[2]; 906 idiag += 9; 907 i2 += 3; 908 } 909 break; 910 case 4: 911 for (i = 0; i < m; i++) { 912 v = aa + 16 * ai[i]; 913 vi = aj + ai[i]; 914 nz = ai[i + 1] - ai[i]; 915 s[0] = b[i2]; 916 s[1] = b[i2 + 1]; 917 s[2] = b[i2 + 2]; 918 s[3] = b[i2 + 3]; 919 while (nz--) { 920 idx = 4 * (*vi++); 921 xw[0] = x[idx]; 922 xw[1] = x[1 + idx]; 923 xw[2] = x[2 + idx]; 924 xw[3] = x[3 + idx]; 925 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 926 v += 16; 927 } 928 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 929 x[i2] += xw[0]; 930 x[i2 + 1] += xw[1]; 931 x[i2 + 2] += xw[2]; 932 x[i2 + 3] += xw[3]; 933 idiag += 16; 934 i2 += 4; 935 } 936 break; 937 case 5: 938 for (i = 0; i < m; i++) { 939 v = aa + 25 * ai[i]; 940 vi = aj + ai[i]; 941 nz = ai[i + 1] - ai[i]; 942 s[0] = b[i2]; 943 s[1] = b[i2 + 1]; 944 s[2] = b[i2 + 2]; 945 s[3] = b[i2 + 3]; 946 s[4] = b[i2 + 4]; 947 while (nz--) { 948 idx = 5 * (*vi++); 949 xw[0] = x[idx]; 950 xw[1] = x[1 + idx]; 951 xw[2] = x[2 + idx]; 952 xw[3] = x[3 + idx]; 953 xw[4] = x[4 + idx]; 954 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 955 v += 25; 956 } 957 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 958 x[i2] += xw[0]; 959 x[i2 + 1] += xw[1]; 960 x[i2 + 2] += xw[2]; 961 x[i2 + 3] += xw[3]; 962 x[i2 + 4] += xw[4]; 963 idiag += 25; 964 i2 += 5; 965 } 966 break; 967 case 6: 968 for (i = 0; i < m; i++) { 969 v = aa + 36 * ai[i]; 970 vi = aj + ai[i]; 971 nz = ai[i + 1] - ai[i]; 972 s[0] = b[i2]; 973 s[1] = b[i2 + 1]; 974 s[2] = b[i2 + 2]; 975 s[3] = b[i2 + 3]; 976 s[4] = b[i2 + 4]; 977 s[5] = b[i2 + 5]; 978 while (nz--) { 979 idx = 6 * (*vi++); 980 xw[0] = x[idx]; 981 xw[1] = x[1 + idx]; 982 xw[2] = x[2 + idx]; 983 xw[3] = x[3 + idx]; 984 xw[4] = x[4 + idx]; 985 xw[5] = x[5 + idx]; 986 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 987 v += 36; 988 } 989 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 990 x[i2] += xw[0]; 991 x[i2 + 1] += xw[1]; 992 x[i2 + 2] += xw[2]; 993 x[i2 + 3] += xw[3]; 994 x[i2 + 4] += xw[4]; 995 x[i2 + 5] += xw[5]; 996 idiag += 36; 997 i2 += 6; 998 } 999 break; 1000 case 7: 1001 for (i = 0; i < m; i++) { 1002 v = aa + 49 * ai[i]; 1003 vi = aj + ai[i]; 1004 nz = ai[i + 1] - ai[i]; 1005 s[0] = b[i2]; 1006 s[1] = b[i2 + 1]; 1007 s[2] = b[i2 + 2]; 1008 s[3] = b[i2 + 3]; 1009 s[4] = b[i2 + 4]; 1010 s[5] = b[i2 + 5]; 1011 s[6] = b[i2 + 6]; 1012 while (nz--) { 1013 idx = 7 * (*vi++); 1014 xw[0] = x[idx]; 1015 xw[1] = x[1 + idx]; 1016 xw[2] = x[2 + idx]; 1017 xw[3] = x[3 + idx]; 1018 xw[4] = x[4 + idx]; 1019 xw[5] = x[5 + idx]; 1020 xw[6] = x[6 + idx]; 1021 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1022 v += 49; 1023 } 1024 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1025 x[i2] += xw[0]; 1026 x[i2 + 1] += xw[1]; 1027 x[i2 + 2] += xw[2]; 1028 x[i2 + 3] += xw[3]; 1029 x[i2 + 4] += xw[4]; 1030 x[i2 + 5] += xw[5]; 1031 x[i2 + 6] += xw[6]; 1032 idiag += 49; 1033 i2 += 7; 1034 } 1035 break; 1036 default: 1037 for (i = 0; i < m; i++) { 1038 v = aa + bs2 * ai[i]; 1039 vi = aj + ai[i]; 1040 nz = ai[i + 1] - ai[i]; 1041 1042 PetscCall(PetscArraycpy(w, b + i2, bs)); 1043 /* copy all rows of x that are needed into contiguous space */ 1044 workt = work; 1045 for (j = 0; j < nz; j++) { 1046 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1047 workt += bs; 1048 } 1049 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1050 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1051 1052 idiag += bs2; 1053 i2 += bs; 1054 } 1055 break; 1056 } 1057 PetscCall(PetscLogFlops(2.0 * bs2 * a->nz)); 1058 } 1059 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1060 idiag = a->idiag + bs2 * (a->mbs - 1); 1061 i2 = bs * (m - 1); 1062 switch (bs) { 1063 case 1: 1064 for (i = m - 1; i >= 0; i--) { 1065 v = aa + ai[i]; 1066 vi = aj + ai[i]; 1067 nz = ai[i + 1] - ai[i]; 1068 s[0] = b[i2]; 1069 for (j = 0; j < nz; j++) { 1070 xw[0] = x[vi[j]]; 1071 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 1072 } 1073 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 1074 x[i2] += xw[0]; 1075 idiag -= 1; 1076 i2 -= 1; 1077 } 1078 break; 1079 case 2: 1080 for (i = m - 1; i >= 0; i--) { 1081 v = aa + 4 * ai[i]; 1082 vi = aj + ai[i]; 1083 nz = ai[i + 1] - ai[i]; 1084 s[0] = b[i2]; 1085 s[1] = b[i2 + 1]; 1086 for (j = 0; j < nz; j++) { 1087 idx = 2 * vi[j]; 1088 it = 4 * j; 1089 xw[0] = x[idx]; 1090 xw[1] = x[1 + idx]; 1091 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 1092 } 1093 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 1094 x[i2] += xw[0]; 1095 x[i2 + 1] += xw[1]; 1096 idiag -= 4; 1097 i2 -= 2; 1098 } 1099 break; 1100 case 3: 1101 for (i = m - 1; i >= 0; i--) { 1102 v = aa + 9 * ai[i]; 1103 vi = aj + ai[i]; 1104 nz = ai[i + 1] - ai[i]; 1105 s[0] = b[i2]; 1106 s[1] = b[i2 + 1]; 1107 s[2] = b[i2 + 2]; 1108 while (nz--) { 1109 idx = 3 * (*vi++); 1110 xw[0] = x[idx]; 1111 xw[1] = x[1 + idx]; 1112 xw[2] = x[2 + idx]; 1113 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 1114 v += 9; 1115 } 1116 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 1117 x[i2] += xw[0]; 1118 x[i2 + 1] += xw[1]; 1119 x[i2 + 2] += xw[2]; 1120 idiag -= 9; 1121 i2 -= 3; 1122 } 1123 break; 1124 case 4: 1125 for (i = m - 1; i >= 0; i--) { 1126 v = aa + 16 * ai[i]; 1127 vi = aj + ai[i]; 1128 nz = ai[i + 1] - ai[i]; 1129 s[0] = b[i2]; 1130 s[1] = b[i2 + 1]; 1131 s[2] = b[i2 + 2]; 1132 s[3] = b[i2 + 3]; 1133 while (nz--) { 1134 idx = 4 * (*vi++); 1135 xw[0] = x[idx]; 1136 xw[1] = x[1 + idx]; 1137 xw[2] = x[2 + idx]; 1138 xw[3] = x[3 + idx]; 1139 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 1140 v += 16; 1141 } 1142 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 1143 x[i2] += xw[0]; 1144 x[i2 + 1] += xw[1]; 1145 x[i2 + 2] += xw[2]; 1146 x[i2 + 3] += xw[3]; 1147 idiag -= 16; 1148 i2 -= 4; 1149 } 1150 break; 1151 case 5: 1152 for (i = m - 1; i >= 0; i--) { 1153 v = aa + 25 * ai[i]; 1154 vi = aj + ai[i]; 1155 nz = ai[i + 1] - ai[i]; 1156 s[0] = b[i2]; 1157 s[1] = b[i2 + 1]; 1158 s[2] = b[i2 + 2]; 1159 s[3] = b[i2 + 3]; 1160 s[4] = b[i2 + 4]; 1161 while (nz--) { 1162 idx = 5 * (*vi++); 1163 xw[0] = x[idx]; 1164 xw[1] = x[1 + idx]; 1165 xw[2] = x[2 + idx]; 1166 xw[3] = x[3 + idx]; 1167 xw[4] = x[4 + idx]; 1168 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 1169 v += 25; 1170 } 1171 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 1172 x[i2] += xw[0]; 1173 x[i2 + 1] += xw[1]; 1174 x[i2 + 2] += xw[2]; 1175 x[i2 + 3] += xw[3]; 1176 x[i2 + 4] += xw[4]; 1177 idiag -= 25; 1178 i2 -= 5; 1179 } 1180 break; 1181 case 6: 1182 for (i = m - 1; i >= 0; i--) { 1183 v = aa + 36 * ai[i]; 1184 vi = aj + ai[i]; 1185 nz = ai[i + 1] - ai[i]; 1186 s[0] = b[i2]; 1187 s[1] = b[i2 + 1]; 1188 s[2] = b[i2 + 2]; 1189 s[3] = b[i2 + 3]; 1190 s[4] = b[i2 + 4]; 1191 s[5] = b[i2 + 5]; 1192 while (nz--) { 1193 idx = 6 * (*vi++); 1194 xw[0] = x[idx]; 1195 xw[1] = x[1 + idx]; 1196 xw[2] = x[2 + idx]; 1197 xw[3] = x[3 + idx]; 1198 xw[4] = x[4 + idx]; 1199 xw[5] = x[5 + idx]; 1200 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 1201 v += 36; 1202 } 1203 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 1204 x[i2] += xw[0]; 1205 x[i2 + 1] += xw[1]; 1206 x[i2 + 2] += xw[2]; 1207 x[i2 + 3] += xw[3]; 1208 x[i2 + 4] += xw[4]; 1209 x[i2 + 5] += xw[5]; 1210 idiag -= 36; 1211 i2 -= 6; 1212 } 1213 break; 1214 case 7: 1215 for (i = m - 1; i >= 0; i--) { 1216 v = aa + 49 * ai[i]; 1217 vi = aj + ai[i]; 1218 nz = ai[i + 1] - ai[i]; 1219 s[0] = b[i2]; 1220 s[1] = b[i2 + 1]; 1221 s[2] = b[i2 + 2]; 1222 s[3] = b[i2 + 3]; 1223 s[4] = b[i2 + 4]; 1224 s[5] = b[i2 + 5]; 1225 s[6] = b[i2 + 6]; 1226 while (nz--) { 1227 idx = 7 * (*vi++); 1228 xw[0] = x[idx]; 1229 xw[1] = x[1 + idx]; 1230 xw[2] = x[2 + idx]; 1231 xw[3] = x[3 + idx]; 1232 xw[4] = x[4 + idx]; 1233 xw[5] = x[5 + idx]; 1234 xw[6] = x[6 + idx]; 1235 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1236 v += 49; 1237 } 1238 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1239 x[i2] += xw[0]; 1240 x[i2 + 1] += xw[1]; 1241 x[i2 + 2] += xw[2]; 1242 x[i2 + 3] += xw[3]; 1243 x[i2 + 4] += xw[4]; 1244 x[i2 + 5] += xw[5]; 1245 x[i2 + 6] += xw[6]; 1246 idiag -= 49; 1247 i2 -= 7; 1248 } 1249 break; 1250 default: 1251 for (i = m - 1; i >= 0; i--) { 1252 v = aa + bs2 * ai[i]; 1253 vi = aj + ai[i]; 1254 nz = ai[i + 1] - ai[i]; 1255 1256 PetscCall(PetscArraycpy(w, b + i2, bs)); 1257 /* copy all rows of x that are needed into contiguous space */ 1258 workt = work; 1259 for (j = 0; j < nz; j++) { 1260 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1261 workt += bs; 1262 } 1263 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1264 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1265 1266 idiag -= bs2; 1267 i2 -= bs; 1268 } 1269 break; 1270 } 1271 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz))); 1272 } 1273 } 1274 PetscCall(VecRestoreArray(xx, &x)); 1275 PetscCall(VecRestoreArrayRead(bb, &b)); 1276 PetscFunctionReturn(PETSC_SUCCESS); 1277 } 1278 1279 /* 1280 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1281 */ 1282 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1283 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1284 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1285 #define matsetvaluesblocked4_ matsetvaluesblocked4 1286 #endif 1287 1288 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) 1289 { 1290 Mat A = *AA; 1291 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1292 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn; 1293 PetscInt *ai = a->i, *ailen = a->ilen; 1294 PetscInt *aj = a->j, stepval, lastcol = -1; 1295 const PetscScalar *value = v; 1296 MatScalar *ap, *aa = a->a, *bap; 1297 1298 PetscFunctionBegin; 1299 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4"); 1300 stepval = (n - 1) * 4; 1301 for (k = 0; k < m; k++) { /* loop over added rows */ 1302 row = im[k]; 1303 rp = aj + ai[row]; 1304 ap = aa + 16 * ai[row]; 1305 nrow = ailen[row]; 1306 low = 0; 1307 high = nrow; 1308 for (l = 0; l < n; l++) { /* loop over added columns */ 1309 col = in[l]; 1310 if (col <= lastcol) low = 0; 1311 else high = nrow; 1312 lastcol = col; 1313 value = v + k * (stepval + 4 + l) * 4; 1314 while (high - low > 7) { 1315 t = (low + high) / 2; 1316 if (rp[t] > col) high = t; 1317 else low = t; 1318 } 1319 for (i = low; i < high; i++) { 1320 if (rp[i] > col) break; 1321 if (rp[i] == col) { 1322 bap = ap + 16 * i; 1323 for (ii = 0; ii < 4; ii++, value += stepval) { 1324 for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++; 1325 } 1326 goto noinsert2; 1327 } 1328 } 1329 N = nrow++ - 1; 1330 high++; /* added new column index thus must search to one higher than before */ 1331 /* shift up all the later entries in this row */ 1332 for (ii = N; ii >= i; ii--) { 1333 rp[ii + 1] = rp[ii]; 1334 PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16)); 1335 } 1336 if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1337 rp[i] = col; 1338 bap = ap + 16 * i; 1339 for (ii = 0; ii < 4; ii++, value += stepval) { 1340 for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++; 1341 } 1342 noinsert2:; 1343 low = i; 1344 } 1345 ailen[row] = nrow; 1346 } 1347 PetscFunctionReturnVoid(); 1348 } 1349 1350 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1351 #define matsetvalues4_ MATSETVALUES4 1352 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1353 #define matsetvalues4_ matsetvalues4 1354 #endif 1355 1356 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) 1357 { 1358 Mat A = *AA; 1359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1360 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm; 1361 PetscInt *ai = a->i, *ailen = a->ilen; 1362 PetscInt *aj = a->j, brow, bcol; 1363 PetscInt ridx, cidx, lastcol = -1; 1364 MatScalar *ap, value, *aa = a->a, *bap; 1365 1366 PetscFunctionBegin; 1367 for (k = 0; k < m; k++) { /* loop over added rows */ 1368 row = im[k]; 1369 brow = row / 4; 1370 rp = aj + ai[brow]; 1371 ap = aa + 16 * ai[brow]; 1372 nrow = ailen[brow]; 1373 low = 0; 1374 high = nrow; 1375 for (l = 0; l < n; l++) { /* loop over added columns */ 1376 col = in[l]; 1377 bcol = col / 4; 1378 ridx = row % 4; 1379 cidx = col % 4; 1380 value = v[l + k * n]; 1381 if (col <= lastcol) low = 0; 1382 else high = nrow; 1383 lastcol = col; 1384 while (high - low > 7) { 1385 t = (low + high) / 2; 1386 if (rp[t] > bcol) high = t; 1387 else low = t; 1388 } 1389 for (i = low; i < high; i++) { 1390 if (rp[i] > bcol) break; 1391 if (rp[i] == bcol) { 1392 bap = ap + 16 * i + 4 * cidx + ridx; 1393 *bap += value; 1394 goto noinsert1; 1395 } 1396 } 1397 N = nrow++ - 1; 1398 high++; /* added new column thus must search to one higher than before */ 1399 /* shift up all the later entries in this row */ 1400 PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 1401 PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1))); 1402 PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1403 rp[i] = bcol; 1404 ap[16 * i + 4 * cidx + ridx] = value; 1405 noinsert1:; 1406 low = i; 1407 } 1408 ailen[brow] = nrow; 1409 } 1410 PetscFunctionReturnVoid(); 1411 } 1412 1413 /* 1414 Checks for missing diagonals 1415 */ 1416 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1417 { 1418 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1419 PetscInt *diag, *ii = a->i, i; 1420 1421 PetscFunctionBegin; 1422 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 1423 *missing = PETSC_FALSE; 1424 if (A->rmap->n > 0 && !ii) { 1425 *missing = PETSC_TRUE; 1426 if (d) *d = 0; 1427 PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n")); 1428 } else { 1429 PetscInt n; 1430 n = PetscMin(a->mbs, a->nbs); 1431 diag = a->diag; 1432 for (i = 0; i < n; i++) { 1433 if (diag[i] >= ii[i + 1]) { 1434 *missing = PETSC_TRUE; 1435 if (d) *d = i; 1436 PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i)); 1437 break; 1438 } 1439 } 1440 } 1441 PetscFunctionReturn(PETSC_SUCCESS); 1442 } 1443 1444 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1445 { 1446 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1447 PetscInt i, j, m = a->mbs; 1448 1449 PetscFunctionBegin; 1450 if (!a->diag) { 1451 PetscCall(PetscMalloc1(m, &a->diag)); 1452 a->free_diag = PETSC_TRUE; 1453 } 1454 for (i = 0; i < m; i++) { 1455 a->diag[i] = a->i[i + 1]; 1456 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1457 if (a->j[j] == i) { 1458 a->diag[i] = j; 1459 break; 1460 } 1461 } 1462 } 1463 PetscFunctionReturn(PETSC_SUCCESS); 1464 } 1465 1466 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) 1467 { 1468 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1469 PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt; 1470 PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja; 1471 1472 PetscFunctionBegin; 1473 *nn = n; 1474 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1475 if (symmetric) { 1476 PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja)); 1477 nz = tia[n]; 1478 } else { 1479 tia = a->i; 1480 tja = a->j; 1481 } 1482 1483 if (!blockcompressed && bs > 1) { 1484 (*nn) *= bs; 1485 /* malloc & create the natural set of indices */ 1486 PetscCall(PetscMalloc1((n + 1) * bs, ia)); 1487 if (n) { 1488 (*ia)[0] = oshift; 1489 for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; 1490 } 1491 1492 for (i = 1; i < n; i++) { 1493 (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1]; 1494 for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; 1495 } 1496 if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; 1497 1498 if (inja) { 1499 PetscCall(PetscMalloc1(nz * bs * bs, ja)); 1500 cnt = 0; 1501 for (i = 0; i < n; i++) { 1502 for (j = 0; j < bs; j++) { 1503 for (k = tia[i]; k < tia[i + 1]; k++) { 1504 for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l; 1505 } 1506 } 1507 } 1508 } 1509 1510 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1511 PetscCall(PetscFree(tia)); 1512 PetscCall(PetscFree(tja)); 1513 } 1514 } else if (oshift == 1) { 1515 if (symmetric) { 1516 nz = tia[A->rmap->n / bs]; 1517 /* add 1 to i and j indices */ 1518 for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1; 1519 *ia = tia; 1520 if (ja) { 1521 for (i = 0; i < nz; i++) tja[i] = tja[i] + 1; 1522 *ja = tja; 1523 } 1524 } else { 1525 nz = a->i[A->rmap->n / bs]; 1526 /* malloc space and add 1 to i and j indices */ 1527 PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia)); 1528 for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1; 1529 if (ja) { 1530 PetscCall(PetscMalloc1(nz, ja)); 1531 for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1; 1532 } 1533 } 1534 } else { 1535 *ia = tia; 1536 if (ja) *ja = tja; 1537 } 1538 PetscFunctionReturn(PETSC_SUCCESS); 1539 } 1540 1541 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 1542 { 1543 PetscFunctionBegin; 1544 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1545 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1546 PetscCall(PetscFree(*ia)); 1547 if (ja) PetscCall(PetscFree(*ja)); 1548 } 1549 PetscFunctionReturn(PETSC_SUCCESS); 1550 } 1551 1552 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1553 { 1554 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1555 1556 PetscFunctionBegin; 1557 if (A->hash_active) { 1558 PetscInt bs; 1559 A->ops[0] = a->cops; 1560 PetscCall(PetscHMapIJVDestroy(&a->ht)); 1561 PetscCall(MatGetBlockSize(A, &bs)); 1562 if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht)); 1563 PetscCall(PetscFree(a->dnz)); 1564 PetscCall(PetscFree(a->bdnz)); 1565 A->hash_active = PETSC_FALSE; 1566 } 1567 #if defined(PETSC_USE_LOG) 1568 PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz)); 1569 #endif 1570 PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i)); 1571 PetscCall(ISDestroy(&a->row)); 1572 PetscCall(ISDestroy(&a->col)); 1573 if (a->free_diag) PetscCall(PetscFree(a->diag)); 1574 PetscCall(PetscFree(a->idiag)); 1575 if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen)); 1576 PetscCall(PetscFree(a->solve_work)); 1577 PetscCall(PetscFree(a->mult_work)); 1578 PetscCall(PetscFree(a->sor_workt)); 1579 PetscCall(PetscFree(a->sor_work)); 1580 PetscCall(ISDestroy(&a->icol)); 1581 PetscCall(PetscFree(a->saved_values)); 1582 PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex)); 1583 1584 PetscCall(MatDestroy(&a->sbaijMat)); 1585 PetscCall(MatDestroy(&a->parent)); 1586 PetscCall(PetscFree(A->data)); 1587 1588 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 1589 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL)); 1590 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL)); 1591 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 1592 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 1593 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL)); 1594 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL)); 1595 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL)); 1596 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL)); 1597 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL)); 1598 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL)); 1599 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL)); 1600 #if defined(PETSC_HAVE_HYPRE) 1601 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL)); 1602 #endif 1603 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL)); 1604 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1605 PetscFunctionReturn(PETSC_SUCCESS); 1606 } 1607 1608 PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) 1609 { 1610 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1611 1612 PetscFunctionBegin; 1613 switch (op) { 1614 case MAT_ROW_ORIENTED: 1615 a->roworiented = flg; 1616 break; 1617 case MAT_KEEP_NONZERO_PATTERN: 1618 a->keepnonzeropattern = flg; 1619 break; 1620 case MAT_NEW_NONZERO_LOCATIONS: 1621 a->nonew = (flg ? 0 : 1); 1622 break; 1623 case MAT_NEW_NONZERO_LOCATION_ERR: 1624 a->nonew = (flg ? -1 : 0); 1625 break; 1626 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1627 a->nonew = (flg ? -2 : 0); 1628 break; 1629 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1630 a->nounused = (flg ? -1 : 0); 1631 break; 1632 case MAT_FORCE_DIAGONAL_ENTRIES: 1633 case MAT_IGNORE_OFF_PROC_ENTRIES: 1634 case MAT_USE_HASH_TABLE: 1635 case MAT_SORTED_FULL: 1636 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1637 break; 1638 case MAT_SPD: 1639 case MAT_SYMMETRIC: 1640 case MAT_STRUCTURALLY_SYMMETRIC: 1641 case MAT_HERMITIAN: 1642 case MAT_SYMMETRY_ETERNAL: 1643 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1644 case MAT_SUBMAT_SINGLEIS: 1645 case MAT_STRUCTURE_ONLY: 1646 case MAT_SPD_ETERNAL: 1647 /* if the diagonal matrix is square it inherits some of the properties above */ 1648 break; 1649 default: 1650 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1651 } 1652 PetscFunctionReturn(PETSC_SUCCESS); 1653 } 1654 1655 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1656 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) 1657 { 1658 PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2; 1659 MatScalar *aa_i; 1660 PetscScalar *v_i; 1661 1662 PetscFunctionBegin; 1663 bs = A->rmap->bs; 1664 bs2 = bs * bs; 1665 PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1666 1667 bn = row / bs; /* Block number */ 1668 bp = row % bs; /* Block Position */ 1669 M = ai[bn + 1] - ai[bn]; 1670 *nz = bs * M; 1671 1672 if (v) { 1673 *v = NULL; 1674 if (*nz) { 1675 PetscCall(PetscMalloc1(*nz, v)); 1676 for (i = 0; i < M; i++) { /* for each block in the block row */ 1677 v_i = *v + i * bs; 1678 aa_i = aa + bs2 * (ai[bn] + i); 1679 for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j]; 1680 } 1681 } 1682 } 1683 1684 if (idx) { 1685 *idx = NULL; 1686 if (*nz) { 1687 PetscCall(PetscMalloc1(*nz, idx)); 1688 for (i = 0; i < M; i++) { /* for each block in the block row */ 1689 idx_i = *idx + i * bs; 1690 itmp = bs * aj[ai[bn] + i]; 1691 for (j = 0; j < bs; j++) idx_i[j] = itmp++; 1692 } 1693 } 1694 } 1695 PetscFunctionReturn(PETSC_SUCCESS); 1696 } 1697 1698 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1699 { 1700 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1701 1702 PetscFunctionBegin; 1703 PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a)); 1704 PetscFunctionReturn(PETSC_SUCCESS); 1705 } 1706 1707 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1708 { 1709 PetscFunctionBegin; 1710 if (nz) *nz = 0; 1711 if (idx) PetscCall(PetscFree(*idx)); 1712 if (v) PetscCall(PetscFree(*v)); 1713 PetscFunctionReturn(PETSC_SUCCESS); 1714 } 1715 1716 PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) 1717 { 1718 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at; 1719 Mat C; 1720 PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill; 1721 PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr; 1722 MatScalar *ata, *aa = a->a; 1723 1724 PetscFunctionBegin; 1725 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1726 PetscCall(PetscCalloc1(1 + nbs, &atfill)); 1727 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1728 for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1729 1730 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 1731 PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N)); 1732 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 1733 PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill)); 1734 1735 at = (Mat_SeqBAIJ *)C->data; 1736 ati = at->i; 1737 for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i]; 1738 } else { 1739 C = *B; 1740 at = (Mat_SeqBAIJ *)C->data; 1741 ati = at->i; 1742 } 1743 1744 atj = at->j; 1745 ata = at->a; 1746 1747 /* Copy ati into atfill so we have locations of the next free space in atj */ 1748 PetscCall(PetscArraycpy(atfill, ati, nbs)); 1749 1750 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1751 for (i = 0; i < mbs; i++) { 1752 anzj = ai[i + 1] - ai[i]; 1753 for (j = 0; j < anzj; j++) { 1754 atj[atfill[*aj]] = i; 1755 for (kr = 0; kr < bs; kr++) { 1756 for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; 1757 } 1758 atfill[*aj++] += 1; 1759 } 1760 } 1761 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1762 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1763 1764 /* Clean up temporary space and complete requests. */ 1765 PetscCall(PetscFree(atfill)); 1766 1767 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1768 PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1769 *B = C; 1770 } else { 1771 PetscCall(MatHeaderMerge(A, &C)); 1772 } 1773 PetscFunctionReturn(PETSC_SUCCESS); 1774 } 1775 1776 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) 1777 { 1778 Mat Btrans; 1779 1780 PetscFunctionBegin; 1781 *f = PETSC_FALSE; 1782 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans)); 1783 PetscCall(MatEqual_SeqBAIJ(B, Btrans, f)); 1784 PetscCall(MatDestroy(&Btrans)); 1785 PetscFunctionReturn(PETSC_SUCCESS); 1786 } 1787 1788 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1789 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 1790 { 1791 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data; 1792 PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l; 1793 PetscInt *rowlens, *colidxs; 1794 PetscScalar *matvals; 1795 1796 PetscFunctionBegin; 1797 PetscCall(PetscViewerSetUp(viewer)); 1798 1799 M = mat->rmap->N; 1800 N = mat->cmap->N; 1801 m = mat->rmap->n; 1802 bs = mat->rmap->bs; 1803 nz = bs * bs * A->nz; 1804 1805 /* write matrix header */ 1806 header[0] = MAT_FILE_CLASSID; 1807 header[1] = M; 1808 header[2] = N; 1809 header[3] = nz; 1810 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1811 1812 /* store row lengths */ 1813 PetscCall(PetscMalloc1(m, &rowlens)); 1814 for (cnt = 0, i = 0; i < A->mbs; i++) 1815 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]); 1816 PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT)); 1817 PetscCall(PetscFree(rowlens)); 1818 1819 /* store column indices */ 1820 PetscCall(PetscMalloc1(nz, &colidxs)); 1821 for (cnt = 0, i = 0; i < A->mbs; i++) 1822 for (k = 0; k < bs; k++) 1823 for (j = A->i[i]; j < A->i[i + 1]; j++) 1824 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l; 1825 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1826 PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT)); 1827 PetscCall(PetscFree(colidxs)); 1828 1829 /* store nonzero values */ 1830 PetscCall(PetscMalloc1(nz, &matvals)); 1831 for (cnt = 0, i = 0; i < A->mbs; i++) 1832 for (k = 0; k < bs; k++) 1833 for (j = A->i[i]; j < A->i[i + 1]; j++) 1834 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k]; 1835 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1836 PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR)); 1837 PetscCall(PetscFree(matvals)); 1838 1839 /* write block size option to the viewer's .info file */ 1840 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1841 PetscFunctionReturn(PETSC_SUCCESS); 1842 } 1843 1844 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) 1845 { 1846 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1847 PetscInt i, bs = A->rmap->bs, k; 1848 1849 PetscFunctionBegin; 1850 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1851 for (i = 0; i < a->mbs; i++) { 1852 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1)); 1853 for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1)); 1854 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1855 } 1856 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1857 PetscFunctionReturn(PETSC_SUCCESS); 1858 } 1859 1860 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) 1861 { 1862 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1863 PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2; 1864 PetscViewerFormat format; 1865 1866 PetscFunctionBegin; 1867 if (A->structure_only) { 1868 PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer)); 1869 PetscFunctionReturn(PETSC_SUCCESS); 1870 } 1871 1872 PetscCall(PetscViewerGetFormat(viewer, &format)); 1873 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1874 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1875 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1876 const char *matname; 1877 Mat aij; 1878 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij)); 1879 PetscCall(PetscObjectGetName((PetscObject)A, &matname)); 1880 PetscCall(PetscObjectSetName((PetscObject)aij, matname)); 1881 PetscCall(MatView(aij, viewer)); 1882 PetscCall(MatDestroy(&aij)); 1883 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1884 PetscFunctionReturn(PETSC_SUCCESS); 1885 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1886 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1887 for (i = 0; i < a->mbs; i++) { 1888 for (j = 0; j < bs; j++) { 1889 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1890 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1891 for (l = 0; l < bs; l++) { 1892 #if defined(PETSC_USE_COMPLEX) 1893 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1894 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1895 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1896 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1897 } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1898 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1899 } 1900 #else 1901 if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1902 #endif 1903 } 1904 } 1905 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1906 } 1907 } 1908 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1909 } else { 1910 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1911 for (i = 0; i < a->mbs; i++) { 1912 for (j = 0; j < bs; j++) { 1913 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1914 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1915 for (l = 0; l < bs; l++) { 1916 #if defined(PETSC_USE_COMPLEX) 1917 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) { 1918 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1919 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) { 1920 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1921 } else { 1922 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1923 } 1924 #else 1925 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1926 #endif 1927 } 1928 } 1929 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1930 } 1931 } 1932 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1933 } 1934 PetscCall(PetscViewerFlush(viewer)); 1935 PetscFunctionReturn(PETSC_SUCCESS); 1936 } 1937 1938 #include <petscdraw.h> 1939 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) 1940 { 1941 Mat A = (Mat)Aa; 1942 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1943 PetscInt row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2; 1944 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1945 MatScalar *aa; 1946 PetscViewer viewer; 1947 PetscViewerFormat format; 1948 1949 PetscFunctionBegin; 1950 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1951 PetscCall(PetscViewerGetFormat(viewer, &format)); 1952 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1953 1954 /* loop over matrix elements drawing boxes */ 1955 1956 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1957 PetscDrawCollectiveBegin(draw); 1958 /* Blue for negative, Cyan for zero and Red for positive */ 1959 color = PETSC_DRAW_BLUE; 1960 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1961 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1962 y_l = A->rmap->N - row - 1.0; 1963 y_r = y_l + 1.0; 1964 x_l = a->j[j] * bs; 1965 x_r = x_l + 1.0; 1966 aa = a->a + j * bs2; 1967 for (k = 0; k < bs; k++) { 1968 for (l = 0; l < bs; l++) { 1969 if (PetscRealPart(*aa++) >= 0.) continue; 1970 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1971 } 1972 } 1973 } 1974 } 1975 color = PETSC_DRAW_CYAN; 1976 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1977 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1978 y_l = A->rmap->N - row - 1.0; 1979 y_r = y_l + 1.0; 1980 x_l = a->j[j] * bs; 1981 x_r = x_l + 1.0; 1982 aa = a->a + j * bs2; 1983 for (k = 0; k < bs; k++) { 1984 for (l = 0; l < bs; l++) { 1985 if (PetscRealPart(*aa++) != 0.) continue; 1986 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1987 } 1988 } 1989 } 1990 } 1991 color = PETSC_DRAW_RED; 1992 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1993 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1994 y_l = A->rmap->N - row - 1.0; 1995 y_r = y_l + 1.0; 1996 x_l = a->j[j] * bs; 1997 x_r = x_l + 1.0; 1998 aa = a->a + j * bs2; 1999 for (k = 0; k < bs; k++) { 2000 for (l = 0; l < bs; l++) { 2001 if (PetscRealPart(*aa++) <= 0.) continue; 2002 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2003 } 2004 } 2005 } 2006 } 2007 PetscDrawCollectiveEnd(draw); 2008 } else { 2009 /* use contour shading to indicate magnitude of values */ 2010 /* first determine max of all nonzero values */ 2011 PetscReal minv = 0.0, maxv = 0.0; 2012 PetscDraw popup; 2013 2014 for (i = 0; i < a->nz * a->bs2; i++) { 2015 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 2016 } 2017 if (minv >= maxv) maxv = minv + PETSC_SMALL; 2018 PetscCall(PetscDrawGetPopup(draw, &popup)); 2019 PetscCall(PetscDrawScalePopup(popup, 0.0, maxv)); 2020 2021 PetscDrawCollectiveBegin(draw); 2022 for (i = 0, row = 0; i < mbs; i++, row += bs) { 2023 for (j = a->i[i]; j < a->i[i + 1]; j++) { 2024 y_l = A->rmap->N - row - 1.0; 2025 y_r = y_l + 1.0; 2026 x_l = a->j[j] * bs; 2027 x_r = x_l + 1.0; 2028 aa = a->a + j * bs2; 2029 for (k = 0; k < bs; k++) { 2030 for (l = 0; l < bs; l++) { 2031 MatScalar v = *aa++; 2032 color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv); 2033 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2034 } 2035 } 2036 } 2037 } 2038 PetscDrawCollectiveEnd(draw); 2039 } 2040 PetscFunctionReturn(PETSC_SUCCESS); 2041 } 2042 2043 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) 2044 { 2045 PetscReal xl, yl, xr, yr, w, h; 2046 PetscDraw draw; 2047 PetscBool isnull; 2048 2049 PetscFunctionBegin; 2050 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 2051 PetscCall(PetscDrawIsNull(draw, &isnull)); 2052 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 2053 2054 xr = A->cmap->n; 2055 yr = A->rmap->N; 2056 h = yr / 10.0; 2057 w = xr / 10.0; 2058 xr += w; 2059 yr += h; 2060 xl = -w; 2061 yl = -h; 2062 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 2063 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 2064 PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A)); 2065 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 2066 PetscCall(PetscDrawSave(draw)); 2067 PetscFunctionReturn(PETSC_SUCCESS); 2068 } 2069 2070 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) 2071 { 2072 PetscBool iascii, isbinary, isdraw; 2073 2074 PetscFunctionBegin; 2075 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2076 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2077 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 2078 if (iascii) { 2079 PetscCall(MatView_SeqBAIJ_ASCII(A, viewer)); 2080 } else if (isbinary) { 2081 PetscCall(MatView_SeqBAIJ_Binary(A, viewer)); 2082 } else if (isdraw) { 2083 PetscCall(MatView_SeqBAIJ_Draw(A, viewer)); 2084 } else { 2085 Mat B; 2086 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2087 PetscCall(MatView(B, viewer)); 2088 PetscCall(MatDestroy(&B)); 2089 } 2090 PetscFunctionReturn(PETSC_SUCCESS); 2091 } 2092 2093 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) 2094 { 2095 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2096 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 2097 PetscInt *ai = a->i, *ailen = a->ilen; 2098 PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2; 2099 MatScalar *ap, *aa = a->a; 2100 2101 PetscFunctionBegin; 2102 for (k = 0; k < m; k++) { /* loop over rows */ 2103 row = im[k]; 2104 brow = row / bs; 2105 if (row < 0) { 2106 v += n; 2107 continue; 2108 } /* negative row */ 2109 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row); 2110 rp = aj ? aj + ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2111 ap = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2112 nrow = ailen[brow]; 2113 for (l = 0; l < n; l++) { /* loop over columns */ 2114 if (in[l] < 0) { 2115 v++; 2116 continue; 2117 } /* negative column */ 2118 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]); 2119 col = in[l]; 2120 bcol = col / bs; 2121 cidx = col % bs; 2122 ridx = row % bs; 2123 high = nrow; 2124 low = 0; /* assume unsorted */ 2125 while (high - low > 5) { 2126 t = (low + high) / 2; 2127 if (rp[t] > bcol) high = t; 2128 else low = t; 2129 } 2130 for (i = low; i < high; i++) { 2131 if (rp[i] > bcol) break; 2132 if (rp[i] == bcol) { 2133 *v++ = ap[bs2 * i + bs * cidx + ridx]; 2134 goto finished; 2135 } 2136 } 2137 *v++ = 0.0; 2138 finished:; 2139 } 2140 } 2141 PetscFunctionReturn(PETSC_SUCCESS); 2142 } 2143 2144 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2145 { 2146 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2147 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1; 2148 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2149 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval; 2150 PetscBool roworiented = a->roworiented; 2151 const PetscScalar *value = v; 2152 MatScalar *ap = NULL, *aa = a->a, *bap; 2153 2154 PetscFunctionBegin; 2155 if (roworiented) { 2156 stepval = (n - 1) * bs; 2157 } else { 2158 stepval = (m - 1) * bs; 2159 } 2160 for (k = 0; k < m; k++) { /* loop over added rows */ 2161 row = im[k]; 2162 if (row < 0) continue; 2163 PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1); 2164 rp = aj + ai[row]; 2165 if (!A->structure_only) ap = aa + bs2 * ai[row]; 2166 rmax = imax[row]; 2167 nrow = ailen[row]; 2168 low = 0; 2169 high = nrow; 2170 for (l = 0; l < n; l++) { /* loop over added columns */ 2171 if (in[l] < 0) continue; 2172 PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1); 2173 col = in[l]; 2174 if (!A->structure_only) { 2175 if (roworiented) { 2176 value = v + (k * (stepval + bs) + l) * bs; 2177 } else { 2178 value = v + (l * (stepval + bs) + k) * bs; 2179 } 2180 } 2181 if (col <= lastcol) low = 0; 2182 else high = nrow; 2183 lastcol = col; 2184 while (high - low > 7) { 2185 t = (low + high) / 2; 2186 if (rp[t] > col) high = t; 2187 else low = t; 2188 } 2189 for (i = low; i < high; i++) { 2190 if (rp[i] > col) break; 2191 if (rp[i] == col) { 2192 if (A->structure_only) goto noinsert2; 2193 bap = ap + bs2 * i; 2194 if (roworiented) { 2195 if (is == ADD_VALUES) { 2196 for (ii = 0; ii < bs; ii++, value += stepval) { 2197 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 2198 } 2199 } else { 2200 for (ii = 0; ii < bs; ii++, value += stepval) { 2201 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2202 } 2203 } 2204 } else { 2205 if (is == ADD_VALUES) { 2206 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2207 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 2208 bap += bs; 2209 } 2210 } else { 2211 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2212 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 2213 bap += bs; 2214 } 2215 } 2216 } 2217 goto noinsert2; 2218 } 2219 } 2220 if (nonew == 1) goto noinsert2; 2221 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2222 if (A->structure_only) { 2223 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar); 2224 } else { 2225 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2226 } 2227 N = nrow++ - 1; 2228 high++; 2229 /* shift up all the later entries in this row */ 2230 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2231 rp[i] = col; 2232 if (!A->structure_only) { 2233 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2234 bap = ap + bs2 * i; 2235 if (roworiented) { 2236 for (ii = 0; ii < bs; ii++, value += stepval) { 2237 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2238 } 2239 } else { 2240 for (ii = 0; ii < bs; ii++, value += stepval) { 2241 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 2242 } 2243 } 2244 } 2245 noinsert2:; 2246 low = i; 2247 } 2248 ailen[row] = nrow; 2249 } 2250 PetscFunctionReturn(PETSC_SUCCESS); 2251 } 2252 2253 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) 2254 { 2255 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2256 PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax; 2257 PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen; 2258 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2259 MatScalar *aa = a->a, *ap; 2260 PetscReal ratio = 0.6; 2261 2262 PetscFunctionBegin; 2263 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 2264 2265 if (m) rmax = ailen[0]; 2266 for (i = 1; i < mbs; i++) { 2267 /* move each row back by the amount of empty slots (fshift) before it*/ 2268 fshift += imax[i - 1] - ailen[i - 1]; 2269 rmax = PetscMax(rmax, ailen[i]); 2270 if (fshift) { 2271 ip = aj + ai[i]; 2272 ap = aa + bs2 * ai[i]; 2273 N = ailen[i]; 2274 PetscCall(PetscArraymove(ip - fshift, ip, N)); 2275 if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N)); 2276 } 2277 ai[i] = ai[i - 1] + ailen[i - 1]; 2278 } 2279 if (mbs) { 2280 fshift += imax[mbs - 1] - ailen[mbs - 1]; 2281 ai[mbs] = ai[mbs - 1] + ailen[mbs - 1]; 2282 } 2283 2284 /* reset ilen and imax for each row */ 2285 a->nonzerorowcnt = 0; 2286 if (A->structure_only) { 2287 PetscCall(PetscFree2(a->imax, a->ilen)); 2288 } else { /* !A->structure_only */ 2289 for (i = 0; i < mbs; i++) { 2290 ailen[i] = imax[i] = ai[i + 1] - ai[i]; 2291 a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0); 2292 } 2293 } 2294 a->nz = ai[mbs]; 2295 2296 /* diagonals may have moved, so kill the diagonal pointers */ 2297 a->idiagvalid = PETSC_FALSE; 2298 if (fshift && a->diag) PetscCall(PetscFree(a->diag)); 2299 if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2); 2300 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2)); 2301 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs)); 2302 PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax)); 2303 2304 A->info.mallocs += a->reallocs; 2305 a->reallocs = 0; 2306 A->info.nz_unneeded = (PetscReal)fshift * bs2; 2307 a->rmax = rmax; 2308 2309 if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio)); 2310 PetscFunctionReturn(PETSC_SUCCESS); 2311 } 2312 2313 /* 2314 This function returns an array of flags which indicate the locations of contiguous 2315 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2316 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2317 Assume: sizes should be long enough to hold all the values. 2318 */ 2319 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) 2320 { 2321 PetscInt j = 0; 2322 2323 PetscFunctionBegin; 2324 for (PetscInt i = 0; i < n; j++) { 2325 PetscInt row = idx[i]; 2326 if (row % bs != 0) { /* Not the beginning of a block */ 2327 sizes[j] = 1; 2328 i++; 2329 } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */ 2330 sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2331 i++; 2332 } else { /* Beginning of the block, so check if the complete block exists */ 2333 PetscBool flg = PETSC_TRUE; 2334 for (PetscInt k = 1; k < bs; k++) { 2335 if (row + k != idx[i + k]) { /* break in the block */ 2336 flg = PETSC_FALSE; 2337 break; 2338 } 2339 } 2340 if (flg) { /* No break in the bs */ 2341 sizes[j] = bs; 2342 i += bs; 2343 } else { 2344 sizes[j] = 1; 2345 i++; 2346 } 2347 } 2348 } 2349 *bs_max = j; 2350 PetscFunctionReturn(PETSC_SUCCESS); 2351 } 2352 2353 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2354 { 2355 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2356 PetscInt i, j, k, count, *rows; 2357 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max; 2358 PetscScalar zero = 0.0; 2359 MatScalar *aa; 2360 const PetscScalar *xx; 2361 PetscScalar *bb; 2362 2363 PetscFunctionBegin; 2364 /* fix right hand side if needed */ 2365 if (x && b) { 2366 PetscCall(VecGetArrayRead(x, &xx)); 2367 PetscCall(VecGetArray(b, &bb)); 2368 for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]]; 2369 PetscCall(VecRestoreArrayRead(x, &xx)); 2370 PetscCall(VecRestoreArray(b, &bb)); 2371 } 2372 2373 /* Make a copy of the IS and sort it */ 2374 /* allocate memory for rows,sizes */ 2375 PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes)); 2376 2377 /* copy IS values to rows, and sort them */ 2378 for (i = 0; i < is_n; i++) rows[i] = is_idx[i]; 2379 PetscCall(PetscSortInt(is_n, rows)); 2380 2381 if (baij->keepnonzeropattern) { 2382 for (i = 0; i < is_n; i++) sizes[i] = 1; 2383 bs_max = is_n; 2384 } else { 2385 PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max)); 2386 A->nonzerostate++; 2387 } 2388 2389 for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) { 2390 row = rows[j]; 2391 PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row); 2392 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2393 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2394 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2395 if (diag != (PetscScalar)0.0) { 2396 if (baij->ilen[row / bs] > 0) { 2397 baij->ilen[row / bs] = 1; 2398 baij->j[baij->i[row / bs]] = row / bs; 2399 2400 PetscCall(PetscArrayzero(aa, count * bs)); 2401 } 2402 /* Now insert all the diagonal values for this bs */ 2403 for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES)); 2404 } else { /* (diag == 0.0) */ 2405 baij->ilen[row / bs] = 0; 2406 } /* end (diag == 0.0) */ 2407 } else { /* (sizes[i] != bs) */ 2408 PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1"); 2409 for (k = 0; k < count; k++) { 2410 aa[0] = zero; 2411 aa += bs; 2412 } 2413 if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES)); 2414 } 2415 } 2416 2417 PetscCall(PetscFree2(rows, sizes)); 2418 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2419 PetscFunctionReturn(PETSC_SUCCESS); 2420 } 2421 2422 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2423 { 2424 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2425 PetscInt i, j, k, count; 2426 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col; 2427 PetscScalar zero = 0.0; 2428 MatScalar *aa; 2429 const PetscScalar *xx; 2430 PetscScalar *bb; 2431 PetscBool *zeroed, vecs = PETSC_FALSE; 2432 2433 PetscFunctionBegin; 2434 /* fix right hand side if needed */ 2435 if (x && b) { 2436 PetscCall(VecGetArrayRead(x, &xx)); 2437 PetscCall(VecGetArray(b, &bb)); 2438 vecs = PETSC_TRUE; 2439 } 2440 2441 /* zero the columns */ 2442 PetscCall(PetscCalloc1(A->rmap->n, &zeroed)); 2443 for (i = 0; i < is_n; i++) { 2444 PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]); 2445 zeroed[is_idx[i]] = PETSC_TRUE; 2446 } 2447 for (i = 0; i < A->rmap->N; i++) { 2448 if (!zeroed[i]) { 2449 row = i / bs; 2450 for (j = baij->i[row]; j < baij->i[row + 1]; j++) { 2451 for (k = 0; k < bs; k++) { 2452 col = bs * baij->j[j] + k; 2453 if (zeroed[col]) { 2454 aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 2455 if (vecs) bb[i] -= aa[0] * xx[col]; 2456 aa[0] = 0.0; 2457 } 2458 } 2459 } 2460 } else if (vecs) bb[i] = diag * xx[i]; 2461 } 2462 PetscCall(PetscFree(zeroed)); 2463 if (vecs) { 2464 PetscCall(VecRestoreArrayRead(x, &xx)); 2465 PetscCall(VecRestoreArray(b, &bb)); 2466 } 2467 2468 /* zero the rows */ 2469 for (i = 0; i < is_n; i++) { 2470 row = is_idx[i]; 2471 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2472 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2473 for (k = 0; k < count; k++) { 2474 aa[0] = zero; 2475 aa += bs; 2476 } 2477 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES); 2478 } 2479 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2480 PetscFunctionReturn(PETSC_SUCCESS); 2481 } 2482 2483 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2484 { 2485 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2486 PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 2487 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2488 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 2489 PetscInt ridx, cidx, bs2 = a->bs2; 2490 PetscBool roworiented = a->roworiented; 2491 MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap; 2492 2493 PetscFunctionBegin; 2494 for (k = 0; k < m; k++) { /* loop over added rows */ 2495 row = im[k]; 2496 brow = row / bs; 2497 if (row < 0) continue; 2498 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 2499 rp = aj + ai[brow]; 2500 if (!A->structure_only) ap = aa + bs2 * ai[brow]; 2501 rmax = imax[brow]; 2502 nrow = ailen[brow]; 2503 low = 0; 2504 high = nrow; 2505 for (l = 0; l < n; l++) { /* loop over added columns */ 2506 if (in[l] < 0) continue; 2507 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 2508 col = in[l]; 2509 bcol = col / bs; 2510 ridx = row % bs; 2511 cidx = col % bs; 2512 if (!A->structure_only) { 2513 if (roworiented) { 2514 value = v[l + k * n]; 2515 } else { 2516 value = v[k + l * m]; 2517 } 2518 } 2519 if (col <= lastcol) low = 0; 2520 else high = nrow; 2521 lastcol = col; 2522 while (high - low > 7) { 2523 t = (low + high) / 2; 2524 if (rp[t] > bcol) high = t; 2525 else low = t; 2526 } 2527 for (i = low; i < high; i++) { 2528 if (rp[i] > bcol) break; 2529 if (rp[i] == bcol) { 2530 bap = ap + bs2 * i + bs * cidx + ridx; 2531 if (!A->structure_only) { 2532 if (is == ADD_VALUES) *bap += value; 2533 else *bap = value; 2534 } 2535 goto noinsert1; 2536 } 2537 } 2538 if (nonew == 1) goto noinsert1; 2539 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2540 if (A->structure_only) { 2541 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar); 2542 } else { 2543 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2544 } 2545 N = nrow++ - 1; 2546 high++; 2547 /* shift up all the later entries in this row */ 2548 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2549 rp[i] = bcol; 2550 if (!A->structure_only) { 2551 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2552 PetscCall(PetscArrayzero(ap + bs2 * i, bs2)); 2553 ap[bs2 * i + bs * cidx + ridx] = value; 2554 } 2555 a->nz++; 2556 A->nonzerostate++; 2557 noinsert1:; 2558 low = i; 2559 } 2560 ailen[brow] = nrow; 2561 } 2562 PetscFunctionReturn(PETSC_SUCCESS); 2563 } 2564 2565 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) 2566 { 2567 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data; 2568 Mat outA; 2569 PetscBool row_identity, col_identity; 2570 2571 PetscFunctionBegin; 2572 PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU"); 2573 PetscCall(ISIdentity(row, &row_identity)); 2574 PetscCall(ISIdentity(col, &col_identity)); 2575 PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU"); 2576 2577 outA = inA; 2578 inA->factortype = MAT_FACTOR_LU; 2579 PetscCall(PetscFree(inA->solvertype)); 2580 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype)); 2581 2582 PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2583 2584 PetscCall(PetscObjectReference((PetscObject)row)); 2585 PetscCall(ISDestroy(&a->row)); 2586 a->row = row; 2587 PetscCall(PetscObjectReference((PetscObject)col)); 2588 PetscCall(ISDestroy(&a->col)); 2589 a->col = col; 2590 2591 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2592 PetscCall(ISDestroy(&a->icol)); 2593 PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol)); 2594 2595 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity))); 2596 if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); 2597 PetscCall(MatLUFactorNumeric(outA, inA, info)); 2598 PetscFunctionReturn(PETSC_SUCCESS); 2599 } 2600 2601 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices) 2602 { 2603 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2604 2605 PetscFunctionBegin; 2606 baij->nz = baij->maxnz; 2607 PetscCall(PetscArraycpy(baij->j, indices, baij->nz)); 2608 PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs)); 2609 PetscFunctionReturn(PETSC_SUCCESS); 2610 } 2611 2612 /*@ 2613 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix. 2614 2615 Input Parameters: 2616 + mat - the `MATSEQBAIJ` matrix 2617 - indices - the column indices 2618 2619 Level: advanced 2620 2621 Notes: 2622 This can be called if you have precomputed the nonzero structure of the 2623 matrix and want to provide it to the matrix object to improve the performance 2624 of the `MatSetValues()` operation. 2625 2626 You MUST have set the correct numbers of nonzeros per row in the call to 2627 `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted. 2628 2629 MUST be called before any calls to `MatSetValues()` 2630 2631 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()` 2632 @*/ 2633 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) 2634 { 2635 PetscFunctionBegin; 2636 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2637 PetscValidIntPointer(indices, 2); 2638 PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices)); 2639 PetscFunctionReturn(PETSC_SUCCESS); 2640 } 2641 2642 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) 2643 { 2644 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2645 PetscInt i, j, n, row, bs, *ai, *aj, mbs; 2646 PetscReal atmp; 2647 PetscScalar *x, zero = 0.0; 2648 MatScalar *aa; 2649 PetscInt ncols, brow, krow, kcol; 2650 2651 PetscFunctionBegin; 2652 /* why is this not a macro???????????????????????????????????????????????????????????????? */ 2653 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2654 bs = A->rmap->bs; 2655 aa = a->a; 2656 ai = a->i; 2657 aj = a->j; 2658 mbs = a->mbs; 2659 2660 PetscCall(VecSet(v, zero)); 2661 PetscCall(VecGetArray(v, &x)); 2662 PetscCall(VecGetLocalSize(v, &n)); 2663 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2664 for (i = 0; i < mbs; i++) { 2665 ncols = ai[1] - ai[0]; 2666 ai++; 2667 brow = bs * i; 2668 for (j = 0; j < ncols; j++) { 2669 for (kcol = 0; kcol < bs; kcol++) { 2670 for (krow = 0; krow < bs; krow++) { 2671 atmp = PetscAbsScalar(*aa); 2672 aa++; 2673 row = brow + krow; /* row index */ 2674 if (PetscAbsScalar(x[row]) < atmp) { 2675 x[row] = atmp; 2676 if (idx) idx[row] = bs * (*aj) + kcol; 2677 } 2678 } 2679 } 2680 aj++; 2681 } 2682 } 2683 PetscCall(VecRestoreArray(v, &x)); 2684 PetscFunctionReturn(PETSC_SUCCESS); 2685 } 2686 2687 PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) 2688 { 2689 PetscFunctionBegin; 2690 /* If the two matrices have the same copy implementation, use fast copy. */ 2691 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2692 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2693 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 2694 PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs; 2695 2696 PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]); 2697 PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs); 2698 PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs])); 2699 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2700 } else { 2701 PetscCall(MatCopy_Basic(A, B, str)); 2702 } 2703 PetscFunctionReturn(PETSC_SUCCESS); 2704 } 2705 2706 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2707 { 2708 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2709 2710 PetscFunctionBegin; 2711 *array = a->a; 2712 PetscFunctionReturn(PETSC_SUCCESS); 2713 } 2714 2715 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2716 { 2717 PetscFunctionBegin; 2718 *array = NULL; 2719 PetscFunctionReturn(PETSC_SUCCESS); 2720 } 2721 2722 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) 2723 { 2724 PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs; 2725 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 2726 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 2727 2728 PetscFunctionBegin; 2729 /* Set the number of nonzeros in the new matrix */ 2730 PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz)); 2731 PetscFunctionReturn(PETSC_SUCCESS); 2732 } 2733 2734 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2735 { 2736 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data; 2737 PetscInt bs = Y->rmap->bs, bs2 = bs * bs; 2738 PetscBLASInt one = 1; 2739 2740 PetscFunctionBegin; 2741 if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2742 PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2743 if (e) { 2744 PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e)); 2745 if (e) { 2746 PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e)); 2747 if (e) str = SAME_NONZERO_PATTERN; 2748 } 2749 } 2750 if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN"); 2751 } 2752 if (str == SAME_NONZERO_PATTERN) { 2753 PetscScalar alpha = a; 2754 PetscBLASInt bnz; 2755 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 2756 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 2757 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2758 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2759 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2760 } else { 2761 Mat B; 2762 PetscInt *nnz; 2763 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 2764 PetscCall(PetscMalloc1(Y->rmap->N, &nnz)); 2765 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2766 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2767 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 2768 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 2769 PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name)); 2770 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz)); 2771 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 2772 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2773 PetscCall(MatHeaderMerge(Y, &B)); 2774 PetscCall(PetscFree(nnz)); 2775 } 2776 PetscFunctionReturn(PETSC_SUCCESS); 2777 } 2778 2779 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2780 { 2781 #if PetscDefined(USE_COMPLEX) 2782 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2783 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2784 MatScalar *aa = a->a; 2785 2786 PetscFunctionBegin; 2787 for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]); 2788 PetscFunctionReturn(PETSC_SUCCESS); 2789 #else 2790 (void)A; 2791 return PETSC_SUCCESS; 2792 #endif 2793 } 2794 2795 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2796 { 2797 #if PetscDefined(USE_COMPLEX) 2798 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2799 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2800 MatScalar *aa = a->a; 2801 2802 PetscFunctionBegin; 2803 for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]); 2804 PetscFunctionReturn(PETSC_SUCCESS); 2805 #else 2806 (void)A; 2807 return PETSC_SUCCESS; 2808 #endif 2809 } 2810 2811 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2812 { 2813 #if PetscDefined(USE_COMPLEX) 2814 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2815 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2816 MatScalar *aa = a->a; 2817 2818 PetscFunctionBegin; 2819 for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2820 PetscFunctionReturn(PETSC_SUCCESS); 2821 #else 2822 (void)A; 2823 return PETSC_SUCCESS; 2824 #endif 2825 } 2826 2827 /* 2828 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2829 */ 2830 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2831 { 2832 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2833 PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs; 2834 PetscInt nz = a->i[m], row, *jj, mr, col; 2835 2836 PetscFunctionBegin; 2837 *nn = n; 2838 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2839 PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices"); 2840 PetscCall(PetscCalloc1(n, &collengths)); 2841 PetscCall(PetscMalloc1(n + 1, &cia)); 2842 PetscCall(PetscMalloc1(nz, &cja)); 2843 jj = a->j; 2844 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2845 cia[0] = oshift; 2846 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2847 PetscCall(PetscArrayzero(collengths, n)); 2848 jj = a->j; 2849 for (row = 0; row < m; row++) { 2850 mr = a->i[row + 1] - a->i[row]; 2851 for (i = 0; i < mr; i++) { 2852 col = *jj++; 2853 2854 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2855 } 2856 } 2857 PetscCall(PetscFree(collengths)); 2858 *ia = cia; 2859 *ja = cja; 2860 PetscFunctionReturn(PETSC_SUCCESS); 2861 } 2862 2863 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2864 { 2865 PetscFunctionBegin; 2866 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2867 PetscCall(PetscFree(*ia)); 2868 PetscCall(PetscFree(*ja)); 2869 PetscFunctionReturn(PETSC_SUCCESS); 2870 } 2871 2872 /* 2873 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2874 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2875 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2876 */ 2877 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2878 { 2879 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2880 PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs; 2881 PetscInt nz = a->i[m], row, *jj, mr, col; 2882 PetscInt *cspidx; 2883 2884 PetscFunctionBegin; 2885 *nn = n; 2886 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2887 2888 PetscCall(PetscCalloc1(n, &collengths)); 2889 PetscCall(PetscMalloc1(n + 1, &cia)); 2890 PetscCall(PetscMalloc1(nz, &cja)); 2891 PetscCall(PetscMalloc1(nz, &cspidx)); 2892 jj = a->j; 2893 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2894 cia[0] = oshift; 2895 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2896 PetscCall(PetscArrayzero(collengths, n)); 2897 jj = a->j; 2898 for (row = 0; row < m; row++) { 2899 mr = a->i[row + 1] - a->i[row]; 2900 for (i = 0; i < mr; i++) { 2901 col = *jj++; 2902 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2903 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2904 } 2905 } 2906 PetscCall(PetscFree(collengths)); 2907 *ia = cia; 2908 *ja = cja; 2909 *spidx = cspidx; 2910 PetscFunctionReturn(PETSC_SUCCESS); 2911 } 2912 2913 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2914 { 2915 PetscFunctionBegin; 2916 PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done)); 2917 PetscCall(PetscFree(*spidx)); 2918 PetscFunctionReturn(PETSC_SUCCESS); 2919 } 2920 2921 PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) 2922 { 2923 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data; 2924 2925 PetscFunctionBegin; 2926 if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL)); 2927 PetscCall(MatShift_Basic(Y, a)); 2928 PetscFunctionReturn(PETSC_SUCCESS); 2929 } 2930 2931 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2932 MatGetRow_SeqBAIJ, 2933 MatRestoreRow_SeqBAIJ, 2934 MatMult_SeqBAIJ_N, 2935 /* 4*/ MatMultAdd_SeqBAIJ_N, 2936 MatMultTranspose_SeqBAIJ, 2937 MatMultTransposeAdd_SeqBAIJ, 2938 NULL, 2939 NULL, 2940 NULL, 2941 /* 10*/ NULL, 2942 MatLUFactor_SeqBAIJ, 2943 NULL, 2944 NULL, 2945 MatTranspose_SeqBAIJ, 2946 /* 15*/ MatGetInfo_SeqBAIJ, 2947 MatEqual_SeqBAIJ, 2948 MatGetDiagonal_SeqBAIJ, 2949 MatDiagonalScale_SeqBAIJ, 2950 MatNorm_SeqBAIJ, 2951 /* 20*/ NULL, 2952 MatAssemblyEnd_SeqBAIJ, 2953 MatSetOption_SeqBAIJ, 2954 MatZeroEntries_SeqBAIJ, 2955 /* 24*/ MatZeroRows_SeqBAIJ, 2956 NULL, 2957 NULL, 2958 NULL, 2959 NULL, 2960 /* 29*/ MatSetUp_Seq_Hash, 2961 NULL, 2962 NULL, 2963 NULL, 2964 NULL, 2965 /* 34*/ MatDuplicate_SeqBAIJ, 2966 NULL, 2967 NULL, 2968 MatILUFactor_SeqBAIJ, 2969 NULL, 2970 /* 39*/ MatAXPY_SeqBAIJ, 2971 MatCreateSubMatrices_SeqBAIJ, 2972 MatIncreaseOverlap_SeqBAIJ, 2973 MatGetValues_SeqBAIJ, 2974 MatCopy_SeqBAIJ, 2975 /* 44*/ NULL, 2976 MatScale_SeqBAIJ, 2977 MatShift_SeqBAIJ, 2978 NULL, 2979 MatZeroRowsColumns_SeqBAIJ, 2980 /* 49*/ NULL, 2981 MatGetRowIJ_SeqBAIJ, 2982 MatRestoreRowIJ_SeqBAIJ, 2983 MatGetColumnIJ_SeqBAIJ, 2984 MatRestoreColumnIJ_SeqBAIJ, 2985 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2986 NULL, 2987 NULL, 2988 NULL, 2989 MatSetValuesBlocked_SeqBAIJ, 2990 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2991 MatDestroy_SeqBAIJ, 2992 MatView_SeqBAIJ, 2993 NULL, 2994 NULL, 2995 /* 64*/ NULL, 2996 NULL, 2997 NULL, 2998 NULL, 2999 NULL, 3000 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 3001 NULL, 3002 MatConvert_Basic, 3003 NULL, 3004 NULL, 3005 /* 74*/ NULL, 3006 MatFDColoringApply_BAIJ, 3007 NULL, 3008 NULL, 3009 NULL, 3010 /* 79*/ NULL, 3011 NULL, 3012 NULL, 3013 NULL, 3014 MatLoad_SeqBAIJ, 3015 /* 84*/ NULL, 3016 NULL, 3017 NULL, 3018 NULL, 3019 NULL, 3020 /* 89*/ NULL, 3021 NULL, 3022 NULL, 3023 NULL, 3024 NULL, 3025 /* 94*/ NULL, 3026 NULL, 3027 NULL, 3028 NULL, 3029 NULL, 3030 /* 99*/ NULL, 3031 NULL, 3032 NULL, 3033 MatConjugate_SeqBAIJ, 3034 NULL, 3035 /*104*/ NULL, 3036 MatRealPart_SeqBAIJ, 3037 MatImaginaryPart_SeqBAIJ, 3038 NULL, 3039 NULL, 3040 /*109*/ NULL, 3041 NULL, 3042 NULL, 3043 NULL, 3044 MatMissingDiagonal_SeqBAIJ, 3045 /*114*/ NULL, 3046 NULL, 3047 NULL, 3048 NULL, 3049 NULL, 3050 /*119*/ NULL, 3051 NULL, 3052 MatMultHermitianTranspose_SeqBAIJ, 3053 MatMultHermitianTransposeAdd_SeqBAIJ, 3054 NULL, 3055 /*124*/ NULL, 3056 MatGetColumnReductions_SeqBAIJ, 3057 MatInvertBlockDiagonal_SeqBAIJ, 3058 NULL, 3059 NULL, 3060 /*129*/ NULL, 3061 NULL, 3062 NULL, 3063 NULL, 3064 NULL, 3065 /*134*/ NULL, 3066 NULL, 3067 NULL, 3068 NULL, 3069 NULL, 3070 /*139*/ MatSetBlockSizes_Default, 3071 NULL, 3072 NULL, 3073 MatFDColoringSetUp_SeqXAIJ, 3074 NULL, 3075 /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 3076 MatDestroySubMatrices_SeqBAIJ, 3077 NULL, 3078 NULL, 3079 NULL, 3080 NULL, 3081 /*150*/ NULL, 3082 NULL}; 3083 3084 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3085 { 3086 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3087 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3088 3089 PetscFunctionBegin; 3090 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3091 3092 /* allocate space for values if not already there */ 3093 if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); 3094 3095 /* copy values over */ 3096 PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz)); 3097 PetscFunctionReturn(PETSC_SUCCESS); 3098 } 3099 3100 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3101 { 3102 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3103 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3104 3105 PetscFunctionBegin; 3106 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3107 PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 3108 3109 /* copy values over */ 3110 PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz)); 3111 PetscFunctionReturn(PETSC_SUCCESS); 3112 } 3113 3114 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 3115 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *); 3116 3117 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, PetscInt *nnz) 3118 { 3119 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 3120 PetscInt i, mbs, nbs, bs2; 3121 PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 3122 3123 PetscFunctionBegin; 3124 if (B->hash_active) { 3125 PetscInt bs; 3126 B->ops[0] = b->cops; 3127 PetscCall(PetscHMapIJVDestroy(&b->ht)); 3128 PetscCall(MatGetBlockSize(B, &bs)); 3129 if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht)); 3130 PetscCall(PetscFree(b->dnz)); 3131 PetscCall(PetscFree(b->bdnz)); 3132 B->hash_active = PETSC_FALSE; 3133 } 3134 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3135 if (nz == MAT_SKIP_ALLOCATION) { 3136 skipallocation = PETSC_TRUE; 3137 nz = 0; 3138 } 3139 3140 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 3141 PetscCall(PetscLayoutSetUp(B->rmap)); 3142 PetscCall(PetscLayoutSetUp(B->cmap)); 3143 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3144 3145 B->preallocated = PETSC_TRUE; 3146 3147 mbs = B->rmap->n / bs; 3148 nbs = B->cmap->n / bs; 3149 bs2 = bs * bs; 3150 3151 PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs); 3152 3153 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3154 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 3155 if (nnz) { 3156 for (i = 0; i < mbs; i++) { 3157 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 3158 PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs); 3159 } 3160 } 3161 3162 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat"); 3163 PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL)); 3164 PetscOptionsEnd(); 3165 3166 if (!flg) { 3167 switch (bs) { 3168 case 1: 3169 B->ops->mult = MatMult_SeqBAIJ_1; 3170 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3171 break; 3172 case 2: 3173 B->ops->mult = MatMult_SeqBAIJ_2; 3174 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3175 break; 3176 case 3: 3177 B->ops->mult = MatMult_SeqBAIJ_3; 3178 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3179 break; 3180 case 4: 3181 B->ops->mult = MatMult_SeqBAIJ_4; 3182 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3183 break; 3184 case 5: 3185 B->ops->mult = MatMult_SeqBAIJ_5; 3186 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3187 break; 3188 case 6: 3189 B->ops->mult = MatMult_SeqBAIJ_6; 3190 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3191 break; 3192 case 7: 3193 B->ops->mult = MatMult_SeqBAIJ_7; 3194 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3195 break; 3196 case 9: { 3197 PetscInt version = 1; 3198 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3199 switch (version) { 3200 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3201 case 1: 3202 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 3203 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 3204 PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3205 break; 3206 #endif 3207 default: 3208 B->ops->mult = MatMult_SeqBAIJ_N; 3209 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3210 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3211 break; 3212 } 3213 break; 3214 } 3215 case 11: 3216 B->ops->mult = MatMult_SeqBAIJ_11; 3217 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 3218 break; 3219 case 12: { 3220 PetscInt version = 1; 3221 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3222 switch (version) { 3223 case 1: 3224 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 3225 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3226 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3227 break; 3228 case 2: 3229 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 3230 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 3231 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3232 break; 3233 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3234 case 3: 3235 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 3236 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3237 PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3238 break; 3239 #endif 3240 default: 3241 B->ops->mult = MatMult_SeqBAIJ_N; 3242 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3243 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3244 break; 3245 } 3246 break; 3247 } 3248 case 15: { 3249 PetscInt version = 1; 3250 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3251 switch (version) { 3252 case 1: 3253 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3254 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3255 break; 3256 case 2: 3257 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 3258 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3259 break; 3260 case 3: 3261 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 3262 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3263 break; 3264 case 4: 3265 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 3266 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3267 break; 3268 default: 3269 B->ops->mult = MatMult_SeqBAIJ_N; 3270 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3271 break; 3272 } 3273 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3274 break; 3275 } 3276 default: 3277 B->ops->mult = MatMult_SeqBAIJ_N; 3278 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3279 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3280 break; 3281 } 3282 } 3283 B->ops->sor = MatSOR_SeqBAIJ; 3284 b->mbs = mbs; 3285 b->nbs = nbs; 3286 if (!skipallocation) { 3287 if (!b->imax) { 3288 PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen)); 3289 3290 b->free_imax_ilen = PETSC_TRUE; 3291 } 3292 /* b->ilen will count nonzeros in each block row so far. */ 3293 for (i = 0; i < mbs; i++) b->ilen[i] = 0; 3294 if (!nnz) { 3295 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3296 else if (nz < 0) nz = 1; 3297 nz = PetscMin(nz, nbs); 3298 for (i = 0; i < mbs; i++) b->imax[i] = nz; 3299 PetscCall(PetscIntMultError(nz, mbs, &nz)); 3300 } else { 3301 PetscInt64 nz64 = 0; 3302 for (i = 0; i < mbs; i++) { 3303 b->imax[i] = nnz[i]; 3304 nz64 += nnz[i]; 3305 } 3306 PetscCall(PetscIntCast(nz64, &nz)); 3307 } 3308 3309 /* allocate the matrix space */ 3310 PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i)); 3311 if (B->structure_only) { 3312 PetscCall(PetscMalloc1(nz, &b->j)); 3313 PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i)); 3314 } else { 3315 PetscInt nzbs2 = 0; 3316 PetscCall(PetscIntMultError(nz, bs2, &nzbs2)); 3317 PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i)); 3318 PetscCall(PetscArrayzero(b->a, nz * bs2)); 3319 } 3320 PetscCall(PetscArrayzero(b->j, nz)); 3321 3322 if (B->structure_only) { 3323 b->singlemalloc = PETSC_FALSE; 3324 b->free_a = PETSC_FALSE; 3325 } else { 3326 b->singlemalloc = PETSC_TRUE; 3327 b->free_a = PETSC_TRUE; 3328 } 3329 b->free_ij = PETSC_TRUE; 3330 3331 b->i[0] = 0; 3332 for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1]; 3333 3334 } else { 3335 b->free_a = PETSC_FALSE; 3336 b->free_ij = PETSC_FALSE; 3337 } 3338 3339 b->bs2 = bs2; 3340 b->mbs = mbs; 3341 b->nz = 0; 3342 b->maxnz = nz; 3343 B->info.nz_unneeded = (PetscReal)b->maxnz * bs2; 3344 B->was_assembled = PETSC_FALSE; 3345 B->assembled = PETSC_FALSE; 3346 if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3347 PetscFunctionReturn(PETSC_SUCCESS); 3348 } 3349 3350 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 3351 { 3352 PetscInt i, m, nz, nz_max = 0, *nnz; 3353 PetscScalar *values = NULL; 3354 PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented; 3355 3356 PetscFunctionBegin; 3357 PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 3358 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 3359 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 3360 PetscCall(PetscLayoutSetUp(B->rmap)); 3361 PetscCall(PetscLayoutSetUp(B->cmap)); 3362 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3363 m = B->rmap->n / bs; 3364 3365 PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 3366 PetscCall(PetscMalloc1(m + 1, &nnz)); 3367 for (i = 0; i < m; i++) { 3368 nz = ii[i + 1] - ii[i]; 3369 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 3370 nz_max = PetscMax(nz_max, nz); 3371 nnz[i] = nz; 3372 } 3373 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 3374 PetscCall(PetscFree(nnz)); 3375 3376 values = (PetscScalar *)V; 3377 if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values)); 3378 for (i = 0; i < m; i++) { 3379 PetscInt ncols = ii[i + 1] - ii[i]; 3380 const PetscInt *icols = jj + ii[i]; 3381 if (bs == 1 || !roworiented) { 3382 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 3383 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES)); 3384 } else { 3385 PetscInt j; 3386 for (j = 0; j < ncols; j++) { 3387 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 3388 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES)); 3389 } 3390 } 3391 } 3392 if (!V) PetscCall(PetscFree(values)); 3393 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3394 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3395 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3396 PetscFunctionReturn(PETSC_SUCCESS); 3397 } 3398 3399 /*@C 3400 MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored 3401 3402 Not Collective 3403 3404 Input Parameter: 3405 . mat - a `MATSEQBAIJ` matrix 3406 3407 Output Parameter: 3408 . array - pointer to the data 3409 3410 Level: intermediate 3411 3412 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3413 @*/ 3414 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array) 3415 { 3416 PetscFunctionBegin; 3417 PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array)); 3418 PetscFunctionReturn(PETSC_SUCCESS); 3419 } 3420 3421 /*@C 3422 MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()` 3423 3424 Not Collective 3425 3426 Input Parameters: 3427 + mat - a `MATSEQBAIJ` matrix 3428 - array - pointer to the data 3429 3430 Level: intermediate 3431 3432 .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3433 @*/ 3434 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array) 3435 { 3436 PetscFunctionBegin; 3437 PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array)); 3438 PetscFunctionReturn(PETSC_SUCCESS); 3439 } 3440 3441 /*MC 3442 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3443 block sparse compressed row format. 3444 3445 Options Database Keys: 3446 + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()` 3447 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3448 3449 Level: beginner 3450 3451 Notes: 3452 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 3453 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 3454 3455 Run with `-info` to see what version of the matrix-vector product is being used 3456 3457 .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()` 3458 M*/ 3459 3460 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *); 3461 3462 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3463 { 3464 PetscMPIInt size; 3465 Mat_SeqBAIJ *b; 3466 3467 PetscFunctionBegin; 3468 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3469 PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3470 3471 PetscCall(PetscNew(&b)); 3472 B->data = (void *)b; 3473 B->ops[0] = MatOps_Values; 3474 3475 b->row = NULL; 3476 b->col = NULL; 3477 b->icol = NULL; 3478 b->reallocs = 0; 3479 b->saved_values = NULL; 3480 3481 b->roworiented = PETSC_TRUE; 3482 b->nonew = 0; 3483 b->diag = NULL; 3484 B->spptr = NULL; 3485 B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2; 3486 b->keepnonzeropattern = PETSC_FALSE; 3487 3488 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ)); 3489 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ)); 3490 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ)); 3491 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ)); 3492 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ)); 3493 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ)); 3494 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ)); 3495 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ)); 3496 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 3497 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ)); 3498 #if defined(PETSC_HAVE_HYPRE) 3499 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE)); 3500 #endif 3501 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS)); 3502 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 3503 PetscFunctionReturn(PETSC_SUCCESS); 3504 } 3505 3506 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) 3507 { 3508 Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data; 3509 PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2; 3510 3511 PetscFunctionBegin; 3512 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix"); 3513 PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix"); 3514 3515 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3516 c->imax = a->imax; 3517 c->ilen = a->ilen; 3518 c->free_imax_ilen = PETSC_FALSE; 3519 } else { 3520 PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen)); 3521 for (i = 0; i < mbs; i++) { 3522 c->imax[i] = a->imax[i]; 3523 c->ilen[i] = a->ilen[i]; 3524 } 3525 c->free_imax_ilen = PETSC_TRUE; 3526 } 3527 3528 /* allocate the matrix space */ 3529 if (mallocmatspace) { 3530 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3531 PetscCall(PetscCalloc1(bs2 * nz, &c->a)); 3532 3533 c->i = a->i; 3534 c->j = a->j; 3535 c->singlemalloc = PETSC_FALSE; 3536 c->free_a = PETSC_TRUE; 3537 c->free_ij = PETSC_FALSE; 3538 c->parent = A; 3539 C->preallocated = PETSC_TRUE; 3540 C->assembled = PETSC_TRUE; 3541 3542 PetscCall(PetscObjectReference((PetscObject)A)); 3543 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3544 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3545 } else { 3546 PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i)); 3547 3548 c->singlemalloc = PETSC_TRUE; 3549 c->free_a = PETSC_TRUE; 3550 c->free_ij = PETSC_TRUE; 3551 3552 PetscCall(PetscArraycpy(c->i, a->i, mbs + 1)); 3553 if (mbs > 0) { 3554 PetscCall(PetscArraycpy(c->j, a->j, nz)); 3555 if (cpvalues == MAT_COPY_VALUES) { 3556 PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz)); 3557 } else { 3558 PetscCall(PetscArrayzero(c->a, bs2 * nz)); 3559 } 3560 } 3561 C->preallocated = PETSC_TRUE; 3562 C->assembled = PETSC_TRUE; 3563 } 3564 } 3565 3566 c->roworiented = a->roworiented; 3567 c->nonew = a->nonew; 3568 3569 PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 3570 PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 3571 3572 c->bs2 = a->bs2; 3573 c->mbs = a->mbs; 3574 c->nbs = a->nbs; 3575 3576 if (a->diag) { 3577 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3578 c->diag = a->diag; 3579 c->free_diag = PETSC_FALSE; 3580 } else { 3581 PetscCall(PetscMalloc1(mbs + 1, &c->diag)); 3582 for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i]; 3583 c->free_diag = PETSC_TRUE; 3584 } 3585 } else c->diag = NULL; 3586 3587 c->nz = a->nz; 3588 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3589 c->solve_work = NULL; 3590 c->mult_work = NULL; 3591 c->sor_workt = NULL; 3592 c->sor_work = NULL; 3593 3594 c->compressedrow.use = a->compressedrow.use; 3595 c->compressedrow.nrows = a->compressedrow.nrows; 3596 if (a->compressedrow.use) { 3597 i = a->compressedrow.nrows; 3598 PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex)); 3599 PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1)); 3600 PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i)); 3601 } else { 3602 c->compressedrow.use = PETSC_FALSE; 3603 c->compressedrow.i = NULL; 3604 c->compressedrow.rindex = NULL; 3605 } 3606 C->nonzerostate = A->nonzerostate; 3607 3608 PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 3609 PetscFunctionReturn(PETSC_SUCCESS); 3610 } 3611 3612 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) 3613 { 3614 PetscFunctionBegin; 3615 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 3616 PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n)); 3617 PetscCall(MatSetType(*B, MATSEQBAIJ)); 3618 PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE)); 3619 PetscFunctionReturn(PETSC_SUCCESS); 3620 } 3621 3622 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3623 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 3624 { 3625 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3626 PetscInt *rowidxs, *colidxs; 3627 PetscScalar *matvals; 3628 3629 PetscFunctionBegin; 3630 PetscCall(PetscViewerSetUp(viewer)); 3631 3632 /* read matrix header */ 3633 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3634 PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3635 M = header[1]; 3636 N = header[2]; 3637 nz = header[3]; 3638 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3639 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3640 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3641 3642 /* set block sizes from the viewer's .info file */ 3643 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3644 /* set local and global sizes if not set already */ 3645 if (mat->rmap->n < 0) mat->rmap->n = M; 3646 if (mat->cmap->n < 0) mat->cmap->n = N; 3647 if (mat->rmap->N < 0) mat->rmap->N = M; 3648 if (mat->cmap->N < 0) mat->cmap->N = N; 3649 PetscCall(PetscLayoutSetUp(mat->rmap)); 3650 PetscCall(PetscLayoutSetUp(mat->cmap)); 3651 3652 /* check if the matrix sizes are correct */ 3653 PetscCall(MatGetSize(mat, &rows, &cols)); 3654 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3655 PetscCall(MatGetBlockSize(mat, &bs)); 3656 PetscCall(MatGetLocalSize(mat, &m, &n)); 3657 mbs = m / bs; 3658 nbs = n / bs; 3659 3660 /* read in row lengths, column indices and nonzero values */ 3661 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3662 PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT)); 3663 rowidxs[0] = 0; 3664 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3665 sum = rowidxs[m]; 3666 PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3667 3668 /* read in column indices and nonzero values */ 3669 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals)); 3670 PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT)); 3671 PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR)); 3672 3673 { /* preallocate matrix storage */ 3674 PetscBT bt; /* helper bit set to count nonzeros */ 3675 PetscInt *nnz; 3676 PetscBool sbaij; 3677 3678 PetscCall(PetscBTCreate(nbs, &bt)); 3679 PetscCall(PetscCalloc1(mbs, &nnz)); 3680 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij)); 3681 for (i = 0; i < mbs; i++) { 3682 PetscCall(PetscBTMemzero(nbs, bt)); 3683 for (k = 0; k < bs; k++) { 3684 PetscInt row = bs * i + k; 3685 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3686 PetscInt col = colidxs[j]; 3687 if (!sbaij || col >= row) 3688 if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++; 3689 } 3690 } 3691 } 3692 PetscCall(PetscBTDestroy(&bt)); 3693 PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz)); 3694 PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz)); 3695 PetscCall(PetscFree(nnz)); 3696 } 3697 3698 /* store matrix values */ 3699 for (i = 0; i < m; i++) { 3700 PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1]; 3701 PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3702 } 3703 3704 PetscCall(PetscFree(rowidxs)); 3705 PetscCall(PetscFree2(colidxs, matvals)); 3706 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3707 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3708 PetscFunctionReturn(PETSC_SUCCESS); 3709 } 3710 3711 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) 3712 { 3713 PetscBool isbinary; 3714 3715 PetscFunctionBegin; 3716 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3717 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3718 PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer)); 3719 PetscFunctionReturn(PETSC_SUCCESS); 3720 } 3721 3722 /*@C 3723 MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block 3724 compressed row) format. For good matrix assembly performance the 3725 user should preallocate the matrix storage by setting the parameter `nz` 3726 (or the array `nnz`). 3727 3728 Collective 3729 3730 Input Parameters: 3731 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3732 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3733 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3734 . m - number of rows 3735 . n - number of columns 3736 . nz - number of nonzero blocks per block row (same for all rows) 3737 - nnz - array containing the number of nonzero blocks in the various block rows 3738 (possibly different for each block row) or `NULL` 3739 3740 Output Parameter: 3741 . A - the matrix 3742 3743 Options Database Keys: 3744 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3745 - -mat_block_size - size of the blocks to use 3746 3747 Level: intermediate 3748 3749 Notes: 3750 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3751 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3752 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 3753 3754 The number of rows and columns must be divisible by blocksize. 3755 3756 If the `nnz` parameter is given then the `nz` parameter is ignored 3757 3758 A nonzero block is any block that as 1 or more nonzeros in it 3759 3760 The `MATSEQBAIJ` format is fully compatible with standard Fortran 3761 storage. That is, the stored row and column indices can begin at 3762 either one (as in Fortran) or zero. 3763 3764 Specify the preallocated storage with either `nz` or `nnz` (not both). 3765 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3766 allocation. See [Sparse Matrices](sec_matsparse) for details. 3767 matrices. 3768 3769 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3770 @*/ 3771 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 3772 { 3773 PetscFunctionBegin; 3774 PetscCall(MatCreate(comm, A)); 3775 PetscCall(MatSetSizes(*A, m, n, m, n)); 3776 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3777 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz)); 3778 PetscFunctionReturn(PETSC_SUCCESS); 3779 } 3780 3781 /*@C 3782 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3783 per row in the matrix. For good matrix assembly performance the 3784 user should preallocate the matrix storage by setting the parameter `nz` 3785 (or the array `nnz`). 3786 3787 Collective 3788 3789 Input Parameters: 3790 + B - the matrix 3791 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3792 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3793 . nz - number of block nonzeros per block row (same for all rows) 3794 - nnz - array containing the number of block nonzeros in the various block rows 3795 (possibly different for each block row) or `NULL` 3796 3797 Options Database Keys: 3798 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3799 - -mat_block_size - size of the blocks to use 3800 3801 Level: intermediate 3802 3803 Notes: 3804 If the `nnz` parameter is given then the `nz` parameter is ignored 3805 3806 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3807 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3808 You can also run with the option `-info` and look for messages with the string 3809 malloc in them to see if additional memory allocation was needed. 3810 3811 The `MATSEQBAIJ` format is fully compatible with standard Fortran 3812 storage. That is, the stored row and column indices can begin at 3813 either one (as in Fortran) or zero. 3814 3815 Specify the preallocated storage with either nz or nnz (not both). 3816 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3817 allocation. See [Sparse Matrices](sec_matsparse) for details. 3818 3819 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3820 @*/ 3821 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3822 { 3823 PetscFunctionBegin; 3824 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3825 PetscValidType(B, 1); 3826 PetscValidLogicalCollectiveInt(B, bs, 2); 3827 PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 3828 PetscFunctionReturn(PETSC_SUCCESS); 3829 } 3830 3831 /*@C 3832 MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values 3833 3834 Collective 3835 3836 Input Parameters: 3837 + B - the matrix 3838 . bs - the blocksize 3839 . i - the indices into `j` for the start of each local row (starts with zero) 3840 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3841 - v - optional values in the matrix 3842 3843 Level: advanced 3844 3845 Notes: 3846 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 3847 may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 3848 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3849 `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3850 block column and the second index is over columns within a block. 3851 3852 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3853 3854 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3855 @*/ 3856 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3857 { 3858 PetscFunctionBegin; 3859 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3860 PetscValidType(B, 1); 3861 PetscValidLogicalCollectiveInt(B, bs, 2); 3862 PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 3863 PetscFunctionReturn(PETSC_SUCCESS); 3864 } 3865 3866 /*@ 3867 MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user. 3868 3869 Collective 3870 3871 Input Parameters: 3872 + comm - must be an MPI communicator of size 1 3873 . bs - size of block 3874 . m - number of rows 3875 . n - number of columns 3876 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3877 . j - column indices 3878 - a - matrix values 3879 3880 Output Parameter: 3881 . mat - the matrix 3882 3883 Level: advanced 3884 3885 Notes: 3886 The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays 3887 once the matrix is destroyed 3888 3889 You cannot set new nonzero locations into this matrix, that will generate an error. 3890 3891 The `i` and `j` indices are 0 based 3892 3893 When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format 3894 3895 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3896 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3897 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3898 with column-major ordering within blocks. 3899 3900 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3901 @*/ 3902 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) 3903 { 3904 Mat_SeqBAIJ *baij; 3905 3906 PetscFunctionBegin; 3907 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs); 3908 if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3909 3910 PetscCall(MatCreate(comm, mat)); 3911 PetscCall(MatSetSizes(*mat, m, n, m, n)); 3912 PetscCall(MatSetType(*mat, MATSEQBAIJ)); 3913 PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL)); 3914 baij = (Mat_SeqBAIJ *)(*mat)->data; 3915 PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen)); 3916 3917 baij->i = i; 3918 baij->j = j; 3919 baij->a = a; 3920 3921 baij->singlemalloc = PETSC_FALSE; 3922 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3923 baij->free_a = PETSC_FALSE; 3924 baij->free_ij = PETSC_FALSE; 3925 baij->free_imax_ilen = PETSC_TRUE; 3926 3927 for (PetscInt ii = 0; ii < m; ii++) { 3928 const PetscInt row_len = i[ii + 1] - i[ii]; 3929 3930 baij->ilen[ii] = baij->imax[ii] = row_len; 3931 PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len); 3932 } 3933 if (PetscDefined(USE_DEBUG)) { 3934 for (PetscInt ii = 0; ii < baij->i[m]; ii++) { 3935 PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3936 PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3937 } 3938 } 3939 3940 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3941 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3942 PetscFunctionReturn(PETSC_SUCCESS); 3943 } 3944 3945 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3946 { 3947 PetscFunctionBegin; 3948 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat)); 3949 PetscFunctionReturn(PETSC_SUCCESS); 3950 } 3951