1 /* 2 Defines the basic matrix operations for the BAIJ (compressed row) 3 matrix storage format. 4 */ 5 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <petsc/private/kernels/blockinvert.h> 8 #include <petsc/private/kernels/blockmatmult.h> 9 10 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */ 11 #define TYPE BAIJ 12 #define TYPE_BS 13 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 14 #undef TYPE_BS 15 #define TYPE_BS _BS 16 #define TYPE_BS_ON 17 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 18 #undef TYPE_BS 19 #include "../src/mat/impls/aij/seq/seqhashmat.h" 20 #undef TYPE 21 #undef TYPE_BS_ON 22 23 #if defined(PETSC_HAVE_HYPRE) 24 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 25 #endif 26 27 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 28 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 29 #endif 30 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 31 32 MatGetDiagonalMarkers(SeqBAIJ, A->rmap->bs) 33 34 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) 35 { 36 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data; 37 PetscInt m, n, ib, jb, bs = A->rmap->bs; 38 MatScalar *a_val = a_aij->a; 39 40 PetscFunctionBegin; 41 PetscCall(MatGetSize(A, &m, &n)); 42 PetscCall(PetscArrayzero(reductions, n)); 43 if (type == NORM_2) { 44 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 45 for (jb = 0; jb < bs; jb++) { 46 for (ib = 0; ib < bs; ib++) { 47 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 48 a_val++; 49 } 50 } 51 } 52 } else if (type == NORM_1) { 53 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 54 for (jb = 0; jb < bs; jb++) { 55 for (ib = 0; ib < bs; ib++) { 56 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 57 a_val++; 58 } 59 } 60 } 61 } else if (type == NORM_INFINITY) { 62 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 63 for (jb = 0; jb < bs; jb++) { 64 for (ib = 0; ib < bs; ib++) { 65 PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb; 66 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 67 a_val++; 68 } 69 } 70 } 71 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 72 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 73 for (jb = 0; jb < bs; jb++) { 74 for (ib = 0; ib < bs; ib++) { 75 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 76 a_val++; 77 } 78 } 79 } 80 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 81 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 82 for (jb = 0; jb < bs; jb++) { 83 for (ib = 0; ib < bs; ib++) { 84 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 85 a_val++; 86 } 87 } 88 } 89 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 90 if (type == NORM_2) { 91 for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 92 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 93 for (PetscInt i = 0; i < n; i++) reductions[i] /= m; 94 } 95 PetscFunctionReturn(PETSC_SUCCESS); 96 } 97 98 static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) 99 { 100 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 101 PetscInt i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots; 102 MatScalar *v = a->a, *odiag, *diag, work[25], *v_work; 103 PetscReal shift = 0.0; 104 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 105 const PetscInt *adiag; 106 107 PetscFunctionBegin; 108 allowzeropivot = PetscNot(A->erroriffailure); 109 110 if (a->idiagvalid) { 111 if (values) *values = a->idiag; 112 PetscFunctionReturn(PETSC_SUCCESS); 113 } 114 PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &adiag, NULL)); 115 if (!a->idiag) PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); 116 diag = a->idiag; 117 if (values) *values = a->idiag; 118 /* factor and invert each block */ 119 switch (bs) { 120 case 1: 121 for (i = 0; i < mbs; i++) { 122 odiag = v + 1 * adiag[i]; 123 diag[0] = odiag[0]; 124 125 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 126 PetscCheck(allowzeropivot, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON); 127 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 128 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 129 A->factorerror_zeropivot_row = i; 130 PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i)); 131 } 132 133 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 134 diag += 1; 135 } 136 break; 137 case 2: 138 for (i = 0; i < mbs; i++) { 139 odiag = v + 4 * adiag[i]; 140 diag[0] = odiag[0]; 141 diag[1] = odiag[1]; 142 diag[2] = odiag[2]; 143 diag[3] = odiag[3]; 144 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 145 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 146 diag += 4; 147 } 148 break; 149 case 3: 150 for (i = 0; i < mbs; i++) { 151 odiag = v + 9 * adiag[i]; 152 diag[0] = odiag[0]; 153 diag[1] = odiag[1]; 154 diag[2] = odiag[2]; 155 diag[3] = odiag[3]; 156 diag[4] = odiag[4]; 157 diag[5] = odiag[5]; 158 diag[6] = odiag[6]; 159 diag[7] = odiag[7]; 160 diag[8] = odiag[8]; 161 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 162 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 163 diag += 9; 164 } 165 break; 166 case 4: 167 for (i = 0; i < mbs; i++) { 168 odiag = v + 16 * adiag[i]; 169 PetscCall(PetscArraycpy(diag, odiag, 16)); 170 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 171 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 172 diag += 16; 173 } 174 break; 175 case 5: 176 for (i = 0; i < mbs; i++) { 177 odiag = v + 25 * adiag[i]; 178 PetscCall(PetscArraycpy(diag, odiag, 25)); 179 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 180 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 181 diag += 25; 182 } 183 break; 184 case 6: 185 for (i = 0; i < mbs; i++) { 186 odiag = v + 36 * adiag[i]; 187 PetscCall(PetscArraycpy(diag, odiag, 36)); 188 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 189 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 190 diag += 36; 191 } 192 break; 193 case 7: 194 for (i = 0; i < mbs; i++) { 195 odiag = v + 49 * adiag[i]; 196 PetscCall(PetscArraycpy(diag, odiag, 49)); 197 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 198 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 199 diag += 49; 200 } 201 break; 202 default: 203 PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots)); 204 for (i = 0; i < mbs; i++) { 205 odiag = v + bs2 * adiag[i]; 206 PetscCall(PetscArraycpy(diag, odiag, bs2)); 207 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 208 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 209 diag += bs2; 210 } 211 PetscCall(PetscFree2(v_work, v_pivots)); 212 } 213 a->idiagvalid = PETSC_TRUE; 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 217 static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 218 { 219 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 220 PetscScalar *x, *work, *w, *workt, *t; 221 const MatScalar *v, *aa = a->a, *idiag; 222 const PetscScalar *b, *xb; 223 PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */ 224 PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it; 225 const PetscInt *diag, *ai = a->i, *aj = a->j, *vi; 226 227 PetscFunctionBegin; 228 its = its * lits; 229 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 230 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 231 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 232 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor"); 233 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 234 235 if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL)); 236 237 if (!m) PetscFunctionReturn(PETSC_SUCCESS); 238 diag = a->diag; 239 idiag = a->idiag; 240 k = PetscMax(A->rmap->n, A->cmap->n); 241 if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work)); 242 if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt)); 243 if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work)); 244 work = a->mult_work; 245 t = a->sor_workt; 246 w = a->sor_work; 247 248 PetscCall(VecGetArray(xx, &x)); 249 PetscCall(VecGetArrayRead(bb, &b)); 250 251 if (flag & SOR_ZERO_INITIAL_GUESS) { 252 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 253 switch (bs) { 254 case 1: 255 PetscKernel_v_gets_A_times_w_1(x, idiag, b); 256 t[0] = b[0]; 257 i2 = 1; 258 idiag += 1; 259 for (i = 1; i < m; i++) { 260 v = aa + ai[i]; 261 vi = aj + ai[i]; 262 nz = diag[i] - ai[i]; 263 s[0] = b[i2]; 264 for (j = 0; j < nz; j++) { 265 xw[0] = x[vi[j]]; 266 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 267 } 268 t[i2] = s[0]; 269 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 270 x[i2] = xw[0]; 271 idiag += 1; 272 i2 += 1; 273 } 274 break; 275 case 2: 276 PetscKernel_v_gets_A_times_w_2(x, idiag, b); 277 t[0] = b[0]; 278 t[1] = b[1]; 279 i2 = 2; 280 idiag += 4; 281 for (i = 1; i < m; i++) { 282 v = aa + 4 * ai[i]; 283 vi = aj + ai[i]; 284 nz = diag[i] - ai[i]; 285 s[0] = b[i2]; 286 s[1] = b[i2 + 1]; 287 for (j = 0; j < nz; j++) { 288 idx = 2 * vi[j]; 289 it = 4 * j; 290 xw[0] = x[idx]; 291 xw[1] = x[1 + idx]; 292 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 293 } 294 t[i2] = s[0]; 295 t[i2 + 1] = s[1]; 296 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 297 x[i2] = xw[0]; 298 x[i2 + 1] = xw[1]; 299 idiag += 4; 300 i2 += 2; 301 } 302 break; 303 case 3: 304 PetscKernel_v_gets_A_times_w_3(x, idiag, b); 305 t[0] = b[0]; 306 t[1] = b[1]; 307 t[2] = b[2]; 308 i2 = 3; 309 idiag += 9; 310 for (i = 1; i < m; i++) { 311 v = aa + 9 * ai[i]; 312 vi = aj + ai[i]; 313 nz = diag[i] - ai[i]; 314 s[0] = b[i2]; 315 s[1] = b[i2 + 1]; 316 s[2] = b[i2 + 2]; 317 while (nz--) { 318 idx = 3 * (*vi++); 319 xw[0] = x[idx]; 320 xw[1] = x[1 + idx]; 321 xw[2] = x[2 + idx]; 322 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 323 v += 9; 324 } 325 t[i2] = s[0]; 326 t[i2 + 1] = s[1]; 327 t[i2 + 2] = s[2]; 328 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 329 x[i2] = xw[0]; 330 x[i2 + 1] = xw[1]; 331 x[i2 + 2] = xw[2]; 332 idiag += 9; 333 i2 += 3; 334 } 335 break; 336 case 4: 337 PetscKernel_v_gets_A_times_w_4(x, idiag, b); 338 t[0] = b[0]; 339 t[1] = b[1]; 340 t[2] = b[2]; 341 t[3] = b[3]; 342 i2 = 4; 343 idiag += 16; 344 for (i = 1; i < m; i++) { 345 v = aa + 16 * ai[i]; 346 vi = aj + ai[i]; 347 nz = diag[i] - ai[i]; 348 s[0] = b[i2]; 349 s[1] = b[i2 + 1]; 350 s[2] = b[i2 + 2]; 351 s[3] = b[i2 + 3]; 352 while (nz--) { 353 idx = 4 * (*vi++); 354 xw[0] = x[idx]; 355 xw[1] = x[1 + idx]; 356 xw[2] = x[2 + idx]; 357 xw[3] = x[3 + idx]; 358 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 359 v += 16; 360 } 361 t[i2] = s[0]; 362 t[i2 + 1] = s[1]; 363 t[i2 + 2] = s[2]; 364 t[i2 + 3] = s[3]; 365 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 366 x[i2] = xw[0]; 367 x[i2 + 1] = xw[1]; 368 x[i2 + 2] = xw[2]; 369 x[i2 + 3] = xw[3]; 370 idiag += 16; 371 i2 += 4; 372 } 373 break; 374 case 5: 375 PetscKernel_v_gets_A_times_w_5(x, idiag, b); 376 t[0] = b[0]; 377 t[1] = b[1]; 378 t[2] = b[2]; 379 t[3] = b[3]; 380 t[4] = b[4]; 381 i2 = 5; 382 idiag += 25; 383 for (i = 1; i < m; i++) { 384 v = aa + 25 * ai[i]; 385 vi = aj + ai[i]; 386 nz = diag[i] - ai[i]; 387 s[0] = b[i2]; 388 s[1] = b[i2 + 1]; 389 s[2] = b[i2 + 2]; 390 s[3] = b[i2 + 3]; 391 s[4] = b[i2 + 4]; 392 while (nz--) { 393 idx = 5 * (*vi++); 394 xw[0] = x[idx]; 395 xw[1] = x[1 + idx]; 396 xw[2] = x[2 + idx]; 397 xw[3] = x[3 + idx]; 398 xw[4] = x[4 + idx]; 399 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 400 v += 25; 401 } 402 t[i2] = s[0]; 403 t[i2 + 1] = s[1]; 404 t[i2 + 2] = s[2]; 405 t[i2 + 3] = s[3]; 406 t[i2 + 4] = s[4]; 407 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 408 x[i2] = xw[0]; 409 x[i2 + 1] = xw[1]; 410 x[i2 + 2] = xw[2]; 411 x[i2 + 3] = xw[3]; 412 x[i2 + 4] = xw[4]; 413 idiag += 25; 414 i2 += 5; 415 } 416 break; 417 case 6: 418 PetscKernel_v_gets_A_times_w_6(x, idiag, b); 419 t[0] = b[0]; 420 t[1] = b[1]; 421 t[2] = b[2]; 422 t[3] = b[3]; 423 t[4] = b[4]; 424 t[5] = b[5]; 425 i2 = 6; 426 idiag += 36; 427 for (i = 1; i < m; i++) { 428 v = aa + 36 * ai[i]; 429 vi = aj + ai[i]; 430 nz = diag[i] - ai[i]; 431 s[0] = b[i2]; 432 s[1] = b[i2 + 1]; 433 s[2] = b[i2 + 2]; 434 s[3] = b[i2 + 3]; 435 s[4] = b[i2 + 4]; 436 s[5] = b[i2 + 5]; 437 while (nz--) { 438 idx = 6 * (*vi++); 439 xw[0] = x[idx]; 440 xw[1] = x[1 + idx]; 441 xw[2] = x[2 + idx]; 442 xw[3] = x[3 + idx]; 443 xw[4] = x[4 + idx]; 444 xw[5] = x[5 + idx]; 445 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 446 v += 36; 447 } 448 t[i2] = s[0]; 449 t[i2 + 1] = s[1]; 450 t[i2 + 2] = s[2]; 451 t[i2 + 3] = s[3]; 452 t[i2 + 4] = s[4]; 453 t[i2 + 5] = s[5]; 454 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 455 x[i2] = xw[0]; 456 x[i2 + 1] = xw[1]; 457 x[i2 + 2] = xw[2]; 458 x[i2 + 3] = xw[3]; 459 x[i2 + 4] = xw[4]; 460 x[i2 + 5] = xw[5]; 461 idiag += 36; 462 i2 += 6; 463 } 464 break; 465 case 7: 466 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 467 t[0] = b[0]; 468 t[1] = b[1]; 469 t[2] = b[2]; 470 t[3] = b[3]; 471 t[4] = b[4]; 472 t[5] = b[5]; 473 t[6] = b[6]; 474 i2 = 7; 475 idiag += 49; 476 for (i = 1; i < m; i++) { 477 v = aa + 49 * ai[i]; 478 vi = aj + ai[i]; 479 nz = diag[i] - ai[i]; 480 s[0] = b[i2]; 481 s[1] = b[i2 + 1]; 482 s[2] = b[i2 + 2]; 483 s[3] = b[i2 + 3]; 484 s[4] = b[i2 + 4]; 485 s[5] = b[i2 + 5]; 486 s[6] = b[i2 + 6]; 487 while (nz--) { 488 idx = 7 * (*vi++); 489 xw[0] = x[idx]; 490 xw[1] = x[1 + idx]; 491 xw[2] = x[2 + idx]; 492 xw[3] = x[3 + idx]; 493 xw[4] = x[4 + idx]; 494 xw[5] = x[5 + idx]; 495 xw[6] = x[6 + idx]; 496 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 497 v += 49; 498 } 499 t[i2] = s[0]; 500 t[i2 + 1] = s[1]; 501 t[i2 + 2] = s[2]; 502 t[i2 + 3] = s[3]; 503 t[i2 + 4] = s[4]; 504 t[i2 + 5] = s[5]; 505 t[i2 + 6] = s[6]; 506 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 507 x[i2] = xw[0]; 508 x[i2 + 1] = xw[1]; 509 x[i2 + 2] = xw[2]; 510 x[i2 + 3] = xw[3]; 511 x[i2 + 4] = xw[4]; 512 x[i2 + 5] = xw[5]; 513 x[i2 + 6] = xw[6]; 514 idiag += 49; 515 i2 += 7; 516 } 517 break; 518 default: 519 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); 520 PetscCall(PetscArraycpy(t, b, bs)); 521 i2 = bs; 522 idiag += bs2; 523 for (i = 1; i < m; i++) { 524 v = aa + bs2 * ai[i]; 525 vi = aj + ai[i]; 526 nz = diag[i] - ai[i]; 527 528 PetscCall(PetscArraycpy(w, b + i2, bs)); 529 /* copy all rows of x that are needed into contiguous space */ 530 workt = work; 531 for (j = 0; j < nz; j++) { 532 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 533 workt += bs; 534 } 535 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 536 PetscCall(PetscArraycpy(t + i2, w, bs)); 537 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 538 539 idiag += bs2; 540 i2 += bs; 541 } 542 break; 543 } 544 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 545 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 546 xb = t; 547 } else xb = b; 548 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 549 idiag = a->idiag + bs2 * (a->mbs - 1); 550 i2 = bs * (m - 1); 551 switch (bs) { 552 case 1: 553 s[0] = xb[i2]; 554 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 555 x[i2] = xw[0]; 556 i2 -= 1; 557 for (i = m - 2; i >= 0; i--) { 558 v = aa + (diag[i] + 1); 559 vi = aj + diag[i] + 1; 560 nz = ai[i + 1] - diag[i] - 1; 561 s[0] = xb[i2]; 562 for (j = 0; j < nz; j++) { 563 xw[0] = x[vi[j]]; 564 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 565 } 566 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 567 x[i2] = xw[0]; 568 idiag -= 1; 569 i2 -= 1; 570 } 571 break; 572 case 2: 573 s[0] = xb[i2]; 574 s[1] = xb[i2 + 1]; 575 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 576 x[i2] = xw[0]; 577 x[i2 + 1] = xw[1]; 578 i2 -= 2; 579 idiag -= 4; 580 for (i = m - 2; i >= 0; i--) { 581 v = aa + 4 * (diag[i] + 1); 582 vi = aj + diag[i] + 1; 583 nz = ai[i + 1] - diag[i] - 1; 584 s[0] = xb[i2]; 585 s[1] = xb[i2 + 1]; 586 for (j = 0; j < nz; j++) { 587 idx = 2 * vi[j]; 588 it = 4 * j; 589 xw[0] = x[idx]; 590 xw[1] = x[1 + idx]; 591 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 592 } 593 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 594 x[i2] = xw[0]; 595 x[i2 + 1] = xw[1]; 596 idiag -= 4; 597 i2 -= 2; 598 } 599 break; 600 case 3: 601 s[0] = xb[i2]; 602 s[1] = xb[i2 + 1]; 603 s[2] = xb[i2 + 2]; 604 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 605 x[i2] = xw[0]; 606 x[i2 + 1] = xw[1]; 607 x[i2 + 2] = xw[2]; 608 i2 -= 3; 609 idiag -= 9; 610 for (i = m - 2; i >= 0; i--) { 611 v = aa + 9 * (diag[i] + 1); 612 vi = aj + diag[i] + 1; 613 nz = ai[i + 1] - diag[i] - 1; 614 s[0] = xb[i2]; 615 s[1] = xb[i2 + 1]; 616 s[2] = xb[i2 + 2]; 617 while (nz--) { 618 idx = 3 * (*vi++); 619 xw[0] = x[idx]; 620 xw[1] = x[1 + idx]; 621 xw[2] = x[2 + idx]; 622 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 623 v += 9; 624 } 625 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 626 x[i2] = xw[0]; 627 x[i2 + 1] = xw[1]; 628 x[i2 + 2] = xw[2]; 629 idiag -= 9; 630 i2 -= 3; 631 } 632 break; 633 case 4: 634 s[0] = xb[i2]; 635 s[1] = xb[i2 + 1]; 636 s[2] = xb[i2 + 2]; 637 s[3] = xb[i2 + 3]; 638 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 639 x[i2] = xw[0]; 640 x[i2 + 1] = xw[1]; 641 x[i2 + 2] = xw[2]; 642 x[i2 + 3] = xw[3]; 643 i2 -= 4; 644 idiag -= 16; 645 for (i = m - 2; i >= 0; i--) { 646 v = aa + 16 * (diag[i] + 1); 647 vi = aj + diag[i] + 1; 648 nz = ai[i + 1] - diag[i] - 1; 649 s[0] = xb[i2]; 650 s[1] = xb[i2 + 1]; 651 s[2] = xb[i2 + 2]; 652 s[3] = xb[i2 + 3]; 653 while (nz--) { 654 idx = 4 * (*vi++); 655 xw[0] = x[idx]; 656 xw[1] = x[1 + idx]; 657 xw[2] = x[2 + idx]; 658 xw[3] = x[3 + idx]; 659 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 660 v += 16; 661 } 662 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 663 x[i2] = xw[0]; 664 x[i2 + 1] = xw[1]; 665 x[i2 + 2] = xw[2]; 666 x[i2 + 3] = xw[3]; 667 idiag -= 16; 668 i2 -= 4; 669 } 670 break; 671 case 5: 672 s[0] = xb[i2]; 673 s[1] = xb[i2 + 1]; 674 s[2] = xb[i2 + 2]; 675 s[3] = xb[i2 + 3]; 676 s[4] = xb[i2 + 4]; 677 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 678 x[i2] = xw[0]; 679 x[i2 + 1] = xw[1]; 680 x[i2 + 2] = xw[2]; 681 x[i2 + 3] = xw[3]; 682 x[i2 + 4] = xw[4]; 683 i2 -= 5; 684 idiag -= 25; 685 for (i = m - 2; i >= 0; i--) { 686 v = aa + 25 * (diag[i] + 1); 687 vi = aj + diag[i] + 1; 688 nz = ai[i + 1] - diag[i] - 1; 689 s[0] = xb[i2]; 690 s[1] = xb[i2 + 1]; 691 s[2] = xb[i2 + 2]; 692 s[3] = xb[i2 + 3]; 693 s[4] = xb[i2 + 4]; 694 while (nz--) { 695 idx = 5 * (*vi++); 696 xw[0] = x[idx]; 697 xw[1] = x[1 + idx]; 698 xw[2] = x[2 + idx]; 699 xw[3] = x[3 + idx]; 700 xw[4] = x[4 + idx]; 701 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 702 v += 25; 703 } 704 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 705 x[i2] = xw[0]; 706 x[i2 + 1] = xw[1]; 707 x[i2 + 2] = xw[2]; 708 x[i2 + 3] = xw[3]; 709 x[i2 + 4] = xw[4]; 710 idiag -= 25; 711 i2 -= 5; 712 } 713 break; 714 case 6: 715 s[0] = xb[i2]; 716 s[1] = xb[i2 + 1]; 717 s[2] = xb[i2 + 2]; 718 s[3] = xb[i2 + 3]; 719 s[4] = xb[i2 + 4]; 720 s[5] = xb[i2 + 5]; 721 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 722 x[i2] = xw[0]; 723 x[i2 + 1] = xw[1]; 724 x[i2 + 2] = xw[2]; 725 x[i2 + 3] = xw[3]; 726 x[i2 + 4] = xw[4]; 727 x[i2 + 5] = xw[5]; 728 i2 -= 6; 729 idiag -= 36; 730 for (i = m - 2; i >= 0; i--) { 731 v = aa + 36 * (diag[i] + 1); 732 vi = aj + diag[i] + 1; 733 nz = ai[i + 1] - diag[i] - 1; 734 s[0] = xb[i2]; 735 s[1] = xb[i2 + 1]; 736 s[2] = xb[i2 + 2]; 737 s[3] = xb[i2 + 3]; 738 s[4] = xb[i2 + 4]; 739 s[5] = xb[i2 + 5]; 740 while (nz--) { 741 idx = 6 * (*vi++); 742 xw[0] = x[idx]; 743 xw[1] = x[1 + idx]; 744 xw[2] = x[2 + idx]; 745 xw[3] = x[3 + idx]; 746 xw[4] = x[4 + idx]; 747 xw[5] = x[5 + idx]; 748 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 749 v += 36; 750 } 751 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 752 x[i2] = xw[0]; 753 x[i2 + 1] = xw[1]; 754 x[i2 + 2] = xw[2]; 755 x[i2 + 3] = xw[3]; 756 x[i2 + 4] = xw[4]; 757 x[i2 + 5] = xw[5]; 758 idiag -= 36; 759 i2 -= 6; 760 } 761 break; 762 case 7: 763 s[0] = xb[i2]; 764 s[1] = xb[i2 + 1]; 765 s[2] = xb[i2 + 2]; 766 s[3] = xb[i2 + 3]; 767 s[4] = xb[i2 + 4]; 768 s[5] = xb[i2 + 5]; 769 s[6] = xb[i2 + 6]; 770 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 771 x[i2] = xw[0]; 772 x[i2 + 1] = xw[1]; 773 x[i2 + 2] = xw[2]; 774 x[i2 + 3] = xw[3]; 775 x[i2 + 4] = xw[4]; 776 x[i2 + 5] = xw[5]; 777 x[i2 + 6] = xw[6]; 778 i2 -= 7; 779 idiag -= 49; 780 for (i = m - 2; i >= 0; i--) { 781 v = aa + 49 * (diag[i] + 1); 782 vi = aj + diag[i] + 1; 783 nz = ai[i + 1] - diag[i] - 1; 784 s[0] = xb[i2]; 785 s[1] = xb[i2 + 1]; 786 s[2] = xb[i2 + 2]; 787 s[3] = xb[i2 + 3]; 788 s[4] = xb[i2 + 4]; 789 s[5] = xb[i2 + 5]; 790 s[6] = xb[i2 + 6]; 791 while (nz--) { 792 idx = 7 * (*vi++); 793 xw[0] = x[idx]; 794 xw[1] = x[1 + idx]; 795 xw[2] = x[2 + idx]; 796 xw[3] = x[3 + idx]; 797 xw[4] = x[4 + idx]; 798 xw[5] = x[5 + idx]; 799 xw[6] = x[6 + idx]; 800 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 801 v += 49; 802 } 803 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 804 x[i2] = xw[0]; 805 x[i2 + 1] = xw[1]; 806 x[i2 + 2] = xw[2]; 807 x[i2 + 3] = xw[3]; 808 x[i2 + 4] = xw[4]; 809 x[i2 + 5] = xw[5]; 810 x[i2 + 6] = xw[6]; 811 idiag -= 49; 812 i2 -= 7; 813 } 814 break; 815 default: 816 PetscCall(PetscArraycpy(w, xb + i2, bs)); 817 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 818 i2 -= bs; 819 idiag -= bs2; 820 for (i = m - 2; i >= 0; i--) { 821 v = aa + bs2 * (diag[i] + 1); 822 vi = aj + diag[i] + 1; 823 nz = ai[i + 1] - diag[i] - 1; 824 825 PetscCall(PetscArraycpy(w, xb + i2, bs)); 826 /* copy all rows of x that are needed into contiguous space */ 827 workt = work; 828 for (j = 0; j < nz; j++) { 829 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 830 workt += bs; 831 } 832 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 833 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 834 835 idiag -= bs2; 836 i2 -= bs; 837 } 838 break; 839 } 840 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 841 } 842 its--; 843 } 844 while (its--) { 845 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 846 idiag = a->idiag; 847 i2 = 0; 848 switch (bs) { 849 case 1: 850 for (i = 0; i < m; i++) { 851 v = aa + ai[i]; 852 vi = aj + ai[i]; 853 nz = ai[i + 1] - ai[i]; 854 s[0] = b[i2]; 855 for (j = 0; j < nz; j++) { 856 xw[0] = x[vi[j]]; 857 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 858 } 859 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 860 x[i2] += xw[0]; 861 idiag += 1; 862 i2 += 1; 863 } 864 break; 865 case 2: 866 for (i = 0; i < m; i++) { 867 v = aa + 4 * ai[i]; 868 vi = aj + ai[i]; 869 nz = ai[i + 1] - ai[i]; 870 s[0] = b[i2]; 871 s[1] = b[i2 + 1]; 872 for (j = 0; j < nz; j++) { 873 idx = 2 * vi[j]; 874 it = 4 * j; 875 xw[0] = x[idx]; 876 xw[1] = x[1 + idx]; 877 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 878 } 879 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 880 x[i2] += xw[0]; 881 x[i2 + 1] += xw[1]; 882 idiag += 4; 883 i2 += 2; 884 } 885 break; 886 case 3: 887 for (i = 0; i < m; i++) { 888 v = aa + 9 * ai[i]; 889 vi = aj + ai[i]; 890 nz = ai[i + 1] - ai[i]; 891 s[0] = b[i2]; 892 s[1] = b[i2 + 1]; 893 s[2] = b[i2 + 2]; 894 while (nz--) { 895 idx = 3 * (*vi++); 896 xw[0] = x[idx]; 897 xw[1] = x[1 + idx]; 898 xw[2] = x[2 + idx]; 899 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 900 v += 9; 901 } 902 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 903 x[i2] += xw[0]; 904 x[i2 + 1] += xw[1]; 905 x[i2 + 2] += xw[2]; 906 idiag += 9; 907 i2 += 3; 908 } 909 break; 910 case 4: 911 for (i = 0; i < m; i++) { 912 v = aa + 16 * ai[i]; 913 vi = aj + ai[i]; 914 nz = ai[i + 1] - ai[i]; 915 s[0] = b[i2]; 916 s[1] = b[i2 + 1]; 917 s[2] = b[i2 + 2]; 918 s[3] = b[i2 + 3]; 919 while (nz--) { 920 idx = 4 * (*vi++); 921 xw[0] = x[idx]; 922 xw[1] = x[1 + idx]; 923 xw[2] = x[2 + idx]; 924 xw[3] = x[3 + idx]; 925 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 926 v += 16; 927 } 928 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 929 x[i2] += xw[0]; 930 x[i2 + 1] += xw[1]; 931 x[i2 + 2] += xw[2]; 932 x[i2 + 3] += xw[3]; 933 idiag += 16; 934 i2 += 4; 935 } 936 break; 937 case 5: 938 for (i = 0; i < m; i++) { 939 v = aa + 25 * ai[i]; 940 vi = aj + ai[i]; 941 nz = ai[i + 1] - ai[i]; 942 s[0] = b[i2]; 943 s[1] = b[i2 + 1]; 944 s[2] = b[i2 + 2]; 945 s[3] = b[i2 + 3]; 946 s[4] = b[i2 + 4]; 947 while (nz--) { 948 idx = 5 * (*vi++); 949 xw[0] = x[idx]; 950 xw[1] = x[1 + idx]; 951 xw[2] = x[2 + idx]; 952 xw[3] = x[3 + idx]; 953 xw[4] = x[4 + idx]; 954 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 955 v += 25; 956 } 957 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 958 x[i2] += xw[0]; 959 x[i2 + 1] += xw[1]; 960 x[i2 + 2] += xw[2]; 961 x[i2 + 3] += xw[3]; 962 x[i2 + 4] += xw[4]; 963 idiag += 25; 964 i2 += 5; 965 } 966 break; 967 case 6: 968 for (i = 0; i < m; i++) { 969 v = aa + 36 * ai[i]; 970 vi = aj + ai[i]; 971 nz = ai[i + 1] - ai[i]; 972 s[0] = b[i2]; 973 s[1] = b[i2 + 1]; 974 s[2] = b[i2 + 2]; 975 s[3] = b[i2 + 3]; 976 s[4] = b[i2 + 4]; 977 s[5] = b[i2 + 5]; 978 while (nz--) { 979 idx = 6 * (*vi++); 980 xw[0] = x[idx]; 981 xw[1] = x[1 + idx]; 982 xw[2] = x[2 + idx]; 983 xw[3] = x[3 + idx]; 984 xw[4] = x[4 + idx]; 985 xw[5] = x[5 + idx]; 986 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 987 v += 36; 988 } 989 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 990 x[i2] += xw[0]; 991 x[i2 + 1] += xw[1]; 992 x[i2 + 2] += xw[2]; 993 x[i2 + 3] += xw[3]; 994 x[i2 + 4] += xw[4]; 995 x[i2 + 5] += xw[5]; 996 idiag += 36; 997 i2 += 6; 998 } 999 break; 1000 case 7: 1001 for (i = 0; i < m; i++) { 1002 v = aa + 49 * ai[i]; 1003 vi = aj + ai[i]; 1004 nz = ai[i + 1] - ai[i]; 1005 s[0] = b[i2]; 1006 s[1] = b[i2 + 1]; 1007 s[2] = b[i2 + 2]; 1008 s[3] = b[i2 + 3]; 1009 s[4] = b[i2 + 4]; 1010 s[5] = b[i2 + 5]; 1011 s[6] = b[i2 + 6]; 1012 while (nz--) { 1013 idx = 7 * (*vi++); 1014 xw[0] = x[idx]; 1015 xw[1] = x[1 + idx]; 1016 xw[2] = x[2 + idx]; 1017 xw[3] = x[3 + idx]; 1018 xw[4] = x[4 + idx]; 1019 xw[5] = x[5 + idx]; 1020 xw[6] = x[6 + idx]; 1021 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1022 v += 49; 1023 } 1024 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1025 x[i2] += xw[0]; 1026 x[i2 + 1] += xw[1]; 1027 x[i2 + 2] += xw[2]; 1028 x[i2 + 3] += xw[3]; 1029 x[i2 + 4] += xw[4]; 1030 x[i2 + 5] += xw[5]; 1031 x[i2 + 6] += xw[6]; 1032 idiag += 49; 1033 i2 += 7; 1034 } 1035 break; 1036 default: 1037 for (i = 0; i < m; i++) { 1038 v = aa + bs2 * ai[i]; 1039 vi = aj + ai[i]; 1040 nz = ai[i + 1] - ai[i]; 1041 1042 PetscCall(PetscArraycpy(w, b + i2, bs)); 1043 /* copy all rows of x that are needed into contiguous space */ 1044 workt = work; 1045 for (j = 0; j < nz; j++) { 1046 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1047 workt += bs; 1048 } 1049 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1050 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1051 1052 idiag += bs2; 1053 i2 += bs; 1054 } 1055 break; 1056 } 1057 PetscCall(PetscLogFlops(2.0 * bs2 * a->nz)); 1058 } 1059 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1060 idiag = a->idiag + bs2 * (a->mbs - 1); 1061 i2 = bs * (m - 1); 1062 switch (bs) { 1063 case 1: 1064 for (i = m - 1; i >= 0; i--) { 1065 v = aa + ai[i]; 1066 vi = aj + ai[i]; 1067 nz = ai[i + 1] - ai[i]; 1068 s[0] = b[i2]; 1069 for (j = 0; j < nz; j++) { 1070 xw[0] = x[vi[j]]; 1071 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 1072 } 1073 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 1074 x[i2] += xw[0]; 1075 idiag -= 1; 1076 i2 -= 1; 1077 } 1078 break; 1079 case 2: 1080 for (i = m - 1; i >= 0; i--) { 1081 v = aa + 4 * ai[i]; 1082 vi = aj + ai[i]; 1083 nz = ai[i + 1] - ai[i]; 1084 s[0] = b[i2]; 1085 s[1] = b[i2 + 1]; 1086 for (j = 0; j < nz; j++) { 1087 idx = 2 * vi[j]; 1088 it = 4 * j; 1089 xw[0] = x[idx]; 1090 xw[1] = x[1 + idx]; 1091 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 1092 } 1093 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 1094 x[i2] += xw[0]; 1095 x[i2 + 1] += xw[1]; 1096 idiag -= 4; 1097 i2 -= 2; 1098 } 1099 break; 1100 case 3: 1101 for (i = m - 1; i >= 0; i--) { 1102 v = aa + 9 * ai[i]; 1103 vi = aj + ai[i]; 1104 nz = ai[i + 1] - ai[i]; 1105 s[0] = b[i2]; 1106 s[1] = b[i2 + 1]; 1107 s[2] = b[i2 + 2]; 1108 while (nz--) { 1109 idx = 3 * (*vi++); 1110 xw[0] = x[idx]; 1111 xw[1] = x[1 + idx]; 1112 xw[2] = x[2 + idx]; 1113 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 1114 v += 9; 1115 } 1116 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 1117 x[i2] += xw[0]; 1118 x[i2 + 1] += xw[1]; 1119 x[i2 + 2] += xw[2]; 1120 idiag -= 9; 1121 i2 -= 3; 1122 } 1123 break; 1124 case 4: 1125 for (i = m - 1; i >= 0; i--) { 1126 v = aa + 16 * ai[i]; 1127 vi = aj + ai[i]; 1128 nz = ai[i + 1] - ai[i]; 1129 s[0] = b[i2]; 1130 s[1] = b[i2 + 1]; 1131 s[2] = b[i2 + 2]; 1132 s[3] = b[i2 + 3]; 1133 while (nz--) { 1134 idx = 4 * (*vi++); 1135 xw[0] = x[idx]; 1136 xw[1] = x[1 + idx]; 1137 xw[2] = x[2 + idx]; 1138 xw[3] = x[3 + idx]; 1139 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 1140 v += 16; 1141 } 1142 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 1143 x[i2] += xw[0]; 1144 x[i2 + 1] += xw[1]; 1145 x[i2 + 2] += xw[2]; 1146 x[i2 + 3] += xw[3]; 1147 idiag -= 16; 1148 i2 -= 4; 1149 } 1150 break; 1151 case 5: 1152 for (i = m - 1; i >= 0; i--) { 1153 v = aa + 25 * ai[i]; 1154 vi = aj + ai[i]; 1155 nz = ai[i + 1] - ai[i]; 1156 s[0] = b[i2]; 1157 s[1] = b[i2 + 1]; 1158 s[2] = b[i2 + 2]; 1159 s[3] = b[i2 + 3]; 1160 s[4] = b[i2 + 4]; 1161 while (nz--) { 1162 idx = 5 * (*vi++); 1163 xw[0] = x[idx]; 1164 xw[1] = x[1 + idx]; 1165 xw[2] = x[2 + idx]; 1166 xw[3] = x[3 + idx]; 1167 xw[4] = x[4 + idx]; 1168 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 1169 v += 25; 1170 } 1171 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 1172 x[i2] += xw[0]; 1173 x[i2 + 1] += xw[1]; 1174 x[i2 + 2] += xw[2]; 1175 x[i2 + 3] += xw[3]; 1176 x[i2 + 4] += xw[4]; 1177 idiag -= 25; 1178 i2 -= 5; 1179 } 1180 break; 1181 case 6: 1182 for (i = m - 1; i >= 0; i--) { 1183 v = aa + 36 * ai[i]; 1184 vi = aj + ai[i]; 1185 nz = ai[i + 1] - ai[i]; 1186 s[0] = b[i2]; 1187 s[1] = b[i2 + 1]; 1188 s[2] = b[i2 + 2]; 1189 s[3] = b[i2 + 3]; 1190 s[4] = b[i2 + 4]; 1191 s[5] = b[i2 + 5]; 1192 while (nz--) { 1193 idx = 6 * (*vi++); 1194 xw[0] = x[idx]; 1195 xw[1] = x[1 + idx]; 1196 xw[2] = x[2 + idx]; 1197 xw[3] = x[3 + idx]; 1198 xw[4] = x[4 + idx]; 1199 xw[5] = x[5 + idx]; 1200 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 1201 v += 36; 1202 } 1203 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 1204 x[i2] += xw[0]; 1205 x[i2 + 1] += xw[1]; 1206 x[i2 + 2] += xw[2]; 1207 x[i2 + 3] += xw[3]; 1208 x[i2 + 4] += xw[4]; 1209 x[i2 + 5] += xw[5]; 1210 idiag -= 36; 1211 i2 -= 6; 1212 } 1213 break; 1214 case 7: 1215 for (i = m - 1; i >= 0; i--) { 1216 v = aa + 49 * ai[i]; 1217 vi = aj + ai[i]; 1218 nz = ai[i + 1] - ai[i]; 1219 s[0] = b[i2]; 1220 s[1] = b[i2 + 1]; 1221 s[2] = b[i2 + 2]; 1222 s[3] = b[i2 + 3]; 1223 s[4] = b[i2 + 4]; 1224 s[5] = b[i2 + 5]; 1225 s[6] = b[i2 + 6]; 1226 while (nz--) { 1227 idx = 7 * (*vi++); 1228 xw[0] = x[idx]; 1229 xw[1] = x[1 + idx]; 1230 xw[2] = x[2 + idx]; 1231 xw[3] = x[3 + idx]; 1232 xw[4] = x[4 + idx]; 1233 xw[5] = x[5 + idx]; 1234 xw[6] = x[6 + idx]; 1235 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1236 v += 49; 1237 } 1238 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1239 x[i2] += xw[0]; 1240 x[i2 + 1] += xw[1]; 1241 x[i2 + 2] += xw[2]; 1242 x[i2 + 3] += xw[3]; 1243 x[i2 + 4] += xw[4]; 1244 x[i2 + 5] += xw[5]; 1245 x[i2 + 6] += xw[6]; 1246 idiag -= 49; 1247 i2 -= 7; 1248 } 1249 break; 1250 default: 1251 for (i = m - 1; i >= 0; i--) { 1252 v = aa + bs2 * ai[i]; 1253 vi = aj + ai[i]; 1254 nz = ai[i + 1] - ai[i]; 1255 1256 PetscCall(PetscArraycpy(w, b + i2, bs)); 1257 /* copy all rows of x that are needed into contiguous space */ 1258 workt = work; 1259 for (j = 0; j < nz; j++) { 1260 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1261 workt += bs; 1262 } 1263 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1264 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1265 1266 idiag -= bs2; 1267 i2 -= bs; 1268 } 1269 break; 1270 } 1271 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz))); 1272 } 1273 } 1274 PetscCall(VecRestoreArray(xx, &x)); 1275 PetscCall(VecRestoreArrayRead(bb, &b)); 1276 PetscFunctionReturn(PETSC_SUCCESS); 1277 } 1278 1279 /* 1280 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1281 */ 1282 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1283 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1284 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1285 #define matsetvaluesblocked4_ matsetvaluesblocked4 1286 #endif 1287 1288 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) 1289 { 1290 Mat A = *AA; 1291 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1292 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn; 1293 PetscInt *ai = a->i, *ailen = a->ilen; 1294 PetscInt *aj = a->j, stepval, lastcol = -1; 1295 const PetscScalar *value = v; 1296 MatScalar *ap, *aa = a->a, *bap; 1297 1298 PetscFunctionBegin; 1299 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4"); 1300 stepval = (n - 1) * 4; 1301 for (k = 0; k < m; k++) { /* loop over added rows */ 1302 row = im[k]; 1303 rp = aj + ai[row]; 1304 ap = aa + 16 * ai[row]; 1305 nrow = ailen[row]; 1306 low = 0; 1307 high = nrow; 1308 for (l = 0; l < n; l++) { /* loop over added columns */ 1309 col = in[l]; 1310 if (col <= lastcol) low = 0; 1311 else high = nrow; 1312 lastcol = col; 1313 value = v + k * (stepval + 4 + l) * 4; 1314 while (high - low > 7) { 1315 t = (low + high) / 2; 1316 if (rp[t] > col) high = t; 1317 else low = t; 1318 } 1319 for (i = low; i < high; i++) { 1320 if (rp[i] > col) break; 1321 if (rp[i] == col) { 1322 bap = ap + 16 * i; 1323 for (ii = 0; ii < 4; ii++, value += stepval) { 1324 for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++; 1325 } 1326 goto noinsert2; 1327 } 1328 } 1329 N = nrow++ - 1; 1330 high++; /* added new column index thus must search to one higher than before */ 1331 /* shift up all the later entries in this row */ 1332 for (ii = N; ii >= i; ii--) { 1333 rp[ii + 1] = rp[ii]; 1334 PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16)); 1335 } 1336 if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1337 rp[i] = col; 1338 bap = ap + 16 * i; 1339 for (ii = 0; ii < 4; ii++, value += stepval) { 1340 for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++; 1341 } 1342 noinsert2:; 1343 low = i; 1344 } 1345 ailen[row] = nrow; 1346 } 1347 PetscFunctionReturnVoid(); 1348 } 1349 1350 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1351 #define matsetvalues4_ MATSETVALUES4 1352 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1353 #define matsetvalues4_ matsetvalues4 1354 #endif 1355 1356 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) 1357 { 1358 Mat A = *AA; 1359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1360 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm; 1361 PetscInt *ai = a->i, *ailen = a->ilen; 1362 PetscInt *aj = a->j, brow, bcol; 1363 PetscInt ridx, cidx, lastcol = -1; 1364 MatScalar *ap, value, *aa = a->a, *bap; 1365 1366 PetscFunctionBegin; 1367 for (k = 0; k < m; k++) { /* loop over added rows */ 1368 row = im[k]; 1369 brow = row / 4; 1370 rp = aj + ai[brow]; 1371 ap = aa + 16 * ai[brow]; 1372 nrow = ailen[brow]; 1373 low = 0; 1374 high = nrow; 1375 for (l = 0; l < n; l++) { /* loop over added columns */ 1376 col = in[l]; 1377 bcol = col / 4; 1378 ridx = row % 4; 1379 cidx = col % 4; 1380 value = v[l + k * n]; 1381 if (col <= lastcol) low = 0; 1382 else high = nrow; 1383 lastcol = col; 1384 while (high - low > 7) { 1385 t = (low + high) / 2; 1386 if (rp[t] > bcol) high = t; 1387 else low = t; 1388 } 1389 for (i = low; i < high; i++) { 1390 if (rp[i] > bcol) break; 1391 if (rp[i] == bcol) { 1392 bap = ap + 16 * i + 4 * cidx + ridx; 1393 *bap += value; 1394 goto noinsert1; 1395 } 1396 } 1397 N = nrow++ - 1; 1398 high++; /* added new column thus must search to one higher than before */ 1399 /* shift up all the later entries in this row */ 1400 PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 1401 PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1))); 1402 PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1403 rp[i] = bcol; 1404 ap[16 * i + 4 * cidx + ridx] = value; 1405 noinsert1:; 1406 low = i; 1407 } 1408 ailen[brow] = nrow; 1409 } 1410 PetscFunctionReturnVoid(); 1411 } 1412 1413 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) 1414 { 1415 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1416 PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt; 1417 PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja; 1418 1419 PetscFunctionBegin; 1420 *nn = n; 1421 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1422 if (symmetric) { 1423 PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja)); 1424 nz = tia[n]; 1425 } else { 1426 tia = a->i; 1427 tja = a->j; 1428 } 1429 1430 if (!blockcompressed && bs > 1) { 1431 (*nn) *= bs; 1432 /* malloc & create the natural set of indices */ 1433 PetscCall(PetscMalloc1((n + 1) * bs, ia)); 1434 if (n) { 1435 (*ia)[0] = oshift; 1436 for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; 1437 } 1438 1439 for (i = 1; i < n; i++) { 1440 (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1]; 1441 for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; 1442 } 1443 if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; 1444 1445 if (inja) { 1446 PetscCall(PetscMalloc1(nz * bs * bs, ja)); 1447 cnt = 0; 1448 for (i = 0; i < n; i++) { 1449 for (j = 0; j < bs; j++) { 1450 for (k = tia[i]; k < tia[i + 1]; k++) { 1451 for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l; 1452 } 1453 } 1454 } 1455 } 1456 1457 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1458 PetscCall(PetscFree(tia)); 1459 PetscCall(PetscFree(tja)); 1460 } 1461 } else if (oshift == 1) { 1462 if (symmetric) { 1463 nz = tia[A->rmap->n / bs]; 1464 /* add 1 to i and j indices */ 1465 for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1; 1466 *ia = tia; 1467 if (ja) { 1468 for (i = 0; i < nz; i++) tja[i] = tja[i] + 1; 1469 *ja = tja; 1470 } 1471 } else { 1472 nz = a->i[A->rmap->n / bs]; 1473 /* malloc space and add 1 to i and j indices */ 1474 PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia)); 1475 for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1; 1476 if (ja) { 1477 PetscCall(PetscMalloc1(nz, ja)); 1478 for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1; 1479 } 1480 } 1481 } else { 1482 *ia = tia; 1483 if (ja) *ja = tja; 1484 } 1485 PetscFunctionReturn(PETSC_SUCCESS); 1486 } 1487 1488 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 1489 { 1490 PetscFunctionBegin; 1491 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1492 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1493 PetscCall(PetscFree(*ia)); 1494 if (ja) PetscCall(PetscFree(*ja)); 1495 } 1496 PetscFunctionReturn(PETSC_SUCCESS); 1497 } 1498 1499 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1500 { 1501 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1502 1503 PetscFunctionBegin; 1504 if (A->hash_active) { 1505 PetscInt bs; 1506 A->ops[0] = a->cops; 1507 PetscCall(PetscHMapIJVDestroy(&a->ht)); 1508 PetscCall(MatGetBlockSize(A, &bs)); 1509 if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht)); 1510 PetscCall(PetscFree(a->dnz)); 1511 PetscCall(PetscFree(a->bdnz)); 1512 A->hash_active = PETSC_FALSE; 1513 } 1514 PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz)); 1515 PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i)); 1516 PetscCall(ISDestroy(&a->row)); 1517 PetscCall(ISDestroy(&a->col)); 1518 PetscCall(PetscFree(a->diag)); 1519 PetscCall(PetscFree(a->idiag)); 1520 if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen)); 1521 PetscCall(PetscFree(a->solve_work)); 1522 PetscCall(PetscFree(a->mult_work)); 1523 PetscCall(PetscFree(a->sor_workt)); 1524 PetscCall(PetscFree(a->sor_work)); 1525 PetscCall(ISDestroy(&a->icol)); 1526 PetscCall(PetscFree(a->saved_values)); 1527 PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex)); 1528 1529 PetscCall(MatDestroy(&a->sbaijMat)); 1530 PetscCall(MatDestroy(&a->parent)); 1531 PetscCall(PetscFree(A->data)); 1532 1533 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 1534 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL)); 1535 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL)); 1536 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 1537 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 1538 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL)); 1539 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL)); 1540 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL)); 1541 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL)); 1542 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL)); 1543 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL)); 1544 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL)); 1545 #if defined(PETSC_HAVE_HYPRE) 1546 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL)); 1547 #endif 1548 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL)); 1549 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1550 PetscFunctionReturn(PETSC_SUCCESS); 1551 } 1552 1553 static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) 1554 { 1555 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1556 1557 PetscFunctionBegin; 1558 switch (op) { 1559 case MAT_ROW_ORIENTED: 1560 a->roworiented = flg; 1561 break; 1562 case MAT_KEEP_NONZERO_PATTERN: 1563 a->keepnonzeropattern = flg; 1564 break; 1565 case MAT_NEW_NONZERO_LOCATIONS: 1566 a->nonew = (flg ? 0 : 1); 1567 break; 1568 case MAT_NEW_NONZERO_LOCATION_ERR: 1569 a->nonew = (flg ? -1 : 0); 1570 break; 1571 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1572 a->nonew = (flg ? -2 : 0); 1573 break; 1574 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1575 a->nounused = (flg ? -1 : 0); 1576 break; 1577 default: 1578 break; 1579 } 1580 PetscFunctionReturn(PETSC_SUCCESS); 1581 } 1582 1583 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1584 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) 1585 { 1586 PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2; 1587 MatScalar *aa_i; 1588 PetscScalar *v_i; 1589 1590 PetscFunctionBegin; 1591 bs = A->rmap->bs; 1592 bs2 = bs * bs; 1593 PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1594 1595 bn = row / bs; /* Block number */ 1596 bp = row % bs; /* Block Position */ 1597 M = ai[bn + 1] - ai[bn]; 1598 *nz = bs * M; 1599 1600 if (v) { 1601 *v = NULL; 1602 if (*nz) { 1603 PetscCall(PetscMalloc1(*nz, v)); 1604 for (i = 0; i < M; i++) { /* for each block in the block row */ 1605 v_i = *v + i * bs; 1606 aa_i = aa + bs2 * (ai[bn] + i); 1607 for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j]; 1608 } 1609 } 1610 } 1611 1612 if (idx) { 1613 *idx = NULL; 1614 if (*nz) { 1615 PetscCall(PetscMalloc1(*nz, idx)); 1616 for (i = 0; i < M; i++) { /* for each block in the block row */ 1617 idx_i = *idx + i * bs; 1618 itmp = bs * aj[ai[bn] + i]; 1619 for (j = 0; j < bs; j++) idx_i[j] = itmp++; 1620 } 1621 } 1622 } 1623 PetscFunctionReturn(PETSC_SUCCESS); 1624 } 1625 1626 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1627 { 1628 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1629 1630 PetscFunctionBegin; 1631 PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a)); 1632 PetscFunctionReturn(PETSC_SUCCESS); 1633 } 1634 1635 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1636 { 1637 PetscFunctionBegin; 1638 if (idx) PetscCall(PetscFree(*idx)); 1639 if (v) PetscCall(PetscFree(*v)); 1640 PetscFunctionReturn(PETSC_SUCCESS); 1641 } 1642 1643 static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) 1644 { 1645 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at; 1646 Mat C; 1647 PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill; 1648 PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr; 1649 MatScalar *ata, *aa = a->a; 1650 1651 PetscFunctionBegin; 1652 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1653 PetscCall(PetscCalloc1(1 + nbs, &atfill)); 1654 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1655 for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1656 1657 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 1658 PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N)); 1659 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 1660 PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill)); 1661 1662 at = (Mat_SeqBAIJ *)C->data; 1663 ati = at->i; 1664 for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i]; 1665 } else { 1666 C = *B; 1667 at = (Mat_SeqBAIJ *)C->data; 1668 ati = at->i; 1669 } 1670 1671 atj = at->j; 1672 ata = at->a; 1673 1674 /* Copy ati into atfill so we have locations of the next free space in atj */ 1675 PetscCall(PetscArraycpy(atfill, ati, nbs)); 1676 1677 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1678 for (i = 0; i < mbs; i++) { 1679 anzj = ai[i + 1] - ai[i]; 1680 for (j = 0; j < anzj; j++) { 1681 atj[atfill[*aj]] = i; 1682 for (kr = 0; kr < bs; kr++) { 1683 for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; 1684 } 1685 atfill[*aj++] += 1; 1686 } 1687 } 1688 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1689 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1690 1691 /* Clean up temporary space and complete requests. */ 1692 PetscCall(PetscFree(atfill)); 1693 1694 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1695 PetscCall(MatSetBlockSizes(C, A->cmap->bs, A->rmap->bs)); 1696 *B = C; 1697 } else { 1698 PetscCall(MatHeaderMerge(A, &C)); 1699 } 1700 PetscFunctionReturn(PETSC_SUCCESS); 1701 } 1702 1703 static PetscErrorCode MatCompare_SeqBAIJ_Private(Mat A, Mat B, PetscReal tol, PetscBool *flg) 1704 { 1705 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1706 1707 PetscFunctionBegin; 1708 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1709 if (A->rmap->N != B->rmap->N || A->cmap->n != B->cmap->n || A->rmap->bs != B->rmap->bs || a->nz != b->nz) { 1710 *flg = PETSC_FALSE; 1711 PetscFunctionReturn(PETSC_SUCCESS); 1712 } 1713 1714 /* if the a->i are the same */ 1715 PetscCall(PetscArraycmp(a->i, b->i, a->mbs + 1, flg)); 1716 if (!*flg) PetscFunctionReturn(PETSC_SUCCESS); 1717 1718 /* if a->j are the same */ 1719 PetscCall(PetscArraycmp(a->j, b->j, a->nz, flg)); 1720 if (!*flg) PetscFunctionReturn(PETSC_SUCCESS); 1721 1722 if (tol == 0.0) PetscCall(PetscArraycmp(a->a, b->a, a->nz * A->rmap->bs * A->rmap->bs, flg)); /* if a->a are the same */ 1723 else { 1724 *flg = PETSC_TRUE; 1725 for (PetscInt i = 0; (i < a->nz * A->rmap->bs * A->rmap->bs) && *flg; ++i) 1726 if (PetscAbsScalar(a->a[i] - b->a[i]) > tol) *flg = PETSC_FALSE; 1727 } 1728 PetscFunctionReturn(PETSC_SUCCESS); 1729 } 1730 1731 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) 1732 { 1733 Mat Btrans; 1734 1735 PetscFunctionBegin; 1736 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans)); 1737 PetscCall(MatCompare_SeqBAIJ_Private(A, Btrans, tol, f)); 1738 PetscCall(MatDestroy(&Btrans)); 1739 PetscFunctionReturn(PETSC_SUCCESS); 1740 } 1741 1742 static PetscErrorCode MatEqual_SeqBAIJ(Mat A, Mat B, PetscBool *flg) 1743 { 1744 PetscFunctionBegin; 1745 PetscCall(MatCompare_SeqBAIJ_Private(A, B, 0.0, flg)); 1746 PetscFunctionReturn(PETSC_SUCCESS); 1747 } 1748 1749 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1750 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 1751 { 1752 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data; 1753 PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l; 1754 PetscInt *rowlens, *colidxs; 1755 PetscScalar *matvals; 1756 1757 PetscFunctionBegin; 1758 PetscCall(PetscViewerSetUp(viewer)); 1759 1760 M = mat->rmap->N; 1761 N = mat->cmap->N; 1762 m = mat->rmap->n; 1763 bs = mat->rmap->bs; 1764 nz = bs * bs * A->nz; 1765 1766 /* write matrix header */ 1767 header[0] = MAT_FILE_CLASSID; 1768 header[1] = M; 1769 header[2] = N; 1770 header[3] = nz; 1771 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1772 1773 /* store row lengths */ 1774 PetscCall(PetscMalloc1(m, &rowlens)); 1775 for (cnt = 0, i = 0; i < A->mbs; i++) 1776 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]); 1777 PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT)); 1778 PetscCall(PetscFree(rowlens)); 1779 1780 /* store column indices */ 1781 PetscCall(PetscMalloc1(nz, &colidxs)); 1782 for (cnt = 0, i = 0; i < A->mbs; i++) 1783 for (k = 0; k < bs; k++) 1784 for (j = A->i[i]; j < A->i[i + 1]; j++) 1785 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l; 1786 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1787 PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT)); 1788 PetscCall(PetscFree(colidxs)); 1789 1790 /* store nonzero values */ 1791 PetscCall(PetscMalloc1(nz, &matvals)); 1792 for (cnt = 0, i = 0; i < A->mbs; i++) 1793 for (k = 0; k < bs; k++) 1794 for (j = A->i[i]; j < A->i[i + 1]; j++) 1795 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k]; 1796 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1797 PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR)); 1798 PetscCall(PetscFree(matvals)); 1799 1800 /* write block size option to the viewer's .info file */ 1801 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1802 PetscFunctionReturn(PETSC_SUCCESS); 1803 } 1804 1805 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) 1806 { 1807 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1808 PetscInt i, bs = A->rmap->bs, k; 1809 1810 PetscFunctionBegin; 1811 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1812 for (i = 0; i < a->mbs; i++) { 1813 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1)); 1814 for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1)); 1815 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1816 } 1817 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1818 PetscFunctionReturn(PETSC_SUCCESS); 1819 } 1820 1821 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) 1822 { 1823 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1824 PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2; 1825 PetscViewerFormat format; 1826 1827 PetscFunctionBegin; 1828 if (A->structure_only) { 1829 PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer)); 1830 PetscFunctionReturn(PETSC_SUCCESS); 1831 } 1832 1833 PetscCall(PetscViewerGetFormat(viewer, &format)); 1834 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1835 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1836 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1837 const char *matname; 1838 Mat aij; 1839 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij)); 1840 PetscCall(PetscObjectGetName((PetscObject)A, &matname)); 1841 PetscCall(PetscObjectSetName((PetscObject)aij, matname)); 1842 PetscCall(MatView(aij, viewer)); 1843 PetscCall(MatDestroy(&aij)); 1844 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1845 PetscFunctionReturn(PETSC_SUCCESS); 1846 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1847 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1848 for (i = 0; i < a->mbs; i++) { 1849 for (j = 0; j < bs; j++) { 1850 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1851 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1852 for (l = 0; l < bs; l++) { 1853 #if defined(PETSC_USE_COMPLEX) 1854 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1855 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1856 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1857 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1858 } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1859 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1860 } 1861 #else 1862 if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1863 #endif 1864 } 1865 } 1866 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1867 } 1868 } 1869 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1870 } else { 1871 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1872 for (i = 0; i < a->mbs; i++) { 1873 for (j = 0; j < bs; j++) { 1874 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1875 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1876 for (l = 0; l < bs; l++) { 1877 #if defined(PETSC_USE_COMPLEX) 1878 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) { 1879 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1880 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) { 1881 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1882 } else { 1883 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1884 } 1885 #else 1886 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1887 #endif 1888 } 1889 } 1890 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1891 } 1892 } 1893 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1894 } 1895 PetscCall(PetscViewerFlush(viewer)); 1896 PetscFunctionReturn(PETSC_SUCCESS); 1897 } 1898 1899 #include <petscdraw.h> 1900 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) 1901 { 1902 Mat A = (Mat)Aa; 1903 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1904 PetscInt row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 1905 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1906 MatScalar *aa; 1907 PetscViewer viewer; 1908 PetscViewerFormat format; 1909 int color; 1910 1911 PetscFunctionBegin; 1912 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1913 PetscCall(PetscViewerGetFormat(viewer, &format)); 1914 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1915 1916 /* loop over matrix elements drawing boxes */ 1917 1918 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1919 PetscDrawCollectiveBegin(draw); 1920 /* Blue for negative, Cyan for zero and Red for positive */ 1921 color = PETSC_DRAW_BLUE; 1922 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1923 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1924 y_l = A->rmap->N - row - 1.0; 1925 y_r = y_l + 1.0; 1926 x_l = a->j[j] * bs; 1927 x_r = x_l + 1.0; 1928 aa = a->a + j * bs2; 1929 for (k = 0; k < bs; k++) { 1930 for (l = 0; l < bs; l++) { 1931 if (PetscRealPart(*aa++) >= 0.) continue; 1932 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1933 } 1934 } 1935 } 1936 } 1937 color = PETSC_DRAW_CYAN; 1938 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1939 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1940 y_l = A->rmap->N - row - 1.0; 1941 y_r = y_l + 1.0; 1942 x_l = a->j[j] * bs; 1943 x_r = x_l + 1.0; 1944 aa = a->a + j * bs2; 1945 for (k = 0; k < bs; k++) { 1946 for (l = 0; l < bs; l++) { 1947 if (PetscRealPart(*aa++) != 0.) continue; 1948 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1949 } 1950 } 1951 } 1952 } 1953 color = PETSC_DRAW_RED; 1954 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1955 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1956 y_l = A->rmap->N - row - 1.0; 1957 y_r = y_l + 1.0; 1958 x_l = a->j[j] * bs; 1959 x_r = x_l + 1.0; 1960 aa = a->a + j * bs2; 1961 for (k = 0; k < bs; k++) { 1962 for (l = 0; l < bs; l++) { 1963 if (PetscRealPart(*aa++) <= 0.) continue; 1964 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1965 } 1966 } 1967 } 1968 } 1969 PetscDrawCollectiveEnd(draw); 1970 } else { 1971 /* use contour shading to indicate magnitude of values */ 1972 /* first determine max of all nonzero values */ 1973 PetscReal minv = 0.0, maxv = 0.0; 1974 PetscDraw popup; 1975 1976 for (i = 0; i < a->nz * a->bs2; i++) { 1977 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1978 } 1979 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1980 PetscCall(PetscDrawGetPopup(draw, &popup)); 1981 PetscCall(PetscDrawScalePopup(popup, 0.0, maxv)); 1982 1983 PetscDrawCollectiveBegin(draw); 1984 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1985 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1986 y_l = A->rmap->N - row - 1.0; 1987 y_r = y_l + 1.0; 1988 x_l = a->j[j] * bs; 1989 x_r = x_l + 1.0; 1990 aa = a->a + j * bs2; 1991 for (k = 0; k < bs; k++) { 1992 for (l = 0; l < bs; l++) { 1993 MatScalar v = *aa++; 1994 color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv); 1995 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1996 } 1997 } 1998 } 1999 } 2000 PetscDrawCollectiveEnd(draw); 2001 } 2002 PetscFunctionReturn(PETSC_SUCCESS); 2003 } 2004 2005 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) 2006 { 2007 PetscReal xl, yl, xr, yr, w, h; 2008 PetscDraw draw; 2009 PetscBool isnull; 2010 2011 PetscFunctionBegin; 2012 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 2013 PetscCall(PetscDrawIsNull(draw, &isnull)); 2014 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 2015 2016 xr = A->cmap->n; 2017 yr = A->rmap->N; 2018 h = yr / 10.0; 2019 w = xr / 10.0; 2020 xr += w; 2021 yr += h; 2022 xl = -w; 2023 yl = -h; 2024 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 2025 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 2026 PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A)); 2027 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 2028 PetscCall(PetscDrawSave(draw)); 2029 PetscFunctionReturn(PETSC_SUCCESS); 2030 } 2031 2032 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) 2033 { 2034 PetscBool isascii, isbinary, isdraw; 2035 2036 PetscFunctionBegin; 2037 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 2038 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2039 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 2040 if (isascii) { 2041 PetscCall(MatView_SeqBAIJ_ASCII(A, viewer)); 2042 } else if (isbinary) { 2043 PetscCall(MatView_SeqBAIJ_Binary(A, viewer)); 2044 } else if (isdraw) { 2045 PetscCall(MatView_SeqBAIJ_Draw(A, viewer)); 2046 } else { 2047 Mat B; 2048 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2049 PetscCall(MatView(B, viewer)); 2050 PetscCall(MatDestroy(&B)); 2051 } 2052 PetscFunctionReturn(PETSC_SUCCESS); 2053 } 2054 2055 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) 2056 { 2057 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2058 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 2059 PetscInt *ai = a->i, *ailen = a->ilen; 2060 PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2; 2061 MatScalar *ap, *aa = a->a; 2062 2063 PetscFunctionBegin; 2064 for (k = 0; k < m; k++) { /* loop over rows */ 2065 row = im[k]; 2066 brow = row / bs; 2067 if (row < 0) { 2068 v += n; 2069 continue; 2070 } /* negative row */ 2071 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row); 2072 rp = PetscSafePointerPlusOffset(aj, ai[brow]); 2073 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); 2074 nrow = ailen[brow]; 2075 for (l = 0; l < n; l++) { /* loop over columns */ 2076 if (in[l] < 0) { 2077 v++; 2078 continue; 2079 } /* negative column */ 2080 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]); 2081 col = in[l]; 2082 bcol = col / bs; 2083 cidx = col % bs; 2084 ridx = row % bs; 2085 high = nrow; 2086 low = 0; /* assume unsorted */ 2087 while (high - low > 5) { 2088 t = (low + high) / 2; 2089 if (rp[t] > bcol) high = t; 2090 else low = t; 2091 } 2092 for (i = low; i < high; i++) { 2093 if (rp[i] > bcol) break; 2094 if (rp[i] == bcol) { 2095 *v++ = ap[bs2 * i + bs * cidx + ridx]; 2096 goto finished; 2097 } 2098 } 2099 *v++ = 0.0; 2100 finished:; 2101 } 2102 } 2103 PetscFunctionReturn(PETSC_SUCCESS); 2104 } 2105 2106 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2107 { 2108 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2109 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1; 2110 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2111 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval; 2112 PetscBool roworiented = a->roworiented; 2113 const PetscScalar *value = v; 2114 MatScalar *ap = NULL, *aa = a->a, *bap; 2115 2116 PetscFunctionBegin; 2117 if (roworiented) { 2118 stepval = (n - 1) * bs; 2119 } else { 2120 stepval = (m - 1) * bs; 2121 } 2122 for (k = 0; k < m; k++) { /* loop over added rows */ 2123 row = im[k]; 2124 if (row < 0) continue; 2125 PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1); 2126 rp = aj + ai[row]; 2127 if (!A->structure_only) ap = aa + bs2 * ai[row]; 2128 rmax = imax[row]; 2129 nrow = ailen[row]; 2130 low = 0; 2131 high = nrow; 2132 for (l = 0; l < n; l++) { /* loop over added columns */ 2133 if (in[l] < 0) continue; 2134 PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1); 2135 col = in[l]; 2136 if (!A->structure_only) { 2137 if (roworiented) { 2138 value = v + (k * (stepval + bs) + l) * bs; 2139 } else { 2140 value = v + (l * (stepval + bs) + k) * bs; 2141 } 2142 } 2143 if (col <= lastcol) low = 0; 2144 else high = nrow; 2145 lastcol = col; 2146 while (high - low > 7) { 2147 t = (low + high) / 2; 2148 if (rp[t] > col) high = t; 2149 else low = t; 2150 } 2151 for (i = low; i < high; i++) { 2152 if (rp[i] > col) break; 2153 if (rp[i] == col) { 2154 if (A->structure_only) goto noinsert2; 2155 bap = ap + bs2 * i; 2156 if (roworiented) { 2157 if (is == ADD_VALUES) { 2158 for (ii = 0; ii < bs; ii++, value += stepval) { 2159 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 2160 } 2161 } else { 2162 for (ii = 0; ii < bs; ii++, value += stepval) { 2163 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2164 } 2165 } 2166 } else { 2167 if (is == ADD_VALUES) { 2168 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2169 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 2170 bap += bs; 2171 } 2172 } else { 2173 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2174 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 2175 bap += bs; 2176 } 2177 } 2178 } 2179 goto noinsert2; 2180 } 2181 } 2182 if (nonew == 1) goto noinsert2; 2183 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2184 if (A->structure_only) { 2185 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar); 2186 } else { 2187 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2188 } 2189 N = nrow++ - 1; 2190 high++; 2191 /* shift up all the later entries in this row */ 2192 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2193 rp[i] = col; 2194 if (!A->structure_only) { 2195 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2196 bap = ap + bs2 * i; 2197 if (roworiented) { 2198 for (ii = 0; ii < bs; ii++, value += stepval) { 2199 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2200 } 2201 } else { 2202 for (ii = 0; ii < bs; ii++, value += stepval) { 2203 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 2204 } 2205 } 2206 } 2207 noinsert2:; 2208 low = i; 2209 } 2210 ailen[row] = nrow; 2211 } 2212 PetscFunctionReturn(PETSC_SUCCESS); 2213 } 2214 2215 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) 2216 { 2217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2218 PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax; 2219 PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen; 2220 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2221 MatScalar *aa = a->a, *ap; 2222 PetscReal ratio = 0.6; 2223 2224 PetscFunctionBegin; 2225 if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS); 2226 2227 if (m) rmax = ailen[0]; 2228 for (i = 1; i < mbs; i++) { 2229 /* move each row back by the amount of empty slots (fshift) before it*/ 2230 fshift += imax[i - 1] - ailen[i - 1]; 2231 rmax = PetscMax(rmax, ailen[i]); 2232 if (fshift) { 2233 ip = aj + ai[i]; 2234 ap = aa + bs2 * ai[i]; 2235 N = ailen[i]; 2236 PetscCall(PetscArraymove(ip - fshift, ip, N)); 2237 if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N)); 2238 } 2239 ai[i] = ai[i - 1] + ailen[i - 1]; 2240 } 2241 if (mbs) { 2242 fshift += imax[mbs - 1] - ailen[mbs - 1]; 2243 ai[mbs] = ai[mbs - 1] + ailen[mbs - 1]; 2244 } 2245 2246 /* reset ilen and imax for each row */ 2247 a->nonzerorowcnt = 0; 2248 if (A->structure_only) { 2249 PetscCall(PetscFree2(a->imax, a->ilen)); 2250 } else { /* !A->structure_only */ 2251 for (i = 0; i < mbs; i++) { 2252 ailen[i] = imax[i] = ai[i + 1] - ai[i]; 2253 a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0); 2254 } 2255 } 2256 a->nz = ai[mbs]; 2257 2258 /* diagonals may have moved, so kill the diagonal pointers */ 2259 a->idiagvalid = PETSC_FALSE; 2260 if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2); 2261 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2)); 2262 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs)); 2263 PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax)); 2264 2265 A->info.mallocs += a->reallocs; 2266 a->reallocs = 0; 2267 A->info.nz_unneeded = (PetscReal)fshift * bs2; 2268 a->rmax = rmax; 2269 2270 if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio)); 2271 PetscFunctionReturn(PETSC_SUCCESS); 2272 } 2273 2274 /* 2275 This function returns an array of flags which indicate the locations of contiguous 2276 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2277 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2278 Assume: sizes should be long enough to hold all the values. 2279 */ 2280 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) 2281 { 2282 PetscInt j = 0; 2283 2284 PetscFunctionBegin; 2285 for (PetscInt i = 0; i < n; j++) { 2286 PetscInt row = idx[i]; 2287 if (row % bs != 0) { /* Not the beginning of a block */ 2288 sizes[j] = 1; 2289 i++; 2290 } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */ 2291 sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2292 i++; 2293 } else { /* Beginning of the block, so check if the complete block exists */ 2294 PetscBool flg = PETSC_TRUE; 2295 for (PetscInt k = 1; k < bs; k++) { 2296 if (row + k != idx[i + k]) { /* break in the block */ 2297 flg = PETSC_FALSE; 2298 break; 2299 } 2300 } 2301 if (flg) { /* No break in the bs */ 2302 sizes[j] = bs; 2303 i += bs; 2304 } else { 2305 sizes[j] = 1; 2306 i++; 2307 } 2308 } 2309 } 2310 *bs_max = j; 2311 PetscFunctionReturn(PETSC_SUCCESS); 2312 } 2313 2314 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2315 { 2316 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2317 PetscInt i, j, k, count, *rows; 2318 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max; 2319 PetscScalar zero = 0.0; 2320 MatScalar *aa; 2321 const PetscScalar *xx; 2322 PetscScalar *bb; 2323 2324 PetscFunctionBegin; 2325 /* fix right-hand side if needed */ 2326 if (x && b) { 2327 PetscCall(VecGetArrayRead(x, &xx)); 2328 PetscCall(VecGetArray(b, &bb)); 2329 for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]]; 2330 PetscCall(VecRestoreArrayRead(x, &xx)); 2331 PetscCall(VecRestoreArray(b, &bb)); 2332 } 2333 2334 /* Make a copy of the IS and sort it */ 2335 /* allocate memory for rows,sizes */ 2336 PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes)); 2337 2338 /* copy IS values to rows, and sort them */ 2339 for (i = 0; i < is_n; i++) rows[i] = is_idx[i]; 2340 PetscCall(PetscSortInt(is_n, rows)); 2341 2342 if (baij->keepnonzeropattern) { 2343 for (i = 0; i < is_n; i++) sizes[i] = 1; 2344 bs_max = is_n; 2345 } else { 2346 PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max)); 2347 A->nonzerostate++; 2348 } 2349 2350 for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) { 2351 row = rows[j]; 2352 PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row); 2353 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2354 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 2355 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2356 if (diag != (PetscScalar)0.0) { 2357 if (baij->ilen[row / bs] > 0) { 2358 baij->ilen[row / bs] = 1; 2359 baij->j[baij->i[row / bs]] = row / bs; 2360 2361 PetscCall(PetscArrayzero(aa, count * bs)); 2362 } 2363 /* Now insert all the diagonal values for this bs */ 2364 for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES); 2365 } else { /* (diag == 0.0) */ 2366 baij->ilen[row / bs] = 0; 2367 } /* end (diag == 0.0) */ 2368 } else { /* (sizes[i] != bs) */ 2369 PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1"); 2370 for (k = 0; k < count; k++) { 2371 aa[0] = zero; 2372 aa += bs; 2373 } 2374 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES); 2375 } 2376 } 2377 2378 PetscCall(PetscFree2(rows, sizes)); 2379 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2380 PetscFunctionReturn(PETSC_SUCCESS); 2381 } 2382 2383 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2384 { 2385 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2386 PetscInt i, j, k, count; 2387 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col; 2388 PetscScalar zero = 0.0; 2389 MatScalar *aa; 2390 const PetscScalar *xx; 2391 PetscScalar *bb; 2392 PetscBool *zeroed, vecs = PETSC_FALSE; 2393 2394 PetscFunctionBegin; 2395 /* fix right-hand side if needed */ 2396 if (x && b) { 2397 PetscCall(VecGetArrayRead(x, &xx)); 2398 PetscCall(VecGetArray(b, &bb)); 2399 vecs = PETSC_TRUE; 2400 } 2401 2402 /* zero the columns */ 2403 PetscCall(PetscCalloc1(A->rmap->n, &zeroed)); 2404 for (i = 0; i < is_n; i++) { 2405 PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]); 2406 zeroed[is_idx[i]] = PETSC_TRUE; 2407 } 2408 for (i = 0; i < A->rmap->N; i++) { 2409 if (!zeroed[i]) { 2410 row = i / bs; 2411 for (j = baij->i[row]; j < baij->i[row + 1]; j++) { 2412 for (k = 0; k < bs; k++) { 2413 col = bs * baij->j[j] + k; 2414 if (zeroed[col]) { 2415 aa = baij->a + j * bs2 + (i % bs) + bs * k; 2416 if (vecs) bb[i] -= aa[0] * xx[col]; 2417 aa[0] = 0.0; 2418 } 2419 } 2420 } 2421 } else if (vecs) bb[i] = diag * xx[i]; 2422 } 2423 PetscCall(PetscFree(zeroed)); 2424 if (vecs) { 2425 PetscCall(VecRestoreArrayRead(x, &xx)); 2426 PetscCall(VecRestoreArray(b, &bb)); 2427 } 2428 2429 /* zero the rows */ 2430 for (i = 0; i < is_n; i++) { 2431 row = is_idx[i]; 2432 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2433 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 2434 for (k = 0; k < count; k++) { 2435 aa[0] = zero; 2436 aa += bs; 2437 } 2438 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES); 2439 } 2440 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2441 PetscFunctionReturn(PETSC_SUCCESS); 2442 } 2443 2444 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2445 { 2446 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2447 PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 2448 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2449 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 2450 PetscInt ridx, cidx, bs2 = a->bs2; 2451 PetscBool roworiented = a->roworiented; 2452 MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap; 2453 2454 PetscFunctionBegin; 2455 for (k = 0; k < m; k++) { /* loop over added rows */ 2456 row = im[k]; 2457 brow = row / bs; 2458 if (row < 0) continue; 2459 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 2460 rp = PetscSafePointerPlusOffset(aj, ai[brow]); 2461 if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); 2462 rmax = imax[brow]; 2463 nrow = ailen[brow]; 2464 low = 0; 2465 high = nrow; 2466 for (l = 0; l < n; l++) { /* loop over added columns */ 2467 if (in[l] < 0) continue; 2468 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 2469 col = in[l]; 2470 bcol = col / bs; 2471 ridx = row % bs; 2472 cidx = col % bs; 2473 if (!A->structure_only) { 2474 if (roworiented) { 2475 value = v[l + k * n]; 2476 } else { 2477 value = v[k + l * m]; 2478 } 2479 } 2480 if (col <= lastcol) low = 0; 2481 else high = nrow; 2482 lastcol = col; 2483 while (high - low > 7) { 2484 t = (low + high) / 2; 2485 if (rp[t] > bcol) high = t; 2486 else low = t; 2487 } 2488 for (i = low; i < high; i++) { 2489 if (rp[i] > bcol) break; 2490 if (rp[i] == bcol) { 2491 bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx); 2492 if (!A->structure_only) { 2493 if (is == ADD_VALUES) *bap += value; 2494 else *bap = value; 2495 } 2496 goto noinsert1; 2497 } 2498 } 2499 if (nonew == 1) goto noinsert1; 2500 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2501 if (A->structure_only) { 2502 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar); 2503 } else { 2504 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2505 } 2506 N = nrow++ - 1; 2507 high++; 2508 /* shift up all the later entries in this row */ 2509 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2510 rp[i] = bcol; 2511 if (!A->structure_only) { 2512 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2513 PetscCall(PetscArrayzero(ap + bs2 * i, bs2)); 2514 ap[bs2 * i + bs * cidx + ridx] = value; 2515 } 2516 a->nz++; 2517 noinsert1:; 2518 low = i; 2519 } 2520 ailen[brow] = nrow; 2521 } 2522 PetscFunctionReturn(PETSC_SUCCESS); 2523 } 2524 2525 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) 2526 { 2527 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data; 2528 Mat outA; 2529 PetscBool row_identity, col_identity; 2530 2531 PetscFunctionBegin; 2532 PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU"); 2533 PetscCall(ISIdentity(row, &row_identity)); 2534 PetscCall(ISIdentity(col, &col_identity)); 2535 PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU"); 2536 2537 outA = inA; 2538 inA->factortype = MAT_FACTOR_LU; 2539 PetscCall(PetscFree(inA->solvertype)); 2540 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype)); 2541 2542 PetscCall(PetscObjectReference((PetscObject)row)); 2543 PetscCall(ISDestroy(&a->row)); 2544 a->row = row; 2545 PetscCall(PetscObjectReference((PetscObject)col)); 2546 PetscCall(ISDestroy(&a->col)); 2547 a->col = col; 2548 2549 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2550 PetscCall(ISDestroy(&a->icol)); 2551 PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol)); 2552 2553 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity))); 2554 if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); 2555 PetscCall(MatLUFactorNumeric(outA, inA, info)); 2556 PetscFunctionReturn(PETSC_SUCCESS); 2557 } 2558 2559 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices) 2560 { 2561 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2562 2563 PetscFunctionBegin; 2564 baij->nz = baij->maxnz; 2565 PetscCall(PetscArraycpy(baij->j, indices, baij->nz)); 2566 PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs)); 2567 PetscFunctionReturn(PETSC_SUCCESS); 2568 } 2569 2570 /*@ 2571 MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix. 2572 2573 Input Parameters: 2574 + mat - the `MATSEQBAIJ` matrix 2575 - indices - the block column indices 2576 2577 Level: advanced 2578 2579 Notes: 2580 This can be called if you have precomputed the nonzero structure of the 2581 matrix and want to provide it to the matrix object to improve the performance 2582 of the `MatSetValues()` operation. 2583 2584 You MUST have set the correct numbers of nonzeros per row in the call to 2585 `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted. 2586 2587 MUST be called before any calls to `MatSetValues()` 2588 2589 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()` 2590 @*/ 2591 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) 2592 { 2593 PetscFunctionBegin; 2594 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2595 PetscAssertPointer(indices, 2); 2596 PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices)); 2597 PetscFunctionReturn(PETSC_SUCCESS); 2598 } 2599 2600 static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) 2601 { 2602 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2603 PetscInt i, j, n, row, bs, *ai, *aj, mbs; 2604 PetscReal atmp; 2605 PetscScalar *x, zero = 0.0; 2606 MatScalar *aa; 2607 PetscInt ncols, brow, krow, kcol; 2608 2609 PetscFunctionBegin; 2610 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2611 bs = A->rmap->bs; 2612 aa = a->a; 2613 ai = a->i; 2614 aj = a->j; 2615 mbs = a->mbs; 2616 2617 PetscCall(VecSet(v, zero)); 2618 PetscCall(VecGetArray(v, &x)); 2619 PetscCall(VecGetLocalSize(v, &n)); 2620 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2621 for (i = 0; i < mbs; i++) { 2622 ncols = ai[1] - ai[0]; 2623 ai++; 2624 brow = bs * i; 2625 for (j = 0; j < ncols; j++) { 2626 for (kcol = 0; kcol < bs; kcol++) { 2627 for (krow = 0; krow < bs; krow++) { 2628 atmp = PetscAbsScalar(*aa); 2629 aa++; 2630 row = brow + krow; /* row index */ 2631 if (PetscAbsScalar(x[row]) < atmp) { 2632 x[row] = atmp; 2633 if (idx) idx[row] = bs * (*aj) + kcol; 2634 } 2635 } 2636 } 2637 aj++; 2638 } 2639 } 2640 PetscCall(VecRestoreArray(v, &x)); 2641 PetscFunctionReturn(PETSC_SUCCESS); 2642 } 2643 2644 static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v) 2645 { 2646 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2647 PetscInt i, j, n, row, bs, *ai, mbs; 2648 PetscReal atmp; 2649 PetscScalar *x, zero = 0.0; 2650 MatScalar *aa; 2651 PetscInt ncols, brow, krow, kcol; 2652 2653 PetscFunctionBegin; 2654 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2655 bs = A->rmap->bs; 2656 aa = a->a; 2657 ai = a->i; 2658 mbs = a->mbs; 2659 2660 PetscCall(VecSet(v, zero)); 2661 PetscCall(VecGetArrayWrite(v, &x)); 2662 PetscCall(VecGetLocalSize(v, &n)); 2663 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2664 for (i = 0; i < mbs; i++) { 2665 ncols = ai[1] - ai[0]; 2666 ai++; 2667 brow = bs * i; 2668 for (j = 0; j < ncols; j++) { 2669 for (kcol = 0; kcol < bs; kcol++) { 2670 for (krow = 0; krow < bs; krow++) { 2671 atmp = PetscAbsScalar(*aa); 2672 aa++; 2673 row = brow + krow; /* row index */ 2674 x[row] += atmp; 2675 } 2676 } 2677 } 2678 } 2679 PetscCall(VecRestoreArrayWrite(v, &x)); 2680 PetscFunctionReturn(PETSC_SUCCESS); 2681 } 2682 2683 static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) 2684 { 2685 PetscFunctionBegin; 2686 /* If the two matrices have the same copy implementation, use fast copy. */ 2687 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2688 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2689 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 2690 PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs; 2691 2692 PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]); 2693 PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs); 2694 PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs])); 2695 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2696 } else { 2697 PetscCall(MatCopy_Basic(A, B, str)); 2698 } 2699 PetscFunctionReturn(PETSC_SUCCESS); 2700 } 2701 2702 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2703 { 2704 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2705 2706 PetscFunctionBegin; 2707 *array = a->a; 2708 PetscFunctionReturn(PETSC_SUCCESS); 2709 } 2710 2711 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2712 { 2713 PetscFunctionBegin; 2714 *array = NULL; 2715 PetscFunctionReturn(PETSC_SUCCESS); 2716 } 2717 2718 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) 2719 { 2720 PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs; 2721 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 2722 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 2723 2724 PetscFunctionBegin; 2725 /* Set the number of nonzeros in the new matrix */ 2726 PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz)); 2727 PetscFunctionReturn(PETSC_SUCCESS); 2728 } 2729 2730 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2731 { 2732 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data; 2733 PetscInt bs = Y->rmap->bs, bs2 = bs * bs; 2734 PetscBLASInt one = 1; 2735 2736 PetscFunctionBegin; 2737 if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2738 PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2739 if (e) { 2740 PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e)); 2741 if (e) { 2742 PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e)); 2743 if (e) str = SAME_NONZERO_PATTERN; 2744 } 2745 } 2746 if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN"); 2747 } 2748 if (str == SAME_NONZERO_PATTERN) { 2749 PetscScalar alpha = a; 2750 PetscBLASInt bnz; 2751 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 2752 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 2753 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2754 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2755 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2756 } else { 2757 Mat B; 2758 PetscInt *nnz; 2759 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 2760 PetscCall(PetscMalloc1(Y->rmap->N, &nnz)); 2761 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2762 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2763 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 2764 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 2765 PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name)); 2766 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz)); 2767 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 2768 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2769 PetscCall(MatHeaderMerge(Y, &B)); 2770 PetscCall(PetscFree(nnz)); 2771 } 2772 PetscFunctionReturn(PETSC_SUCCESS); 2773 } 2774 2775 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2776 { 2777 #if PetscDefined(USE_COMPLEX) 2778 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2779 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2780 MatScalar *aa = a->a; 2781 2782 PetscFunctionBegin; 2783 for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]); 2784 PetscFunctionReturn(PETSC_SUCCESS); 2785 #else 2786 (void)A; 2787 return PETSC_SUCCESS; 2788 #endif 2789 } 2790 2791 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2792 { 2793 #if PetscDefined(USE_COMPLEX) 2794 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2795 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2796 MatScalar *aa = a->a; 2797 2798 PetscFunctionBegin; 2799 for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]); 2800 PetscFunctionReturn(PETSC_SUCCESS); 2801 #else 2802 (void)A; 2803 return PETSC_SUCCESS; 2804 #endif 2805 } 2806 2807 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2808 { 2809 #if PetscDefined(USE_COMPLEX) 2810 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2811 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2812 MatScalar *aa = a->a; 2813 2814 PetscFunctionBegin; 2815 for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2816 PetscFunctionReturn(PETSC_SUCCESS); 2817 #else 2818 (void)A; 2819 return PETSC_SUCCESS; 2820 #endif 2821 } 2822 2823 /* 2824 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2825 */ 2826 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2827 { 2828 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2829 PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs; 2830 PetscInt nz = a->i[m], row, *jj, mr, col; 2831 2832 PetscFunctionBegin; 2833 *nn = n; 2834 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2835 PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices"); 2836 PetscCall(PetscCalloc1(n, &collengths)); 2837 PetscCall(PetscMalloc1(n + 1, &cia)); 2838 PetscCall(PetscMalloc1(nz, &cja)); 2839 jj = a->j; 2840 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2841 cia[0] = oshift; 2842 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2843 PetscCall(PetscArrayzero(collengths, n)); 2844 jj = a->j; 2845 for (row = 0; row < m; row++) { 2846 mr = a->i[row + 1] - a->i[row]; 2847 for (i = 0; i < mr; i++) { 2848 col = *jj++; 2849 2850 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2851 } 2852 } 2853 PetscCall(PetscFree(collengths)); 2854 *ia = cia; 2855 *ja = cja; 2856 PetscFunctionReturn(PETSC_SUCCESS); 2857 } 2858 2859 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2860 { 2861 PetscFunctionBegin; 2862 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2863 PetscCall(PetscFree(*ia)); 2864 PetscCall(PetscFree(*ja)); 2865 PetscFunctionReturn(PETSC_SUCCESS); 2866 } 2867 2868 /* 2869 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2870 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2871 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2872 */ 2873 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2874 { 2875 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2876 PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs; 2877 PetscInt nz = a->i[m], row, *jj, mr, col; 2878 PetscInt *cspidx; 2879 2880 PetscFunctionBegin; 2881 *nn = n; 2882 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2883 2884 PetscCall(PetscCalloc1(n, &collengths)); 2885 PetscCall(PetscMalloc1(n + 1, &cia)); 2886 PetscCall(PetscMalloc1(nz, &cja)); 2887 PetscCall(PetscMalloc1(nz, &cspidx)); 2888 jj = a->j; 2889 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2890 cia[0] = oshift; 2891 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2892 PetscCall(PetscArrayzero(collengths, n)); 2893 jj = a->j; 2894 for (row = 0; row < m; row++) { 2895 mr = a->i[row + 1] - a->i[row]; 2896 for (i = 0; i < mr; i++) { 2897 col = *jj++; 2898 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2899 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2900 } 2901 } 2902 PetscCall(PetscFree(collengths)); 2903 *ia = cia; 2904 *ja = cja; 2905 *spidx = cspidx; 2906 PetscFunctionReturn(PETSC_SUCCESS); 2907 } 2908 2909 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2910 { 2911 PetscFunctionBegin; 2912 PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done)); 2913 PetscCall(PetscFree(*spidx)); 2914 PetscFunctionReturn(PETSC_SUCCESS); 2915 } 2916 2917 static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) 2918 { 2919 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data; 2920 2921 PetscFunctionBegin; 2922 if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL)); 2923 PetscCall(MatShift_Basic(Y, a)); 2924 PetscFunctionReturn(PETSC_SUCCESS); 2925 } 2926 2927 PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep) 2928 { 2929 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2930 PetscInt fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k; 2931 PetscInt m = A->rmap->N, *ailen = a->ilen; 2932 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2933 MatScalar *aa = a->a, *ap; 2934 PetscBool zero; 2935 2936 PetscFunctionBegin; 2937 PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix"); 2938 if (m) rmax = ailen[0]; 2939 for (i = 1; i <= mbs; i++) { 2940 for (k = ai[i - 1]; k < ai[i]; k++) { 2941 zero = PETSC_TRUE; 2942 ap = aa + bs2 * k; 2943 for (j = 0; j < bs2 && zero; j++) { 2944 if (ap[j] != 0.0) zero = PETSC_FALSE; 2945 } 2946 if (zero && (aj[k] != i - 1 || !keep)) fshift++; 2947 else { 2948 if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1)); 2949 aj[k - fshift] = aj[k]; 2950 PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2)); 2951 } 2952 } 2953 ai[i - 1] -= fshift_prev; 2954 fshift_prev = fshift; 2955 ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1]; 2956 a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0); 2957 rmax = PetscMax(rmax, ailen[i - 1]); 2958 } 2959 if (fshift) { 2960 if (mbs) { 2961 ai[mbs] -= fshift; 2962 a->nz = ai[mbs]; 2963 } 2964 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz)); 2965 A->nonzerostate++; 2966 A->info.nz_unneeded += (PetscReal)fshift; 2967 a->rmax = rmax; 2968 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 2969 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 2970 } 2971 PetscFunctionReturn(PETSC_SUCCESS); 2972 } 2973 2974 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2975 MatGetRow_SeqBAIJ, 2976 MatRestoreRow_SeqBAIJ, 2977 MatMult_SeqBAIJ_N, 2978 /* 4*/ MatMultAdd_SeqBAIJ_N, 2979 MatMultTranspose_SeqBAIJ, 2980 MatMultTransposeAdd_SeqBAIJ, 2981 NULL, 2982 NULL, 2983 NULL, 2984 /* 10*/ NULL, 2985 MatLUFactor_SeqBAIJ, 2986 NULL, 2987 NULL, 2988 MatTranspose_SeqBAIJ, 2989 /* 15*/ MatGetInfo_SeqBAIJ, 2990 MatEqual_SeqBAIJ, 2991 MatGetDiagonal_SeqBAIJ, 2992 MatDiagonalScale_SeqBAIJ, 2993 MatNorm_SeqBAIJ, 2994 /* 20*/ NULL, 2995 MatAssemblyEnd_SeqBAIJ, 2996 MatSetOption_SeqBAIJ, 2997 MatZeroEntries_SeqBAIJ, 2998 /* 24*/ MatZeroRows_SeqBAIJ, 2999 NULL, 3000 NULL, 3001 NULL, 3002 NULL, 3003 /* 29*/ MatSetUp_Seq_Hash, 3004 NULL, 3005 NULL, 3006 NULL, 3007 NULL, 3008 /* 34*/ MatDuplicate_SeqBAIJ, 3009 NULL, 3010 NULL, 3011 MatILUFactor_SeqBAIJ, 3012 NULL, 3013 /* 39*/ MatAXPY_SeqBAIJ, 3014 MatCreateSubMatrices_SeqBAIJ, 3015 MatIncreaseOverlap_SeqBAIJ, 3016 MatGetValues_SeqBAIJ, 3017 MatCopy_SeqBAIJ, 3018 /* 44*/ NULL, 3019 MatScale_SeqBAIJ, 3020 MatShift_SeqBAIJ, 3021 NULL, 3022 MatZeroRowsColumns_SeqBAIJ, 3023 /* 49*/ NULL, 3024 MatGetRowIJ_SeqBAIJ, 3025 MatRestoreRowIJ_SeqBAIJ, 3026 MatGetColumnIJ_SeqBAIJ, 3027 MatRestoreColumnIJ_SeqBAIJ, 3028 /* 54*/ MatFDColoringCreate_SeqXAIJ, 3029 NULL, 3030 NULL, 3031 NULL, 3032 MatSetValuesBlocked_SeqBAIJ, 3033 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 3034 MatDestroy_SeqBAIJ, 3035 MatView_SeqBAIJ, 3036 NULL, 3037 NULL, 3038 /* 64*/ NULL, 3039 NULL, 3040 NULL, 3041 NULL, 3042 MatGetRowMaxAbs_SeqBAIJ, 3043 /* 69*/ NULL, 3044 MatConvert_Basic, 3045 NULL, 3046 MatFDColoringApply_BAIJ, 3047 NULL, 3048 /* 74*/ NULL, 3049 NULL, 3050 NULL, 3051 NULL, 3052 MatLoad_SeqBAIJ, 3053 /* 79*/ NULL, 3054 NULL, 3055 NULL, 3056 NULL, 3057 NULL, 3058 /* 84*/ NULL, 3059 NULL, 3060 NULL, 3061 NULL, 3062 NULL, 3063 /* 89*/ NULL, 3064 NULL, 3065 NULL, 3066 NULL, 3067 MatConjugate_SeqBAIJ, 3068 /* 94*/ NULL, 3069 NULL, 3070 MatRealPart_SeqBAIJ, 3071 MatImaginaryPart_SeqBAIJ, 3072 NULL, 3073 /* 99*/ NULL, 3074 NULL, 3075 NULL, 3076 NULL, 3077 NULL, 3078 /*104*/ NULL, 3079 NULL, 3080 NULL, 3081 NULL, 3082 NULL, 3083 /*109*/ NULL, 3084 NULL, 3085 MatMultHermitianTranspose_SeqBAIJ, 3086 MatMultHermitianTransposeAdd_SeqBAIJ, 3087 NULL, 3088 /*114*/ NULL, 3089 MatGetColumnReductions_SeqBAIJ, 3090 MatInvertBlockDiagonal_SeqBAIJ, 3091 NULL, 3092 NULL, 3093 /*119*/ NULL, 3094 NULL, 3095 NULL, 3096 NULL, 3097 NULL, 3098 /*124*/ NULL, 3099 NULL, 3100 MatSetBlockSizes_Default, 3101 NULL, 3102 MatFDColoringSetUp_SeqXAIJ, 3103 /*129*/ NULL, 3104 MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 3105 MatDestroySubMatrices_SeqBAIJ, 3106 NULL, 3107 NULL, 3108 /*134*/ NULL, 3109 NULL, 3110 MatEliminateZeros_SeqBAIJ, 3111 MatGetRowSumAbs_SeqBAIJ, 3112 NULL, 3113 /*139*/ NULL, 3114 NULL, 3115 MatCopyHashToXAIJ_Seq_Hash, 3116 NULL, 3117 NULL}; 3118 3119 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3120 { 3121 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3122 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3123 3124 PetscFunctionBegin; 3125 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3126 3127 /* allocate space for values if not already there */ 3128 if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); 3129 3130 /* copy values over */ 3131 PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz)); 3132 PetscFunctionReturn(PETSC_SUCCESS); 3133 } 3134 3135 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3136 { 3137 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3138 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3139 3140 PetscFunctionBegin; 3141 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3142 PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 3143 3144 /* copy values over */ 3145 PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz)); 3146 PetscFunctionReturn(PETSC_SUCCESS); 3147 } 3148 3149 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 3150 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *); 3151 3152 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3153 { 3154 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 3155 PetscInt i, mbs, nbs, bs2; 3156 PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 3157 3158 PetscFunctionBegin; 3159 if (B->hash_active) { 3160 PetscInt bs; 3161 B->ops[0] = b->cops; 3162 PetscCall(PetscHMapIJVDestroy(&b->ht)); 3163 PetscCall(MatGetBlockSize(B, &bs)); 3164 if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht)); 3165 PetscCall(PetscFree(b->dnz)); 3166 PetscCall(PetscFree(b->bdnz)); 3167 B->hash_active = PETSC_FALSE; 3168 } 3169 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3170 if (nz == MAT_SKIP_ALLOCATION) { 3171 skipallocation = PETSC_TRUE; 3172 nz = 0; 3173 } 3174 3175 PetscCall(MatSetBlockSize(B, bs)); 3176 PetscCall(PetscLayoutSetUp(B->rmap)); 3177 PetscCall(PetscLayoutSetUp(B->cmap)); 3178 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3179 3180 B->preallocated = PETSC_TRUE; 3181 3182 mbs = B->rmap->n / bs; 3183 nbs = B->cmap->n / bs; 3184 bs2 = bs * bs; 3185 3186 PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs); 3187 3188 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3189 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 3190 if (nnz) { 3191 for (i = 0; i < mbs; i++) { 3192 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 3193 PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs); 3194 } 3195 } 3196 3197 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat"); 3198 PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL)); 3199 PetscOptionsEnd(); 3200 3201 if (!flg) { 3202 switch (bs) { 3203 case 1: 3204 B->ops->mult = MatMult_SeqBAIJ_1; 3205 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3206 break; 3207 case 2: 3208 B->ops->mult = MatMult_SeqBAIJ_2; 3209 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3210 break; 3211 case 3: 3212 B->ops->mult = MatMult_SeqBAIJ_3; 3213 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3214 break; 3215 case 4: 3216 B->ops->mult = MatMult_SeqBAIJ_4; 3217 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3218 break; 3219 case 5: 3220 B->ops->mult = MatMult_SeqBAIJ_5; 3221 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3222 break; 3223 case 6: 3224 B->ops->mult = MatMult_SeqBAIJ_6; 3225 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3226 break; 3227 case 7: 3228 B->ops->mult = MatMult_SeqBAIJ_7; 3229 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3230 break; 3231 case 9: { 3232 PetscInt version = 1; 3233 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3234 switch (version) { 3235 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3236 case 1: 3237 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 3238 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 3239 PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3240 break; 3241 #endif 3242 default: 3243 B->ops->mult = MatMult_SeqBAIJ_N; 3244 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3245 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3246 break; 3247 } 3248 break; 3249 } 3250 case 11: 3251 B->ops->mult = MatMult_SeqBAIJ_11; 3252 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 3253 break; 3254 case 12: { 3255 PetscInt version = 1; 3256 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3257 switch (version) { 3258 case 1: 3259 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 3260 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3261 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3262 break; 3263 case 2: 3264 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 3265 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 3266 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3267 break; 3268 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3269 case 3: 3270 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 3271 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3272 PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3273 break; 3274 #endif 3275 default: 3276 B->ops->mult = MatMult_SeqBAIJ_N; 3277 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3278 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3279 break; 3280 } 3281 break; 3282 } 3283 case 15: { 3284 PetscInt version = 1; 3285 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3286 switch (version) { 3287 case 1: 3288 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3289 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3290 break; 3291 case 2: 3292 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 3293 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3294 break; 3295 case 3: 3296 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 3297 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3298 break; 3299 case 4: 3300 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 3301 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3302 break; 3303 default: 3304 B->ops->mult = MatMult_SeqBAIJ_N; 3305 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3306 break; 3307 } 3308 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3309 break; 3310 } 3311 default: 3312 B->ops->mult = MatMult_SeqBAIJ_N; 3313 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3314 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3315 break; 3316 } 3317 } 3318 B->ops->sor = MatSOR_SeqBAIJ; 3319 b->mbs = mbs; 3320 b->nbs = nbs; 3321 if (!skipallocation) { 3322 if (!b->imax) { 3323 PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen)); 3324 3325 b->free_imax_ilen = PETSC_TRUE; 3326 } 3327 /* b->ilen will count nonzeros in each block row so far. */ 3328 for (i = 0; i < mbs; i++) b->ilen[i] = 0; 3329 if (!nnz) { 3330 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3331 else if (nz < 0) nz = 1; 3332 nz = PetscMin(nz, nbs); 3333 for (i = 0; i < mbs; i++) b->imax[i] = nz; 3334 PetscCall(PetscIntMultError(nz, mbs, &nz)); 3335 } else { 3336 PetscInt64 nz64 = 0; 3337 for (i = 0; i < mbs; i++) { 3338 b->imax[i] = nnz[i]; 3339 nz64 += nnz[i]; 3340 } 3341 PetscCall(PetscIntCast(nz64, &nz)); 3342 } 3343 3344 /* allocate the matrix space */ 3345 PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i)); 3346 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j)); 3347 PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i)); 3348 if (B->structure_only) { 3349 b->free_a = PETSC_FALSE; 3350 } else { 3351 PetscInt nzbs2 = 0; 3352 PetscCall(PetscIntMultError(nz, bs2, &nzbs2)); 3353 PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a)); 3354 b->free_a = PETSC_TRUE; 3355 PetscCall(PetscArrayzero(b->a, nzbs2)); 3356 } 3357 b->free_ij = PETSC_TRUE; 3358 PetscCall(PetscArrayzero(b->j, nz)); 3359 3360 b->i[0] = 0; 3361 for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1]; 3362 } else { 3363 b->free_a = PETSC_FALSE; 3364 b->free_ij = PETSC_FALSE; 3365 } 3366 3367 b->bs2 = bs2; 3368 b->mbs = mbs; 3369 b->nz = 0; 3370 b->maxnz = nz; 3371 B->info.nz_unneeded = (PetscReal)b->maxnz * bs2; 3372 B->was_assembled = PETSC_FALSE; 3373 B->assembled = PETSC_FALSE; 3374 if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3375 PetscFunctionReturn(PETSC_SUCCESS); 3376 } 3377 3378 static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 3379 { 3380 PetscInt i, m, nz, nz_max = 0, *nnz; 3381 PetscScalar *values = NULL; 3382 PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented; 3383 3384 PetscFunctionBegin; 3385 PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 3386 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 3387 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 3388 PetscCall(PetscLayoutSetUp(B->rmap)); 3389 PetscCall(PetscLayoutSetUp(B->cmap)); 3390 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3391 m = B->rmap->n / bs; 3392 3393 PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 3394 PetscCall(PetscMalloc1(m + 1, &nnz)); 3395 for (i = 0; i < m; i++) { 3396 nz = ii[i + 1] - ii[i]; 3397 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 3398 nz_max = PetscMax(nz_max, nz); 3399 nnz[i] = nz; 3400 } 3401 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 3402 PetscCall(PetscFree(nnz)); 3403 3404 values = (PetscScalar *)V; 3405 if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values)); 3406 for (i = 0; i < m; i++) { 3407 PetscInt ncols = ii[i + 1] - ii[i]; 3408 const PetscInt *icols = jj + ii[i]; 3409 if (bs == 1 || !roworiented) { 3410 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 3411 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES)); 3412 } else { 3413 PetscInt j; 3414 for (j = 0; j < ncols; j++) { 3415 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 3416 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES)); 3417 } 3418 } 3419 } 3420 if (!V) PetscCall(PetscFree(values)); 3421 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3422 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3423 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3424 PetscFunctionReturn(PETSC_SUCCESS); 3425 } 3426 3427 /*@C 3428 MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored 3429 3430 Not Collective 3431 3432 Input Parameter: 3433 . A - a `MATSEQBAIJ` matrix 3434 3435 Output Parameter: 3436 . array - pointer to the data 3437 3438 Level: intermediate 3439 3440 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3441 @*/ 3442 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[]) 3443 { 3444 PetscFunctionBegin; 3445 PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array)); 3446 PetscFunctionReturn(PETSC_SUCCESS); 3447 } 3448 3449 /*@C 3450 MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()` 3451 3452 Not Collective 3453 3454 Input Parameters: 3455 + A - a `MATSEQBAIJ` matrix 3456 - array - pointer to the data 3457 3458 Level: intermediate 3459 3460 .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3461 @*/ 3462 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[]) 3463 { 3464 PetscFunctionBegin; 3465 PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array)); 3466 PetscFunctionReturn(PETSC_SUCCESS); 3467 } 3468 3469 /*MC 3470 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3471 block sparse compressed row format. 3472 3473 Options Database Keys: 3474 + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()` 3475 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3476 3477 Level: beginner 3478 3479 Notes: 3480 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 3481 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 3482 3483 Run with `-info` to see what version of the matrix-vector product is being used 3484 3485 .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()` 3486 M*/ 3487 3488 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *); 3489 3490 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3491 { 3492 PetscMPIInt size; 3493 Mat_SeqBAIJ *b; 3494 3495 PetscFunctionBegin; 3496 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3497 PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3498 3499 PetscCall(PetscNew(&b)); 3500 B->data = (void *)b; 3501 B->ops[0] = MatOps_Values; 3502 3503 b->row = NULL; 3504 b->col = NULL; 3505 b->icol = NULL; 3506 b->reallocs = 0; 3507 b->saved_values = NULL; 3508 3509 b->roworiented = PETSC_TRUE; 3510 b->nonew = 0; 3511 b->diag = NULL; 3512 B->spptr = NULL; 3513 B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2; 3514 b->keepnonzeropattern = PETSC_FALSE; 3515 3516 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ)); 3517 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ)); 3518 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ)); 3519 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ)); 3520 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ)); 3521 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ)); 3522 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ)); 3523 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ)); 3524 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 3525 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ)); 3526 #if defined(PETSC_HAVE_HYPRE) 3527 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE)); 3528 #endif 3529 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS)); 3530 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 3531 PetscFunctionReturn(PETSC_SUCCESS); 3532 } 3533 3534 PETSC_INTERN PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) 3535 { 3536 Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data; 3537 PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2; 3538 3539 PetscFunctionBegin; 3540 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix"); 3541 PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix"); 3542 3543 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3544 c->imax = a->imax; 3545 c->ilen = a->ilen; 3546 c->free_imax_ilen = PETSC_FALSE; 3547 } else { 3548 PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen)); 3549 for (i = 0; i < mbs; i++) { 3550 c->imax[i] = a->imax[i]; 3551 c->ilen[i] = a->ilen[i]; 3552 } 3553 c->free_imax_ilen = PETSC_TRUE; 3554 } 3555 3556 /* allocate the matrix space */ 3557 if (mallocmatspace) { 3558 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3559 PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a)); 3560 PetscCall(PetscArrayzero(c->a, bs2 * nz)); 3561 c->free_a = PETSC_TRUE; 3562 c->i = a->i; 3563 c->j = a->j; 3564 c->free_ij = PETSC_FALSE; 3565 c->parent = A; 3566 C->preallocated = PETSC_TRUE; 3567 C->assembled = PETSC_TRUE; 3568 3569 PetscCall(PetscObjectReference((PetscObject)A)); 3570 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3571 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3572 } else { 3573 PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a)); 3574 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j)); 3575 PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i)); 3576 c->free_a = PETSC_TRUE; 3577 c->free_ij = PETSC_TRUE; 3578 3579 PetscCall(PetscArraycpy(c->i, a->i, mbs + 1)); 3580 if (mbs > 0) { 3581 PetscCall(PetscArraycpy(c->j, a->j, nz)); 3582 if (cpvalues == MAT_COPY_VALUES) { 3583 PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz)); 3584 } else { 3585 PetscCall(PetscArrayzero(c->a, bs2 * nz)); 3586 } 3587 } 3588 C->preallocated = PETSC_TRUE; 3589 C->assembled = PETSC_TRUE; 3590 } 3591 } 3592 3593 c->roworiented = a->roworiented; 3594 c->nonew = a->nonew; 3595 3596 PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 3597 PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 3598 3599 c->bs2 = a->bs2; 3600 c->mbs = a->mbs; 3601 c->nbs = a->nbs; 3602 c->nz = a->nz; 3603 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3604 c->solve_work = NULL; 3605 c->mult_work = NULL; 3606 c->sor_workt = NULL; 3607 c->sor_work = NULL; 3608 3609 c->compressedrow.use = a->compressedrow.use; 3610 c->compressedrow.nrows = a->compressedrow.nrows; 3611 if (a->compressedrow.use) { 3612 i = a->compressedrow.nrows; 3613 PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex)); 3614 PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1)); 3615 PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i)); 3616 } else { 3617 c->compressedrow.use = PETSC_FALSE; 3618 c->compressedrow.i = NULL; 3619 c->compressedrow.rindex = NULL; 3620 } 3621 c->nonzerorowcnt = a->nonzerorowcnt; 3622 C->nonzerostate = A->nonzerostate; 3623 3624 PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 3625 PetscFunctionReturn(PETSC_SUCCESS); 3626 } 3627 3628 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) 3629 { 3630 PetscFunctionBegin; 3631 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 3632 PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n)); 3633 PetscCall(MatSetType(*B, MATSEQBAIJ)); 3634 PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE)); 3635 PetscFunctionReturn(PETSC_SUCCESS); 3636 } 3637 3638 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3639 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 3640 { 3641 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3642 PetscInt *rowidxs, *colidxs; 3643 PetscScalar *matvals; 3644 3645 PetscFunctionBegin; 3646 PetscCall(PetscViewerSetUp(viewer)); 3647 3648 /* read matrix header */ 3649 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3650 PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3651 M = header[1]; 3652 N = header[2]; 3653 nz = header[3]; 3654 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3655 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3656 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3657 3658 /* set block sizes from the viewer's .info file */ 3659 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3660 /* set local and global sizes if not set already */ 3661 if (mat->rmap->n < 0) mat->rmap->n = M; 3662 if (mat->cmap->n < 0) mat->cmap->n = N; 3663 if (mat->rmap->N < 0) mat->rmap->N = M; 3664 if (mat->cmap->N < 0) mat->cmap->N = N; 3665 PetscCall(PetscLayoutSetUp(mat->rmap)); 3666 PetscCall(PetscLayoutSetUp(mat->cmap)); 3667 3668 /* check if the matrix sizes are correct */ 3669 PetscCall(MatGetSize(mat, &rows, &cols)); 3670 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3671 PetscCall(MatGetBlockSize(mat, &bs)); 3672 PetscCall(MatGetLocalSize(mat, &m, &n)); 3673 mbs = m / bs; 3674 nbs = n / bs; 3675 3676 /* read in row lengths, column indices and nonzero values */ 3677 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3678 PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT)); 3679 rowidxs[0] = 0; 3680 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3681 sum = rowidxs[m]; 3682 PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3683 3684 /* read in column indices and nonzero values */ 3685 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals)); 3686 PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT)); 3687 PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR)); 3688 3689 { /* preallocate matrix storage */ 3690 PetscBT bt; /* helper bit set to count nonzeros */ 3691 PetscInt *nnz; 3692 PetscBool sbaij; 3693 3694 PetscCall(PetscBTCreate(nbs, &bt)); 3695 PetscCall(PetscCalloc1(mbs, &nnz)); 3696 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij)); 3697 for (i = 0; i < mbs; i++) { 3698 PetscCall(PetscBTMemzero(nbs, bt)); 3699 for (k = 0; k < bs; k++) { 3700 PetscInt row = bs * i + k; 3701 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3702 PetscInt col = colidxs[j]; 3703 if (!sbaij || col >= row) 3704 if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++; 3705 } 3706 } 3707 } 3708 PetscCall(PetscBTDestroy(&bt)); 3709 PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz)); 3710 PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz)); 3711 PetscCall(PetscFree(nnz)); 3712 } 3713 3714 /* store matrix values */ 3715 for (i = 0; i < m; i++) { 3716 PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1]; 3717 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3718 } 3719 3720 PetscCall(PetscFree(rowidxs)); 3721 PetscCall(PetscFree2(colidxs, matvals)); 3722 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3723 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3724 PetscFunctionReturn(PETSC_SUCCESS); 3725 } 3726 3727 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) 3728 { 3729 PetscBool isbinary; 3730 3731 PetscFunctionBegin; 3732 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3733 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3734 PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer)); 3735 PetscFunctionReturn(PETSC_SUCCESS); 3736 } 3737 3738 /*@ 3739 MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block 3740 compressed row) format. For good matrix assembly performance the 3741 user should preallocate the matrix storage by setting the parameter `nz` 3742 (or the array `nnz`). 3743 3744 Collective 3745 3746 Input Parameters: 3747 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3748 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3749 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3750 . m - number of rows 3751 . n - number of columns 3752 . nz - number of nonzero blocks per block row (same for all rows) 3753 - nnz - array containing the number of nonzero blocks in the various block rows 3754 (possibly different for each block row) or `NULL` 3755 3756 Output Parameter: 3757 . A - the matrix 3758 3759 Options Database Keys: 3760 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3761 - -mat_block_size - size of the blocks to use 3762 3763 Level: intermediate 3764 3765 Notes: 3766 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3767 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3768 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 3769 3770 The number of rows and columns must be divisible by blocksize. 3771 3772 If the `nnz` parameter is given then the `nz` parameter is ignored 3773 3774 A nonzero block is any block that as 1 or more nonzeros in it 3775 3776 The `MATSEQBAIJ` format is fully compatible with standard Fortran 3777 storage. That is, the stored row and column indices can begin at 3778 either one (as in Fortran) or zero. 3779 3780 Specify the preallocated storage with either `nz` or `nnz` (not both). 3781 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3782 allocation. See [Sparse Matrices](sec_matsparse) for details. 3783 matrices. 3784 3785 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3786 @*/ 3787 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 3788 { 3789 PetscFunctionBegin; 3790 PetscCall(MatCreate(comm, A)); 3791 PetscCall(MatSetSizes(*A, m, n, m, n)); 3792 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3793 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz)); 3794 PetscFunctionReturn(PETSC_SUCCESS); 3795 } 3796 3797 /*@ 3798 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3799 per row in the matrix. For good matrix assembly performance the 3800 user should preallocate the matrix storage by setting the parameter `nz` 3801 (or the array `nnz`). 3802 3803 Collective 3804 3805 Input Parameters: 3806 + B - the matrix 3807 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3808 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3809 . nz - number of block nonzeros per block row (same for all rows) 3810 - nnz - array containing the number of block nonzeros in the various block rows 3811 (possibly different for each block row) or `NULL` 3812 3813 Options Database Keys: 3814 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3815 - -mat_block_size - size of the blocks to use 3816 3817 Level: intermediate 3818 3819 Notes: 3820 If the `nnz` parameter is given then the `nz` parameter is ignored 3821 3822 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3823 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3824 You can also run with the option `-info` and look for messages with the string 3825 malloc in them to see if additional memory allocation was needed. 3826 3827 The `MATSEQBAIJ` format is fully compatible with standard Fortran 3828 storage. That is, the stored row and column indices can begin at 3829 either one (as in Fortran) or zero. 3830 3831 Specify the preallocated storage with either `nz` or `nnz` (not both). 3832 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3833 allocation. See [Sparse Matrices](sec_matsparse) for details. 3834 3835 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3836 @*/ 3837 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3838 { 3839 PetscFunctionBegin; 3840 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3841 PetscValidType(B, 1); 3842 PetscValidLogicalCollectiveInt(B, bs, 2); 3843 PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 3844 PetscFunctionReturn(PETSC_SUCCESS); 3845 } 3846 3847 /*@C 3848 MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values 3849 3850 Collective 3851 3852 Input Parameters: 3853 + B - the matrix 3854 . bs - the blocksize 3855 . i - the indices into `j` for the start of each local row (indices start with zero) 3856 . j - the column indices for each local row (indices start with zero) these must be sorted for each row 3857 - v - optional values in the matrix, use `NULL` if not provided 3858 3859 Level: advanced 3860 3861 Notes: 3862 The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()` 3863 3864 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 3865 may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 3866 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3867 `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3868 block column and the second index is over columns within a block. 3869 3870 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3871 3872 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3873 @*/ 3874 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3875 { 3876 PetscFunctionBegin; 3877 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3878 PetscValidType(B, 1); 3879 PetscValidLogicalCollectiveInt(B, bs, 2); 3880 PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 3881 PetscFunctionReturn(PETSC_SUCCESS); 3882 } 3883 3884 /*@ 3885 MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user. 3886 3887 Collective 3888 3889 Input Parameters: 3890 + comm - must be an MPI communicator of size 1 3891 . bs - size of block 3892 . m - number of rows 3893 . n - number of columns 3894 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3895 . j - column indices 3896 - a - matrix values 3897 3898 Output Parameter: 3899 . mat - the matrix 3900 3901 Level: advanced 3902 3903 Notes: 3904 The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays 3905 once the matrix is destroyed 3906 3907 You cannot set new nonzero locations into this matrix, that will generate an error. 3908 3909 The `i` and `j` indices are 0 based 3910 3911 When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format 3912 3913 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3914 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3915 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3916 with column-major ordering within blocks. 3917 3918 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3919 @*/ 3920 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) 3921 { 3922 Mat_SeqBAIJ *baij; 3923 3924 PetscFunctionBegin; 3925 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs); 3926 if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3927 3928 PetscCall(MatCreate(comm, mat)); 3929 PetscCall(MatSetSizes(*mat, m, n, m, n)); 3930 PetscCall(MatSetType(*mat, MATSEQBAIJ)); 3931 PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL)); 3932 baij = (Mat_SeqBAIJ *)(*mat)->data; 3933 PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen)); 3934 3935 baij->i = i; 3936 baij->j = j; 3937 baij->a = a; 3938 3939 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3940 baij->free_a = PETSC_FALSE; 3941 baij->free_ij = PETSC_FALSE; 3942 baij->free_imax_ilen = PETSC_TRUE; 3943 3944 for (PetscInt ii = 0; ii < m; ii++) { 3945 const PetscInt row_len = i[ii + 1] - i[ii]; 3946 3947 baij->ilen[ii] = baij->imax[ii] = row_len; 3948 PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len); 3949 } 3950 if (PetscDefined(USE_DEBUG)) { 3951 for (PetscInt ii = 0; ii < baij->i[m]; ii++) { 3952 PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3953 PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3954 } 3955 } 3956 3957 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3958 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3959 PetscFunctionReturn(PETSC_SUCCESS); 3960 } 3961 3962 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3963 { 3964 PetscFunctionBegin; 3965 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat)); 3966 PetscFunctionReturn(PETSC_SUCCESS); 3967 } 3968