1 /* 2 Defines the basic matrix operations for the BAIJ (compressed row) 3 matrix storage format. 4 */ 5 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <petsc/private/kernels/blockinvert.h> 8 #include <petsc/private/kernels/blockmatmult.h> 9 10 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */ 11 #define TYPE BAIJ 12 #define TYPE_BS 13 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 14 #undef TYPE_BS 15 #define TYPE_BS _BS 16 #define TYPE_BS_ON 17 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 18 #undef TYPE_BS 19 #include "../src/mat/impls/aij/seq/seqhashmat.h" 20 #undef TYPE 21 #undef TYPE_BS_ON 22 23 #if defined(PETSC_HAVE_HYPRE) 24 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 25 #endif 26 27 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 28 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 29 #endif 30 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 31 32 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) 33 { 34 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data; 35 PetscInt m, n, ib, jb, bs = A->rmap->bs; 36 MatScalar *a_val = a_aij->a; 37 38 PetscFunctionBegin; 39 PetscCall(MatGetSize(A, &m, &n)); 40 PetscCall(PetscArrayzero(reductions, n)); 41 if (type == NORM_2) { 42 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 43 for (jb = 0; jb < bs; jb++) { 44 for (ib = 0; ib < bs; ib++) { 45 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 46 a_val++; 47 } 48 } 49 } 50 } else if (type == NORM_1) { 51 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 52 for (jb = 0; jb < bs; jb++) { 53 for (ib = 0; ib < bs; ib++) { 54 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 55 a_val++; 56 } 57 } 58 } 59 } else if (type == NORM_INFINITY) { 60 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 61 for (jb = 0; jb < bs; jb++) { 62 for (ib = 0; ib < bs; ib++) { 63 PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb; 64 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 65 a_val++; 66 } 67 } 68 } 69 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 70 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 71 for (jb = 0; jb < bs; jb++) { 72 for (ib = 0; ib < bs; ib++) { 73 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 74 a_val++; 75 } 76 } 77 } 78 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 79 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 80 for (jb = 0; jb < bs; jb++) { 81 for (ib = 0; ib < bs; ib++) { 82 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 83 a_val++; 84 } 85 } 86 } 87 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 88 if (type == NORM_2) { 89 for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 90 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 91 for (PetscInt i = 0; i < n; i++) reductions[i] /= m; 92 } 93 PetscFunctionReturn(PETSC_SUCCESS); 94 } 95 96 static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) 97 { 98 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 99 PetscInt *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots; 100 MatScalar *v = a->a, *odiag, *diag, work[25], *v_work; 101 PetscReal shift = 0.0; 102 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 103 104 PetscFunctionBegin; 105 allowzeropivot = PetscNot(A->erroriffailure); 106 107 if (a->idiagvalid) { 108 if (values) *values = a->idiag; 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 112 diag_offset = a->diag; 113 if (!a->idiag) PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); 114 diag = a->idiag; 115 if (values) *values = a->idiag; 116 /* factor and invert each block */ 117 switch (bs) { 118 case 1: 119 for (i = 0; i < mbs; i++) { 120 odiag = v + 1 * diag_offset[i]; 121 diag[0] = odiag[0]; 122 123 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 124 PetscCheck(allowzeropivot, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON); 125 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 126 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 127 A->factorerror_zeropivot_row = i; 128 PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i)); 129 } 130 131 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 132 diag += 1; 133 } 134 break; 135 case 2: 136 for (i = 0; i < mbs; i++) { 137 odiag = v + 4 * diag_offset[i]; 138 diag[0] = odiag[0]; 139 diag[1] = odiag[1]; 140 diag[2] = odiag[2]; 141 diag[3] = odiag[3]; 142 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 143 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 144 diag += 4; 145 } 146 break; 147 case 3: 148 for (i = 0; i < mbs; i++) { 149 odiag = v + 9 * diag_offset[i]; 150 diag[0] = odiag[0]; 151 diag[1] = odiag[1]; 152 diag[2] = odiag[2]; 153 diag[3] = odiag[3]; 154 diag[4] = odiag[4]; 155 diag[5] = odiag[5]; 156 diag[6] = odiag[6]; 157 diag[7] = odiag[7]; 158 diag[8] = odiag[8]; 159 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 160 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 161 diag += 9; 162 } 163 break; 164 case 4: 165 for (i = 0; i < mbs; i++) { 166 odiag = v + 16 * diag_offset[i]; 167 PetscCall(PetscArraycpy(diag, odiag, 16)); 168 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 169 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 170 diag += 16; 171 } 172 break; 173 case 5: 174 for (i = 0; i < mbs; i++) { 175 odiag = v + 25 * diag_offset[i]; 176 PetscCall(PetscArraycpy(diag, odiag, 25)); 177 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 178 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 179 diag += 25; 180 } 181 break; 182 case 6: 183 for (i = 0; i < mbs; i++) { 184 odiag = v + 36 * diag_offset[i]; 185 PetscCall(PetscArraycpy(diag, odiag, 36)); 186 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 187 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 188 diag += 36; 189 } 190 break; 191 case 7: 192 for (i = 0; i < mbs; i++) { 193 odiag = v + 49 * diag_offset[i]; 194 PetscCall(PetscArraycpy(diag, odiag, 49)); 195 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 196 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 197 diag += 49; 198 } 199 break; 200 default: 201 PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots)); 202 for (i = 0; i < mbs; i++) { 203 odiag = v + bs2 * diag_offset[i]; 204 PetscCall(PetscArraycpy(diag, odiag, bs2)); 205 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 206 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 207 diag += bs2; 208 } 209 PetscCall(PetscFree2(v_work, v_pivots)); 210 } 211 a->idiagvalid = PETSC_TRUE; 212 PetscFunctionReturn(PETSC_SUCCESS); 213 } 214 215 static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 216 { 217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 218 PetscScalar *x, *work, *w, *workt, *t; 219 const MatScalar *v, *aa = a->a, *idiag; 220 const PetscScalar *b, *xb; 221 PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */ 222 PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it; 223 const PetscInt *diag, *ai = a->i, *aj = a->j, *vi; 224 225 PetscFunctionBegin; 226 its = its * lits; 227 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 228 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 229 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 230 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor"); 231 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 232 233 if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL)); 234 235 if (!m) PetscFunctionReturn(PETSC_SUCCESS); 236 diag = a->diag; 237 idiag = a->idiag; 238 k = PetscMax(A->rmap->n, A->cmap->n); 239 if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work)); 240 if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt)); 241 if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work)); 242 work = a->mult_work; 243 t = a->sor_workt; 244 w = a->sor_work; 245 246 PetscCall(VecGetArray(xx, &x)); 247 PetscCall(VecGetArrayRead(bb, &b)); 248 249 if (flag & SOR_ZERO_INITIAL_GUESS) { 250 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 251 switch (bs) { 252 case 1: 253 PetscKernel_v_gets_A_times_w_1(x, idiag, b); 254 t[0] = b[0]; 255 i2 = 1; 256 idiag += 1; 257 for (i = 1; i < m; i++) { 258 v = aa + ai[i]; 259 vi = aj + ai[i]; 260 nz = diag[i] - ai[i]; 261 s[0] = b[i2]; 262 for (j = 0; j < nz; j++) { 263 xw[0] = x[vi[j]]; 264 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 265 } 266 t[i2] = s[0]; 267 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 268 x[i2] = xw[0]; 269 idiag += 1; 270 i2 += 1; 271 } 272 break; 273 case 2: 274 PetscKernel_v_gets_A_times_w_2(x, idiag, b); 275 t[0] = b[0]; 276 t[1] = b[1]; 277 i2 = 2; 278 idiag += 4; 279 for (i = 1; i < m; i++) { 280 v = aa + 4 * ai[i]; 281 vi = aj + ai[i]; 282 nz = diag[i] - ai[i]; 283 s[0] = b[i2]; 284 s[1] = b[i2 + 1]; 285 for (j = 0; j < nz; j++) { 286 idx = 2 * vi[j]; 287 it = 4 * j; 288 xw[0] = x[idx]; 289 xw[1] = x[1 + idx]; 290 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 291 } 292 t[i2] = s[0]; 293 t[i2 + 1] = s[1]; 294 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 295 x[i2] = xw[0]; 296 x[i2 + 1] = xw[1]; 297 idiag += 4; 298 i2 += 2; 299 } 300 break; 301 case 3: 302 PetscKernel_v_gets_A_times_w_3(x, idiag, b); 303 t[0] = b[0]; 304 t[1] = b[1]; 305 t[2] = b[2]; 306 i2 = 3; 307 idiag += 9; 308 for (i = 1; i < m; i++) { 309 v = aa + 9 * ai[i]; 310 vi = aj + ai[i]; 311 nz = diag[i] - ai[i]; 312 s[0] = b[i2]; 313 s[1] = b[i2 + 1]; 314 s[2] = b[i2 + 2]; 315 while (nz--) { 316 idx = 3 * (*vi++); 317 xw[0] = x[idx]; 318 xw[1] = x[1 + idx]; 319 xw[2] = x[2 + idx]; 320 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 321 v += 9; 322 } 323 t[i2] = s[0]; 324 t[i2 + 1] = s[1]; 325 t[i2 + 2] = s[2]; 326 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 327 x[i2] = xw[0]; 328 x[i2 + 1] = xw[1]; 329 x[i2 + 2] = xw[2]; 330 idiag += 9; 331 i2 += 3; 332 } 333 break; 334 case 4: 335 PetscKernel_v_gets_A_times_w_4(x, idiag, b); 336 t[0] = b[0]; 337 t[1] = b[1]; 338 t[2] = b[2]; 339 t[3] = b[3]; 340 i2 = 4; 341 idiag += 16; 342 for (i = 1; i < m; i++) { 343 v = aa + 16 * ai[i]; 344 vi = aj + ai[i]; 345 nz = diag[i] - ai[i]; 346 s[0] = b[i2]; 347 s[1] = b[i2 + 1]; 348 s[2] = b[i2 + 2]; 349 s[3] = b[i2 + 3]; 350 while (nz--) { 351 idx = 4 * (*vi++); 352 xw[0] = x[idx]; 353 xw[1] = x[1 + idx]; 354 xw[2] = x[2 + idx]; 355 xw[3] = x[3 + idx]; 356 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 357 v += 16; 358 } 359 t[i2] = s[0]; 360 t[i2 + 1] = s[1]; 361 t[i2 + 2] = s[2]; 362 t[i2 + 3] = s[3]; 363 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 364 x[i2] = xw[0]; 365 x[i2 + 1] = xw[1]; 366 x[i2 + 2] = xw[2]; 367 x[i2 + 3] = xw[3]; 368 idiag += 16; 369 i2 += 4; 370 } 371 break; 372 case 5: 373 PetscKernel_v_gets_A_times_w_5(x, idiag, b); 374 t[0] = b[0]; 375 t[1] = b[1]; 376 t[2] = b[2]; 377 t[3] = b[3]; 378 t[4] = b[4]; 379 i2 = 5; 380 idiag += 25; 381 for (i = 1; i < m; i++) { 382 v = aa + 25 * ai[i]; 383 vi = aj + ai[i]; 384 nz = diag[i] - ai[i]; 385 s[0] = b[i2]; 386 s[1] = b[i2 + 1]; 387 s[2] = b[i2 + 2]; 388 s[3] = b[i2 + 3]; 389 s[4] = b[i2 + 4]; 390 while (nz--) { 391 idx = 5 * (*vi++); 392 xw[0] = x[idx]; 393 xw[1] = x[1 + idx]; 394 xw[2] = x[2 + idx]; 395 xw[3] = x[3 + idx]; 396 xw[4] = x[4 + idx]; 397 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 398 v += 25; 399 } 400 t[i2] = s[0]; 401 t[i2 + 1] = s[1]; 402 t[i2 + 2] = s[2]; 403 t[i2 + 3] = s[3]; 404 t[i2 + 4] = s[4]; 405 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 406 x[i2] = xw[0]; 407 x[i2 + 1] = xw[1]; 408 x[i2 + 2] = xw[2]; 409 x[i2 + 3] = xw[3]; 410 x[i2 + 4] = xw[4]; 411 idiag += 25; 412 i2 += 5; 413 } 414 break; 415 case 6: 416 PetscKernel_v_gets_A_times_w_6(x, idiag, b); 417 t[0] = b[0]; 418 t[1] = b[1]; 419 t[2] = b[2]; 420 t[3] = b[3]; 421 t[4] = b[4]; 422 t[5] = b[5]; 423 i2 = 6; 424 idiag += 36; 425 for (i = 1; i < m; i++) { 426 v = aa + 36 * ai[i]; 427 vi = aj + ai[i]; 428 nz = diag[i] - ai[i]; 429 s[0] = b[i2]; 430 s[1] = b[i2 + 1]; 431 s[2] = b[i2 + 2]; 432 s[3] = b[i2 + 3]; 433 s[4] = b[i2 + 4]; 434 s[5] = b[i2 + 5]; 435 while (nz--) { 436 idx = 6 * (*vi++); 437 xw[0] = x[idx]; 438 xw[1] = x[1 + idx]; 439 xw[2] = x[2 + idx]; 440 xw[3] = x[3 + idx]; 441 xw[4] = x[4 + idx]; 442 xw[5] = x[5 + idx]; 443 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 444 v += 36; 445 } 446 t[i2] = s[0]; 447 t[i2 + 1] = s[1]; 448 t[i2 + 2] = s[2]; 449 t[i2 + 3] = s[3]; 450 t[i2 + 4] = s[4]; 451 t[i2 + 5] = s[5]; 452 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 453 x[i2] = xw[0]; 454 x[i2 + 1] = xw[1]; 455 x[i2 + 2] = xw[2]; 456 x[i2 + 3] = xw[3]; 457 x[i2 + 4] = xw[4]; 458 x[i2 + 5] = xw[5]; 459 idiag += 36; 460 i2 += 6; 461 } 462 break; 463 case 7: 464 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 465 t[0] = b[0]; 466 t[1] = b[1]; 467 t[2] = b[2]; 468 t[3] = b[3]; 469 t[4] = b[4]; 470 t[5] = b[5]; 471 t[6] = b[6]; 472 i2 = 7; 473 idiag += 49; 474 for (i = 1; i < m; i++) { 475 v = aa + 49 * ai[i]; 476 vi = aj + ai[i]; 477 nz = diag[i] - ai[i]; 478 s[0] = b[i2]; 479 s[1] = b[i2 + 1]; 480 s[2] = b[i2 + 2]; 481 s[3] = b[i2 + 3]; 482 s[4] = b[i2 + 4]; 483 s[5] = b[i2 + 5]; 484 s[6] = b[i2 + 6]; 485 while (nz--) { 486 idx = 7 * (*vi++); 487 xw[0] = x[idx]; 488 xw[1] = x[1 + idx]; 489 xw[2] = x[2 + idx]; 490 xw[3] = x[3 + idx]; 491 xw[4] = x[4 + idx]; 492 xw[5] = x[5 + idx]; 493 xw[6] = x[6 + idx]; 494 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 495 v += 49; 496 } 497 t[i2] = s[0]; 498 t[i2 + 1] = s[1]; 499 t[i2 + 2] = s[2]; 500 t[i2 + 3] = s[3]; 501 t[i2 + 4] = s[4]; 502 t[i2 + 5] = s[5]; 503 t[i2 + 6] = s[6]; 504 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 505 x[i2] = xw[0]; 506 x[i2 + 1] = xw[1]; 507 x[i2 + 2] = xw[2]; 508 x[i2 + 3] = xw[3]; 509 x[i2 + 4] = xw[4]; 510 x[i2 + 5] = xw[5]; 511 x[i2 + 6] = xw[6]; 512 idiag += 49; 513 i2 += 7; 514 } 515 break; 516 default: 517 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); 518 PetscCall(PetscArraycpy(t, b, bs)); 519 i2 = bs; 520 idiag += bs2; 521 for (i = 1; i < m; i++) { 522 v = aa + bs2 * ai[i]; 523 vi = aj + ai[i]; 524 nz = diag[i] - ai[i]; 525 526 PetscCall(PetscArraycpy(w, b + i2, bs)); 527 /* copy all rows of x that are needed into contiguous space */ 528 workt = work; 529 for (j = 0; j < nz; j++) { 530 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 531 workt += bs; 532 } 533 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 534 PetscCall(PetscArraycpy(t + i2, w, bs)); 535 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 536 537 idiag += bs2; 538 i2 += bs; 539 } 540 break; 541 } 542 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 543 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 544 xb = t; 545 } else xb = b; 546 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 547 idiag = a->idiag + bs2 * (a->mbs - 1); 548 i2 = bs * (m - 1); 549 switch (bs) { 550 case 1: 551 s[0] = xb[i2]; 552 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 553 x[i2] = xw[0]; 554 i2 -= 1; 555 for (i = m - 2; i >= 0; i--) { 556 v = aa + (diag[i] + 1); 557 vi = aj + diag[i] + 1; 558 nz = ai[i + 1] - diag[i] - 1; 559 s[0] = xb[i2]; 560 for (j = 0; j < nz; j++) { 561 xw[0] = x[vi[j]]; 562 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 563 } 564 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 565 x[i2] = xw[0]; 566 idiag -= 1; 567 i2 -= 1; 568 } 569 break; 570 case 2: 571 s[0] = xb[i2]; 572 s[1] = xb[i2 + 1]; 573 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 574 x[i2] = xw[0]; 575 x[i2 + 1] = xw[1]; 576 i2 -= 2; 577 idiag -= 4; 578 for (i = m - 2; i >= 0; i--) { 579 v = aa + 4 * (diag[i] + 1); 580 vi = aj + diag[i] + 1; 581 nz = ai[i + 1] - diag[i] - 1; 582 s[0] = xb[i2]; 583 s[1] = xb[i2 + 1]; 584 for (j = 0; j < nz; j++) { 585 idx = 2 * vi[j]; 586 it = 4 * j; 587 xw[0] = x[idx]; 588 xw[1] = x[1 + idx]; 589 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 590 } 591 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 592 x[i2] = xw[0]; 593 x[i2 + 1] = xw[1]; 594 idiag -= 4; 595 i2 -= 2; 596 } 597 break; 598 case 3: 599 s[0] = xb[i2]; 600 s[1] = xb[i2 + 1]; 601 s[2] = xb[i2 + 2]; 602 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 603 x[i2] = xw[0]; 604 x[i2 + 1] = xw[1]; 605 x[i2 + 2] = xw[2]; 606 i2 -= 3; 607 idiag -= 9; 608 for (i = m - 2; i >= 0; i--) { 609 v = aa + 9 * (diag[i] + 1); 610 vi = aj + diag[i] + 1; 611 nz = ai[i + 1] - diag[i] - 1; 612 s[0] = xb[i2]; 613 s[1] = xb[i2 + 1]; 614 s[2] = xb[i2 + 2]; 615 while (nz--) { 616 idx = 3 * (*vi++); 617 xw[0] = x[idx]; 618 xw[1] = x[1 + idx]; 619 xw[2] = x[2 + idx]; 620 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 621 v += 9; 622 } 623 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 624 x[i2] = xw[0]; 625 x[i2 + 1] = xw[1]; 626 x[i2 + 2] = xw[2]; 627 idiag -= 9; 628 i2 -= 3; 629 } 630 break; 631 case 4: 632 s[0] = xb[i2]; 633 s[1] = xb[i2 + 1]; 634 s[2] = xb[i2 + 2]; 635 s[3] = xb[i2 + 3]; 636 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 637 x[i2] = xw[0]; 638 x[i2 + 1] = xw[1]; 639 x[i2 + 2] = xw[2]; 640 x[i2 + 3] = xw[3]; 641 i2 -= 4; 642 idiag -= 16; 643 for (i = m - 2; i >= 0; i--) { 644 v = aa + 16 * (diag[i] + 1); 645 vi = aj + diag[i] + 1; 646 nz = ai[i + 1] - diag[i] - 1; 647 s[0] = xb[i2]; 648 s[1] = xb[i2 + 1]; 649 s[2] = xb[i2 + 2]; 650 s[3] = xb[i2 + 3]; 651 while (nz--) { 652 idx = 4 * (*vi++); 653 xw[0] = x[idx]; 654 xw[1] = x[1 + idx]; 655 xw[2] = x[2 + idx]; 656 xw[3] = x[3 + idx]; 657 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 658 v += 16; 659 } 660 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 661 x[i2] = xw[0]; 662 x[i2 + 1] = xw[1]; 663 x[i2 + 2] = xw[2]; 664 x[i2 + 3] = xw[3]; 665 idiag -= 16; 666 i2 -= 4; 667 } 668 break; 669 case 5: 670 s[0] = xb[i2]; 671 s[1] = xb[i2 + 1]; 672 s[2] = xb[i2 + 2]; 673 s[3] = xb[i2 + 3]; 674 s[4] = xb[i2 + 4]; 675 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 676 x[i2] = xw[0]; 677 x[i2 + 1] = xw[1]; 678 x[i2 + 2] = xw[2]; 679 x[i2 + 3] = xw[3]; 680 x[i2 + 4] = xw[4]; 681 i2 -= 5; 682 idiag -= 25; 683 for (i = m - 2; i >= 0; i--) { 684 v = aa + 25 * (diag[i] + 1); 685 vi = aj + diag[i] + 1; 686 nz = ai[i + 1] - diag[i] - 1; 687 s[0] = xb[i2]; 688 s[1] = xb[i2 + 1]; 689 s[2] = xb[i2 + 2]; 690 s[3] = xb[i2 + 3]; 691 s[4] = xb[i2 + 4]; 692 while (nz--) { 693 idx = 5 * (*vi++); 694 xw[0] = x[idx]; 695 xw[1] = x[1 + idx]; 696 xw[2] = x[2 + idx]; 697 xw[3] = x[3 + idx]; 698 xw[4] = x[4 + idx]; 699 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 700 v += 25; 701 } 702 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 703 x[i2] = xw[0]; 704 x[i2 + 1] = xw[1]; 705 x[i2 + 2] = xw[2]; 706 x[i2 + 3] = xw[3]; 707 x[i2 + 4] = xw[4]; 708 idiag -= 25; 709 i2 -= 5; 710 } 711 break; 712 case 6: 713 s[0] = xb[i2]; 714 s[1] = xb[i2 + 1]; 715 s[2] = xb[i2 + 2]; 716 s[3] = xb[i2 + 3]; 717 s[4] = xb[i2 + 4]; 718 s[5] = xb[i2 + 5]; 719 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 720 x[i2] = xw[0]; 721 x[i2 + 1] = xw[1]; 722 x[i2 + 2] = xw[2]; 723 x[i2 + 3] = xw[3]; 724 x[i2 + 4] = xw[4]; 725 x[i2 + 5] = xw[5]; 726 i2 -= 6; 727 idiag -= 36; 728 for (i = m - 2; i >= 0; i--) { 729 v = aa + 36 * (diag[i] + 1); 730 vi = aj + diag[i] + 1; 731 nz = ai[i + 1] - diag[i] - 1; 732 s[0] = xb[i2]; 733 s[1] = xb[i2 + 1]; 734 s[2] = xb[i2 + 2]; 735 s[3] = xb[i2 + 3]; 736 s[4] = xb[i2 + 4]; 737 s[5] = xb[i2 + 5]; 738 while (nz--) { 739 idx = 6 * (*vi++); 740 xw[0] = x[idx]; 741 xw[1] = x[1 + idx]; 742 xw[2] = x[2 + idx]; 743 xw[3] = x[3 + idx]; 744 xw[4] = x[4 + idx]; 745 xw[5] = x[5 + idx]; 746 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 747 v += 36; 748 } 749 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 750 x[i2] = xw[0]; 751 x[i2 + 1] = xw[1]; 752 x[i2 + 2] = xw[2]; 753 x[i2 + 3] = xw[3]; 754 x[i2 + 4] = xw[4]; 755 x[i2 + 5] = xw[5]; 756 idiag -= 36; 757 i2 -= 6; 758 } 759 break; 760 case 7: 761 s[0] = xb[i2]; 762 s[1] = xb[i2 + 1]; 763 s[2] = xb[i2 + 2]; 764 s[3] = xb[i2 + 3]; 765 s[4] = xb[i2 + 4]; 766 s[5] = xb[i2 + 5]; 767 s[6] = xb[i2 + 6]; 768 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 769 x[i2] = xw[0]; 770 x[i2 + 1] = xw[1]; 771 x[i2 + 2] = xw[2]; 772 x[i2 + 3] = xw[3]; 773 x[i2 + 4] = xw[4]; 774 x[i2 + 5] = xw[5]; 775 x[i2 + 6] = xw[6]; 776 i2 -= 7; 777 idiag -= 49; 778 for (i = m - 2; i >= 0; i--) { 779 v = aa + 49 * (diag[i] + 1); 780 vi = aj + diag[i] + 1; 781 nz = ai[i + 1] - diag[i] - 1; 782 s[0] = xb[i2]; 783 s[1] = xb[i2 + 1]; 784 s[2] = xb[i2 + 2]; 785 s[3] = xb[i2 + 3]; 786 s[4] = xb[i2 + 4]; 787 s[5] = xb[i2 + 5]; 788 s[6] = xb[i2 + 6]; 789 while (nz--) { 790 idx = 7 * (*vi++); 791 xw[0] = x[idx]; 792 xw[1] = x[1 + idx]; 793 xw[2] = x[2 + idx]; 794 xw[3] = x[3 + idx]; 795 xw[4] = x[4 + idx]; 796 xw[5] = x[5 + idx]; 797 xw[6] = x[6 + idx]; 798 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 799 v += 49; 800 } 801 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 802 x[i2] = xw[0]; 803 x[i2 + 1] = xw[1]; 804 x[i2 + 2] = xw[2]; 805 x[i2 + 3] = xw[3]; 806 x[i2 + 4] = xw[4]; 807 x[i2 + 5] = xw[5]; 808 x[i2 + 6] = xw[6]; 809 idiag -= 49; 810 i2 -= 7; 811 } 812 break; 813 default: 814 PetscCall(PetscArraycpy(w, xb + i2, bs)); 815 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 816 i2 -= bs; 817 idiag -= bs2; 818 for (i = m - 2; i >= 0; i--) { 819 v = aa + bs2 * (diag[i] + 1); 820 vi = aj + diag[i] + 1; 821 nz = ai[i + 1] - diag[i] - 1; 822 823 PetscCall(PetscArraycpy(w, xb + i2, bs)); 824 /* copy all rows of x that are needed into contiguous space */ 825 workt = work; 826 for (j = 0; j < nz; j++) { 827 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 828 workt += bs; 829 } 830 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 831 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 832 833 idiag -= bs2; 834 i2 -= bs; 835 } 836 break; 837 } 838 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 839 } 840 its--; 841 } 842 while (its--) { 843 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 844 idiag = a->idiag; 845 i2 = 0; 846 switch (bs) { 847 case 1: 848 for (i = 0; i < m; i++) { 849 v = aa + ai[i]; 850 vi = aj + ai[i]; 851 nz = ai[i + 1] - ai[i]; 852 s[0] = b[i2]; 853 for (j = 0; j < nz; j++) { 854 xw[0] = x[vi[j]]; 855 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 856 } 857 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 858 x[i2] += xw[0]; 859 idiag += 1; 860 i2 += 1; 861 } 862 break; 863 case 2: 864 for (i = 0; i < m; i++) { 865 v = aa + 4 * ai[i]; 866 vi = aj + ai[i]; 867 nz = ai[i + 1] - ai[i]; 868 s[0] = b[i2]; 869 s[1] = b[i2 + 1]; 870 for (j = 0; j < nz; j++) { 871 idx = 2 * vi[j]; 872 it = 4 * j; 873 xw[0] = x[idx]; 874 xw[1] = x[1 + idx]; 875 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 876 } 877 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 878 x[i2] += xw[0]; 879 x[i2 + 1] += xw[1]; 880 idiag += 4; 881 i2 += 2; 882 } 883 break; 884 case 3: 885 for (i = 0; i < m; i++) { 886 v = aa + 9 * ai[i]; 887 vi = aj + ai[i]; 888 nz = ai[i + 1] - ai[i]; 889 s[0] = b[i2]; 890 s[1] = b[i2 + 1]; 891 s[2] = b[i2 + 2]; 892 while (nz--) { 893 idx = 3 * (*vi++); 894 xw[0] = x[idx]; 895 xw[1] = x[1 + idx]; 896 xw[2] = x[2 + idx]; 897 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 898 v += 9; 899 } 900 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 901 x[i2] += xw[0]; 902 x[i2 + 1] += xw[1]; 903 x[i2 + 2] += xw[2]; 904 idiag += 9; 905 i2 += 3; 906 } 907 break; 908 case 4: 909 for (i = 0; i < m; i++) { 910 v = aa + 16 * ai[i]; 911 vi = aj + ai[i]; 912 nz = ai[i + 1] - ai[i]; 913 s[0] = b[i2]; 914 s[1] = b[i2 + 1]; 915 s[2] = b[i2 + 2]; 916 s[3] = b[i2 + 3]; 917 while (nz--) { 918 idx = 4 * (*vi++); 919 xw[0] = x[idx]; 920 xw[1] = x[1 + idx]; 921 xw[2] = x[2 + idx]; 922 xw[3] = x[3 + idx]; 923 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 924 v += 16; 925 } 926 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 927 x[i2] += xw[0]; 928 x[i2 + 1] += xw[1]; 929 x[i2 + 2] += xw[2]; 930 x[i2 + 3] += xw[3]; 931 idiag += 16; 932 i2 += 4; 933 } 934 break; 935 case 5: 936 for (i = 0; i < m; i++) { 937 v = aa + 25 * ai[i]; 938 vi = aj + ai[i]; 939 nz = ai[i + 1] - ai[i]; 940 s[0] = b[i2]; 941 s[1] = b[i2 + 1]; 942 s[2] = b[i2 + 2]; 943 s[3] = b[i2 + 3]; 944 s[4] = b[i2 + 4]; 945 while (nz--) { 946 idx = 5 * (*vi++); 947 xw[0] = x[idx]; 948 xw[1] = x[1 + idx]; 949 xw[2] = x[2 + idx]; 950 xw[3] = x[3 + idx]; 951 xw[4] = x[4 + idx]; 952 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 953 v += 25; 954 } 955 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 956 x[i2] += xw[0]; 957 x[i2 + 1] += xw[1]; 958 x[i2 + 2] += xw[2]; 959 x[i2 + 3] += xw[3]; 960 x[i2 + 4] += xw[4]; 961 idiag += 25; 962 i2 += 5; 963 } 964 break; 965 case 6: 966 for (i = 0; i < m; i++) { 967 v = aa + 36 * ai[i]; 968 vi = aj + ai[i]; 969 nz = ai[i + 1] - ai[i]; 970 s[0] = b[i2]; 971 s[1] = b[i2 + 1]; 972 s[2] = b[i2 + 2]; 973 s[3] = b[i2 + 3]; 974 s[4] = b[i2 + 4]; 975 s[5] = b[i2 + 5]; 976 while (nz--) { 977 idx = 6 * (*vi++); 978 xw[0] = x[idx]; 979 xw[1] = x[1 + idx]; 980 xw[2] = x[2 + idx]; 981 xw[3] = x[3 + idx]; 982 xw[4] = x[4 + idx]; 983 xw[5] = x[5 + idx]; 984 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 985 v += 36; 986 } 987 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 988 x[i2] += xw[0]; 989 x[i2 + 1] += xw[1]; 990 x[i2 + 2] += xw[2]; 991 x[i2 + 3] += xw[3]; 992 x[i2 + 4] += xw[4]; 993 x[i2 + 5] += xw[5]; 994 idiag += 36; 995 i2 += 6; 996 } 997 break; 998 case 7: 999 for (i = 0; i < m; i++) { 1000 v = aa + 49 * ai[i]; 1001 vi = aj + ai[i]; 1002 nz = ai[i + 1] - ai[i]; 1003 s[0] = b[i2]; 1004 s[1] = b[i2 + 1]; 1005 s[2] = b[i2 + 2]; 1006 s[3] = b[i2 + 3]; 1007 s[4] = b[i2 + 4]; 1008 s[5] = b[i2 + 5]; 1009 s[6] = b[i2 + 6]; 1010 while (nz--) { 1011 idx = 7 * (*vi++); 1012 xw[0] = x[idx]; 1013 xw[1] = x[1 + idx]; 1014 xw[2] = x[2 + idx]; 1015 xw[3] = x[3 + idx]; 1016 xw[4] = x[4 + idx]; 1017 xw[5] = x[5 + idx]; 1018 xw[6] = x[6 + idx]; 1019 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1020 v += 49; 1021 } 1022 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1023 x[i2] += xw[0]; 1024 x[i2 + 1] += xw[1]; 1025 x[i2 + 2] += xw[2]; 1026 x[i2 + 3] += xw[3]; 1027 x[i2 + 4] += xw[4]; 1028 x[i2 + 5] += xw[5]; 1029 x[i2 + 6] += xw[6]; 1030 idiag += 49; 1031 i2 += 7; 1032 } 1033 break; 1034 default: 1035 for (i = 0; i < m; i++) { 1036 v = aa + bs2 * ai[i]; 1037 vi = aj + ai[i]; 1038 nz = ai[i + 1] - ai[i]; 1039 1040 PetscCall(PetscArraycpy(w, b + i2, bs)); 1041 /* copy all rows of x that are needed into contiguous space */ 1042 workt = work; 1043 for (j = 0; j < nz; j++) { 1044 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1045 workt += bs; 1046 } 1047 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1048 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1049 1050 idiag += bs2; 1051 i2 += bs; 1052 } 1053 break; 1054 } 1055 PetscCall(PetscLogFlops(2.0 * bs2 * a->nz)); 1056 } 1057 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1058 idiag = a->idiag + bs2 * (a->mbs - 1); 1059 i2 = bs * (m - 1); 1060 switch (bs) { 1061 case 1: 1062 for (i = m - 1; i >= 0; i--) { 1063 v = aa + ai[i]; 1064 vi = aj + ai[i]; 1065 nz = ai[i + 1] - ai[i]; 1066 s[0] = b[i2]; 1067 for (j = 0; j < nz; j++) { 1068 xw[0] = x[vi[j]]; 1069 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 1070 } 1071 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 1072 x[i2] += xw[0]; 1073 idiag -= 1; 1074 i2 -= 1; 1075 } 1076 break; 1077 case 2: 1078 for (i = m - 1; i >= 0; i--) { 1079 v = aa + 4 * ai[i]; 1080 vi = aj + ai[i]; 1081 nz = ai[i + 1] - ai[i]; 1082 s[0] = b[i2]; 1083 s[1] = b[i2 + 1]; 1084 for (j = 0; j < nz; j++) { 1085 idx = 2 * vi[j]; 1086 it = 4 * j; 1087 xw[0] = x[idx]; 1088 xw[1] = x[1 + idx]; 1089 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 1090 } 1091 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 1092 x[i2] += xw[0]; 1093 x[i2 + 1] += xw[1]; 1094 idiag -= 4; 1095 i2 -= 2; 1096 } 1097 break; 1098 case 3: 1099 for (i = m - 1; i >= 0; i--) { 1100 v = aa + 9 * ai[i]; 1101 vi = aj + ai[i]; 1102 nz = ai[i + 1] - ai[i]; 1103 s[0] = b[i2]; 1104 s[1] = b[i2 + 1]; 1105 s[2] = b[i2 + 2]; 1106 while (nz--) { 1107 idx = 3 * (*vi++); 1108 xw[0] = x[idx]; 1109 xw[1] = x[1 + idx]; 1110 xw[2] = x[2 + idx]; 1111 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 1112 v += 9; 1113 } 1114 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 1115 x[i2] += xw[0]; 1116 x[i2 + 1] += xw[1]; 1117 x[i2 + 2] += xw[2]; 1118 idiag -= 9; 1119 i2 -= 3; 1120 } 1121 break; 1122 case 4: 1123 for (i = m - 1; i >= 0; i--) { 1124 v = aa + 16 * ai[i]; 1125 vi = aj + ai[i]; 1126 nz = ai[i + 1] - ai[i]; 1127 s[0] = b[i2]; 1128 s[1] = b[i2 + 1]; 1129 s[2] = b[i2 + 2]; 1130 s[3] = b[i2 + 3]; 1131 while (nz--) { 1132 idx = 4 * (*vi++); 1133 xw[0] = x[idx]; 1134 xw[1] = x[1 + idx]; 1135 xw[2] = x[2 + idx]; 1136 xw[3] = x[3 + idx]; 1137 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 1138 v += 16; 1139 } 1140 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 1141 x[i2] += xw[0]; 1142 x[i2 + 1] += xw[1]; 1143 x[i2 + 2] += xw[2]; 1144 x[i2 + 3] += xw[3]; 1145 idiag -= 16; 1146 i2 -= 4; 1147 } 1148 break; 1149 case 5: 1150 for (i = m - 1; i >= 0; i--) { 1151 v = aa + 25 * ai[i]; 1152 vi = aj + ai[i]; 1153 nz = ai[i + 1] - ai[i]; 1154 s[0] = b[i2]; 1155 s[1] = b[i2 + 1]; 1156 s[2] = b[i2 + 2]; 1157 s[3] = b[i2 + 3]; 1158 s[4] = b[i2 + 4]; 1159 while (nz--) { 1160 idx = 5 * (*vi++); 1161 xw[0] = x[idx]; 1162 xw[1] = x[1 + idx]; 1163 xw[2] = x[2 + idx]; 1164 xw[3] = x[3 + idx]; 1165 xw[4] = x[4 + idx]; 1166 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 1167 v += 25; 1168 } 1169 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 1170 x[i2] += xw[0]; 1171 x[i2 + 1] += xw[1]; 1172 x[i2 + 2] += xw[2]; 1173 x[i2 + 3] += xw[3]; 1174 x[i2 + 4] += xw[4]; 1175 idiag -= 25; 1176 i2 -= 5; 1177 } 1178 break; 1179 case 6: 1180 for (i = m - 1; i >= 0; i--) { 1181 v = aa + 36 * ai[i]; 1182 vi = aj + ai[i]; 1183 nz = ai[i + 1] - ai[i]; 1184 s[0] = b[i2]; 1185 s[1] = b[i2 + 1]; 1186 s[2] = b[i2 + 2]; 1187 s[3] = b[i2 + 3]; 1188 s[4] = b[i2 + 4]; 1189 s[5] = b[i2 + 5]; 1190 while (nz--) { 1191 idx = 6 * (*vi++); 1192 xw[0] = x[idx]; 1193 xw[1] = x[1 + idx]; 1194 xw[2] = x[2 + idx]; 1195 xw[3] = x[3 + idx]; 1196 xw[4] = x[4 + idx]; 1197 xw[5] = x[5 + idx]; 1198 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 1199 v += 36; 1200 } 1201 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 1202 x[i2] += xw[0]; 1203 x[i2 + 1] += xw[1]; 1204 x[i2 + 2] += xw[2]; 1205 x[i2 + 3] += xw[3]; 1206 x[i2 + 4] += xw[4]; 1207 x[i2 + 5] += xw[5]; 1208 idiag -= 36; 1209 i2 -= 6; 1210 } 1211 break; 1212 case 7: 1213 for (i = m - 1; i >= 0; i--) { 1214 v = aa + 49 * ai[i]; 1215 vi = aj + ai[i]; 1216 nz = ai[i + 1] - ai[i]; 1217 s[0] = b[i2]; 1218 s[1] = b[i2 + 1]; 1219 s[2] = b[i2 + 2]; 1220 s[3] = b[i2 + 3]; 1221 s[4] = b[i2 + 4]; 1222 s[5] = b[i2 + 5]; 1223 s[6] = b[i2 + 6]; 1224 while (nz--) { 1225 idx = 7 * (*vi++); 1226 xw[0] = x[idx]; 1227 xw[1] = x[1 + idx]; 1228 xw[2] = x[2 + idx]; 1229 xw[3] = x[3 + idx]; 1230 xw[4] = x[4 + idx]; 1231 xw[5] = x[5 + idx]; 1232 xw[6] = x[6 + idx]; 1233 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1234 v += 49; 1235 } 1236 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1237 x[i2] += xw[0]; 1238 x[i2 + 1] += xw[1]; 1239 x[i2 + 2] += xw[2]; 1240 x[i2 + 3] += xw[3]; 1241 x[i2 + 4] += xw[4]; 1242 x[i2 + 5] += xw[5]; 1243 x[i2 + 6] += xw[6]; 1244 idiag -= 49; 1245 i2 -= 7; 1246 } 1247 break; 1248 default: 1249 for (i = m - 1; i >= 0; i--) { 1250 v = aa + bs2 * ai[i]; 1251 vi = aj + ai[i]; 1252 nz = ai[i + 1] - ai[i]; 1253 1254 PetscCall(PetscArraycpy(w, b + i2, bs)); 1255 /* copy all rows of x that are needed into contiguous space */ 1256 workt = work; 1257 for (j = 0; j < nz; j++) { 1258 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1259 workt += bs; 1260 } 1261 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1262 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1263 1264 idiag -= bs2; 1265 i2 -= bs; 1266 } 1267 break; 1268 } 1269 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz))); 1270 } 1271 } 1272 PetscCall(VecRestoreArray(xx, &x)); 1273 PetscCall(VecRestoreArrayRead(bb, &b)); 1274 PetscFunctionReturn(PETSC_SUCCESS); 1275 } 1276 1277 /* 1278 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1279 */ 1280 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1281 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1282 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1283 #define matsetvaluesblocked4_ matsetvaluesblocked4 1284 #endif 1285 1286 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) 1287 { 1288 Mat A = *AA; 1289 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1290 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn; 1291 PetscInt *ai = a->i, *ailen = a->ilen; 1292 PetscInt *aj = a->j, stepval, lastcol = -1; 1293 const PetscScalar *value = v; 1294 MatScalar *ap, *aa = a->a, *bap; 1295 1296 PetscFunctionBegin; 1297 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4"); 1298 stepval = (n - 1) * 4; 1299 for (k = 0; k < m; k++) { /* loop over added rows */ 1300 row = im[k]; 1301 rp = aj + ai[row]; 1302 ap = aa + 16 * ai[row]; 1303 nrow = ailen[row]; 1304 low = 0; 1305 high = nrow; 1306 for (l = 0; l < n; l++) { /* loop over added columns */ 1307 col = in[l]; 1308 if (col <= lastcol) low = 0; 1309 else high = nrow; 1310 lastcol = col; 1311 value = v + k * (stepval + 4 + l) * 4; 1312 while (high - low > 7) { 1313 t = (low + high) / 2; 1314 if (rp[t] > col) high = t; 1315 else low = t; 1316 } 1317 for (i = low; i < high; i++) { 1318 if (rp[i] > col) break; 1319 if (rp[i] == col) { 1320 bap = ap + 16 * i; 1321 for (ii = 0; ii < 4; ii++, value += stepval) { 1322 for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++; 1323 } 1324 goto noinsert2; 1325 } 1326 } 1327 N = nrow++ - 1; 1328 high++; /* added new column index thus must search to one higher than before */ 1329 /* shift up all the later entries in this row */ 1330 for (ii = N; ii >= i; ii--) { 1331 rp[ii + 1] = rp[ii]; 1332 PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16)); 1333 } 1334 if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1335 rp[i] = col; 1336 bap = ap + 16 * i; 1337 for (ii = 0; ii < 4; ii++, value += stepval) { 1338 for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++; 1339 } 1340 noinsert2:; 1341 low = i; 1342 } 1343 ailen[row] = nrow; 1344 } 1345 PetscFunctionReturnVoid(); 1346 } 1347 1348 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1349 #define matsetvalues4_ MATSETVALUES4 1350 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1351 #define matsetvalues4_ matsetvalues4 1352 #endif 1353 1354 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) 1355 { 1356 Mat A = *AA; 1357 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1358 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm; 1359 PetscInt *ai = a->i, *ailen = a->ilen; 1360 PetscInt *aj = a->j, brow, bcol; 1361 PetscInt ridx, cidx, lastcol = -1; 1362 MatScalar *ap, value, *aa = a->a, *bap; 1363 1364 PetscFunctionBegin; 1365 for (k = 0; k < m; k++) { /* loop over added rows */ 1366 row = im[k]; 1367 brow = row / 4; 1368 rp = aj + ai[brow]; 1369 ap = aa + 16 * ai[brow]; 1370 nrow = ailen[brow]; 1371 low = 0; 1372 high = nrow; 1373 for (l = 0; l < n; l++) { /* loop over added columns */ 1374 col = in[l]; 1375 bcol = col / 4; 1376 ridx = row % 4; 1377 cidx = col % 4; 1378 value = v[l + k * n]; 1379 if (col <= lastcol) low = 0; 1380 else high = nrow; 1381 lastcol = col; 1382 while (high - low > 7) { 1383 t = (low + high) / 2; 1384 if (rp[t] > bcol) high = t; 1385 else low = t; 1386 } 1387 for (i = low; i < high; i++) { 1388 if (rp[i] > bcol) break; 1389 if (rp[i] == bcol) { 1390 bap = ap + 16 * i + 4 * cidx + ridx; 1391 *bap += value; 1392 goto noinsert1; 1393 } 1394 } 1395 N = nrow++ - 1; 1396 high++; /* added new column thus must search to one higher than before */ 1397 /* shift up all the later entries in this row */ 1398 PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 1399 PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1))); 1400 PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1401 rp[i] = bcol; 1402 ap[16 * i + 4 * cidx + ridx] = value; 1403 noinsert1:; 1404 low = i; 1405 } 1406 ailen[brow] = nrow; 1407 } 1408 PetscFunctionReturnVoid(); 1409 } 1410 1411 /* 1412 Checks for missing diagonals 1413 */ 1414 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1415 { 1416 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1417 PetscInt *diag, *ii = a->i, i; 1418 1419 PetscFunctionBegin; 1420 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 1421 *missing = PETSC_FALSE; 1422 if (A->rmap->n > 0 && !ii) { 1423 *missing = PETSC_TRUE; 1424 if (d) *d = 0; 1425 PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n")); 1426 } else { 1427 PetscInt n; 1428 n = PetscMin(a->mbs, a->nbs); 1429 diag = a->diag; 1430 for (i = 0; i < n; i++) { 1431 if (diag[i] >= ii[i + 1]) { 1432 *missing = PETSC_TRUE; 1433 if (d) *d = i; 1434 PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i)); 1435 break; 1436 } 1437 } 1438 } 1439 PetscFunctionReturn(PETSC_SUCCESS); 1440 } 1441 1442 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1443 { 1444 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1445 PetscInt i, j, m = a->mbs; 1446 1447 PetscFunctionBegin; 1448 if (!a->diag) { 1449 PetscCall(PetscMalloc1(m, &a->diag)); 1450 a->free_diag = PETSC_TRUE; 1451 } 1452 for (i = 0; i < m; i++) { 1453 a->diag[i] = a->i[i + 1]; 1454 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1455 if (a->j[j] == i) { 1456 a->diag[i] = j; 1457 break; 1458 } 1459 } 1460 } 1461 PetscFunctionReturn(PETSC_SUCCESS); 1462 } 1463 1464 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) 1465 { 1466 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1467 PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt; 1468 PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja; 1469 1470 PetscFunctionBegin; 1471 *nn = n; 1472 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1473 if (symmetric) { 1474 PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja)); 1475 nz = tia[n]; 1476 } else { 1477 tia = a->i; 1478 tja = a->j; 1479 } 1480 1481 if (!blockcompressed && bs > 1) { 1482 (*nn) *= bs; 1483 /* malloc & create the natural set of indices */ 1484 PetscCall(PetscMalloc1((n + 1) * bs, ia)); 1485 if (n) { 1486 (*ia)[0] = oshift; 1487 for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; 1488 } 1489 1490 for (i = 1; i < n; i++) { 1491 (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1]; 1492 for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; 1493 } 1494 if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; 1495 1496 if (inja) { 1497 PetscCall(PetscMalloc1(nz * bs * bs, ja)); 1498 cnt = 0; 1499 for (i = 0; i < n; i++) { 1500 for (j = 0; j < bs; j++) { 1501 for (k = tia[i]; k < tia[i + 1]; k++) { 1502 for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l; 1503 } 1504 } 1505 } 1506 } 1507 1508 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1509 PetscCall(PetscFree(tia)); 1510 PetscCall(PetscFree(tja)); 1511 } 1512 } else if (oshift == 1) { 1513 if (symmetric) { 1514 nz = tia[A->rmap->n / bs]; 1515 /* add 1 to i and j indices */ 1516 for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1; 1517 *ia = tia; 1518 if (ja) { 1519 for (i = 0; i < nz; i++) tja[i] = tja[i] + 1; 1520 *ja = tja; 1521 } 1522 } else { 1523 nz = a->i[A->rmap->n / bs]; 1524 /* malloc space and add 1 to i and j indices */ 1525 PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia)); 1526 for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1; 1527 if (ja) { 1528 PetscCall(PetscMalloc1(nz, ja)); 1529 for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1; 1530 } 1531 } 1532 } else { 1533 *ia = tia; 1534 if (ja) *ja = tja; 1535 } 1536 PetscFunctionReturn(PETSC_SUCCESS); 1537 } 1538 1539 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 1540 { 1541 PetscFunctionBegin; 1542 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1543 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1544 PetscCall(PetscFree(*ia)); 1545 if (ja) PetscCall(PetscFree(*ja)); 1546 } 1547 PetscFunctionReturn(PETSC_SUCCESS); 1548 } 1549 1550 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1551 { 1552 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1553 1554 PetscFunctionBegin; 1555 if (A->hash_active) { 1556 PetscInt bs; 1557 A->ops[0] = a->cops; 1558 PetscCall(PetscHMapIJVDestroy(&a->ht)); 1559 PetscCall(MatGetBlockSize(A, &bs)); 1560 if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht)); 1561 PetscCall(PetscFree(a->dnz)); 1562 PetscCall(PetscFree(a->bdnz)); 1563 A->hash_active = PETSC_FALSE; 1564 } 1565 PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz)); 1566 PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i)); 1567 PetscCall(ISDestroy(&a->row)); 1568 PetscCall(ISDestroy(&a->col)); 1569 if (a->free_diag) PetscCall(PetscFree(a->diag)); 1570 PetscCall(PetscFree(a->idiag)); 1571 if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen)); 1572 PetscCall(PetscFree(a->solve_work)); 1573 PetscCall(PetscFree(a->mult_work)); 1574 PetscCall(PetscFree(a->sor_workt)); 1575 PetscCall(PetscFree(a->sor_work)); 1576 PetscCall(ISDestroy(&a->icol)); 1577 PetscCall(PetscFree(a->saved_values)); 1578 PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex)); 1579 1580 PetscCall(MatDestroy(&a->sbaijMat)); 1581 PetscCall(MatDestroy(&a->parent)); 1582 PetscCall(PetscFree(A->data)); 1583 1584 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 1585 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL)); 1586 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL)); 1587 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 1588 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 1589 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL)); 1590 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL)); 1591 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL)); 1592 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL)); 1593 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL)); 1594 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL)); 1595 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL)); 1596 #if defined(PETSC_HAVE_HYPRE) 1597 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL)); 1598 #endif 1599 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL)); 1600 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1601 PetscFunctionReturn(PETSC_SUCCESS); 1602 } 1603 1604 static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) 1605 { 1606 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1607 1608 PetscFunctionBegin; 1609 switch (op) { 1610 case MAT_ROW_ORIENTED: 1611 a->roworiented = flg; 1612 break; 1613 case MAT_KEEP_NONZERO_PATTERN: 1614 a->keepnonzeropattern = flg; 1615 break; 1616 case MAT_NEW_NONZERO_LOCATIONS: 1617 a->nonew = (flg ? 0 : 1); 1618 break; 1619 case MAT_NEW_NONZERO_LOCATION_ERR: 1620 a->nonew = (flg ? -1 : 0); 1621 break; 1622 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1623 a->nonew = (flg ? -2 : 0); 1624 break; 1625 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1626 a->nounused = (flg ? -1 : 0); 1627 break; 1628 default: 1629 break; 1630 } 1631 PetscFunctionReturn(PETSC_SUCCESS); 1632 } 1633 1634 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1635 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) 1636 { 1637 PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2; 1638 MatScalar *aa_i; 1639 PetscScalar *v_i; 1640 1641 PetscFunctionBegin; 1642 bs = A->rmap->bs; 1643 bs2 = bs * bs; 1644 PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1645 1646 bn = row / bs; /* Block number */ 1647 bp = row % bs; /* Block Position */ 1648 M = ai[bn + 1] - ai[bn]; 1649 *nz = bs * M; 1650 1651 if (v) { 1652 *v = NULL; 1653 if (*nz) { 1654 PetscCall(PetscMalloc1(*nz, v)); 1655 for (i = 0; i < M; i++) { /* for each block in the block row */ 1656 v_i = *v + i * bs; 1657 aa_i = aa + bs2 * (ai[bn] + i); 1658 for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j]; 1659 } 1660 } 1661 } 1662 1663 if (idx) { 1664 *idx = NULL; 1665 if (*nz) { 1666 PetscCall(PetscMalloc1(*nz, idx)); 1667 for (i = 0; i < M; i++) { /* for each block in the block row */ 1668 idx_i = *idx + i * bs; 1669 itmp = bs * aj[ai[bn] + i]; 1670 for (j = 0; j < bs; j++) idx_i[j] = itmp++; 1671 } 1672 } 1673 } 1674 PetscFunctionReturn(PETSC_SUCCESS); 1675 } 1676 1677 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1678 { 1679 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1680 1681 PetscFunctionBegin; 1682 PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a)); 1683 PetscFunctionReturn(PETSC_SUCCESS); 1684 } 1685 1686 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1687 { 1688 PetscFunctionBegin; 1689 if (idx) PetscCall(PetscFree(*idx)); 1690 if (v) PetscCall(PetscFree(*v)); 1691 PetscFunctionReturn(PETSC_SUCCESS); 1692 } 1693 1694 static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) 1695 { 1696 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at; 1697 Mat C; 1698 PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill; 1699 PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr; 1700 MatScalar *ata, *aa = a->a; 1701 1702 PetscFunctionBegin; 1703 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1704 PetscCall(PetscCalloc1(1 + nbs, &atfill)); 1705 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1706 for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1707 1708 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 1709 PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N)); 1710 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 1711 PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill)); 1712 1713 at = (Mat_SeqBAIJ *)C->data; 1714 ati = at->i; 1715 for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i]; 1716 } else { 1717 C = *B; 1718 at = (Mat_SeqBAIJ *)C->data; 1719 ati = at->i; 1720 } 1721 1722 atj = at->j; 1723 ata = at->a; 1724 1725 /* Copy ati into atfill so we have locations of the next free space in atj */ 1726 PetscCall(PetscArraycpy(atfill, ati, nbs)); 1727 1728 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1729 for (i = 0; i < mbs; i++) { 1730 anzj = ai[i + 1] - ai[i]; 1731 for (j = 0; j < anzj; j++) { 1732 atj[atfill[*aj]] = i; 1733 for (kr = 0; kr < bs; kr++) { 1734 for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; 1735 } 1736 atfill[*aj++] += 1; 1737 } 1738 } 1739 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1740 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1741 1742 /* Clean up temporary space and complete requests. */ 1743 PetscCall(PetscFree(atfill)); 1744 1745 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1746 PetscCall(MatSetBlockSizes(C, A->cmap->bs, A->rmap->bs)); 1747 *B = C; 1748 } else { 1749 PetscCall(MatHeaderMerge(A, &C)); 1750 } 1751 PetscFunctionReturn(PETSC_SUCCESS); 1752 } 1753 1754 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) 1755 { 1756 Mat Btrans; 1757 1758 PetscFunctionBegin; 1759 *f = PETSC_FALSE; 1760 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans)); 1761 PetscCall(MatEqual_SeqBAIJ(B, Btrans, f)); 1762 PetscCall(MatDestroy(&Btrans)); 1763 PetscFunctionReturn(PETSC_SUCCESS); 1764 } 1765 1766 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1767 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 1768 { 1769 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data; 1770 PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l; 1771 PetscInt *rowlens, *colidxs; 1772 PetscScalar *matvals; 1773 1774 PetscFunctionBegin; 1775 PetscCall(PetscViewerSetUp(viewer)); 1776 1777 M = mat->rmap->N; 1778 N = mat->cmap->N; 1779 m = mat->rmap->n; 1780 bs = mat->rmap->bs; 1781 nz = bs * bs * A->nz; 1782 1783 /* write matrix header */ 1784 header[0] = MAT_FILE_CLASSID; 1785 header[1] = M; 1786 header[2] = N; 1787 header[3] = nz; 1788 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1789 1790 /* store row lengths */ 1791 PetscCall(PetscMalloc1(m, &rowlens)); 1792 for (cnt = 0, i = 0; i < A->mbs; i++) 1793 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]); 1794 PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT)); 1795 PetscCall(PetscFree(rowlens)); 1796 1797 /* store column indices */ 1798 PetscCall(PetscMalloc1(nz, &colidxs)); 1799 for (cnt = 0, i = 0; i < A->mbs; i++) 1800 for (k = 0; k < bs; k++) 1801 for (j = A->i[i]; j < A->i[i + 1]; j++) 1802 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l; 1803 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1804 PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT)); 1805 PetscCall(PetscFree(colidxs)); 1806 1807 /* store nonzero values */ 1808 PetscCall(PetscMalloc1(nz, &matvals)); 1809 for (cnt = 0, i = 0; i < A->mbs; i++) 1810 for (k = 0; k < bs; k++) 1811 for (j = A->i[i]; j < A->i[i + 1]; j++) 1812 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k]; 1813 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1814 PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR)); 1815 PetscCall(PetscFree(matvals)); 1816 1817 /* write block size option to the viewer's .info file */ 1818 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1819 PetscFunctionReturn(PETSC_SUCCESS); 1820 } 1821 1822 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) 1823 { 1824 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1825 PetscInt i, bs = A->rmap->bs, k; 1826 1827 PetscFunctionBegin; 1828 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1829 for (i = 0; i < a->mbs; i++) { 1830 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1)); 1831 for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1)); 1832 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1833 } 1834 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1835 PetscFunctionReturn(PETSC_SUCCESS); 1836 } 1837 1838 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) 1839 { 1840 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1841 PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2; 1842 PetscViewerFormat format; 1843 1844 PetscFunctionBegin; 1845 if (A->structure_only) { 1846 PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer)); 1847 PetscFunctionReturn(PETSC_SUCCESS); 1848 } 1849 1850 PetscCall(PetscViewerGetFormat(viewer, &format)); 1851 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1852 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1853 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1854 const char *matname; 1855 Mat aij; 1856 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij)); 1857 PetscCall(PetscObjectGetName((PetscObject)A, &matname)); 1858 PetscCall(PetscObjectSetName((PetscObject)aij, matname)); 1859 PetscCall(MatView(aij, viewer)); 1860 PetscCall(MatDestroy(&aij)); 1861 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1862 PetscFunctionReturn(PETSC_SUCCESS); 1863 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1864 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1865 for (i = 0; i < a->mbs; i++) { 1866 for (j = 0; j < bs; j++) { 1867 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1868 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1869 for (l = 0; l < bs; l++) { 1870 #if defined(PETSC_USE_COMPLEX) 1871 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1872 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1873 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1874 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1875 } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1876 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1877 } 1878 #else 1879 if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1880 #endif 1881 } 1882 } 1883 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1884 } 1885 } 1886 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1887 } else { 1888 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1889 for (i = 0; i < a->mbs; i++) { 1890 for (j = 0; j < bs; j++) { 1891 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1892 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1893 for (l = 0; l < bs; l++) { 1894 #if defined(PETSC_USE_COMPLEX) 1895 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) { 1896 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1897 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) { 1898 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1899 } else { 1900 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1901 } 1902 #else 1903 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1904 #endif 1905 } 1906 } 1907 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1908 } 1909 } 1910 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1911 } 1912 PetscCall(PetscViewerFlush(viewer)); 1913 PetscFunctionReturn(PETSC_SUCCESS); 1914 } 1915 1916 #include <petscdraw.h> 1917 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) 1918 { 1919 Mat A = (Mat)Aa; 1920 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1921 PetscInt row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 1922 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1923 MatScalar *aa; 1924 PetscViewer viewer; 1925 PetscViewerFormat format; 1926 int color; 1927 1928 PetscFunctionBegin; 1929 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1930 PetscCall(PetscViewerGetFormat(viewer, &format)); 1931 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1932 1933 /* loop over matrix elements drawing boxes */ 1934 1935 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1936 PetscDrawCollectiveBegin(draw); 1937 /* Blue for negative, Cyan for zero and Red for positive */ 1938 color = PETSC_DRAW_BLUE; 1939 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1940 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1941 y_l = A->rmap->N - row - 1.0; 1942 y_r = y_l + 1.0; 1943 x_l = a->j[j] * bs; 1944 x_r = x_l + 1.0; 1945 aa = a->a + j * bs2; 1946 for (k = 0; k < bs; k++) { 1947 for (l = 0; l < bs; l++) { 1948 if (PetscRealPart(*aa++) >= 0.) continue; 1949 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1950 } 1951 } 1952 } 1953 } 1954 color = PETSC_DRAW_CYAN; 1955 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1956 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1957 y_l = A->rmap->N - row - 1.0; 1958 y_r = y_l + 1.0; 1959 x_l = a->j[j] * bs; 1960 x_r = x_l + 1.0; 1961 aa = a->a + j * bs2; 1962 for (k = 0; k < bs; k++) { 1963 for (l = 0; l < bs; l++) { 1964 if (PetscRealPart(*aa++) != 0.) continue; 1965 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1966 } 1967 } 1968 } 1969 } 1970 color = PETSC_DRAW_RED; 1971 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1972 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1973 y_l = A->rmap->N - row - 1.0; 1974 y_r = y_l + 1.0; 1975 x_l = a->j[j] * bs; 1976 x_r = x_l + 1.0; 1977 aa = a->a + j * bs2; 1978 for (k = 0; k < bs; k++) { 1979 for (l = 0; l < bs; l++) { 1980 if (PetscRealPart(*aa++) <= 0.) continue; 1981 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1982 } 1983 } 1984 } 1985 } 1986 PetscDrawCollectiveEnd(draw); 1987 } else { 1988 /* use contour shading to indicate magnitude of values */ 1989 /* first determine max of all nonzero values */ 1990 PetscReal minv = 0.0, maxv = 0.0; 1991 PetscDraw popup; 1992 1993 for (i = 0; i < a->nz * a->bs2; i++) { 1994 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1995 } 1996 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1997 PetscCall(PetscDrawGetPopup(draw, &popup)); 1998 PetscCall(PetscDrawScalePopup(popup, 0.0, maxv)); 1999 2000 PetscDrawCollectiveBegin(draw); 2001 for (i = 0, row = 0; i < mbs; i++, row += bs) { 2002 for (j = a->i[i]; j < a->i[i + 1]; j++) { 2003 y_l = A->rmap->N - row - 1.0; 2004 y_r = y_l + 1.0; 2005 x_l = a->j[j] * bs; 2006 x_r = x_l + 1.0; 2007 aa = a->a + j * bs2; 2008 for (k = 0; k < bs; k++) { 2009 for (l = 0; l < bs; l++) { 2010 MatScalar v = *aa++; 2011 color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv); 2012 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2013 } 2014 } 2015 } 2016 } 2017 PetscDrawCollectiveEnd(draw); 2018 } 2019 PetscFunctionReturn(PETSC_SUCCESS); 2020 } 2021 2022 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) 2023 { 2024 PetscReal xl, yl, xr, yr, w, h; 2025 PetscDraw draw; 2026 PetscBool isnull; 2027 2028 PetscFunctionBegin; 2029 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 2030 PetscCall(PetscDrawIsNull(draw, &isnull)); 2031 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 2032 2033 xr = A->cmap->n; 2034 yr = A->rmap->N; 2035 h = yr / 10.0; 2036 w = xr / 10.0; 2037 xr += w; 2038 yr += h; 2039 xl = -w; 2040 yl = -h; 2041 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 2042 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 2043 PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A)); 2044 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 2045 PetscCall(PetscDrawSave(draw)); 2046 PetscFunctionReturn(PETSC_SUCCESS); 2047 } 2048 2049 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) 2050 { 2051 PetscBool isascii, isbinary, isdraw; 2052 2053 PetscFunctionBegin; 2054 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 2055 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2056 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 2057 if (isascii) { 2058 PetscCall(MatView_SeqBAIJ_ASCII(A, viewer)); 2059 } else if (isbinary) { 2060 PetscCall(MatView_SeqBAIJ_Binary(A, viewer)); 2061 } else if (isdraw) { 2062 PetscCall(MatView_SeqBAIJ_Draw(A, viewer)); 2063 } else { 2064 Mat B; 2065 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2066 PetscCall(MatView(B, viewer)); 2067 PetscCall(MatDestroy(&B)); 2068 } 2069 PetscFunctionReturn(PETSC_SUCCESS); 2070 } 2071 2072 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) 2073 { 2074 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2075 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 2076 PetscInt *ai = a->i, *ailen = a->ilen; 2077 PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2; 2078 MatScalar *ap, *aa = a->a; 2079 2080 PetscFunctionBegin; 2081 for (k = 0; k < m; k++) { /* loop over rows */ 2082 row = im[k]; 2083 brow = row / bs; 2084 if (row < 0) { 2085 v += n; 2086 continue; 2087 } /* negative row */ 2088 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row); 2089 rp = PetscSafePointerPlusOffset(aj, ai[brow]); 2090 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); 2091 nrow = ailen[brow]; 2092 for (l = 0; l < n; l++) { /* loop over columns */ 2093 if (in[l] < 0) { 2094 v++; 2095 continue; 2096 } /* negative column */ 2097 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]); 2098 col = in[l]; 2099 bcol = col / bs; 2100 cidx = col % bs; 2101 ridx = row % bs; 2102 high = nrow; 2103 low = 0; /* assume unsorted */ 2104 while (high - low > 5) { 2105 t = (low + high) / 2; 2106 if (rp[t] > bcol) high = t; 2107 else low = t; 2108 } 2109 for (i = low; i < high; i++) { 2110 if (rp[i] > bcol) break; 2111 if (rp[i] == bcol) { 2112 *v++ = ap[bs2 * i + bs * cidx + ridx]; 2113 goto finished; 2114 } 2115 } 2116 *v++ = 0.0; 2117 finished:; 2118 } 2119 } 2120 PetscFunctionReturn(PETSC_SUCCESS); 2121 } 2122 2123 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2124 { 2125 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2126 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1; 2127 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2128 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval; 2129 PetscBool roworiented = a->roworiented; 2130 const PetscScalar *value = v; 2131 MatScalar *ap = NULL, *aa = a->a, *bap; 2132 2133 PetscFunctionBegin; 2134 if (roworiented) { 2135 stepval = (n - 1) * bs; 2136 } else { 2137 stepval = (m - 1) * bs; 2138 } 2139 for (k = 0; k < m; k++) { /* loop over added rows */ 2140 row = im[k]; 2141 if (row < 0) continue; 2142 PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1); 2143 rp = aj + ai[row]; 2144 if (!A->structure_only) ap = aa + bs2 * ai[row]; 2145 rmax = imax[row]; 2146 nrow = ailen[row]; 2147 low = 0; 2148 high = nrow; 2149 for (l = 0; l < n; l++) { /* loop over added columns */ 2150 if (in[l] < 0) continue; 2151 PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1); 2152 col = in[l]; 2153 if (!A->structure_only) { 2154 if (roworiented) { 2155 value = v + (k * (stepval + bs) + l) * bs; 2156 } else { 2157 value = v + (l * (stepval + bs) + k) * bs; 2158 } 2159 } 2160 if (col <= lastcol) low = 0; 2161 else high = nrow; 2162 lastcol = col; 2163 while (high - low > 7) { 2164 t = (low + high) / 2; 2165 if (rp[t] > col) high = t; 2166 else low = t; 2167 } 2168 for (i = low; i < high; i++) { 2169 if (rp[i] > col) break; 2170 if (rp[i] == col) { 2171 if (A->structure_only) goto noinsert2; 2172 bap = ap + bs2 * i; 2173 if (roworiented) { 2174 if (is == ADD_VALUES) { 2175 for (ii = 0; ii < bs; ii++, value += stepval) { 2176 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 2177 } 2178 } else { 2179 for (ii = 0; ii < bs; ii++, value += stepval) { 2180 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2181 } 2182 } 2183 } else { 2184 if (is == ADD_VALUES) { 2185 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2186 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 2187 bap += bs; 2188 } 2189 } else { 2190 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2191 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 2192 bap += bs; 2193 } 2194 } 2195 } 2196 goto noinsert2; 2197 } 2198 } 2199 if (nonew == 1) goto noinsert2; 2200 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2201 if (A->structure_only) { 2202 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar); 2203 } else { 2204 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2205 } 2206 N = nrow++ - 1; 2207 high++; 2208 /* shift up all the later entries in this row */ 2209 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2210 rp[i] = col; 2211 if (!A->structure_only) { 2212 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2213 bap = ap + bs2 * i; 2214 if (roworiented) { 2215 for (ii = 0; ii < bs; ii++, value += stepval) { 2216 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2217 } 2218 } else { 2219 for (ii = 0; ii < bs; ii++, value += stepval) { 2220 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 2221 } 2222 } 2223 } 2224 noinsert2:; 2225 low = i; 2226 } 2227 ailen[row] = nrow; 2228 } 2229 PetscFunctionReturn(PETSC_SUCCESS); 2230 } 2231 2232 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) 2233 { 2234 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2235 PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax; 2236 PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen; 2237 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2238 MatScalar *aa = a->a, *ap; 2239 PetscReal ratio = 0.6; 2240 2241 PetscFunctionBegin; 2242 if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS); 2243 2244 if (m) rmax = ailen[0]; 2245 for (i = 1; i < mbs; i++) { 2246 /* move each row back by the amount of empty slots (fshift) before it*/ 2247 fshift += imax[i - 1] - ailen[i - 1]; 2248 rmax = PetscMax(rmax, ailen[i]); 2249 if (fshift) { 2250 ip = aj + ai[i]; 2251 ap = aa + bs2 * ai[i]; 2252 N = ailen[i]; 2253 PetscCall(PetscArraymove(ip - fshift, ip, N)); 2254 if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N)); 2255 } 2256 ai[i] = ai[i - 1] + ailen[i - 1]; 2257 } 2258 if (mbs) { 2259 fshift += imax[mbs - 1] - ailen[mbs - 1]; 2260 ai[mbs] = ai[mbs - 1] + ailen[mbs - 1]; 2261 } 2262 2263 /* reset ilen and imax for each row */ 2264 a->nonzerorowcnt = 0; 2265 if (A->structure_only) { 2266 PetscCall(PetscFree2(a->imax, a->ilen)); 2267 } else { /* !A->structure_only */ 2268 for (i = 0; i < mbs; i++) { 2269 ailen[i] = imax[i] = ai[i + 1] - ai[i]; 2270 a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0); 2271 } 2272 } 2273 a->nz = ai[mbs]; 2274 2275 /* diagonals may have moved, so kill the diagonal pointers */ 2276 a->idiagvalid = PETSC_FALSE; 2277 if (fshift && a->diag) PetscCall(PetscFree(a->diag)); 2278 if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2); 2279 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2)); 2280 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs)); 2281 PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax)); 2282 2283 A->info.mallocs += a->reallocs; 2284 a->reallocs = 0; 2285 A->info.nz_unneeded = (PetscReal)fshift * bs2; 2286 a->rmax = rmax; 2287 2288 if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio)); 2289 PetscFunctionReturn(PETSC_SUCCESS); 2290 } 2291 2292 /* 2293 This function returns an array of flags which indicate the locations of contiguous 2294 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2295 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2296 Assume: sizes should be long enough to hold all the values. 2297 */ 2298 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) 2299 { 2300 PetscInt j = 0; 2301 2302 PetscFunctionBegin; 2303 for (PetscInt i = 0; i < n; j++) { 2304 PetscInt row = idx[i]; 2305 if (row % bs != 0) { /* Not the beginning of a block */ 2306 sizes[j] = 1; 2307 i++; 2308 } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */ 2309 sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2310 i++; 2311 } else { /* Beginning of the block, so check if the complete block exists */ 2312 PetscBool flg = PETSC_TRUE; 2313 for (PetscInt k = 1; k < bs; k++) { 2314 if (row + k != idx[i + k]) { /* break in the block */ 2315 flg = PETSC_FALSE; 2316 break; 2317 } 2318 } 2319 if (flg) { /* No break in the bs */ 2320 sizes[j] = bs; 2321 i += bs; 2322 } else { 2323 sizes[j] = 1; 2324 i++; 2325 } 2326 } 2327 } 2328 *bs_max = j; 2329 PetscFunctionReturn(PETSC_SUCCESS); 2330 } 2331 2332 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2333 { 2334 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2335 PetscInt i, j, k, count, *rows; 2336 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max; 2337 PetscScalar zero = 0.0; 2338 MatScalar *aa; 2339 const PetscScalar *xx; 2340 PetscScalar *bb; 2341 2342 PetscFunctionBegin; 2343 /* fix right-hand side if needed */ 2344 if (x && b) { 2345 PetscCall(VecGetArrayRead(x, &xx)); 2346 PetscCall(VecGetArray(b, &bb)); 2347 for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]]; 2348 PetscCall(VecRestoreArrayRead(x, &xx)); 2349 PetscCall(VecRestoreArray(b, &bb)); 2350 } 2351 2352 /* Make a copy of the IS and sort it */ 2353 /* allocate memory for rows,sizes */ 2354 PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes)); 2355 2356 /* copy IS values to rows, and sort them */ 2357 for (i = 0; i < is_n; i++) rows[i] = is_idx[i]; 2358 PetscCall(PetscSortInt(is_n, rows)); 2359 2360 if (baij->keepnonzeropattern) { 2361 for (i = 0; i < is_n; i++) sizes[i] = 1; 2362 bs_max = is_n; 2363 } else { 2364 PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max)); 2365 A->nonzerostate++; 2366 } 2367 2368 for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) { 2369 row = rows[j]; 2370 PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row); 2371 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2372 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 2373 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2374 if (diag != (PetscScalar)0.0) { 2375 if (baij->ilen[row / bs] > 0) { 2376 baij->ilen[row / bs] = 1; 2377 baij->j[baij->i[row / bs]] = row / bs; 2378 2379 PetscCall(PetscArrayzero(aa, count * bs)); 2380 } 2381 /* Now insert all the diagonal values for this bs */ 2382 for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES); 2383 } else { /* (diag == 0.0) */ 2384 baij->ilen[row / bs] = 0; 2385 } /* end (diag == 0.0) */ 2386 } else { /* (sizes[i] != bs) */ 2387 PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1"); 2388 for (k = 0; k < count; k++) { 2389 aa[0] = zero; 2390 aa += bs; 2391 } 2392 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES); 2393 } 2394 } 2395 2396 PetscCall(PetscFree2(rows, sizes)); 2397 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2398 PetscFunctionReturn(PETSC_SUCCESS); 2399 } 2400 2401 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2402 { 2403 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2404 PetscInt i, j, k, count; 2405 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col; 2406 PetscScalar zero = 0.0; 2407 MatScalar *aa; 2408 const PetscScalar *xx; 2409 PetscScalar *bb; 2410 PetscBool *zeroed, vecs = PETSC_FALSE; 2411 2412 PetscFunctionBegin; 2413 /* fix right-hand side if needed */ 2414 if (x && b) { 2415 PetscCall(VecGetArrayRead(x, &xx)); 2416 PetscCall(VecGetArray(b, &bb)); 2417 vecs = PETSC_TRUE; 2418 } 2419 2420 /* zero the columns */ 2421 PetscCall(PetscCalloc1(A->rmap->n, &zeroed)); 2422 for (i = 0; i < is_n; i++) { 2423 PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]); 2424 zeroed[is_idx[i]] = PETSC_TRUE; 2425 } 2426 for (i = 0; i < A->rmap->N; i++) { 2427 if (!zeroed[i]) { 2428 row = i / bs; 2429 for (j = baij->i[row]; j < baij->i[row + 1]; j++) { 2430 for (k = 0; k < bs; k++) { 2431 col = bs * baij->j[j] + k; 2432 if (zeroed[col]) { 2433 aa = baij->a + j * bs2 + (i % bs) + bs * k; 2434 if (vecs) bb[i] -= aa[0] * xx[col]; 2435 aa[0] = 0.0; 2436 } 2437 } 2438 } 2439 } else if (vecs) bb[i] = diag * xx[i]; 2440 } 2441 PetscCall(PetscFree(zeroed)); 2442 if (vecs) { 2443 PetscCall(VecRestoreArrayRead(x, &xx)); 2444 PetscCall(VecRestoreArray(b, &bb)); 2445 } 2446 2447 /* zero the rows */ 2448 for (i = 0; i < is_n; i++) { 2449 row = is_idx[i]; 2450 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2451 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 2452 for (k = 0; k < count; k++) { 2453 aa[0] = zero; 2454 aa += bs; 2455 } 2456 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES); 2457 } 2458 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2459 PetscFunctionReturn(PETSC_SUCCESS); 2460 } 2461 2462 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2463 { 2464 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2465 PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 2466 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2467 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 2468 PetscInt ridx, cidx, bs2 = a->bs2; 2469 PetscBool roworiented = a->roworiented; 2470 MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap; 2471 2472 PetscFunctionBegin; 2473 for (k = 0; k < m; k++) { /* loop over added rows */ 2474 row = im[k]; 2475 brow = row / bs; 2476 if (row < 0) continue; 2477 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 2478 rp = PetscSafePointerPlusOffset(aj, ai[brow]); 2479 if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); 2480 rmax = imax[brow]; 2481 nrow = ailen[brow]; 2482 low = 0; 2483 high = nrow; 2484 for (l = 0; l < n; l++) { /* loop over added columns */ 2485 if (in[l] < 0) continue; 2486 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 2487 col = in[l]; 2488 bcol = col / bs; 2489 ridx = row % bs; 2490 cidx = col % bs; 2491 if (!A->structure_only) { 2492 if (roworiented) { 2493 value = v[l + k * n]; 2494 } else { 2495 value = v[k + l * m]; 2496 } 2497 } 2498 if (col <= lastcol) low = 0; 2499 else high = nrow; 2500 lastcol = col; 2501 while (high - low > 7) { 2502 t = (low + high) / 2; 2503 if (rp[t] > bcol) high = t; 2504 else low = t; 2505 } 2506 for (i = low; i < high; i++) { 2507 if (rp[i] > bcol) break; 2508 if (rp[i] == bcol) { 2509 bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx); 2510 if (!A->structure_only) { 2511 if (is == ADD_VALUES) *bap += value; 2512 else *bap = value; 2513 } 2514 goto noinsert1; 2515 } 2516 } 2517 if (nonew == 1) goto noinsert1; 2518 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2519 if (A->structure_only) { 2520 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar); 2521 } else { 2522 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2523 } 2524 N = nrow++ - 1; 2525 high++; 2526 /* shift up all the later entries in this row */ 2527 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2528 rp[i] = bcol; 2529 if (!A->structure_only) { 2530 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2531 PetscCall(PetscArrayzero(ap + bs2 * i, bs2)); 2532 ap[bs2 * i + bs * cidx + ridx] = value; 2533 } 2534 a->nz++; 2535 noinsert1:; 2536 low = i; 2537 } 2538 ailen[brow] = nrow; 2539 } 2540 PetscFunctionReturn(PETSC_SUCCESS); 2541 } 2542 2543 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) 2544 { 2545 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data; 2546 Mat outA; 2547 PetscBool row_identity, col_identity; 2548 2549 PetscFunctionBegin; 2550 PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU"); 2551 PetscCall(ISIdentity(row, &row_identity)); 2552 PetscCall(ISIdentity(col, &col_identity)); 2553 PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU"); 2554 2555 outA = inA; 2556 inA->factortype = MAT_FACTOR_LU; 2557 PetscCall(PetscFree(inA->solvertype)); 2558 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype)); 2559 2560 PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2561 2562 PetscCall(PetscObjectReference((PetscObject)row)); 2563 PetscCall(ISDestroy(&a->row)); 2564 a->row = row; 2565 PetscCall(PetscObjectReference((PetscObject)col)); 2566 PetscCall(ISDestroy(&a->col)); 2567 a->col = col; 2568 2569 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2570 PetscCall(ISDestroy(&a->icol)); 2571 PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol)); 2572 2573 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity))); 2574 if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); 2575 PetscCall(MatLUFactorNumeric(outA, inA, info)); 2576 PetscFunctionReturn(PETSC_SUCCESS); 2577 } 2578 2579 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices) 2580 { 2581 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2582 2583 PetscFunctionBegin; 2584 baij->nz = baij->maxnz; 2585 PetscCall(PetscArraycpy(baij->j, indices, baij->nz)); 2586 PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs)); 2587 PetscFunctionReturn(PETSC_SUCCESS); 2588 } 2589 2590 /*@ 2591 MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix. 2592 2593 Input Parameters: 2594 + mat - the `MATSEQBAIJ` matrix 2595 - indices - the block column indices 2596 2597 Level: advanced 2598 2599 Notes: 2600 This can be called if you have precomputed the nonzero structure of the 2601 matrix and want to provide it to the matrix object to improve the performance 2602 of the `MatSetValues()` operation. 2603 2604 You MUST have set the correct numbers of nonzeros per row in the call to 2605 `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted. 2606 2607 MUST be called before any calls to `MatSetValues()` 2608 2609 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()` 2610 @*/ 2611 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) 2612 { 2613 PetscFunctionBegin; 2614 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2615 PetscAssertPointer(indices, 2); 2616 PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices)); 2617 PetscFunctionReturn(PETSC_SUCCESS); 2618 } 2619 2620 static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) 2621 { 2622 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2623 PetscInt i, j, n, row, bs, *ai, *aj, mbs; 2624 PetscReal atmp; 2625 PetscScalar *x, zero = 0.0; 2626 MatScalar *aa; 2627 PetscInt ncols, brow, krow, kcol; 2628 2629 PetscFunctionBegin; 2630 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2631 bs = A->rmap->bs; 2632 aa = a->a; 2633 ai = a->i; 2634 aj = a->j; 2635 mbs = a->mbs; 2636 2637 PetscCall(VecSet(v, zero)); 2638 PetscCall(VecGetArray(v, &x)); 2639 PetscCall(VecGetLocalSize(v, &n)); 2640 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2641 for (i = 0; i < mbs; i++) { 2642 ncols = ai[1] - ai[0]; 2643 ai++; 2644 brow = bs * i; 2645 for (j = 0; j < ncols; j++) { 2646 for (kcol = 0; kcol < bs; kcol++) { 2647 for (krow = 0; krow < bs; krow++) { 2648 atmp = PetscAbsScalar(*aa); 2649 aa++; 2650 row = brow + krow; /* row index */ 2651 if (PetscAbsScalar(x[row]) < atmp) { 2652 x[row] = atmp; 2653 if (idx) idx[row] = bs * (*aj) + kcol; 2654 } 2655 } 2656 } 2657 aj++; 2658 } 2659 } 2660 PetscCall(VecRestoreArray(v, &x)); 2661 PetscFunctionReturn(PETSC_SUCCESS); 2662 } 2663 2664 static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v) 2665 { 2666 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2667 PetscInt i, j, n, row, bs, *ai, mbs; 2668 PetscReal atmp; 2669 PetscScalar *x, zero = 0.0; 2670 MatScalar *aa; 2671 PetscInt ncols, brow, krow, kcol; 2672 2673 PetscFunctionBegin; 2674 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2675 bs = A->rmap->bs; 2676 aa = a->a; 2677 ai = a->i; 2678 mbs = a->mbs; 2679 2680 PetscCall(VecSet(v, zero)); 2681 PetscCall(VecGetArrayWrite(v, &x)); 2682 PetscCall(VecGetLocalSize(v, &n)); 2683 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2684 for (i = 0; i < mbs; i++) { 2685 ncols = ai[1] - ai[0]; 2686 ai++; 2687 brow = bs * i; 2688 for (j = 0; j < ncols; j++) { 2689 for (kcol = 0; kcol < bs; kcol++) { 2690 for (krow = 0; krow < bs; krow++) { 2691 atmp = PetscAbsScalar(*aa); 2692 aa++; 2693 row = brow + krow; /* row index */ 2694 x[row] += atmp; 2695 } 2696 } 2697 } 2698 } 2699 PetscCall(VecRestoreArrayWrite(v, &x)); 2700 PetscFunctionReturn(PETSC_SUCCESS); 2701 } 2702 2703 static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) 2704 { 2705 PetscFunctionBegin; 2706 /* If the two matrices have the same copy implementation, use fast copy. */ 2707 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2708 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2709 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 2710 PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs; 2711 2712 PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]); 2713 PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs); 2714 PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs])); 2715 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2716 } else { 2717 PetscCall(MatCopy_Basic(A, B, str)); 2718 } 2719 PetscFunctionReturn(PETSC_SUCCESS); 2720 } 2721 2722 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2723 { 2724 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2725 2726 PetscFunctionBegin; 2727 *array = a->a; 2728 PetscFunctionReturn(PETSC_SUCCESS); 2729 } 2730 2731 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2732 { 2733 PetscFunctionBegin; 2734 *array = NULL; 2735 PetscFunctionReturn(PETSC_SUCCESS); 2736 } 2737 2738 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) 2739 { 2740 PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs; 2741 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 2742 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 2743 2744 PetscFunctionBegin; 2745 /* Set the number of nonzeros in the new matrix */ 2746 PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz)); 2747 PetscFunctionReturn(PETSC_SUCCESS); 2748 } 2749 2750 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2751 { 2752 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data; 2753 PetscInt bs = Y->rmap->bs, bs2 = bs * bs; 2754 PetscBLASInt one = 1; 2755 2756 PetscFunctionBegin; 2757 if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2758 PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2759 if (e) { 2760 PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e)); 2761 if (e) { 2762 PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e)); 2763 if (e) str = SAME_NONZERO_PATTERN; 2764 } 2765 } 2766 if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN"); 2767 } 2768 if (str == SAME_NONZERO_PATTERN) { 2769 PetscScalar alpha = a; 2770 PetscBLASInt bnz; 2771 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 2772 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 2773 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2774 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2775 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2776 } else { 2777 Mat B; 2778 PetscInt *nnz; 2779 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 2780 PetscCall(PetscMalloc1(Y->rmap->N, &nnz)); 2781 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2782 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2783 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 2784 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 2785 PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name)); 2786 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz)); 2787 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 2788 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2789 PetscCall(MatHeaderMerge(Y, &B)); 2790 PetscCall(PetscFree(nnz)); 2791 } 2792 PetscFunctionReturn(PETSC_SUCCESS); 2793 } 2794 2795 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2796 { 2797 #if PetscDefined(USE_COMPLEX) 2798 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2799 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2800 MatScalar *aa = a->a; 2801 2802 PetscFunctionBegin; 2803 for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]); 2804 PetscFunctionReturn(PETSC_SUCCESS); 2805 #else 2806 (void)A; 2807 return PETSC_SUCCESS; 2808 #endif 2809 } 2810 2811 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2812 { 2813 #if PetscDefined(USE_COMPLEX) 2814 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2815 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2816 MatScalar *aa = a->a; 2817 2818 PetscFunctionBegin; 2819 for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]); 2820 PetscFunctionReturn(PETSC_SUCCESS); 2821 #else 2822 (void)A; 2823 return PETSC_SUCCESS; 2824 #endif 2825 } 2826 2827 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2828 { 2829 #if PetscDefined(USE_COMPLEX) 2830 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2831 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2832 MatScalar *aa = a->a; 2833 2834 PetscFunctionBegin; 2835 for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2836 PetscFunctionReturn(PETSC_SUCCESS); 2837 #else 2838 (void)A; 2839 return PETSC_SUCCESS; 2840 #endif 2841 } 2842 2843 /* 2844 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2845 */ 2846 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2847 { 2848 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2849 PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs; 2850 PetscInt nz = a->i[m], row, *jj, mr, col; 2851 2852 PetscFunctionBegin; 2853 *nn = n; 2854 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2855 PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices"); 2856 PetscCall(PetscCalloc1(n, &collengths)); 2857 PetscCall(PetscMalloc1(n + 1, &cia)); 2858 PetscCall(PetscMalloc1(nz, &cja)); 2859 jj = a->j; 2860 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2861 cia[0] = oshift; 2862 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2863 PetscCall(PetscArrayzero(collengths, n)); 2864 jj = a->j; 2865 for (row = 0; row < m; row++) { 2866 mr = a->i[row + 1] - a->i[row]; 2867 for (i = 0; i < mr; i++) { 2868 col = *jj++; 2869 2870 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2871 } 2872 } 2873 PetscCall(PetscFree(collengths)); 2874 *ia = cia; 2875 *ja = cja; 2876 PetscFunctionReturn(PETSC_SUCCESS); 2877 } 2878 2879 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2880 { 2881 PetscFunctionBegin; 2882 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2883 PetscCall(PetscFree(*ia)); 2884 PetscCall(PetscFree(*ja)); 2885 PetscFunctionReturn(PETSC_SUCCESS); 2886 } 2887 2888 /* 2889 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2890 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2891 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2892 */ 2893 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2894 { 2895 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2896 PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs; 2897 PetscInt nz = a->i[m], row, *jj, mr, col; 2898 PetscInt *cspidx; 2899 2900 PetscFunctionBegin; 2901 *nn = n; 2902 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2903 2904 PetscCall(PetscCalloc1(n, &collengths)); 2905 PetscCall(PetscMalloc1(n + 1, &cia)); 2906 PetscCall(PetscMalloc1(nz, &cja)); 2907 PetscCall(PetscMalloc1(nz, &cspidx)); 2908 jj = a->j; 2909 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2910 cia[0] = oshift; 2911 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2912 PetscCall(PetscArrayzero(collengths, n)); 2913 jj = a->j; 2914 for (row = 0; row < m; row++) { 2915 mr = a->i[row + 1] - a->i[row]; 2916 for (i = 0; i < mr; i++) { 2917 col = *jj++; 2918 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2919 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2920 } 2921 } 2922 PetscCall(PetscFree(collengths)); 2923 *ia = cia; 2924 *ja = cja; 2925 *spidx = cspidx; 2926 PetscFunctionReturn(PETSC_SUCCESS); 2927 } 2928 2929 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2930 { 2931 PetscFunctionBegin; 2932 PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done)); 2933 PetscCall(PetscFree(*spidx)); 2934 PetscFunctionReturn(PETSC_SUCCESS); 2935 } 2936 2937 static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) 2938 { 2939 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data; 2940 2941 PetscFunctionBegin; 2942 if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL)); 2943 PetscCall(MatShift_Basic(Y, a)); 2944 PetscFunctionReturn(PETSC_SUCCESS); 2945 } 2946 2947 PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep) 2948 { 2949 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2950 PetscInt fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k; 2951 PetscInt m = A->rmap->N, *ailen = a->ilen; 2952 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2953 MatScalar *aa = a->a, *ap; 2954 PetscBool zero; 2955 2956 PetscFunctionBegin; 2957 PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix"); 2958 if (m) rmax = ailen[0]; 2959 for (i = 1; i <= mbs; i++) { 2960 for (k = ai[i - 1]; k < ai[i]; k++) { 2961 zero = PETSC_TRUE; 2962 ap = aa + bs2 * k; 2963 for (j = 0; j < bs2 && zero; j++) { 2964 if (ap[j] != 0.0) zero = PETSC_FALSE; 2965 } 2966 if (zero && (aj[k] != i - 1 || !keep)) fshift++; 2967 else { 2968 if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1)); 2969 aj[k - fshift] = aj[k]; 2970 PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2)); 2971 } 2972 } 2973 ai[i - 1] -= fshift_prev; 2974 fshift_prev = fshift; 2975 ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1]; 2976 a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0); 2977 rmax = PetscMax(rmax, ailen[i - 1]); 2978 } 2979 if (fshift) { 2980 if (mbs) { 2981 ai[mbs] -= fshift; 2982 a->nz = ai[mbs]; 2983 } 2984 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz)); 2985 A->nonzerostate++; 2986 A->info.nz_unneeded += (PetscReal)fshift; 2987 a->rmax = rmax; 2988 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 2989 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 2990 } 2991 PetscFunctionReturn(PETSC_SUCCESS); 2992 } 2993 2994 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2995 MatGetRow_SeqBAIJ, 2996 MatRestoreRow_SeqBAIJ, 2997 MatMult_SeqBAIJ_N, 2998 /* 4*/ MatMultAdd_SeqBAIJ_N, 2999 MatMultTranspose_SeqBAIJ, 3000 MatMultTransposeAdd_SeqBAIJ, 3001 NULL, 3002 NULL, 3003 NULL, 3004 /* 10*/ NULL, 3005 MatLUFactor_SeqBAIJ, 3006 NULL, 3007 NULL, 3008 MatTranspose_SeqBAIJ, 3009 /* 15*/ MatGetInfo_SeqBAIJ, 3010 MatEqual_SeqBAIJ, 3011 MatGetDiagonal_SeqBAIJ, 3012 MatDiagonalScale_SeqBAIJ, 3013 MatNorm_SeqBAIJ, 3014 /* 20*/ NULL, 3015 MatAssemblyEnd_SeqBAIJ, 3016 MatSetOption_SeqBAIJ, 3017 MatZeroEntries_SeqBAIJ, 3018 /* 24*/ MatZeroRows_SeqBAIJ, 3019 NULL, 3020 NULL, 3021 NULL, 3022 NULL, 3023 /* 29*/ MatSetUp_Seq_Hash, 3024 NULL, 3025 NULL, 3026 NULL, 3027 NULL, 3028 /* 34*/ MatDuplicate_SeqBAIJ, 3029 NULL, 3030 NULL, 3031 MatILUFactor_SeqBAIJ, 3032 NULL, 3033 /* 39*/ MatAXPY_SeqBAIJ, 3034 MatCreateSubMatrices_SeqBAIJ, 3035 MatIncreaseOverlap_SeqBAIJ, 3036 MatGetValues_SeqBAIJ, 3037 MatCopy_SeqBAIJ, 3038 /* 44*/ NULL, 3039 MatScale_SeqBAIJ, 3040 MatShift_SeqBAIJ, 3041 NULL, 3042 MatZeroRowsColumns_SeqBAIJ, 3043 /* 49*/ NULL, 3044 MatGetRowIJ_SeqBAIJ, 3045 MatRestoreRowIJ_SeqBAIJ, 3046 MatGetColumnIJ_SeqBAIJ, 3047 MatRestoreColumnIJ_SeqBAIJ, 3048 /* 54*/ MatFDColoringCreate_SeqXAIJ, 3049 NULL, 3050 NULL, 3051 NULL, 3052 MatSetValuesBlocked_SeqBAIJ, 3053 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 3054 MatDestroy_SeqBAIJ, 3055 MatView_SeqBAIJ, 3056 NULL, 3057 NULL, 3058 /* 64*/ NULL, 3059 NULL, 3060 NULL, 3061 NULL, 3062 MatGetRowMaxAbs_SeqBAIJ, 3063 /* 69*/ NULL, 3064 MatConvert_Basic, 3065 NULL, 3066 MatFDColoringApply_BAIJ, 3067 NULL, 3068 /* 74*/ NULL, 3069 NULL, 3070 NULL, 3071 NULL, 3072 MatLoad_SeqBAIJ, 3073 /* 79*/ NULL, 3074 NULL, 3075 NULL, 3076 NULL, 3077 NULL, 3078 /* 84*/ NULL, 3079 NULL, 3080 NULL, 3081 NULL, 3082 NULL, 3083 /* 89*/ NULL, 3084 NULL, 3085 NULL, 3086 NULL, 3087 MatConjugate_SeqBAIJ, 3088 /* 94*/ NULL, 3089 NULL, 3090 MatRealPart_SeqBAIJ, 3091 MatImaginaryPart_SeqBAIJ, 3092 NULL, 3093 /* 99*/ NULL, 3094 NULL, 3095 NULL, 3096 NULL, 3097 NULL, 3098 /*104*/ MatMissingDiagonal_SeqBAIJ, 3099 NULL, 3100 NULL, 3101 NULL, 3102 NULL, 3103 /*109*/ NULL, 3104 NULL, 3105 NULL, 3106 MatMultHermitianTranspose_SeqBAIJ, 3107 MatMultHermitianTransposeAdd_SeqBAIJ, 3108 /*114*/ NULL, 3109 NULL, 3110 MatGetColumnReductions_SeqBAIJ, 3111 MatInvertBlockDiagonal_SeqBAIJ, 3112 NULL, 3113 /*119*/ NULL, 3114 NULL, 3115 NULL, 3116 NULL, 3117 NULL, 3118 /*124*/ NULL, 3119 NULL, 3120 NULL, 3121 MatSetBlockSizes_Default, 3122 NULL, 3123 /*129*/ MatFDColoringSetUp_SeqXAIJ, 3124 NULL, 3125 MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 3126 MatDestroySubMatrices_SeqBAIJ, 3127 NULL, 3128 /*134*/ NULL, 3129 NULL, 3130 NULL, 3131 MatEliminateZeros_SeqBAIJ, 3132 MatGetRowSumAbs_SeqBAIJ, 3133 /*139*/ NULL, 3134 NULL, 3135 NULL, 3136 MatCopyHashToXAIJ_Seq_Hash, 3137 NULL, 3138 NULL}; 3139 3140 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3141 { 3142 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3143 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3144 3145 PetscFunctionBegin; 3146 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3147 3148 /* allocate space for values if not already there */ 3149 if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); 3150 3151 /* copy values over */ 3152 PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz)); 3153 PetscFunctionReturn(PETSC_SUCCESS); 3154 } 3155 3156 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3157 { 3158 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3159 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3160 3161 PetscFunctionBegin; 3162 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3163 PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 3164 3165 /* copy values over */ 3166 PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz)); 3167 PetscFunctionReturn(PETSC_SUCCESS); 3168 } 3169 3170 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 3171 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *); 3172 3173 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3174 { 3175 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 3176 PetscInt i, mbs, nbs, bs2; 3177 PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 3178 3179 PetscFunctionBegin; 3180 if (B->hash_active) { 3181 PetscInt bs; 3182 B->ops[0] = b->cops; 3183 PetscCall(PetscHMapIJVDestroy(&b->ht)); 3184 PetscCall(MatGetBlockSize(B, &bs)); 3185 if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht)); 3186 PetscCall(PetscFree(b->dnz)); 3187 PetscCall(PetscFree(b->bdnz)); 3188 B->hash_active = PETSC_FALSE; 3189 } 3190 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3191 if (nz == MAT_SKIP_ALLOCATION) { 3192 skipallocation = PETSC_TRUE; 3193 nz = 0; 3194 } 3195 3196 PetscCall(MatSetBlockSize(B, bs)); 3197 PetscCall(PetscLayoutSetUp(B->rmap)); 3198 PetscCall(PetscLayoutSetUp(B->cmap)); 3199 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3200 3201 B->preallocated = PETSC_TRUE; 3202 3203 mbs = B->rmap->n / bs; 3204 nbs = B->cmap->n / bs; 3205 bs2 = bs * bs; 3206 3207 PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs); 3208 3209 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3210 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 3211 if (nnz) { 3212 for (i = 0; i < mbs; i++) { 3213 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 3214 PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs); 3215 } 3216 } 3217 3218 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat"); 3219 PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL)); 3220 PetscOptionsEnd(); 3221 3222 if (!flg) { 3223 switch (bs) { 3224 case 1: 3225 B->ops->mult = MatMult_SeqBAIJ_1; 3226 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3227 break; 3228 case 2: 3229 B->ops->mult = MatMult_SeqBAIJ_2; 3230 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3231 break; 3232 case 3: 3233 B->ops->mult = MatMult_SeqBAIJ_3; 3234 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3235 break; 3236 case 4: 3237 B->ops->mult = MatMult_SeqBAIJ_4; 3238 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3239 break; 3240 case 5: 3241 B->ops->mult = MatMult_SeqBAIJ_5; 3242 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3243 break; 3244 case 6: 3245 B->ops->mult = MatMult_SeqBAIJ_6; 3246 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3247 break; 3248 case 7: 3249 B->ops->mult = MatMult_SeqBAIJ_7; 3250 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3251 break; 3252 case 9: { 3253 PetscInt version = 1; 3254 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3255 switch (version) { 3256 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3257 case 1: 3258 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 3259 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 3260 PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3261 break; 3262 #endif 3263 default: 3264 B->ops->mult = MatMult_SeqBAIJ_N; 3265 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3266 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3267 break; 3268 } 3269 break; 3270 } 3271 case 11: 3272 B->ops->mult = MatMult_SeqBAIJ_11; 3273 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 3274 break; 3275 case 12: { 3276 PetscInt version = 1; 3277 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3278 switch (version) { 3279 case 1: 3280 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 3281 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3282 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3283 break; 3284 case 2: 3285 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 3286 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 3287 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3288 break; 3289 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3290 case 3: 3291 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 3292 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3293 PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3294 break; 3295 #endif 3296 default: 3297 B->ops->mult = MatMult_SeqBAIJ_N; 3298 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3299 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3300 break; 3301 } 3302 break; 3303 } 3304 case 15: { 3305 PetscInt version = 1; 3306 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3307 switch (version) { 3308 case 1: 3309 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3310 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3311 break; 3312 case 2: 3313 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 3314 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3315 break; 3316 case 3: 3317 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 3318 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3319 break; 3320 case 4: 3321 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 3322 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3323 break; 3324 default: 3325 B->ops->mult = MatMult_SeqBAIJ_N; 3326 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3327 break; 3328 } 3329 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3330 break; 3331 } 3332 default: 3333 B->ops->mult = MatMult_SeqBAIJ_N; 3334 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3335 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3336 break; 3337 } 3338 } 3339 B->ops->sor = MatSOR_SeqBAIJ; 3340 b->mbs = mbs; 3341 b->nbs = nbs; 3342 if (!skipallocation) { 3343 if (!b->imax) { 3344 PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen)); 3345 3346 b->free_imax_ilen = PETSC_TRUE; 3347 } 3348 /* b->ilen will count nonzeros in each block row so far. */ 3349 for (i = 0; i < mbs; i++) b->ilen[i] = 0; 3350 if (!nnz) { 3351 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3352 else if (nz < 0) nz = 1; 3353 nz = PetscMin(nz, nbs); 3354 for (i = 0; i < mbs; i++) b->imax[i] = nz; 3355 PetscCall(PetscIntMultError(nz, mbs, &nz)); 3356 } else { 3357 PetscInt64 nz64 = 0; 3358 for (i = 0; i < mbs; i++) { 3359 b->imax[i] = nnz[i]; 3360 nz64 += nnz[i]; 3361 } 3362 PetscCall(PetscIntCast(nz64, &nz)); 3363 } 3364 3365 /* allocate the matrix space */ 3366 PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i)); 3367 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j)); 3368 PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i)); 3369 if (B->structure_only) { 3370 b->free_a = PETSC_FALSE; 3371 } else { 3372 PetscInt nzbs2 = 0; 3373 PetscCall(PetscIntMultError(nz, bs2, &nzbs2)); 3374 PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a)); 3375 b->free_a = PETSC_TRUE; 3376 PetscCall(PetscArrayzero(b->a, nz * bs2)); 3377 } 3378 b->free_ij = PETSC_TRUE; 3379 PetscCall(PetscArrayzero(b->j, nz)); 3380 3381 b->i[0] = 0; 3382 for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1]; 3383 } else { 3384 b->free_a = PETSC_FALSE; 3385 b->free_ij = PETSC_FALSE; 3386 } 3387 3388 b->bs2 = bs2; 3389 b->mbs = mbs; 3390 b->nz = 0; 3391 b->maxnz = nz; 3392 B->info.nz_unneeded = (PetscReal)b->maxnz * bs2; 3393 B->was_assembled = PETSC_FALSE; 3394 B->assembled = PETSC_FALSE; 3395 if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3396 PetscFunctionReturn(PETSC_SUCCESS); 3397 } 3398 3399 static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 3400 { 3401 PetscInt i, m, nz, nz_max = 0, *nnz; 3402 PetscScalar *values = NULL; 3403 PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented; 3404 3405 PetscFunctionBegin; 3406 PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 3407 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 3408 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 3409 PetscCall(PetscLayoutSetUp(B->rmap)); 3410 PetscCall(PetscLayoutSetUp(B->cmap)); 3411 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3412 m = B->rmap->n / bs; 3413 3414 PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 3415 PetscCall(PetscMalloc1(m + 1, &nnz)); 3416 for (i = 0; i < m; i++) { 3417 nz = ii[i + 1] - ii[i]; 3418 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 3419 nz_max = PetscMax(nz_max, nz); 3420 nnz[i] = nz; 3421 } 3422 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 3423 PetscCall(PetscFree(nnz)); 3424 3425 values = (PetscScalar *)V; 3426 if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values)); 3427 for (i = 0; i < m; i++) { 3428 PetscInt ncols = ii[i + 1] - ii[i]; 3429 const PetscInt *icols = jj + ii[i]; 3430 if (bs == 1 || !roworiented) { 3431 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 3432 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES)); 3433 } else { 3434 PetscInt j; 3435 for (j = 0; j < ncols; j++) { 3436 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 3437 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES)); 3438 } 3439 } 3440 } 3441 if (!V) PetscCall(PetscFree(values)); 3442 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3443 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3444 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3445 PetscFunctionReturn(PETSC_SUCCESS); 3446 } 3447 3448 /*@C 3449 MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored 3450 3451 Not Collective 3452 3453 Input Parameter: 3454 . A - a `MATSEQBAIJ` matrix 3455 3456 Output Parameter: 3457 . array - pointer to the data 3458 3459 Level: intermediate 3460 3461 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3462 @*/ 3463 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[]) 3464 { 3465 PetscFunctionBegin; 3466 PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array)); 3467 PetscFunctionReturn(PETSC_SUCCESS); 3468 } 3469 3470 /*@C 3471 MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()` 3472 3473 Not Collective 3474 3475 Input Parameters: 3476 + A - a `MATSEQBAIJ` matrix 3477 - array - pointer to the data 3478 3479 Level: intermediate 3480 3481 .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3482 @*/ 3483 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[]) 3484 { 3485 PetscFunctionBegin; 3486 PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array)); 3487 PetscFunctionReturn(PETSC_SUCCESS); 3488 } 3489 3490 /*MC 3491 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3492 block sparse compressed row format. 3493 3494 Options Database Keys: 3495 + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()` 3496 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3497 3498 Level: beginner 3499 3500 Notes: 3501 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 3502 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 3503 3504 Run with `-info` to see what version of the matrix-vector product is being used 3505 3506 .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()` 3507 M*/ 3508 3509 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *); 3510 3511 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3512 { 3513 PetscMPIInt size; 3514 Mat_SeqBAIJ *b; 3515 3516 PetscFunctionBegin; 3517 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3518 PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3519 3520 PetscCall(PetscNew(&b)); 3521 B->data = (void *)b; 3522 B->ops[0] = MatOps_Values; 3523 3524 b->row = NULL; 3525 b->col = NULL; 3526 b->icol = NULL; 3527 b->reallocs = 0; 3528 b->saved_values = NULL; 3529 3530 b->roworiented = PETSC_TRUE; 3531 b->nonew = 0; 3532 b->diag = NULL; 3533 B->spptr = NULL; 3534 B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2; 3535 b->keepnonzeropattern = PETSC_FALSE; 3536 3537 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ)); 3538 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ)); 3539 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ)); 3540 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ)); 3541 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ)); 3542 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ)); 3543 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ)); 3544 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ)); 3545 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 3546 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ)); 3547 #if defined(PETSC_HAVE_HYPRE) 3548 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE)); 3549 #endif 3550 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS)); 3551 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 3552 PetscFunctionReturn(PETSC_SUCCESS); 3553 } 3554 3555 PETSC_INTERN PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) 3556 { 3557 Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data; 3558 PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2; 3559 3560 PetscFunctionBegin; 3561 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix"); 3562 PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix"); 3563 3564 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3565 c->imax = a->imax; 3566 c->ilen = a->ilen; 3567 c->free_imax_ilen = PETSC_FALSE; 3568 } else { 3569 PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen)); 3570 for (i = 0; i < mbs; i++) { 3571 c->imax[i] = a->imax[i]; 3572 c->ilen[i] = a->ilen[i]; 3573 } 3574 c->free_imax_ilen = PETSC_TRUE; 3575 } 3576 3577 /* allocate the matrix space */ 3578 if (mallocmatspace) { 3579 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3580 PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a)); 3581 PetscCall(PetscArrayzero(c->a, bs2 * nz)); 3582 c->free_a = PETSC_TRUE; 3583 c->i = a->i; 3584 c->j = a->j; 3585 c->free_ij = PETSC_FALSE; 3586 c->parent = A; 3587 C->preallocated = PETSC_TRUE; 3588 C->assembled = PETSC_TRUE; 3589 3590 PetscCall(PetscObjectReference((PetscObject)A)); 3591 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3592 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3593 } else { 3594 PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a)); 3595 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j)); 3596 PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i)); 3597 c->free_a = PETSC_TRUE; 3598 c->free_ij = PETSC_TRUE; 3599 3600 PetscCall(PetscArraycpy(c->i, a->i, mbs + 1)); 3601 if (mbs > 0) { 3602 PetscCall(PetscArraycpy(c->j, a->j, nz)); 3603 if (cpvalues == MAT_COPY_VALUES) { 3604 PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz)); 3605 } else { 3606 PetscCall(PetscArrayzero(c->a, bs2 * nz)); 3607 } 3608 } 3609 C->preallocated = PETSC_TRUE; 3610 C->assembled = PETSC_TRUE; 3611 } 3612 } 3613 3614 c->roworiented = a->roworiented; 3615 c->nonew = a->nonew; 3616 3617 PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 3618 PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 3619 3620 c->bs2 = a->bs2; 3621 c->mbs = a->mbs; 3622 c->nbs = a->nbs; 3623 3624 if (a->diag) { 3625 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3626 c->diag = a->diag; 3627 c->free_diag = PETSC_FALSE; 3628 } else { 3629 PetscCall(PetscMalloc1(mbs + 1, &c->diag)); 3630 for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i]; 3631 c->free_diag = PETSC_TRUE; 3632 } 3633 } else c->diag = NULL; 3634 3635 c->nz = a->nz; 3636 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3637 c->solve_work = NULL; 3638 c->mult_work = NULL; 3639 c->sor_workt = NULL; 3640 c->sor_work = NULL; 3641 3642 c->compressedrow.use = a->compressedrow.use; 3643 c->compressedrow.nrows = a->compressedrow.nrows; 3644 if (a->compressedrow.use) { 3645 i = a->compressedrow.nrows; 3646 PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex)); 3647 PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1)); 3648 PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i)); 3649 } else { 3650 c->compressedrow.use = PETSC_FALSE; 3651 c->compressedrow.i = NULL; 3652 c->compressedrow.rindex = NULL; 3653 } 3654 c->nonzerorowcnt = a->nonzerorowcnt; 3655 C->nonzerostate = A->nonzerostate; 3656 3657 PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 3658 PetscFunctionReturn(PETSC_SUCCESS); 3659 } 3660 3661 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) 3662 { 3663 PetscFunctionBegin; 3664 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 3665 PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n)); 3666 PetscCall(MatSetType(*B, MATSEQBAIJ)); 3667 PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE)); 3668 PetscFunctionReturn(PETSC_SUCCESS); 3669 } 3670 3671 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3672 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 3673 { 3674 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3675 PetscInt *rowidxs, *colidxs; 3676 PetscScalar *matvals; 3677 3678 PetscFunctionBegin; 3679 PetscCall(PetscViewerSetUp(viewer)); 3680 3681 /* read matrix header */ 3682 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3683 PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3684 M = header[1]; 3685 N = header[2]; 3686 nz = header[3]; 3687 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3688 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3689 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3690 3691 /* set block sizes from the viewer's .info file */ 3692 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3693 /* set local and global sizes if not set already */ 3694 if (mat->rmap->n < 0) mat->rmap->n = M; 3695 if (mat->cmap->n < 0) mat->cmap->n = N; 3696 if (mat->rmap->N < 0) mat->rmap->N = M; 3697 if (mat->cmap->N < 0) mat->cmap->N = N; 3698 PetscCall(PetscLayoutSetUp(mat->rmap)); 3699 PetscCall(PetscLayoutSetUp(mat->cmap)); 3700 3701 /* check if the matrix sizes are correct */ 3702 PetscCall(MatGetSize(mat, &rows, &cols)); 3703 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3704 PetscCall(MatGetBlockSize(mat, &bs)); 3705 PetscCall(MatGetLocalSize(mat, &m, &n)); 3706 mbs = m / bs; 3707 nbs = n / bs; 3708 3709 /* read in row lengths, column indices and nonzero values */ 3710 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3711 PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT)); 3712 rowidxs[0] = 0; 3713 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3714 sum = rowidxs[m]; 3715 PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3716 3717 /* read in column indices and nonzero values */ 3718 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals)); 3719 PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT)); 3720 PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR)); 3721 3722 { /* preallocate matrix storage */ 3723 PetscBT bt; /* helper bit set to count nonzeros */ 3724 PetscInt *nnz; 3725 PetscBool sbaij; 3726 3727 PetscCall(PetscBTCreate(nbs, &bt)); 3728 PetscCall(PetscCalloc1(mbs, &nnz)); 3729 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij)); 3730 for (i = 0; i < mbs; i++) { 3731 PetscCall(PetscBTMemzero(nbs, bt)); 3732 for (k = 0; k < bs; k++) { 3733 PetscInt row = bs * i + k; 3734 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3735 PetscInt col = colidxs[j]; 3736 if (!sbaij || col >= row) 3737 if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++; 3738 } 3739 } 3740 } 3741 PetscCall(PetscBTDestroy(&bt)); 3742 PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz)); 3743 PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz)); 3744 PetscCall(PetscFree(nnz)); 3745 } 3746 3747 /* store matrix values */ 3748 for (i = 0; i < m; i++) { 3749 PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1]; 3750 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3751 } 3752 3753 PetscCall(PetscFree(rowidxs)); 3754 PetscCall(PetscFree2(colidxs, matvals)); 3755 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3756 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3757 PetscFunctionReturn(PETSC_SUCCESS); 3758 } 3759 3760 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) 3761 { 3762 PetscBool isbinary; 3763 3764 PetscFunctionBegin; 3765 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3766 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3767 PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer)); 3768 PetscFunctionReturn(PETSC_SUCCESS); 3769 } 3770 3771 /*@ 3772 MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block 3773 compressed row) format. For good matrix assembly performance the 3774 user should preallocate the matrix storage by setting the parameter `nz` 3775 (or the array `nnz`). 3776 3777 Collective 3778 3779 Input Parameters: 3780 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3781 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3782 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3783 . m - number of rows 3784 . n - number of columns 3785 . nz - number of nonzero blocks per block row (same for all rows) 3786 - nnz - array containing the number of nonzero blocks in the various block rows 3787 (possibly different for each block row) or `NULL` 3788 3789 Output Parameter: 3790 . A - the matrix 3791 3792 Options Database Keys: 3793 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3794 - -mat_block_size - size of the blocks to use 3795 3796 Level: intermediate 3797 3798 Notes: 3799 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3800 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3801 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 3802 3803 The number of rows and columns must be divisible by blocksize. 3804 3805 If the `nnz` parameter is given then the `nz` parameter is ignored 3806 3807 A nonzero block is any block that as 1 or more nonzeros in it 3808 3809 The `MATSEQBAIJ` format is fully compatible with standard Fortran 3810 storage. That is, the stored row and column indices can begin at 3811 either one (as in Fortran) or zero. 3812 3813 Specify the preallocated storage with either `nz` or `nnz` (not both). 3814 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3815 allocation. See [Sparse Matrices](sec_matsparse) for details. 3816 matrices. 3817 3818 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3819 @*/ 3820 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 3821 { 3822 PetscFunctionBegin; 3823 PetscCall(MatCreate(comm, A)); 3824 PetscCall(MatSetSizes(*A, m, n, m, n)); 3825 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3826 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz)); 3827 PetscFunctionReturn(PETSC_SUCCESS); 3828 } 3829 3830 /*@ 3831 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3832 per row in the matrix. For good matrix assembly performance the 3833 user should preallocate the matrix storage by setting the parameter `nz` 3834 (or the array `nnz`). 3835 3836 Collective 3837 3838 Input Parameters: 3839 + B - the matrix 3840 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3841 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3842 . nz - number of block nonzeros per block row (same for all rows) 3843 - nnz - array containing the number of block nonzeros in the various block rows 3844 (possibly different for each block row) or `NULL` 3845 3846 Options Database Keys: 3847 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3848 - -mat_block_size - size of the blocks to use 3849 3850 Level: intermediate 3851 3852 Notes: 3853 If the `nnz` parameter is given then the `nz` parameter is ignored 3854 3855 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3856 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3857 You can also run with the option `-info` and look for messages with the string 3858 malloc in them to see if additional memory allocation was needed. 3859 3860 The `MATSEQBAIJ` format is fully compatible with standard Fortran 3861 storage. That is, the stored row and column indices can begin at 3862 either one (as in Fortran) or zero. 3863 3864 Specify the preallocated storage with either `nz` or `nnz` (not both). 3865 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3866 allocation. See [Sparse Matrices](sec_matsparse) for details. 3867 3868 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3869 @*/ 3870 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3871 { 3872 PetscFunctionBegin; 3873 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3874 PetscValidType(B, 1); 3875 PetscValidLogicalCollectiveInt(B, bs, 2); 3876 PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 3877 PetscFunctionReturn(PETSC_SUCCESS); 3878 } 3879 3880 /*@C 3881 MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values 3882 3883 Collective 3884 3885 Input Parameters: 3886 + B - the matrix 3887 . bs - the blocksize 3888 . i - the indices into `j` for the start of each local row (indices start with zero) 3889 . j - the column indices for each local row (indices start with zero) these must be sorted for each row 3890 - v - optional values in the matrix, use `NULL` if not provided 3891 3892 Level: advanced 3893 3894 Notes: 3895 The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()` 3896 3897 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 3898 may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 3899 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3900 `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3901 block column and the second index is over columns within a block. 3902 3903 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3904 3905 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3906 @*/ 3907 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3908 { 3909 PetscFunctionBegin; 3910 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3911 PetscValidType(B, 1); 3912 PetscValidLogicalCollectiveInt(B, bs, 2); 3913 PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 3914 PetscFunctionReturn(PETSC_SUCCESS); 3915 } 3916 3917 /*@ 3918 MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user. 3919 3920 Collective 3921 3922 Input Parameters: 3923 + comm - must be an MPI communicator of size 1 3924 . bs - size of block 3925 . m - number of rows 3926 . n - number of columns 3927 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3928 . j - column indices 3929 - a - matrix values 3930 3931 Output Parameter: 3932 . mat - the matrix 3933 3934 Level: advanced 3935 3936 Notes: 3937 The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays 3938 once the matrix is destroyed 3939 3940 You cannot set new nonzero locations into this matrix, that will generate an error. 3941 3942 The `i` and `j` indices are 0 based 3943 3944 When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format 3945 3946 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3947 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3948 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3949 with column-major ordering within blocks. 3950 3951 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3952 @*/ 3953 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) 3954 { 3955 Mat_SeqBAIJ *baij; 3956 3957 PetscFunctionBegin; 3958 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs); 3959 if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3960 3961 PetscCall(MatCreate(comm, mat)); 3962 PetscCall(MatSetSizes(*mat, m, n, m, n)); 3963 PetscCall(MatSetType(*mat, MATSEQBAIJ)); 3964 PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL)); 3965 baij = (Mat_SeqBAIJ *)(*mat)->data; 3966 PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen)); 3967 3968 baij->i = i; 3969 baij->j = j; 3970 baij->a = a; 3971 3972 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3973 baij->free_a = PETSC_FALSE; 3974 baij->free_ij = PETSC_FALSE; 3975 baij->free_imax_ilen = PETSC_TRUE; 3976 3977 for (PetscInt ii = 0; ii < m; ii++) { 3978 const PetscInt row_len = i[ii + 1] - i[ii]; 3979 3980 baij->ilen[ii] = baij->imax[ii] = row_len; 3981 PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len); 3982 } 3983 if (PetscDefined(USE_DEBUG)) { 3984 for (PetscInt ii = 0; ii < baij->i[m]; ii++) { 3985 PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3986 PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3987 } 3988 } 3989 3990 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3991 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3992 PetscFunctionReturn(PETSC_SUCCESS); 3993 } 3994 3995 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3996 { 3997 PetscFunctionBegin; 3998 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat)); 3999 PetscFunctionReturn(PETSC_SUCCESS); 4000 } 4001