1 /* 2 Defines the basic matrix operations for the BAIJ (compressed row) 3 matrix storage format. 4 */ 5 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <petsc/private/kernels/blockinvert.h> 8 #include <petsc/private/kernels/blockmatmult.h> 9 10 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */ 11 #define TYPE BAIJ 12 #define TYPE_BS 13 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 14 #undef TYPE_BS 15 #define TYPE_BS _BS 16 #define TYPE_BS_ON 17 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 18 #undef TYPE_BS 19 #include "../src/mat/impls/aij/seq/seqhashmat.h" 20 #undef TYPE 21 #undef TYPE_BS_ON 22 23 #if defined(PETSC_HAVE_HYPRE) 24 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 25 #endif 26 27 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 28 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 29 #endif 30 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 31 32 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) 33 { 34 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data; 35 PetscInt m, n, ib, jb, bs = A->rmap->bs; 36 MatScalar *a_val = a_aij->a; 37 38 PetscFunctionBegin; 39 PetscCall(MatGetSize(A, &m, &n)); 40 PetscCall(PetscArrayzero(reductions, n)); 41 if (type == NORM_2) { 42 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 43 for (jb = 0; jb < bs; jb++) { 44 for (ib = 0; ib < bs; ib++) { 45 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 46 a_val++; 47 } 48 } 49 } 50 } else if (type == NORM_1) { 51 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 52 for (jb = 0; jb < bs; jb++) { 53 for (ib = 0; ib < bs; ib++) { 54 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 55 a_val++; 56 } 57 } 58 } 59 } else if (type == NORM_INFINITY) { 60 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 61 for (jb = 0; jb < bs; jb++) { 62 for (ib = 0; ib < bs; ib++) { 63 PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb; 64 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 65 a_val++; 66 } 67 } 68 } 69 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 70 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 71 for (jb = 0; jb < bs; jb++) { 72 for (ib = 0; ib < bs; ib++) { 73 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 74 a_val++; 75 } 76 } 77 } 78 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 79 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 80 for (jb = 0; jb < bs; jb++) { 81 for (ib = 0; ib < bs; ib++) { 82 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 83 a_val++; 84 } 85 } 86 } 87 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 88 if (type == NORM_2) { 89 for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 90 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 91 for (PetscInt i = 0; i < n; i++) reductions[i] /= m; 92 } 93 PetscFunctionReturn(PETSC_SUCCESS); 94 } 95 96 static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) 97 { 98 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 99 PetscInt *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots; 100 MatScalar *v = a->a, *odiag, *diag, work[25], *v_work; 101 PetscReal shift = 0.0; 102 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 103 104 PetscFunctionBegin; 105 allowzeropivot = PetscNot(A->erroriffailure); 106 107 if (a->idiagvalid) { 108 if (values) *values = a->idiag; 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 112 diag_offset = a->diag; 113 if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); } 114 diag = a->idiag; 115 if (values) *values = a->idiag; 116 /* factor and invert each block */ 117 switch (bs) { 118 case 1: 119 for (i = 0; i < mbs; i++) { 120 odiag = v + 1 * diag_offset[i]; 121 diag[0] = odiag[0]; 122 123 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 124 if (allowzeropivot) { 125 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 126 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 127 A->factorerror_zeropivot_row = i; 128 PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i)); 129 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON); 130 } 131 132 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 133 diag += 1; 134 } 135 break; 136 case 2: 137 for (i = 0; i < mbs; i++) { 138 odiag = v + 4 * diag_offset[i]; 139 diag[0] = odiag[0]; 140 diag[1] = odiag[1]; 141 diag[2] = odiag[2]; 142 diag[3] = odiag[3]; 143 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 144 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 145 diag += 4; 146 } 147 break; 148 case 3: 149 for (i = 0; i < mbs; i++) { 150 odiag = v + 9 * diag_offset[i]; 151 diag[0] = odiag[0]; 152 diag[1] = odiag[1]; 153 diag[2] = odiag[2]; 154 diag[3] = odiag[3]; 155 diag[4] = odiag[4]; 156 diag[5] = odiag[5]; 157 diag[6] = odiag[6]; 158 diag[7] = odiag[7]; 159 diag[8] = odiag[8]; 160 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 161 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 162 diag += 9; 163 } 164 break; 165 case 4: 166 for (i = 0; i < mbs; i++) { 167 odiag = v + 16 * diag_offset[i]; 168 PetscCall(PetscArraycpy(diag, odiag, 16)); 169 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 170 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 171 diag += 16; 172 } 173 break; 174 case 5: 175 for (i = 0; i < mbs; i++) { 176 odiag = v + 25 * diag_offset[i]; 177 PetscCall(PetscArraycpy(diag, odiag, 25)); 178 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 179 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 180 diag += 25; 181 } 182 break; 183 case 6: 184 for (i = 0; i < mbs; i++) { 185 odiag = v + 36 * diag_offset[i]; 186 PetscCall(PetscArraycpy(diag, odiag, 36)); 187 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 188 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 189 diag += 36; 190 } 191 break; 192 case 7: 193 for (i = 0; i < mbs; i++) { 194 odiag = v + 49 * diag_offset[i]; 195 PetscCall(PetscArraycpy(diag, odiag, 49)); 196 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 197 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 198 diag += 49; 199 } 200 break; 201 default: 202 PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots)); 203 for (i = 0; i < mbs; i++) { 204 odiag = v + bs2 * diag_offset[i]; 205 PetscCall(PetscArraycpy(diag, odiag, bs2)); 206 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 207 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 208 diag += bs2; 209 } 210 PetscCall(PetscFree2(v_work, v_pivots)); 211 } 212 a->idiagvalid = PETSC_TRUE; 213 PetscFunctionReturn(PETSC_SUCCESS); 214 } 215 216 static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 217 { 218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 219 PetscScalar *x, *work, *w, *workt, *t; 220 const MatScalar *v, *aa = a->a, *idiag; 221 const PetscScalar *b, *xb; 222 PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */ 223 PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it; 224 const PetscInt *diag, *ai = a->i, *aj = a->j, *vi; 225 226 PetscFunctionBegin; 227 its = its * lits; 228 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 229 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 230 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 231 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor"); 232 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 233 234 if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL)); 235 236 if (!m) PetscFunctionReturn(PETSC_SUCCESS); 237 diag = a->diag; 238 idiag = a->idiag; 239 k = PetscMax(A->rmap->n, A->cmap->n); 240 if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work)); 241 if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt)); 242 if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work)); 243 work = a->mult_work; 244 t = a->sor_workt; 245 w = a->sor_work; 246 247 PetscCall(VecGetArray(xx, &x)); 248 PetscCall(VecGetArrayRead(bb, &b)); 249 250 if (flag & SOR_ZERO_INITIAL_GUESS) { 251 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 252 switch (bs) { 253 case 1: 254 PetscKernel_v_gets_A_times_w_1(x, idiag, b); 255 t[0] = b[0]; 256 i2 = 1; 257 idiag += 1; 258 for (i = 1; i < m; i++) { 259 v = aa + ai[i]; 260 vi = aj + ai[i]; 261 nz = diag[i] - ai[i]; 262 s[0] = b[i2]; 263 for (j = 0; j < nz; j++) { 264 xw[0] = x[vi[j]]; 265 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 266 } 267 t[i2] = s[0]; 268 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 269 x[i2] = xw[0]; 270 idiag += 1; 271 i2 += 1; 272 } 273 break; 274 case 2: 275 PetscKernel_v_gets_A_times_w_2(x, idiag, b); 276 t[0] = b[0]; 277 t[1] = b[1]; 278 i2 = 2; 279 idiag += 4; 280 for (i = 1; i < m; i++) { 281 v = aa + 4 * ai[i]; 282 vi = aj + ai[i]; 283 nz = diag[i] - ai[i]; 284 s[0] = b[i2]; 285 s[1] = b[i2 + 1]; 286 for (j = 0; j < nz; j++) { 287 idx = 2 * vi[j]; 288 it = 4 * j; 289 xw[0] = x[idx]; 290 xw[1] = x[1 + idx]; 291 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 292 } 293 t[i2] = s[0]; 294 t[i2 + 1] = s[1]; 295 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 296 x[i2] = xw[0]; 297 x[i2 + 1] = xw[1]; 298 idiag += 4; 299 i2 += 2; 300 } 301 break; 302 case 3: 303 PetscKernel_v_gets_A_times_w_3(x, idiag, b); 304 t[0] = b[0]; 305 t[1] = b[1]; 306 t[2] = b[2]; 307 i2 = 3; 308 idiag += 9; 309 for (i = 1; i < m; i++) { 310 v = aa + 9 * ai[i]; 311 vi = aj + ai[i]; 312 nz = diag[i] - ai[i]; 313 s[0] = b[i2]; 314 s[1] = b[i2 + 1]; 315 s[2] = b[i2 + 2]; 316 while (nz--) { 317 idx = 3 * (*vi++); 318 xw[0] = x[idx]; 319 xw[1] = x[1 + idx]; 320 xw[2] = x[2 + idx]; 321 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 322 v += 9; 323 } 324 t[i2] = s[0]; 325 t[i2 + 1] = s[1]; 326 t[i2 + 2] = s[2]; 327 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 328 x[i2] = xw[0]; 329 x[i2 + 1] = xw[1]; 330 x[i2 + 2] = xw[2]; 331 idiag += 9; 332 i2 += 3; 333 } 334 break; 335 case 4: 336 PetscKernel_v_gets_A_times_w_4(x, idiag, b); 337 t[0] = b[0]; 338 t[1] = b[1]; 339 t[2] = b[2]; 340 t[3] = b[3]; 341 i2 = 4; 342 idiag += 16; 343 for (i = 1; i < m; i++) { 344 v = aa + 16 * ai[i]; 345 vi = aj + ai[i]; 346 nz = diag[i] - ai[i]; 347 s[0] = b[i2]; 348 s[1] = b[i2 + 1]; 349 s[2] = b[i2 + 2]; 350 s[3] = b[i2 + 3]; 351 while (nz--) { 352 idx = 4 * (*vi++); 353 xw[0] = x[idx]; 354 xw[1] = x[1 + idx]; 355 xw[2] = x[2 + idx]; 356 xw[3] = x[3 + idx]; 357 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 358 v += 16; 359 } 360 t[i2] = s[0]; 361 t[i2 + 1] = s[1]; 362 t[i2 + 2] = s[2]; 363 t[i2 + 3] = s[3]; 364 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 365 x[i2] = xw[0]; 366 x[i2 + 1] = xw[1]; 367 x[i2 + 2] = xw[2]; 368 x[i2 + 3] = xw[3]; 369 idiag += 16; 370 i2 += 4; 371 } 372 break; 373 case 5: 374 PetscKernel_v_gets_A_times_w_5(x, idiag, b); 375 t[0] = b[0]; 376 t[1] = b[1]; 377 t[2] = b[2]; 378 t[3] = b[3]; 379 t[4] = b[4]; 380 i2 = 5; 381 idiag += 25; 382 for (i = 1; i < m; i++) { 383 v = aa + 25 * ai[i]; 384 vi = aj + ai[i]; 385 nz = diag[i] - ai[i]; 386 s[0] = b[i2]; 387 s[1] = b[i2 + 1]; 388 s[2] = b[i2 + 2]; 389 s[3] = b[i2 + 3]; 390 s[4] = b[i2 + 4]; 391 while (nz--) { 392 idx = 5 * (*vi++); 393 xw[0] = x[idx]; 394 xw[1] = x[1 + idx]; 395 xw[2] = x[2 + idx]; 396 xw[3] = x[3 + idx]; 397 xw[4] = x[4 + idx]; 398 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 399 v += 25; 400 } 401 t[i2] = s[0]; 402 t[i2 + 1] = s[1]; 403 t[i2 + 2] = s[2]; 404 t[i2 + 3] = s[3]; 405 t[i2 + 4] = s[4]; 406 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 407 x[i2] = xw[0]; 408 x[i2 + 1] = xw[1]; 409 x[i2 + 2] = xw[2]; 410 x[i2 + 3] = xw[3]; 411 x[i2 + 4] = xw[4]; 412 idiag += 25; 413 i2 += 5; 414 } 415 break; 416 case 6: 417 PetscKernel_v_gets_A_times_w_6(x, idiag, b); 418 t[0] = b[0]; 419 t[1] = b[1]; 420 t[2] = b[2]; 421 t[3] = b[3]; 422 t[4] = b[4]; 423 t[5] = b[5]; 424 i2 = 6; 425 idiag += 36; 426 for (i = 1; i < m; i++) { 427 v = aa + 36 * ai[i]; 428 vi = aj + ai[i]; 429 nz = diag[i] - ai[i]; 430 s[0] = b[i2]; 431 s[1] = b[i2 + 1]; 432 s[2] = b[i2 + 2]; 433 s[3] = b[i2 + 3]; 434 s[4] = b[i2 + 4]; 435 s[5] = b[i2 + 5]; 436 while (nz--) { 437 idx = 6 * (*vi++); 438 xw[0] = x[idx]; 439 xw[1] = x[1 + idx]; 440 xw[2] = x[2 + idx]; 441 xw[3] = x[3 + idx]; 442 xw[4] = x[4 + idx]; 443 xw[5] = x[5 + idx]; 444 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 445 v += 36; 446 } 447 t[i2] = s[0]; 448 t[i2 + 1] = s[1]; 449 t[i2 + 2] = s[2]; 450 t[i2 + 3] = s[3]; 451 t[i2 + 4] = s[4]; 452 t[i2 + 5] = s[5]; 453 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 454 x[i2] = xw[0]; 455 x[i2 + 1] = xw[1]; 456 x[i2 + 2] = xw[2]; 457 x[i2 + 3] = xw[3]; 458 x[i2 + 4] = xw[4]; 459 x[i2 + 5] = xw[5]; 460 idiag += 36; 461 i2 += 6; 462 } 463 break; 464 case 7: 465 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 466 t[0] = b[0]; 467 t[1] = b[1]; 468 t[2] = b[2]; 469 t[3] = b[3]; 470 t[4] = b[4]; 471 t[5] = b[5]; 472 t[6] = b[6]; 473 i2 = 7; 474 idiag += 49; 475 for (i = 1; i < m; i++) { 476 v = aa + 49 * ai[i]; 477 vi = aj + ai[i]; 478 nz = diag[i] - ai[i]; 479 s[0] = b[i2]; 480 s[1] = b[i2 + 1]; 481 s[2] = b[i2 + 2]; 482 s[3] = b[i2 + 3]; 483 s[4] = b[i2 + 4]; 484 s[5] = b[i2 + 5]; 485 s[6] = b[i2 + 6]; 486 while (nz--) { 487 idx = 7 * (*vi++); 488 xw[0] = x[idx]; 489 xw[1] = x[1 + idx]; 490 xw[2] = x[2 + idx]; 491 xw[3] = x[3 + idx]; 492 xw[4] = x[4 + idx]; 493 xw[5] = x[5 + idx]; 494 xw[6] = x[6 + idx]; 495 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 496 v += 49; 497 } 498 t[i2] = s[0]; 499 t[i2 + 1] = s[1]; 500 t[i2 + 2] = s[2]; 501 t[i2 + 3] = s[3]; 502 t[i2 + 4] = s[4]; 503 t[i2 + 5] = s[5]; 504 t[i2 + 6] = s[6]; 505 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 506 x[i2] = xw[0]; 507 x[i2 + 1] = xw[1]; 508 x[i2 + 2] = xw[2]; 509 x[i2 + 3] = xw[3]; 510 x[i2 + 4] = xw[4]; 511 x[i2 + 5] = xw[5]; 512 x[i2 + 6] = xw[6]; 513 idiag += 49; 514 i2 += 7; 515 } 516 break; 517 default: 518 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); 519 PetscCall(PetscArraycpy(t, b, bs)); 520 i2 = bs; 521 idiag += bs2; 522 for (i = 1; i < m; i++) { 523 v = aa + bs2 * ai[i]; 524 vi = aj + ai[i]; 525 nz = diag[i] - ai[i]; 526 527 PetscCall(PetscArraycpy(w, b + i2, bs)); 528 /* copy all rows of x that are needed into contiguous space */ 529 workt = work; 530 for (j = 0; j < nz; j++) { 531 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 532 workt += bs; 533 } 534 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 535 PetscCall(PetscArraycpy(t + i2, w, bs)); 536 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 537 538 idiag += bs2; 539 i2 += bs; 540 } 541 break; 542 } 543 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 544 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 545 xb = t; 546 } else xb = b; 547 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 548 idiag = a->idiag + bs2 * (a->mbs - 1); 549 i2 = bs * (m - 1); 550 switch (bs) { 551 case 1: 552 s[0] = xb[i2]; 553 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 554 x[i2] = xw[0]; 555 i2 -= 1; 556 for (i = m - 2; i >= 0; i--) { 557 v = aa + (diag[i] + 1); 558 vi = aj + diag[i] + 1; 559 nz = ai[i + 1] - diag[i] - 1; 560 s[0] = xb[i2]; 561 for (j = 0; j < nz; j++) { 562 xw[0] = x[vi[j]]; 563 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 564 } 565 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 566 x[i2] = xw[0]; 567 idiag -= 1; 568 i2 -= 1; 569 } 570 break; 571 case 2: 572 s[0] = xb[i2]; 573 s[1] = xb[i2 + 1]; 574 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 575 x[i2] = xw[0]; 576 x[i2 + 1] = xw[1]; 577 i2 -= 2; 578 idiag -= 4; 579 for (i = m - 2; i >= 0; i--) { 580 v = aa + 4 * (diag[i] + 1); 581 vi = aj + diag[i] + 1; 582 nz = ai[i + 1] - diag[i] - 1; 583 s[0] = xb[i2]; 584 s[1] = xb[i2 + 1]; 585 for (j = 0; j < nz; j++) { 586 idx = 2 * vi[j]; 587 it = 4 * j; 588 xw[0] = x[idx]; 589 xw[1] = x[1 + idx]; 590 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 591 } 592 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 593 x[i2] = xw[0]; 594 x[i2 + 1] = xw[1]; 595 idiag -= 4; 596 i2 -= 2; 597 } 598 break; 599 case 3: 600 s[0] = xb[i2]; 601 s[1] = xb[i2 + 1]; 602 s[2] = xb[i2 + 2]; 603 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 604 x[i2] = xw[0]; 605 x[i2 + 1] = xw[1]; 606 x[i2 + 2] = xw[2]; 607 i2 -= 3; 608 idiag -= 9; 609 for (i = m - 2; i >= 0; i--) { 610 v = aa + 9 * (diag[i] + 1); 611 vi = aj + diag[i] + 1; 612 nz = ai[i + 1] - diag[i] - 1; 613 s[0] = xb[i2]; 614 s[1] = xb[i2 + 1]; 615 s[2] = xb[i2 + 2]; 616 while (nz--) { 617 idx = 3 * (*vi++); 618 xw[0] = x[idx]; 619 xw[1] = x[1 + idx]; 620 xw[2] = x[2 + idx]; 621 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 622 v += 9; 623 } 624 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 625 x[i2] = xw[0]; 626 x[i2 + 1] = xw[1]; 627 x[i2 + 2] = xw[2]; 628 idiag -= 9; 629 i2 -= 3; 630 } 631 break; 632 case 4: 633 s[0] = xb[i2]; 634 s[1] = xb[i2 + 1]; 635 s[2] = xb[i2 + 2]; 636 s[3] = xb[i2 + 3]; 637 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 638 x[i2] = xw[0]; 639 x[i2 + 1] = xw[1]; 640 x[i2 + 2] = xw[2]; 641 x[i2 + 3] = xw[3]; 642 i2 -= 4; 643 idiag -= 16; 644 for (i = m - 2; i >= 0; i--) { 645 v = aa + 16 * (diag[i] + 1); 646 vi = aj + diag[i] + 1; 647 nz = ai[i + 1] - diag[i] - 1; 648 s[0] = xb[i2]; 649 s[1] = xb[i2 + 1]; 650 s[2] = xb[i2 + 2]; 651 s[3] = xb[i2 + 3]; 652 while (nz--) { 653 idx = 4 * (*vi++); 654 xw[0] = x[idx]; 655 xw[1] = x[1 + idx]; 656 xw[2] = x[2 + idx]; 657 xw[3] = x[3 + idx]; 658 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 659 v += 16; 660 } 661 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 662 x[i2] = xw[0]; 663 x[i2 + 1] = xw[1]; 664 x[i2 + 2] = xw[2]; 665 x[i2 + 3] = xw[3]; 666 idiag -= 16; 667 i2 -= 4; 668 } 669 break; 670 case 5: 671 s[0] = xb[i2]; 672 s[1] = xb[i2 + 1]; 673 s[2] = xb[i2 + 2]; 674 s[3] = xb[i2 + 3]; 675 s[4] = xb[i2 + 4]; 676 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 677 x[i2] = xw[0]; 678 x[i2 + 1] = xw[1]; 679 x[i2 + 2] = xw[2]; 680 x[i2 + 3] = xw[3]; 681 x[i2 + 4] = xw[4]; 682 i2 -= 5; 683 idiag -= 25; 684 for (i = m - 2; i >= 0; i--) { 685 v = aa + 25 * (diag[i] + 1); 686 vi = aj + diag[i] + 1; 687 nz = ai[i + 1] - diag[i] - 1; 688 s[0] = xb[i2]; 689 s[1] = xb[i2 + 1]; 690 s[2] = xb[i2 + 2]; 691 s[3] = xb[i2 + 3]; 692 s[4] = xb[i2 + 4]; 693 while (nz--) { 694 idx = 5 * (*vi++); 695 xw[0] = x[idx]; 696 xw[1] = x[1 + idx]; 697 xw[2] = x[2 + idx]; 698 xw[3] = x[3 + idx]; 699 xw[4] = x[4 + idx]; 700 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 701 v += 25; 702 } 703 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 704 x[i2] = xw[0]; 705 x[i2 + 1] = xw[1]; 706 x[i2 + 2] = xw[2]; 707 x[i2 + 3] = xw[3]; 708 x[i2 + 4] = xw[4]; 709 idiag -= 25; 710 i2 -= 5; 711 } 712 break; 713 case 6: 714 s[0] = xb[i2]; 715 s[1] = xb[i2 + 1]; 716 s[2] = xb[i2 + 2]; 717 s[3] = xb[i2 + 3]; 718 s[4] = xb[i2 + 4]; 719 s[5] = xb[i2 + 5]; 720 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 721 x[i2] = xw[0]; 722 x[i2 + 1] = xw[1]; 723 x[i2 + 2] = xw[2]; 724 x[i2 + 3] = xw[3]; 725 x[i2 + 4] = xw[4]; 726 x[i2 + 5] = xw[5]; 727 i2 -= 6; 728 idiag -= 36; 729 for (i = m - 2; i >= 0; i--) { 730 v = aa + 36 * (diag[i] + 1); 731 vi = aj + diag[i] + 1; 732 nz = ai[i + 1] - diag[i] - 1; 733 s[0] = xb[i2]; 734 s[1] = xb[i2 + 1]; 735 s[2] = xb[i2 + 2]; 736 s[3] = xb[i2 + 3]; 737 s[4] = xb[i2 + 4]; 738 s[5] = xb[i2 + 5]; 739 while (nz--) { 740 idx = 6 * (*vi++); 741 xw[0] = x[idx]; 742 xw[1] = x[1 + idx]; 743 xw[2] = x[2 + idx]; 744 xw[3] = x[3 + idx]; 745 xw[4] = x[4 + idx]; 746 xw[5] = x[5 + idx]; 747 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 748 v += 36; 749 } 750 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 751 x[i2] = xw[0]; 752 x[i2 + 1] = xw[1]; 753 x[i2 + 2] = xw[2]; 754 x[i2 + 3] = xw[3]; 755 x[i2 + 4] = xw[4]; 756 x[i2 + 5] = xw[5]; 757 idiag -= 36; 758 i2 -= 6; 759 } 760 break; 761 case 7: 762 s[0] = xb[i2]; 763 s[1] = xb[i2 + 1]; 764 s[2] = xb[i2 + 2]; 765 s[3] = xb[i2 + 3]; 766 s[4] = xb[i2 + 4]; 767 s[5] = xb[i2 + 5]; 768 s[6] = xb[i2 + 6]; 769 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 770 x[i2] = xw[0]; 771 x[i2 + 1] = xw[1]; 772 x[i2 + 2] = xw[2]; 773 x[i2 + 3] = xw[3]; 774 x[i2 + 4] = xw[4]; 775 x[i2 + 5] = xw[5]; 776 x[i2 + 6] = xw[6]; 777 i2 -= 7; 778 idiag -= 49; 779 for (i = m - 2; i >= 0; i--) { 780 v = aa + 49 * (diag[i] + 1); 781 vi = aj + diag[i] + 1; 782 nz = ai[i + 1] - diag[i] - 1; 783 s[0] = xb[i2]; 784 s[1] = xb[i2 + 1]; 785 s[2] = xb[i2 + 2]; 786 s[3] = xb[i2 + 3]; 787 s[4] = xb[i2 + 4]; 788 s[5] = xb[i2 + 5]; 789 s[6] = xb[i2 + 6]; 790 while (nz--) { 791 idx = 7 * (*vi++); 792 xw[0] = x[idx]; 793 xw[1] = x[1 + idx]; 794 xw[2] = x[2 + idx]; 795 xw[3] = x[3 + idx]; 796 xw[4] = x[4 + idx]; 797 xw[5] = x[5 + idx]; 798 xw[6] = x[6 + idx]; 799 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 800 v += 49; 801 } 802 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 803 x[i2] = xw[0]; 804 x[i2 + 1] = xw[1]; 805 x[i2 + 2] = xw[2]; 806 x[i2 + 3] = xw[3]; 807 x[i2 + 4] = xw[4]; 808 x[i2 + 5] = xw[5]; 809 x[i2 + 6] = xw[6]; 810 idiag -= 49; 811 i2 -= 7; 812 } 813 break; 814 default: 815 PetscCall(PetscArraycpy(w, xb + i2, bs)); 816 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 817 i2 -= bs; 818 idiag -= bs2; 819 for (i = m - 2; i >= 0; i--) { 820 v = aa + bs2 * (diag[i] + 1); 821 vi = aj + diag[i] + 1; 822 nz = ai[i + 1] - diag[i] - 1; 823 824 PetscCall(PetscArraycpy(w, xb + i2, bs)); 825 /* copy all rows of x that are needed into contiguous space */ 826 workt = work; 827 for (j = 0; j < nz; j++) { 828 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 829 workt += bs; 830 } 831 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 832 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 833 834 idiag -= bs2; 835 i2 -= bs; 836 } 837 break; 838 } 839 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 840 } 841 its--; 842 } 843 while (its--) { 844 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 845 idiag = a->idiag; 846 i2 = 0; 847 switch (bs) { 848 case 1: 849 for (i = 0; i < m; i++) { 850 v = aa + ai[i]; 851 vi = aj + ai[i]; 852 nz = ai[i + 1] - ai[i]; 853 s[0] = b[i2]; 854 for (j = 0; j < nz; j++) { 855 xw[0] = x[vi[j]]; 856 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 857 } 858 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 859 x[i2] += xw[0]; 860 idiag += 1; 861 i2 += 1; 862 } 863 break; 864 case 2: 865 for (i = 0; i < m; i++) { 866 v = aa + 4 * ai[i]; 867 vi = aj + ai[i]; 868 nz = ai[i + 1] - ai[i]; 869 s[0] = b[i2]; 870 s[1] = b[i2 + 1]; 871 for (j = 0; j < nz; j++) { 872 idx = 2 * vi[j]; 873 it = 4 * j; 874 xw[0] = x[idx]; 875 xw[1] = x[1 + idx]; 876 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 877 } 878 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 879 x[i2] += xw[0]; 880 x[i2 + 1] += xw[1]; 881 idiag += 4; 882 i2 += 2; 883 } 884 break; 885 case 3: 886 for (i = 0; i < m; i++) { 887 v = aa + 9 * ai[i]; 888 vi = aj + ai[i]; 889 nz = ai[i + 1] - ai[i]; 890 s[0] = b[i2]; 891 s[1] = b[i2 + 1]; 892 s[2] = b[i2 + 2]; 893 while (nz--) { 894 idx = 3 * (*vi++); 895 xw[0] = x[idx]; 896 xw[1] = x[1 + idx]; 897 xw[2] = x[2 + idx]; 898 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 899 v += 9; 900 } 901 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 902 x[i2] += xw[0]; 903 x[i2 + 1] += xw[1]; 904 x[i2 + 2] += xw[2]; 905 idiag += 9; 906 i2 += 3; 907 } 908 break; 909 case 4: 910 for (i = 0; i < m; i++) { 911 v = aa + 16 * ai[i]; 912 vi = aj + ai[i]; 913 nz = ai[i + 1] - ai[i]; 914 s[0] = b[i2]; 915 s[1] = b[i2 + 1]; 916 s[2] = b[i2 + 2]; 917 s[3] = b[i2 + 3]; 918 while (nz--) { 919 idx = 4 * (*vi++); 920 xw[0] = x[idx]; 921 xw[1] = x[1 + idx]; 922 xw[2] = x[2 + idx]; 923 xw[3] = x[3 + idx]; 924 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 925 v += 16; 926 } 927 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 928 x[i2] += xw[0]; 929 x[i2 + 1] += xw[1]; 930 x[i2 + 2] += xw[2]; 931 x[i2 + 3] += xw[3]; 932 idiag += 16; 933 i2 += 4; 934 } 935 break; 936 case 5: 937 for (i = 0; i < m; i++) { 938 v = aa + 25 * ai[i]; 939 vi = aj + ai[i]; 940 nz = ai[i + 1] - ai[i]; 941 s[0] = b[i2]; 942 s[1] = b[i2 + 1]; 943 s[2] = b[i2 + 2]; 944 s[3] = b[i2 + 3]; 945 s[4] = b[i2 + 4]; 946 while (nz--) { 947 idx = 5 * (*vi++); 948 xw[0] = x[idx]; 949 xw[1] = x[1 + idx]; 950 xw[2] = x[2 + idx]; 951 xw[3] = x[3 + idx]; 952 xw[4] = x[4 + idx]; 953 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 954 v += 25; 955 } 956 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 957 x[i2] += xw[0]; 958 x[i2 + 1] += xw[1]; 959 x[i2 + 2] += xw[2]; 960 x[i2 + 3] += xw[3]; 961 x[i2 + 4] += xw[4]; 962 idiag += 25; 963 i2 += 5; 964 } 965 break; 966 case 6: 967 for (i = 0; i < m; i++) { 968 v = aa + 36 * ai[i]; 969 vi = aj + ai[i]; 970 nz = ai[i + 1] - ai[i]; 971 s[0] = b[i2]; 972 s[1] = b[i2 + 1]; 973 s[2] = b[i2 + 2]; 974 s[3] = b[i2 + 3]; 975 s[4] = b[i2 + 4]; 976 s[5] = b[i2 + 5]; 977 while (nz--) { 978 idx = 6 * (*vi++); 979 xw[0] = x[idx]; 980 xw[1] = x[1 + idx]; 981 xw[2] = x[2 + idx]; 982 xw[3] = x[3 + idx]; 983 xw[4] = x[4 + idx]; 984 xw[5] = x[5 + idx]; 985 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 986 v += 36; 987 } 988 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 989 x[i2] += xw[0]; 990 x[i2 + 1] += xw[1]; 991 x[i2 + 2] += xw[2]; 992 x[i2 + 3] += xw[3]; 993 x[i2 + 4] += xw[4]; 994 x[i2 + 5] += xw[5]; 995 idiag += 36; 996 i2 += 6; 997 } 998 break; 999 case 7: 1000 for (i = 0; i < m; i++) { 1001 v = aa + 49 * ai[i]; 1002 vi = aj + ai[i]; 1003 nz = ai[i + 1] - ai[i]; 1004 s[0] = b[i2]; 1005 s[1] = b[i2 + 1]; 1006 s[2] = b[i2 + 2]; 1007 s[3] = b[i2 + 3]; 1008 s[4] = b[i2 + 4]; 1009 s[5] = b[i2 + 5]; 1010 s[6] = b[i2 + 6]; 1011 while (nz--) { 1012 idx = 7 * (*vi++); 1013 xw[0] = x[idx]; 1014 xw[1] = x[1 + idx]; 1015 xw[2] = x[2 + idx]; 1016 xw[3] = x[3 + idx]; 1017 xw[4] = x[4 + idx]; 1018 xw[5] = x[5 + idx]; 1019 xw[6] = x[6 + idx]; 1020 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1021 v += 49; 1022 } 1023 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1024 x[i2] += xw[0]; 1025 x[i2 + 1] += xw[1]; 1026 x[i2 + 2] += xw[2]; 1027 x[i2 + 3] += xw[3]; 1028 x[i2 + 4] += xw[4]; 1029 x[i2 + 5] += xw[5]; 1030 x[i2 + 6] += xw[6]; 1031 idiag += 49; 1032 i2 += 7; 1033 } 1034 break; 1035 default: 1036 for (i = 0; i < m; i++) { 1037 v = aa + bs2 * ai[i]; 1038 vi = aj + ai[i]; 1039 nz = ai[i + 1] - ai[i]; 1040 1041 PetscCall(PetscArraycpy(w, b + i2, bs)); 1042 /* copy all rows of x that are needed into contiguous space */ 1043 workt = work; 1044 for (j = 0; j < nz; j++) { 1045 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1046 workt += bs; 1047 } 1048 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1049 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1050 1051 idiag += bs2; 1052 i2 += bs; 1053 } 1054 break; 1055 } 1056 PetscCall(PetscLogFlops(2.0 * bs2 * a->nz)); 1057 } 1058 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1059 idiag = a->idiag + bs2 * (a->mbs - 1); 1060 i2 = bs * (m - 1); 1061 switch (bs) { 1062 case 1: 1063 for (i = m - 1; i >= 0; i--) { 1064 v = aa + ai[i]; 1065 vi = aj + ai[i]; 1066 nz = ai[i + 1] - ai[i]; 1067 s[0] = b[i2]; 1068 for (j = 0; j < nz; j++) { 1069 xw[0] = x[vi[j]]; 1070 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 1071 } 1072 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 1073 x[i2] += xw[0]; 1074 idiag -= 1; 1075 i2 -= 1; 1076 } 1077 break; 1078 case 2: 1079 for (i = m - 1; i >= 0; i--) { 1080 v = aa + 4 * ai[i]; 1081 vi = aj + ai[i]; 1082 nz = ai[i + 1] - ai[i]; 1083 s[0] = b[i2]; 1084 s[1] = b[i2 + 1]; 1085 for (j = 0; j < nz; j++) { 1086 idx = 2 * vi[j]; 1087 it = 4 * j; 1088 xw[0] = x[idx]; 1089 xw[1] = x[1 + idx]; 1090 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 1091 } 1092 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 1093 x[i2] += xw[0]; 1094 x[i2 + 1] += xw[1]; 1095 idiag -= 4; 1096 i2 -= 2; 1097 } 1098 break; 1099 case 3: 1100 for (i = m - 1; i >= 0; i--) { 1101 v = aa + 9 * ai[i]; 1102 vi = aj + ai[i]; 1103 nz = ai[i + 1] - ai[i]; 1104 s[0] = b[i2]; 1105 s[1] = b[i2 + 1]; 1106 s[2] = b[i2 + 2]; 1107 while (nz--) { 1108 idx = 3 * (*vi++); 1109 xw[0] = x[idx]; 1110 xw[1] = x[1 + idx]; 1111 xw[2] = x[2 + idx]; 1112 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 1113 v += 9; 1114 } 1115 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 1116 x[i2] += xw[0]; 1117 x[i2 + 1] += xw[1]; 1118 x[i2 + 2] += xw[2]; 1119 idiag -= 9; 1120 i2 -= 3; 1121 } 1122 break; 1123 case 4: 1124 for (i = m - 1; i >= 0; i--) { 1125 v = aa + 16 * ai[i]; 1126 vi = aj + ai[i]; 1127 nz = ai[i + 1] - ai[i]; 1128 s[0] = b[i2]; 1129 s[1] = b[i2 + 1]; 1130 s[2] = b[i2 + 2]; 1131 s[3] = b[i2 + 3]; 1132 while (nz--) { 1133 idx = 4 * (*vi++); 1134 xw[0] = x[idx]; 1135 xw[1] = x[1 + idx]; 1136 xw[2] = x[2 + idx]; 1137 xw[3] = x[3 + idx]; 1138 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 1139 v += 16; 1140 } 1141 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 1142 x[i2] += xw[0]; 1143 x[i2 + 1] += xw[1]; 1144 x[i2 + 2] += xw[2]; 1145 x[i2 + 3] += xw[3]; 1146 idiag -= 16; 1147 i2 -= 4; 1148 } 1149 break; 1150 case 5: 1151 for (i = m - 1; i >= 0; i--) { 1152 v = aa + 25 * ai[i]; 1153 vi = aj + ai[i]; 1154 nz = ai[i + 1] - ai[i]; 1155 s[0] = b[i2]; 1156 s[1] = b[i2 + 1]; 1157 s[2] = b[i2 + 2]; 1158 s[3] = b[i2 + 3]; 1159 s[4] = b[i2 + 4]; 1160 while (nz--) { 1161 idx = 5 * (*vi++); 1162 xw[0] = x[idx]; 1163 xw[1] = x[1 + idx]; 1164 xw[2] = x[2 + idx]; 1165 xw[3] = x[3 + idx]; 1166 xw[4] = x[4 + idx]; 1167 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 1168 v += 25; 1169 } 1170 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 1171 x[i2] += xw[0]; 1172 x[i2 + 1] += xw[1]; 1173 x[i2 + 2] += xw[2]; 1174 x[i2 + 3] += xw[3]; 1175 x[i2 + 4] += xw[4]; 1176 idiag -= 25; 1177 i2 -= 5; 1178 } 1179 break; 1180 case 6: 1181 for (i = m - 1; i >= 0; i--) { 1182 v = aa + 36 * ai[i]; 1183 vi = aj + ai[i]; 1184 nz = ai[i + 1] - ai[i]; 1185 s[0] = b[i2]; 1186 s[1] = b[i2 + 1]; 1187 s[2] = b[i2 + 2]; 1188 s[3] = b[i2 + 3]; 1189 s[4] = b[i2 + 4]; 1190 s[5] = b[i2 + 5]; 1191 while (nz--) { 1192 idx = 6 * (*vi++); 1193 xw[0] = x[idx]; 1194 xw[1] = x[1 + idx]; 1195 xw[2] = x[2 + idx]; 1196 xw[3] = x[3 + idx]; 1197 xw[4] = x[4 + idx]; 1198 xw[5] = x[5 + idx]; 1199 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 1200 v += 36; 1201 } 1202 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 1203 x[i2] += xw[0]; 1204 x[i2 + 1] += xw[1]; 1205 x[i2 + 2] += xw[2]; 1206 x[i2 + 3] += xw[3]; 1207 x[i2 + 4] += xw[4]; 1208 x[i2 + 5] += xw[5]; 1209 idiag -= 36; 1210 i2 -= 6; 1211 } 1212 break; 1213 case 7: 1214 for (i = m - 1; i >= 0; i--) { 1215 v = aa + 49 * ai[i]; 1216 vi = aj + ai[i]; 1217 nz = ai[i + 1] - ai[i]; 1218 s[0] = b[i2]; 1219 s[1] = b[i2 + 1]; 1220 s[2] = b[i2 + 2]; 1221 s[3] = b[i2 + 3]; 1222 s[4] = b[i2 + 4]; 1223 s[5] = b[i2 + 5]; 1224 s[6] = b[i2 + 6]; 1225 while (nz--) { 1226 idx = 7 * (*vi++); 1227 xw[0] = x[idx]; 1228 xw[1] = x[1 + idx]; 1229 xw[2] = x[2 + idx]; 1230 xw[3] = x[3 + idx]; 1231 xw[4] = x[4 + idx]; 1232 xw[5] = x[5 + idx]; 1233 xw[6] = x[6 + idx]; 1234 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1235 v += 49; 1236 } 1237 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1238 x[i2] += xw[0]; 1239 x[i2 + 1] += xw[1]; 1240 x[i2 + 2] += xw[2]; 1241 x[i2 + 3] += xw[3]; 1242 x[i2 + 4] += xw[4]; 1243 x[i2 + 5] += xw[5]; 1244 x[i2 + 6] += xw[6]; 1245 idiag -= 49; 1246 i2 -= 7; 1247 } 1248 break; 1249 default: 1250 for (i = m - 1; i >= 0; i--) { 1251 v = aa + bs2 * ai[i]; 1252 vi = aj + ai[i]; 1253 nz = ai[i + 1] - ai[i]; 1254 1255 PetscCall(PetscArraycpy(w, b + i2, bs)); 1256 /* copy all rows of x that are needed into contiguous space */ 1257 workt = work; 1258 for (j = 0; j < nz; j++) { 1259 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1260 workt += bs; 1261 } 1262 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1263 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1264 1265 idiag -= bs2; 1266 i2 -= bs; 1267 } 1268 break; 1269 } 1270 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz))); 1271 } 1272 } 1273 PetscCall(VecRestoreArray(xx, &x)); 1274 PetscCall(VecRestoreArrayRead(bb, &b)); 1275 PetscFunctionReturn(PETSC_SUCCESS); 1276 } 1277 1278 /* 1279 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1280 */ 1281 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1282 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1283 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1284 #define matsetvaluesblocked4_ matsetvaluesblocked4 1285 #endif 1286 1287 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) 1288 { 1289 Mat A = *AA; 1290 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1291 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn; 1292 PetscInt *ai = a->i, *ailen = a->ilen; 1293 PetscInt *aj = a->j, stepval, lastcol = -1; 1294 const PetscScalar *value = v; 1295 MatScalar *ap, *aa = a->a, *bap; 1296 1297 PetscFunctionBegin; 1298 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4"); 1299 stepval = (n - 1) * 4; 1300 for (k = 0; k < m; k++) { /* loop over added rows */ 1301 row = im[k]; 1302 rp = aj + ai[row]; 1303 ap = aa + 16 * ai[row]; 1304 nrow = ailen[row]; 1305 low = 0; 1306 high = nrow; 1307 for (l = 0; l < n; l++) { /* loop over added columns */ 1308 col = in[l]; 1309 if (col <= lastcol) low = 0; 1310 else high = nrow; 1311 lastcol = col; 1312 value = v + k * (stepval + 4 + l) * 4; 1313 while (high - low > 7) { 1314 t = (low + high) / 2; 1315 if (rp[t] > col) high = t; 1316 else low = t; 1317 } 1318 for (i = low; i < high; i++) { 1319 if (rp[i] > col) break; 1320 if (rp[i] == col) { 1321 bap = ap + 16 * i; 1322 for (ii = 0; ii < 4; ii++, value += stepval) { 1323 for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++; 1324 } 1325 goto noinsert2; 1326 } 1327 } 1328 N = nrow++ - 1; 1329 high++; /* added new column index thus must search to one higher than before */ 1330 /* shift up all the later entries in this row */ 1331 for (ii = N; ii >= i; ii--) { 1332 rp[ii + 1] = rp[ii]; 1333 PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16)); 1334 } 1335 if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1336 rp[i] = col; 1337 bap = ap + 16 * i; 1338 for (ii = 0; ii < 4; ii++, value += stepval) { 1339 for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++; 1340 } 1341 noinsert2:; 1342 low = i; 1343 } 1344 ailen[row] = nrow; 1345 } 1346 PetscFunctionReturnVoid(); 1347 } 1348 1349 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1350 #define matsetvalues4_ MATSETVALUES4 1351 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1352 #define matsetvalues4_ matsetvalues4 1353 #endif 1354 1355 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) 1356 { 1357 Mat A = *AA; 1358 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1359 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm; 1360 PetscInt *ai = a->i, *ailen = a->ilen; 1361 PetscInt *aj = a->j, brow, bcol; 1362 PetscInt ridx, cidx, lastcol = -1; 1363 MatScalar *ap, value, *aa = a->a, *bap; 1364 1365 PetscFunctionBegin; 1366 for (k = 0; k < m; k++) { /* loop over added rows */ 1367 row = im[k]; 1368 brow = row / 4; 1369 rp = aj + ai[brow]; 1370 ap = aa + 16 * ai[brow]; 1371 nrow = ailen[brow]; 1372 low = 0; 1373 high = nrow; 1374 for (l = 0; l < n; l++) { /* loop over added columns */ 1375 col = in[l]; 1376 bcol = col / 4; 1377 ridx = row % 4; 1378 cidx = col % 4; 1379 value = v[l + k * n]; 1380 if (col <= lastcol) low = 0; 1381 else high = nrow; 1382 lastcol = col; 1383 while (high - low > 7) { 1384 t = (low + high) / 2; 1385 if (rp[t] > bcol) high = t; 1386 else low = t; 1387 } 1388 for (i = low; i < high; i++) { 1389 if (rp[i] > bcol) break; 1390 if (rp[i] == bcol) { 1391 bap = ap + 16 * i + 4 * cidx + ridx; 1392 *bap += value; 1393 goto noinsert1; 1394 } 1395 } 1396 N = nrow++ - 1; 1397 high++; /* added new column thus must search to one higher than before */ 1398 /* shift up all the later entries in this row */ 1399 PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 1400 PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1))); 1401 PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1402 rp[i] = bcol; 1403 ap[16 * i + 4 * cidx + ridx] = value; 1404 noinsert1:; 1405 low = i; 1406 } 1407 ailen[brow] = nrow; 1408 } 1409 PetscFunctionReturnVoid(); 1410 } 1411 1412 /* 1413 Checks for missing diagonals 1414 */ 1415 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1416 { 1417 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1418 PetscInt *diag, *ii = a->i, i; 1419 1420 PetscFunctionBegin; 1421 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 1422 *missing = PETSC_FALSE; 1423 if (A->rmap->n > 0 && !ii) { 1424 *missing = PETSC_TRUE; 1425 if (d) *d = 0; 1426 PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n")); 1427 } else { 1428 PetscInt n; 1429 n = PetscMin(a->mbs, a->nbs); 1430 diag = a->diag; 1431 for (i = 0; i < n; i++) { 1432 if (diag[i] >= ii[i + 1]) { 1433 *missing = PETSC_TRUE; 1434 if (d) *d = i; 1435 PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i)); 1436 break; 1437 } 1438 } 1439 } 1440 PetscFunctionReturn(PETSC_SUCCESS); 1441 } 1442 1443 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1444 { 1445 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1446 PetscInt i, j, m = a->mbs; 1447 1448 PetscFunctionBegin; 1449 if (!a->diag) { 1450 PetscCall(PetscMalloc1(m, &a->diag)); 1451 a->free_diag = PETSC_TRUE; 1452 } 1453 for (i = 0; i < m; i++) { 1454 a->diag[i] = a->i[i + 1]; 1455 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1456 if (a->j[j] == i) { 1457 a->diag[i] = j; 1458 break; 1459 } 1460 } 1461 } 1462 PetscFunctionReturn(PETSC_SUCCESS); 1463 } 1464 1465 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) 1466 { 1467 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1468 PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt; 1469 PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja; 1470 1471 PetscFunctionBegin; 1472 *nn = n; 1473 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1474 if (symmetric) { 1475 PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja)); 1476 nz = tia[n]; 1477 } else { 1478 tia = a->i; 1479 tja = a->j; 1480 } 1481 1482 if (!blockcompressed && bs > 1) { 1483 (*nn) *= bs; 1484 /* malloc & create the natural set of indices */ 1485 PetscCall(PetscMalloc1((n + 1) * bs, ia)); 1486 if (n) { 1487 (*ia)[0] = oshift; 1488 for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; 1489 } 1490 1491 for (i = 1; i < n; i++) { 1492 (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1]; 1493 for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; 1494 } 1495 if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; 1496 1497 if (inja) { 1498 PetscCall(PetscMalloc1(nz * bs * bs, ja)); 1499 cnt = 0; 1500 for (i = 0; i < n; i++) { 1501 for (j = 0; j < bs; j++) { 1502 for (k = tia[i]; k < tia[i + 1]; k++) { 1503 for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l; 1504 } 1505 } 1506 } 1507 } 1508 1509 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1510 PetscCall(PetscFree(tia)); 1511 PetscCall(PetscFree(tja)); 1512 } 1513 } else if (oshift == 1) { 1514 if (symmetric) { 1515 nz = tia[A->rmap->n / bs]; 1516 /* add 1 to i and j indices */ 1517 for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1; 1518 *ia = tia; 1519 if (ja) { 1520 for (i = 0; i < nz; i++) tja[i] = tja[i] + 1; 1521 *ja = tja; 1522 } 1523 } else { 1524 nz = a->i[A->rmap->n / bs]; 1525 /* malloc space and add 1 to i and j indices */ 1526 PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia)); 1527 for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1; 1528 if (ja) { 1529 PetscCall(PetscMalloc1(nz, ja)); 1530 for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1; 1531 } 1532 } 1533 } else { 1534 *ia = tia; 1535 if (ja) *ja = tja; 1536 } 1537 PetscFunctionReturn(PETSC_SUCCESS); 1538 } 1539 1540 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 1541 { 1542 PetscFunctionBegin; 1543 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1544 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1545 PetscCall(PetscFree(*ia)); 1546 if (ja) PetscCall(PetscFree(*ja)); 1547 } 1548 PetscFunctionReturn(PETSC_SUCCESS); 1549 } 1550 1551 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1552 { 1553 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1554 1555 PetscFunctionBegin; 1556 if (A->hash_active) { 1557 PetscInt bs; 1558 A->ops[0] = a->cops; 1559 PetscCall(PetscHMapIJVDestroy(&a->ht)); 1560 PetscCall(MatGetBlockSize(A, &bs)); 1561 if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht)); 1562 PetscCall(PetscFree(a->dnz)); 1563 PetscCall(PetscFree(a->bdnz)); 1564 A->hash_active = PETSC_FALSE; 1565 } 1566 PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz)); 1567 PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i)); 1568 PetscCall(ISDestroy(&a->row)); 1569 PetscCall(ISDestroy(&a->col)); 1570 if (a->free_diag) PetscCall(PetscFree(a->diag)); 1571 PetscCall(PetscFree(a->idiag)); 1572 if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen)); 1573 PetscCall(PetscFree(a->solve_work)); 1574 PetscCall(PetscFree(a->mult_work)); 1575 PetscCall(PetscFree(a->sor_workt)); 1576 PetscCall(PetscFree(a->sor_work)); 1577 PetscCall(ISDestroy(&a->icol)); 1578 PetscCall(PetscFree(a->saved_values)); 1579 PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex)); 1580 1581 PetscCall(MatDestroy(&a->sbaijMat)); 1582 PetscCall(MatDestroy(&a->parent)); 1583 PetscCall(PetscFree(A->data)); 1584 1585 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 1586 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL)); 1587 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL)); 1588 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 1589 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 1590 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL)); 1591 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL)); 1592 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL)); 1593 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL)); 1594 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL)); 1595 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL)); 1596 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL)); 1597 #if defined(PETSC_HAVE_HYPRE) 1598 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL)); 1599 #endif 1600 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL)); 1601 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1602 PetscFunctionReturn(PETSC_SUCCESS); 1603 } 1604 1605 static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) 1606 { 1607 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1608 1609 PetscFunctionBegin; 1610 switch (op) { 1611 case MAT_ROW_ORIENTED: 1612 a->roworiented = flg; 1613 break; 1614 case MAT_KEEP_NONZERO_PATTERN: 1615 a->keepnonzeropattern = flg; 1616 break; 1617 case MAT_NEW_NONZERO_LOCATIONS: 1618 a->nonew = (flg ? 0 : 1); 1619 break; 1620 case MAT_NEW_NONZERO_LOCATION_ERR: 1621 a->nonew = (flg ? -1 : 0); 1622 break; 1623 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1624 a->nonew = (flg ? -2 : 0); 1625 break; 1626 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1627 a->nounused = (flg ? -1 : 0); 1628 break; 1629 default: 1630 break; 1631 } 1632 PetscFunctionReturn(PETSC_SUCCESS); 1633 } 1634 1635 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1636 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) 1637 { 1638 PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2; 1639 MatScalar *aa_i; 1640 PetscScalar *v_i; 1641 1642 PetscFunctionBegin; 1643 bs = A->rmap->bs; 1644 bs2 = bs * bs; 1645 PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1646 1647 bn = row / bs; /* Block number */ 1648 bp = row % bs; /* Block Position */ 1649 M = ai[bn + 1] - ai[bn]; 1650 *nz = bs * M; 1651 1652 if (v) { 1653 *v = NULL; 1654 if (*nz) { 1655 PetscCall(PetscMalloc1(*nz, v)); 1656 for (i = 0; i < M; i++) { /* for each block in the block row */ 1657 v_i = *v + i * bs; 1658 aa_i = aa + bs2 * (ai[bn] + i); 1659 for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j]; 1660 } 1661 } 1662 } 1663 1664 if (idx) { 1665 *idx = NULL; 1666 if (*nz) { 1667 PetscCall(PetscMalloc1(*nz, idx)); 1668 for (i = 0; i < M; i++) { /* for each block in the block row */ 1669 idx_i = *idx + i * bs; 1670 itmp = bs * aj[ai[bn] + i]; 1671 for (j = 0; j < bs; j++) idx_i[j] = itmp++; 1672 } 1673 } 1674 } 1675 PetscFunctionReturn(PETSC_SUCCESS); 1676 } 1677 1678 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1679 { 1680 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1681 1682 PetscFunctionBegin; 1683 PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a)); 1684 PetscFunctionReturn(PETSC_SUCCESS); 1685 } 1686 1687 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1688 { 1689 PetscFunctionBegin; 1690 if (idx) PetscCall(PetscFree(*idx)); 1691 if (v) PetscCall(PetscFree(*v)); 1692 PetscFunctionReturn(PETSC_SUCCESS); 1693 } 1694 1695 static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) 1696 { 1697 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at; 1698 Mat C; 1699 PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill; 1700 PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr; 1701 MatScalar *ata, *aa = a->a; 1702 1703 PetscFunctionBegin; 1704 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1705 PetscCall(PetscCalloc1(1 + nbs, &atfill)); 1706 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1707 for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1708 1709 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 1710 PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N)); 1711 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 1712 PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill)); 1713 1714 at = (Mat_SeqBAIJ *)C->data; 1715 ati = at->i; 1716 for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i]; 1717 } else { 1718 C = *B; 1719 at = (Mat_SeqBAIJ *)C->data; 1720 ati = at->i; 1721 } 1722 1723 atj = at->j; 1724 ata = at->a; 1725 1726 /* Copy ati into atfill so we have locations of the next free space in atj */ 1727 PetscCall(PetscArraycpy(atfill, ati, nbs)); 1728 1729 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1730 for (i = 0; i < mbs; i++) { 1731 anzj = ai[i + 1] - ai[i]; 1732 for (j = 0; j < anzj; j++) { 1733 atj[atfill[*aj]] = i; 1734 for (kr = 0; kr < bs; kr++) { 1735 for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; 1736 } 1737 atfill[*aj++] += 1; 1738 } 1739 } 1740 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1741 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1742 1743 /* Clean up temporary space and complete requests. */ 1744 PetscCall(PetscFree(atfill)); 1745 1746 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1747 PetscCall(MatSetBlockSizes(C, A->cmap->bs, A->rmap->bs)); 1748 *B = C; 1749 } else { 1750 PetscCall(MatHeaderMerge(A, &C)); 1751 } 1752 PetscFunctionReturn(PETSC_SUCCESS); 1753 } 1754 1755 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) 1756 { 1757 Mat Btrans; 1758 1759 PetscFunctionBegin; 1760 *f = PETSC_FALSE; 1761 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans)); 1762 PetscCall(MatEqual_SeqBAIJ(B, Btrans, f)); 1763 PetscCall(MatDestroy(&Btrans)); 1764 PetscFunctionReturn(PETSC_SUCCESS); 1765 } 1766 1767 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1768 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 1769 { 1770 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data; 1771 PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l; 1772 PetscInt *rowlens, *colidxs; 1773 PetscScalar *matvals; 1774 1775 PetscFunctionBegin; 1776 PetscCall(PetscViewerSetUp(viewer)); 1777 1778 M = mat->rmap->N; 1779 N = mat->cmap->N; 1780 m = mat->rmap->n; 1781 bs = mat->rmap->bs; 1782 nz = bs * bs * A->nz; 1783 1784 /* write matrix header */ 1785 header[0] = MAT_FILE_CLASSID; 1786 header[1] = M; 1787 header[2] = N; 1788 header[3] = nz; 1789 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1790 1791 /* store row lengths */ 1792 PetscCall(PetscMalloc1(m, &rowlens)); 1793 for (cnt = 0, i = 0; i < A->mbs; i++) 1794 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]); 1795 PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT)); 1796 PetscCall(PetscFree(rowlens)); 1797 1798 /* store column indices */ 1799 PetscCall(PetscMalloc1(nz, &colidxs)); 1800 for (cnt = 0, i = 0; i < A->mbs; i++) 1801 for (k = 0; k < bs; k++) 1802 for (j = A->i[i]; j < A->i[i + 1]; j++) 1803 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l; 1804 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1805 PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT)); 1806 PetscCall(PetscFree(colidxs)); 1807 1808 /* store nonzero values */ 1809 PetscCall(PetscMalloc1(nz, &matvals)); 1810 for (cnt = 0, i = 0; i < A->mbs; i++) 1811 for (k = 0; k < bs; k++) 1812 for (j = A->i[i]; j < A->i[i + 1]; j++) 1813 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k]; 1814 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1815 PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR)); 1816 PetscCall(PetscFree(matvals)); 1817 1818 /* write block size option to the viewer's .info file */ 1819 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1820 PetscFunctionReturn(PETSC_SUCCESS); 1821 } 1822 1823 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) 1824 { 1825 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1826 PetscInt i, bs = A->rmap->bs, k; 1827 1828 PetscFunctionBegin; 1829 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1830 for (i = 0; i < a->mbs; i++) { 1831 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1)); 1832 for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1)); 1833 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1834 } 1835 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1836 PetscFunctionReturn(PETSC_SUCCESS); 1837 } 1838 1839 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) 1840 { 1841 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1842 PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2; 1843 PetscViewerFormat format; 1844 1845 PetscFunctionBegin; 1846 if (A->structure_only) { 1847 PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer)); 1848 PetscFunctionReturn(PETSC_SUCCESS); 1849 } 1850 1851 PetscCall(PetscViewerGetFormat(viewer, &format)); 1852 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1853 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1854 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1855 const char *matname; 1856 Mat aij; 1857 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij)); 1858 PetscCall(PetscObjectGetName((PetscObject)A, &matname)); 1859 PetscCall(PetscObjectSetName((PetscObject)aij, matname)); 1860 PetscCall(MatView(aij, viewer)); 1861 PetscCall(MatDestroy(&aij)); 1862 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1863 PetscFunctionReturn(PETSC_SUCCESS); 1864 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1865 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1866 for (i = 0; i < a->mbs; i++) { 1867 for (j = 0; j < bs; j++) { 1868 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1869 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1870 for (l = 0; l < bs; l++) { 1871 #if defined(PETSC_USE_COMPLEX) 1872 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1873 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1874 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1875 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1876 } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1877 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1878 } 1879 #else 1880 if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1881 #endif 1882 } 1883 } 1884 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1885 } 1886 } 1887 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1888 } else { 1889 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1890 for (i = 0; i < a->mbs; i++) { 1891 for (j = 0; j < bs; j++) { 1892 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1893 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1894 for (l = 0; l < bs; l++) { 1895 #if defined(PETSC_USE_COMPLEX) 1896 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) { 1897 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1898 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) { 1899 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1900 } else { 1901 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1902 } 1903 #else 1904 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1905 #endif 1906 } 1907 } 1908 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1909 } 1910 } 1911 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1912 } 1913 PetscCall(PetscViewerFlush(viewer)); 1914 PetscFunctionReturn(PETSC_SUCCESS); 1915 } 1916 1917 #include <petscdraw.h> 1918 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) 1919 { 1920 Mat A = (Mat)Aa; 1921 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1922 PetscInt row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 1923 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1924 MatScalar *aa; 1925 PetscViewer viewer; 1926 PetscViewerFormat format; 1927 int color; 1928 1929 PetscFunctionBegin; 1930 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1931 PetscCall(PetscViewerGetFormat(viewer, &format)); 1932 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1933 1934 /* loop over matrix elements drawing boxes */ 1935 1936 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1937 PetscDrawCollectiveBegin(draw); 1938 /* Blue for negative, Cyan for zero and Red for positive */ 1939 color = PETSC_DRAW_BLUE; 1940 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1941 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1942 y_l = A->rmap->N - row - 1.0; 1943 y_r = y_l + 1.0; 1944 x_l = a->j[j] * bs; 1945 x_r = x_l + 1.0; 1946 aa = a->a + j * bs2; 1947 for (k = 0; k < bs; k++) { 1948 for (l = 0; l < bs; l++) { 1949 if (PetscRealPart(*aa++) >= 0.) continue; 1950 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1951 } 1952 } 1953 } 1954 } 1955 color = PETSC_DRAW_CYAN; 1956 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1957 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1958 y_l = A->rmap->N - row - 1.0; 1959 y_r = y_l + 1.0; 1960 x_l = a->j[j] * bs; 1961 x_r = x_l + 1.0; 1962 aa = a->a + j * bs2; 1963 for (k = 0; k < bs; k++) { 1964 for (l = 0; l < bs; l++) { 1965 if (PetscRealPart(*aa++) != 0.) continue; 1966 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1967 } 1968 } 1969 } 1970 } 1971 color = PETSC_DRAW_RED; 1972 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1973 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1974 y_l = A->rmap->N - row - 1.0; 1975 y_r = y_l + 1.0; 1976 x_l = a->j[j] * bs; 1977 x_r = x_l + 1.0; 1978 aa = a->a + j * bs2; 1979 for (k = 0; k < bs; k++) { 1980 for (l = 0; l < bs; l++) { 1981 if (PetscRealPart(*aa++) <= 0.) continue; 1982 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1983 } 1984 } 1985 } 1986 } 1987 PetscDrawCollectiveEnd(draw); 1988 } else { 1989 /* use contour shading to indicate magnitude of values */ 1990 /* first determine max of all nonzero values */ 1991 PetscReal minv = 0.0, maxv = 0.0; 1992 PetscDraw popup; 1993 1994 for (i = 0; i < a->nz * a->bs2; i++) { 1995 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1996 } 1997 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1998 PetscCall(PetscDrawGetPopup(draw, &popup)); 1999 PetscCall(PetscDrawScalePopup(popup, 0.0, maxv)); 2000 2001 PetscDrawCollectiveBegin(draw); 2002 for (i = 0, row = 0; i < mbs; i++, row += bs) { 2003 for (j = a->i[i]; j < a->i[i + 1]; j++) { 2004 y_l = A->rmap->N - row - 1.0; 2005 y_r = y_l + 1.0; 2006 x_l = a->j[j] * bs; 2007 x_r = x_l + 1.0; 2008 aa = a->a + j * bs2; 2009 for (k = 0; k < bs; k++) { 2010 for (l = 0; l < bs; l++) { 2011 MatScalar v = *aa++; 2012 color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv); 2013 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2014 } 2015 } 2016 } 2017 } 2018 PetscDrawCollectiveEnd(draw); 2019 } 2020 PetscFunctionReturn(PETSC_SUCCESS); 2021 } 2022 2023 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) 2024 { 2025 PetscReal xl, yl, xr, yr, w, h; 2026 PetscDraw draw; 2027 PetscBool isnull; 2028 2029 PetscFunctionBegin; 2030 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 2031 PetscCall(PetscDrawIsNull(draw, &isnull)); 2032 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 2033 2034 xr = A->cmap->n; 2035 yr = A->rmap->N; 2036 h = yr / 10.0; 2037 w = xr / 10.0; 2038 xr += w; 2039 yr += h; 2040 xl = -w; 2041 yl = -h; 2042 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 2043 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 2044 PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A)); 2045 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 2046 PetscCall(PetscDrawSave(draw)); 2047 PetscFunctionReturn(PETSC_SUCCESS); 2048 } 2049 2050 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) 2051 { 2052 PetscBool iascii, isbinary, isdraw; 2053 2054 PetscFunctionBegin; 2055 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2056 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2057 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 2058 if (iascii) { 2059 PetscCall(MatView_SeqBAIJ_ASCII(A, viewer)); 2060 } else if (isbinary) { 2061 PetscCall(MatView_SeqBAIJ_Binary(A, viewer)); 2062 } else if (isdraw) { 2063 PetscCall(MatView_SeqBAIJ_Draw(A, viewer)); 2064 } else { 2065 Mat B; 2066 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2067 PetscCall(MatView(B, viewer)); 2068 PetscCall(MatDestroy(&B)); 2069 } 2070 PetscFunctionReturn(PETSC_SUCCESS); 2071 } 2072 2073 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) 2074 { 2075 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2076 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 2077 PetscInt *ai = a->i, *ailen = a->ilen; 2078 PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2; 2079 MatScalar *ap, *aa = a->a; 2080 2081 PetscFunctionBegin; 2082 for (k = 0; k < m; k++) { /* loop over rows */ 2083 row = im[k]; 2084 brow = row / bs; 2085 if (row < 0) { 2086 v += n; 2087 continue; 2088 } /* negative row */ 2089 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row); 2090 rp = PetscSafePointerPlusOffset(aj, ai[brow]); 2091 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); 2092 nrow = ailen[brow]; 2093 for (l = 0; l < n; l++) { /* loop over columns */ 2094 if (in[l] < 0) { 2095 v++; 2096 continue; 2097 } /* negative column */ 2098 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]); 2099 col = in[l]; 2100 bcol = col / bs; 2101 cidx = col % bs; 2102 ridx = row % bs; 2103 high = nrow; 2104 low = 0; /* assume unsorted */ 2105 while (high - low > 5) { 2106 t = (low + high) / 2; 2107 if (rp[t] > bcol) high = t; 2108 else low = t; 2109 } 2110 for (i = low; i < high; i++) { 2111 if (rp[i] > bcol) break; 2112 if (rp[i] == bcol) { 2113 *v++ = ap[bs2 * i + bs * cidx + ridx]; 2114 goto finished; 2115 } 2116 } 2117 *v++ = 0.0; 2118 finished:; 2119 } 2120 } 2121 PetscFunctionReturn(PETSC_SUCCESS); 2122 } 2123 2124 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2125 { 2126 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2127 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1; 2128 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2129 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval; 2130 PetscBool roworiented = a->roworiented; 2131 const PetscScalar *value = v; 2132 MatScalar *ap = NULL, *aa = a->a, *bap; 2133 2134 PetscFunctionBegin; 2135 if (roworiented) { 2136 stepval = (n - 1) * bs; 2137 } else { 2138 stepval = (m - 1) * bs; 2139 } 2140 for (k = 0; k < m; k++) { /* loop over added rows */ 2141 row = im[k]; 2142 if (row < 0) continue; 2143 PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1); 2144 rp = aj + ai[row]; 2145 if (!A->structure_only) ap = aa + bs2 * ai[row]; 2146 rmax = imax[row]; 2147 nrow = ailen[row]; 2148 low = 0; 2149 high = nrow; 2150 for (l = 0; l < n; l++) { /* loop over added columns */ 2151 if (in[l] < 0) continue; 2152 PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1); 2153 col = in[l]; 2154 if (!A->structure_only) { 2155 if (roworiented) { 2156 value = v + (k * (stepval + bs) + l) * bs; 2157 } else { 2158 value = v + (l * (stepval + bs) + k) * bs; 2159 } 2160 } 2161 if (col <= lastcol) low = 0; 2162 else high = nrow; 2163 lastcol = col; 2164 while (high - low > 7) { 2165 t = (low + high) / 2; 2166 if (rp[t] > col) high = t; 2167 else low = t; 2168 } 2169 for (i = low; i < high; i++) { 2170 if (rp[i] > col) break; 2171 if (rp[i] == col) { 2172 if (A->structure_only) goto noinsert2; 2173 bap = ap + bs2 * i; 2174 if (roworiented) { 2175 if (is == ADD_VALUES) { 2176 for (ii = 0; ii < bs; ii++, value += stepval) { 2177 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 2178 } 2179 } else { 2180 for (ii = 0; ii < bs; ii++, value += stepval) { 2181 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2182 } 2183 } 2184 } else { 2185 if (is == ADD_VALUES) { 2186 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2187 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 2188 bap += bs; 2189 } 2190 } else { 2191 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2192 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 2193 bap += bs; 2194 } 2195 } 2196 } 2197 goto noinsert2; 2198 } 2199 } 2200 if (nonew == 1) goto noinsert2; 2201 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2202 if (A->structure_only) { 2203 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar); 2204 } else { 2205 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2206 } 2207 N = nrow++ - 1; 2208 high++; 2209 /* shift up all the later entries in this row */ 2210 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2211 rp[i] = col; 2212 if (!A->structure_only) { 2213 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2214 bap = ap + bs2 * i; 2215 if (roworiented) { 2216 for (ii = 0; ii < bs; ii++, value += stepval) { 2217 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2218 } 2219 } else { 2220 for (ii = 0; ii < bs; ii++, value += stepval) { 2221 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 2222 } 2223 } 2224 } 2225 noinsert2:; 2226 low = i; 2227 } 2228 ailen[row] = nrow; 2229 } 2230 PetscFunctionReturn(PETSC_SUCCESS); 2231 } 2232 2233 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) 2234 { 2235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2236 PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax; 2237 PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen; 2238 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2239 MatScalar *aa = a->a, *ap; 2240 PetscReal ratio = 0.6; 2241 2242 PetscFunctionBegin; 2243 if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS); 2244 2245 if (m) rmax = ailen[0]; 2246 for (i = 1; i < mbs; i++) { 2247 /* move each row back by the amount of empty slots (fshift) before it*/ 2248 fshift += imax[i - 1] - ailen[i - 1]; 2249 rmax = PetscMax(rmax, ailen[i]); 2250 if (fshift) { 2251 ip = aj + ai[i]; 2252 ap = aa + bs2 * ai[i]; 2253 N = ailen[i]; 2254 PetscCall(PetscArraymove(ip - fshift, ip, N)); 2255 if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N)); 2256 } 2257 ai[i] = ai[i - 1] + ailen[i - 1]; 2258 } 2259 if (mbs) { 2260 fshift += imax[mbs - 1] - ailen[mbs - 1]; 2261 ai[mbs] = ai[mbs - 1] + ailen[mbs - 1]; 2262 } 2263 2264 /* reset ilen and imax for each row */ 2265 a->nonzerorowcnt = 0; 2266 if (A->structure_only) { 2267 PetscCall(PetscFree2(a->imax, a->ilen)); 2268 } else { /* !A->structure_only */ 2269 for (i = 0; i < mbs; i++) { 2270 ailen[i] = imax[i] = ai[i + 1] - ai[i]; 2271 a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0); 2272 } 2273 } 2274 a->nz = ai[mbs]; 2275 2276 /* diagonals may have moved, so kill the diagonal pointers */ 2277 a->idiagvalid = PETSC_FALSE; 2278 if (fshift && a->diag) PetscCall(PetscFree(a->diag)); 2279 if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2); 2280 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2)); 2281 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs)); 2282 PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax)); 2283 2284 A->info.mallocs += a->reallocs; 2285 a->reallocs = 0; 2286 A->info.nz_unneeded = (PetscReal)fshift * bs2; 2287 a->rmax = rmax; 2288 2289 if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio)); 2290 PetscFunctionReturn(PETSC_SUCCESS); 2291 } 2292 2293 /* 2294 This function returns an array of flags which indicate the locations of contiguous 2295 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2296 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2297 Assume: sizes should be long enough to hold all the values. 2298 */ 2299 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) 2300 { 2301 PetscInt j = 0; 2302 2303 PetscFunctionBegin; 2304 for (PetscInt i = 0; i < n; j++) { 2305 PetscInt row = idx[i]; 2306 if (row % bs != 0) { /* Not the beginning of a block */ 2307 sizes[j] = 1; 2308 i++; 2309 } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */ 2310 sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2311 i++; 2312 } else { /* Beginning of the block, so check if the complete block exists */ 2313 PetscBool flg = PETSC_TRUE; 2314 for (PetscInt k = 1; k < bs; k++) { 2315 if (row + k != idx[i + k]) { /* break in the block */ 2316 flg = PETSC_FALSE; 2317 break; 2318 } 2319 } 2320 if (flg) { /* No break in the bs */ 2321 sizes[j] = bs; 2322 i += bs; 2323 } else { 2324 sizes[j] = 1; 2325 i++; 2326 } 2327 } 2328 } 2329 *bs_max = j; 2330 PetscFunctionReturn(PETSC_SUCCESS); 2331 } 2332 2333 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2334 { 2335 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2336 PetscInt i, j, k, count, *rows; 2337 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max; 2338 PetscScalar zero = 0.0; 2339 MatScalar *aa; 2340 const PetscScalar *xx; 2341 PetscScalar *bb; 2342 2343 PetscFunctionBegin; 2344 /* fix right-hand side if needed */ 2345 if (x && b) { 2346 PetscCall(VecGetArrayRead(x, &xx)); 2347 PetscCall(VecGetArray(b, &bb)); 2348 for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]]; 2349 PetscCall(VecRestoreArrayRead(x, &xx)); 2350 PetscCall(VecRestoreArray(b, &bb)); 2351 } 2352 2353 /* Make a copy of the IS and sort it */ 2354 /* allocate memory for rows,sizes */ 2355 PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes)); 2356 2357 /* copy IS values to rows, and sort them */ 2358 for (i = 0; i < is_n; i++) rows[i] = is_idx[i]; 2359 PetscCall(PetscSortInt(is_n, rows)); 2360 2361 if (baij->keepnonzeropattern) { 2362 for (i = 0; i < is_n; i++) sizes[i] = 1; 2363 bs_max = is_n; 2364 } else { 2365 PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max)); 2366 A->nonzerostate++; 2367 } 2368 2369 for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) { 2370 row = rows[j]; 2371 PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row); 2372 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2373 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 2374 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2375 if (diag != (PetscScalar)0.0) { 2376 if (baij->ilen[row / bs] > 0) { 2377 baij->ilen[row / bs] = 1; 2378 baij->j[baij->i[row / bs]] = row / bs; 2379 2380 PetscCall(PetscArrayzero(aa, count * bs)); 2381 } 2382 /* Now insert all the diagonal values for this bs */ 2383 for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES); 2384 } else { /* (diag == 0.0) */ 2385 baij->ilen[row / bs] = 0; 2386 } /* end (diag == 0.0) */ 2387 } else { /* (sizes[i] != bs) */ 2388 PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1"); 2389 for (k = 0; k < count; k++) { 2390 aa[0] = zero; 2391 aa += bs; 2392 } 2393 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES); 2394 } 2395 } 2396 2397 PetscCall(PetscFree2(rows, sizes)); 2398 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2399 PetscFunctionReturn(PETSC_SUCCESS); 2400 } 2401 2402 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2403 { 2404 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2405 PetscInt i, j, k, count; 2406 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col; 2407 PetscScalar zero = 0.0; 2408 MatScalar *aa; 2409 const PetscScalar *xx; 2410 PetscScalar *bb; 2411 PetscBool *zeroed, vecs = PETSC_FALSE; 2412 2413 PetscFunctionBegin; 2414 /* fix right-hand side if needed */ 2415 if (x && b) { 2416 PetscCall(VecGetArrayRead(x, &xx)); 2417 PetscCall(VecGetArray(b, &bb)); 2418 vecs = PETSC_TRUE; 2419 } 2420 2421 /* zero the columns */ 2422 PetscCall(PetscCalloc1(A->rmap->n, &zeroed)); 2423 for (i = 0; i < is_n; i++) { 2424 PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]); 2425 zeroed[is_idx[i]] = PETSC_TRUE; 2426 } 2427 for (i = 0; i < A->rmap->N; i++) { 2428 if (!zeroed[i]) { 2429 row = i / bs; 2430 for (j = baij->i[row]; j < baij->i[row + 1]; j++) { 2431 for (k = 0; k < bs; k++) { 2432 col = bs * baij->j[j] + k; 2433 if (zeroed[col]) { 2434 aa = baij->a + j * bs2 + (i % bs) + bs * k; 2435 if (vecs) bb[i] -= aa[0] * xx[col]; 2436 aa[0] = 0.0; 2437 } 2438 } 2439 } 2440 } else if (vecs) bb[i] = diag * xx[i]; 2441 } 2442 PetscCall(PetscFree(zeroed)); 2443 if (vecs) { 2444 PetscCall(VecRestoreArrayRead(x, &xx)); 2445 PetscCall(VecRestoreArray(b, &bb)); 2446 } 2447 2448 /* zero the rows */ 2449 for (i = 0; i < is_n; i++) { 2450 row = is_idx[i]; 2451 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2452 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 2453 for (k = 0; k < count; k++) { 2454 aa[0] = zero; 2455 aa += bs; 2456 } 2457 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES); 2458 } 2459 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2460 PetscFunctionReturn(PETSC_SUCCESS); 2461 } 2462 2463 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2464 { 2465 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2466 PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 2467 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2468 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 2469 PetscInt ridx, cidx, bs2 = a->bs2; 2470 PetscBool roworiented = a->roworiented; 2471 MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap; 2472 2473 PetscFunctionBegin; 2474 for (k = 0; k < m; k++) { /* loop over added rows */ 2475 row = im[k]; 2476 brow = row / bs; 2477 if (row < 0) continue; 2478 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 2479 rp = PetscSafePointerPlusOffset(aj, ai[brow]); 2480 if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); 2481 rmax = imax[brow]; 2482 nrow = ailen[brow]; 2483 low = 0; 2484 high = nrow; 2485 for (l = 0; l < n; l++) { /* loop over added columns */ 2486 if (in[l] < 0) continue; 2487 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 2488 col = in[l]; 2489 bcol = col / bs; 2490 ridx = row % bs; 2491 cidx = col % bs; 2492 if (!A->structure_only) { 2493 if (roworiented) { 2494 value = v[l + k * n]; 2495 } else { 2496 value = v[k + l * m]; 2497 } 2498 } 2499 if (col <= lastcol) low = 0; 2500 else high = nrow; 2501 lastcol = col; 2502 while (high - low > 7) { 2503 t = (low + high) / 2; 2504 if (rp[t] > bcol) high = t; 2505 else low = t; 2506 } 2507 for (i = low; i < high; i++) { 2508 if (rp[i] > bcol) break; 2509 if (rp[i] == bcol) { 2510 bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx); 2511 if (!A->structure_only) { 2512 if (is == ADD_VALUES) *bap += value; 2513 else *bap = value; 2514 } 2515 goto noinsert1; 2516 } 2517 } 2518 if (nonew == 1) goto noinsert1; 2519 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2520 if (A->structure_only) { 2521 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar); 2522 } else { 2523 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2524 } 2525 N = nrow++ - 1; 2526 high++; 2527 /* shift up all the later entries in this row */ 2528 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2529 rp[i] = bcol; 2530 if (!A->structure_only) { 2531 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2532 PetscCall(PetscArrayzero(ap + bs2 * i, bs2)); 2533 ap[bs2 * i + bs * cidx + ridx] = value; 2534 } 2535 a->nz++; 2536 noinsert1:; 2537 low = i; 2538 } 2539 ailen[brow] = nrow; 2540 } 2541 PetscFunctionReturn(PETSC_SUCCESS); 2542 } 2543 2544 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) 2545 { 2546 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data; 2547 Mat outA; 2548 PetscBool row_identity, col_identity; 2549 2550 PetscFunctionBegin; 2551 PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU"); 2552 PetscCall(ISIdentity(row, &row_identity)); 2553 PetscCall(ISIdentity(col, &col_identity)); 2554 PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU"); 2555 2556 outA = inA; 2557 inA->factortype = MAT_FACTOR_LU; 2558 PetscCall(PetscFree(inA->solvertype)); 2559 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype)); 2560 2561 PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2562 2563 PetscCall(PetscObjectReference((PetscObject)row)); 2564 PetscCall(ISDestroy(&a->row)); 2565 a->row = row; 2566 PetscCall(PetscObjectReference((PetscObject)col)); 2567 PetscCall(ISDestroy(&a->col)); 2568 a->col = col; 2569 2570 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2571 PetscCall(ISDestroy(&a->icol)); 2572 PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol)); 2573 2574 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity))); 2575 if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); 2576 PetscCall(MatLUFactorNumeric(outA, inA, info)); 2577 PetscFunctionReturn(PETSC_SUCCESS); 2578 } 2579 2580 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices) 2581 { 2582 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2583 2584 PetscFunctionBegin; 2585 baij->nz = baij->maxnz; 2586 PetscCall(PetscArraycpy(baij->j, indices, baij->nz)); 2587 PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs)); 2588 PetscFunctionReturn(PETSC_SUCCESS); 2589 } 2590 2591 /*@ 2592 MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix. 2593 2594 Input Parameters: 2595 + mat - the `MATSEQBAIJ` matrix 2596 - indices - the block column indices 2597 2598 Level: advanced 2599 2600 Notes: 2601 This can be called if you have precomputed the nonzero structure of the 2602 matrix and want to provide it to the matrix object to improve the performance 2603 of the `MatSetValues()` operation. 2604 2605 You MUST have set the correct numbers of nonzeros per row in the call to 2606 `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted. 2607 2608 MUST be called before any calls to `MatSetValues()` 2609 2610 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()` 2611 @*/ 2612 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) 2613 { 2614 PetscFunctionBegin; 2615 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2616 PetscAssertPointer(indices, 2); 2617 PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices)); 2618 PetscFunctionReturn(PETSC_SUCCESS); 2619 } 2620 2621 static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) 2622 { 2623 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2624 PetscInt i, j, n, row, bs, *ai, *aj, mbs; 2625 PetscReal atmp; 2626 PetscScalar *x, zero = 0.0; 2627 MatScalar *aa; 2628 PetscInt ncols, brow, krow, kcol; 2629 2630 PetscFunctionBegin; 2631 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2632 bs = A->rmap->bs; 2633 aa = a->a; 2634 ai = a->i; 2635 aj = a->j; 2636 mbs = a->mbs; 2637 2638 PetscCall(VecSet(v, zero)); 2639 PetscCall(VecGetArray(v, &x)); 2640 PetscCall(VecGetLocalSize(v, &n)); 2641 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2642 for (i = 0; i < mbs; i++) { 2643 ncols = ai[1] - ai[0]; 2644 ai++; 2645 brow = bs * i; 2646 for (j = 0; j < ncols; j++) { 2647 for (kcol = 0; kcol < bs; kcol++) { 2648 for (krow = 0; krow < bs; krow++) { 2649 atmp = PetscAbsScalar(*aa); 2650 aa++; 2651 row = brow + krow; /* row index */ 2652 if (PetscAbsScalar(x[row]) < atmp) { 2653 x[row] = atmp; 2654 if (idx) idx[row] = bs * (*aj) + kcol; 2655 } 2656 } 2657 } 2658 aj++; 2659 } 2660 } 2661 PetscCall(VecRestoreArray(v, &x)); 2662 PetscFunctionReturn(PETSC_SUCCESS); 2663 } 2664 2665 static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v) 2666 { 2667 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2668 PetscInt i, j, n, row, bs, *ai, mbs; 2669 PetscReal atmp; 2670 PetscScalar *x, zero = 0.0; 2671 MatScalar *aa; 2672 PetscInt ncols, brow, krow, kcol; 2673 2674 PetscFunctionBegin; 2675 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2676 bs = A->rmap->bs; 2677 aa = a->a; 2678 ai = a->i; 2679 mbs = a->mbs; 2680 2681 PetscCall(VecSet(v, zero)); 2682 PetscCall(VecGetArrayWrite(v, &x)); 2683 PetscCall(VecGetLocalSize(v, &n)); 2684 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2685 for (i = 0; i < mbs; i++) { 2686 ncols = ai[1] - ai[0]; 2687 ai++; 2688 brow = bs * i; 2689 for (j = 0; j < ncols; j++) { 2690 for (kcol = 0; kcol < bs; kcol++) { 2691 for (krow = 0; krow < bs; krow++) { 2692 atmp = PetscAbsScalar(*aa); 2693 aa++; 2694 row = brow + krow; /* row index */ 2695 x[row] += atmp; 2696 } 2697 } 2698 } 2699 } 2700 PetscCall(VecRestoreArrayWrite(v, &x)); 2701 PetscFunctionReturn(PETSC_SUCCESS); 2702 } 2703 2704 static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) 2705 { 2706 PetscFunctionBegin; 2707 /* If the two matrices have the same copy implementation, use fast copy. */ 2708 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2709 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2710 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 2711 PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs; 2712 2713 PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]); 2714 PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs); 2715 PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs])); 2716 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2717 } else { 2718 PetscCall(MatCopy_Basic(A, B, str)); 2719 } 2720 PetscFunctionReturn(PETSC_SUCCESS); 2721 } 2722 2723 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2724 { 2725 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2726 2727 PetscFunctionBegin; 2728 *array = a->a; 2729 PetscFunctionReturn(PETSC_SUCCESS); 2730 } 2731 2732 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2733 { 2734 PetscFunctionBegin; 2735 *array = NULL; 2736 PetscFunctionReturn(PETSC_SUCCESS); 2737 } 2738 2739 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) 2740 { 2741 PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs; 2742 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 2743 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 2744 2745 PetscFunctionBegin; 2746 /* Set the number of nonzeros in the new matrix */ 2747 PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz)); 2748 PetscFunctionReturn(PETSC_SUCCESS); 2749 } 2750 2751 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2752 { 2753 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data; 2754 PetscInt bs = Y->rmap->bs, bs2 = bs * bs; 2755 PetscBLASInt one = 1; 2756 2757 PetscFunctionBegin; 2758 if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2759 PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2760 if (e) { 2761 PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e)); 2762 if (e) { 2763 PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e)); 2764 if (e) str = SAME_NONZERO_PATTERN; 2765 } 2766 } 2767 if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN"); 2768 } 2769 if (str == SAME_NONZERO_PATTERN) { 2770 PetscScalar alpha = a; 2771 PetscBLASInt bnz; 2772 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 2773 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 2774 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2775 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2776 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2777 } else { 2778 Mat B; 2779 PetscInt *nnz; 2780 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 2781 PetscCall(PetscMalloc1(Y->rmap->N, &nnz)); 2782 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2783 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2784 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 2785 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 2786 PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name)); 2787 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz)); 2788 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 2789 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2790 PetscCall(MatHeaderMerge(Y, &B)); 2791 PetscCall(PetscFree(nnz)); 2792 } 2793 PetscFunctionReturn(PETSC_SUCCESS); 2794 } 2795 2796 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2797 { 2798 #if PetscDefined(USE_COMPLEX) 2799 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2800 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2801 MatScalar *aa = a->a; 2802 2803 PetscFunctionBegin; 2804 for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]); 2805 PetscFunctionReturn(PETSC_SUCCESS); 2806 #else 2807 (void)A; 2808 return PETSC_SUCCESS; 2809 #endif 2810 } 2811 2812 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2813 { 2814 #if PetscDefined(USE_COMPLEX) 2815 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2816 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2817 MatScalar *aa = a->a; 2818 2819 PetscFunctionBegin; 2820 for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]); 2821 PetscFunctionReturn(PETSC_SUCCESS); 2822 #else 2823 (void)A; 2824 return PETSC_SUCCESS; 2825 #endif 2826 } 2827 2828 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2829 { 2830 #if PetscDefined(USE_COMPLEX) 2831 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2832 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2833 MatScalar *aa = a->a; 2834 2835 PetscFunctionBegin; 2836 for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2837 PetscFunctionReturn(PETSC_SUCCESS); 2838 #else 2839 (void)A; 2840 return PETSC_SUCCESS; 2841 #endif 2842 } 2843 2844 /* 2845 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2846 */ 2847 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2848 { 2849 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2850 PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs; 2851 PetscInt nz = a->i[m], row, *jj, mr, col; 2852 2853 PetscFunctionBegin; 2854 *nn = n; 2855 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2856 PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices"); 2857 PetscCall(PetscCalloc1(n, &collengths)); 2858 PetscCall(PetscMalloc1(n + 1, &cia)); 2859 PetscCall(PetscMalloc1(nz, &cja)); 2860 jj = a->j; 2861 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2862 cia[0] = oshift; 2863 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2864 PetscCall(PetscArrayzero(collengths, n)); 2865 jj = a->j; 2866 for (row = 0; row < m; row++) { 2867 mr = a->i[row + 1] - a->i[row]; 2868 for (i = 0; i < mr; i++) { 2869 col = *jj++; 2870 2871 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2872 } 2873 } 2874 PetscCall(PetscFree(collengths)); 2875 *ia = cia; 2876 *ja = cja; 2877 PetscFunctionReturn(PETSC_SUCCESS); 2878 } 2879 2880 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2881 { 2882 PetscFunctionBegin; 2883 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2884 PetscCall(PetscFree(*ia)); 2885 PetscCall(PetscFree(*ja)); 2886 PetscFunctionReturn(PETSC_SUCCESS); 2887 } 2888 2889 /* 2890 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2891 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2892 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2893 */ 2894 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2895 { 2896 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2897 PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs; 2898 PetscInt nz = a->i[m], row, *jj, mr, col; 2899 PetscInt *cspidx; 2900 2901 PetscFunctionBegin; 2902 *nn = n; 2903 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2904 2905 PetscCall(PetscCalloc1(n, &collengths)); 2906 PetscCall(PetscMalloc1(n + 1, &cia)); 2907 PetscCall(PetscMalloc1(nz, &cja)); 2908 PetscCall(PetscMalloc1(nz, &cspidx)); 2909 jj = a->j; 2910 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2911 cia[0] = oshift; 2912 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2913 PetscCall(PetscArrayzero(collengths, n)); 2914 jj = a->j; 2915 for (row = 0; row < m; row++) { 2916 mr = a->i[row + 1] - a->i[row]; 2917 for (i = 0; i < mr; i++) { 2918 col = *jj++; 2919 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2920 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2921 } 2922 } 2923 PetscCall(PetscFree(collengths)); 2924 *ia = cia; 2925 *ja = cja; 2926 *spidx = cspidx; 2927 PetscFunctionReturn(PETSC_SUCCESS); 2928 } 2929 2930 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2931 { 2932 PetscFunctionBegin; 2933 PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done)); 2934 PetscCall(PetscFree(*spidx)); 2935 PetscFunctionReturn(PETSC_SUCCESS); 2936 } 2937 2938 static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) 2939 { 2940 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data; 2941 2942 PetscFunctionBegin; 2943 if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL)); 2944 PetscCall(MatShift_Basic(Y, a)); 2945 PetscFunctionReturn(PETSC_SUCCESS); 2946 } 2947 2948 PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep) 2949 { 2950 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2951 PetscInt fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k; 2952 PetscInt m = A->rmap->N, *ailen = a->ilen; 2953 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2954 MatScalar *aa = a->a, *ap; 2955 PetscBool zero; 2956 2957 PetscFunctionBegin; 2958 PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix"); 2959 if (m) rmax = ailen[0]; 2960 for (i = 1; i <= mbs; i++) { 2961 for (k = ai[i - 1]; k < ai[i]; k++) { 2962 zero = PETSC_TRUE; 2963 ap = aa + bs2 * k; 2964 for (j = 0; j < bs2 && zero; j++) { 2965 if (ap[j] != 0.0) zero = PETSC_FALSE; 2966 } 2967 if (zero && (aj[k] != i - 1 || !keep)) fshift++; 2968 else { 2969 if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1)); 2970 aj[k - fshift] = aj[k]; 2971 PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2)); 2972 } 2973 } 2974 ai[i - 1] -= fshift_prev; 2975 fshift_prev = fshift; 2976 ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1]; 2977 a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0); 2978 rmax = PetscMax(rmax, ailen[i - 1]); 2979 } 2980 if (fshift) { 2981 if (mbs) { 2982 ai[mbs] -= fshift; 2983 a->nz = ai[mbs]; 2984 } 2985 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz)); 2986 A->nonzerostate++; 2987 A->info.nz_unneeded += (PetscReal)fshift; 2988 a->rmax = rmax; 2989 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 2990 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 2991 } 2992 PetscFunctionReturn(PETSC_SUCCESS); 2993 } 2994 2995 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2996 MatGetRow_SeqBAIJ, 2997 MatRestoreRow_SeqBAIJ, 2998 MatMult_SeqBAIJ_N, 2999 /* 4*/ MatMultAdd_SeqBAIJ_N, 3000 MatMultTranspose_SeqBAIJ, 3001 MatMultTransposeAdd_SeqBAIJ, 3002 NULL, 3003 NULL, 3004 NULL, 3005 /* 10*/ NULL, 3006 MatLUFactor_SeqBAIJ, 3007 NULL, 3008 NULL, 3009 MatTranspose_SeqBAIJ, 3010 /* 15*/ MatGetInfo_SeqBAIJ, 3011 MatEqual_SeqBAIJ, 3012 MatGetDiagonal_SeqBAIJ, 3013 MatDiagonalScale_SeqBAIJ, 3014 MatNorm_SeqBAIJ, 3015 /* 20*/ NULL, 3016 MatAssemblyEnd_SeqBAIJ, 3017 MatSetOption_SeqBAIJ, 3018 MatZeroEntries_SeqBAIJ, 3019 /* 24*/ MatZeroRows_SeqBAIJ, 3020 NULL, 3021 NULL, 3022 NULL, 3023 NULL, 3024 /* 29*/ MatSetUp_Seq_Hash, 3025 NULL, 3026 NULL, 3027 NULL, 3028 NULL, 3029 /* 34*/ MatDuplicate_SeqBAIJ, 3030 NULL, 3031 NULL, 3032 MatILUFactor_SeqBAIJ, 3033 NULL, 3034 /* 39*/ MatAXPY_SeqBAIJ, 3035 MatCreateSubMatrices_SeqBAIJ, 3036 MatIncreaseOverlap_SeqBAIJ, 3037 MatGetValues_SeqBAIJ, 3038 MatCopy_SeqBAIJ, 3039 /* 44*/ NULL, 3040 MatScale_SeqBAIJ, 3041 MatShift_SeqBAIJ, 3042 NULL, 3043 MatZeroRowsColumns_SeqBAIJ, 3044 /* 49*/ NULL, 3045 MatGetRowIJ_SeqBAIJ, 3046 MatRestoreRowIJ_SeqBAIJ, 3047 MatGetColumnIJ_SeqBAIJ, 3048 MatRestoreColumnIJ_SeqBAIJ, 3049 /* 54*/ MatFDColoringCreate_SeqXAIJ, 3050 NULL, 3051 NULL, 3052 NULL, 3053 MatSetValuesBlocked_SeqBAIJ, 3054 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 3055 MatDestroy_SeqBAIJ, 3056 MatView_SeqBAIJ, 3057 NULL, 3058 NULL, 3059 /* 64*/ NULL, 3060 NULL, 3061 NULL, 3062 NULL, 3063 NULL, 3064 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 3065 NULL, 3066 MatConvert_Basic, 3067 NULL, 3068 NULL, 3069 /* 74*/ NULL, 3070 MatFDColoringApply_BAIJ, 3071 NULL, 3072 NULL, 3073 NULL, 3074 /* 79*/ NULL, 3075 NULL, 3076 NULL, 3077 NULL, 3078 MatLoad_SeqBAIJ, 3079 /* 84*/ NULL, 3080 NULL, 3081 NULL, 3082 NULL, 3083 NULL, 3084 /* 89*/ NULL, 3085 NULL, 3086 NULL, 3087 NULL, 3088 NULL, 3089 /* 94*/ NULL, 3090 NULL, 3091 NULL, 3092 NULL, 3093 NULL, 3094 /* 99*/ NULL, 3095 NULL, 3096 NULL, 3097 MatConjugate_SeqBAIJ, 3098 NULL, 3099 /*104*/ NULL, 3100 MatRealPart_SeqBAIJ, 3101 MatImaginaryPart_SeqBAIJ, 3102 NULL, 3103 NULL, 3104 /*109*/ NULL, 3105 NULL, 3106 NULL, 3107 NULL, 3108 MatMissingDiagonal_SeqBAIJ, 3109 /*114*/ NULL, 3110 NULL, 3111 NULL, 3112 NULL, 3113 NULL, 3114 /*119*/ NULL, 3115 NULL, 3116 MatMultHermitianTranspose_SeqBAIJ, 3117 MatMultHermitianTransposeAdd_SeqBAIJ, 3118 NULL, 3119 /*124*/ NULL, 3120 MatGetColumnReductions_SeqBAIJ, 3121 MatInvertBlockDiagonal_SeqBAIJ, 3122 NULL, 3123 NULL, 3124 /*129*/ NULL, 3125 NULL, 3126 NULL, 3127 NULL, 3128 NULL, 3129 /*134*/ NULL, 3130 NULL, 3131 NULL, 3132 NULL, 3133 NULL, 3134 /*139*/ MatSetBlockSizes_Default, 3135 NULL, 3136 NULL, 3137 MatFDColoringSetUp_SeqXAIJ, 3138 NULL, 3139 /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 3140 MatDestroySubMatrices_SeqBAIJ, 3141 NULL, 3142 NULL, 3143 NULL, 3144 NULL, 3145 /*150*/ NULL, 3146 MatEliminateZeros_SeqBAIJ, 3147 MatGetRowSumAbs_SeqBAIJ, 3148 NULL, 3149 NULL, 3150 /*155*/ NULL, 3151 MatCopyHashToXAIJ_Seq_Hash}; 3152 3153 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3154 { 3155 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3156 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3157 3158 PetscFunctionBegin; 3159 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3160 3161 /* allocate space for values if not already there */ 3162 if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); 3163 3164 /* copy values over */ 3165 PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz)); 3166 PetscFunctionReturn(PETSC_SUCCESS); 3167 } 3168 3169 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3170 { 3171 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3172 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3173 3174 PetscFunctionBegin; 3175 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3176 PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 3177 3178 /* copy values over */ 3179 PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz)); 3180 PetscFunctionReturn(PETSC_SUCCESS); 3181 } 3182 3183 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 3184 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *); 3185 3186 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3187 { 3188 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 3189 PetscInt i, mbs, nbs, bs2; 3190 PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 3191 3192 PetscFunctionBegin; 3193 if (B->hash_active) { 3194 PetscInt bs; 3195 B->ops[0] = b->cops; 3196 PetscCall(PetscHMapIJVDestroy(&b->ht)); 3197 PetscCall(MatGetBlockSize(B, &bs)); 3198 if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht)); 3199 PetscCall(PetscFree(b->dnz)); 3200 PetscCall(PetscFree(b->bdnz)); 3201 B->hash_active = PETSC_FALSE; 3202 } 3203 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3204 if (nz == MAT_SKIP_ALLOCATION) { 3205 skipallocation = PETSC_TRUE; 3206 nz = 0; 3207 } 3208 3209 PetscCall(MatSetBlockSize(B, bs)); 3210 PetscCall(PetscLayoutSetUp(B->rmap)); 3211 PetscCall(PetscLayoutSetUp(B->cmap)); 3212 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3213 3214 B->preallocated = PETSC_TRUE; 3215 3216 mbs = B->rmap->n / bs; 3217 nbs = B->cmap->n / bs; 3218 bs2 = bs * bs; 3219 3220 PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs); 3221 3222 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3223 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 3224 if (nnz) { 3225 for (i = 0; i < mbs; i++) { 3226 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 3227 PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs); 3228 } 3229 } 3230 3231 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat"); 3232 PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL)); 3233 PetscOptionsEnd(); 3234 3235 if (!flg) { 3236 switch (bs) { 3237 case 1: 3238 B->ops->mult = MatMult_SeqBAIJ_1; 3239 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3240 break; 3241 case 2: 3242 B->ops->mult = MatMult_SeqBAIJ_2; 3243 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3244 break; 3245 case 3: 3246 B->ops->mult = MatMult_SeqBAIJ_3; 3247 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3248 break; 3249 case 4: 3250 B->ops->mult = MatMult_SeqBAIJ_4; 3251 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3252 break; 3253 case 5: 3254 B->ops->mult = MatMult_SeqBAIJ_5; 3255 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3256 break; 3257 case 6: 3258 B->ops->mult = MatMult_SeqBAIJ_6; 3259 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3260 break; 3261 case 7: 3262 B->ops->mult = MatMult_SeqBAIJ_7; 3263 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3264 break; 3265 case 9: { 3266 PetscInt version = 1; 3267 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3268 switch (version) { 3269 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3270 case 1: 3271 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 3272 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 3273 PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3274 break; 3275 #endif 3276 default: 3277 B->ops->mult = MatMult_SeqBAIJ_N; 3278 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3279 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3280 break; 3281 } 3282 break; 3283 } 3284 case 11: 3285 B->ops->mult = MatMult_SeqBAIJ_11; 3286 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 3287 break; 3288 case 12: { 3289 PetscInt version = 1; 3290 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3291 switch (version) { 3292 case 1: 3293 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 3294 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3295 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3296 break; 3297 case 2: 3298 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 3299 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 3300 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3301 break; 3302 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3303 case 3: 3304 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 3305 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3306 PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3307 break; 3308 #endif 3309 default: 3310 B->ops->mult = MatMult_SeqBAIJ_N; 3311 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3312 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3313 break; 3314 } 3315 break; 3316 } 3317 case 15: { 3318 PetscInt version = 1; 3319 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3320 switch (version) { 3321 case 1: 3322 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3323 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3324 break; 3325 case 2: 3326 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 3327 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3328 break; 3329 case 3: 3330 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 3331 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3332 break; 3333 case 4: 3334 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 3335 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3336 break; 3337 default: 3338 B->ops->mult = MatMult_SeqBAIJ_N; 3339 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3340 break; 3341 } 3342 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3343 break; 3344 } 3345 default: 3346 B->ops->mult = MatMult_SeqBAIJ_N; 3347 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3348 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3349 break; 3350 } 3351 } 3352 B->ops->sor = MatSOR_SeqBAIJ; 3353 b->mbs = mbs; 3354 b->nbs = nbs; 3355 if (!skipallocation) { 3356 if (!b->imax) { 3357 PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen)); 3358 3359 b->free_imax_ilen = PETSC_TRUE; 3360 } 3361 /* b->ilen will count nonzeros in each block row so far. */ 3362 for (i = 0; i < mbs; i++) b->ilen[i] = 0; 3363 if (!nnz) { 3364 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3365 else if (nz < 0) nz = 1; 3366 nz = PetscMin(nz, nbs); 3367 for (i = 0; i < mbs; i++) b->imax[i] = nz; 3368 PetscCall(PetscIntMultError(nz, mbs, &nz)); 3369 } else { 3370 PetscInt64 nz64 = 0; 3371 for (i = 0; i < mbs; i++) { 3372 b->imax[i] = nnz[i]; 3373 nz64 += nnz[i]; 3374 } 3375 PetscCall(PetscIntCast(nz64, &nz)); 3376 } 3377 3378 /* allocate the matrix space */ 3379 PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i)); 3380 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j)); 3381 PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i)); 3382 if (B->structure_only) { 3383 b->free_a = PETSC_FALSE; 3384 } else { 3385 PetscInt nzbs2 = 0; 3386 PetscCall(PetscIntMultError(nz, bs2, &nzbs2)); 3387 PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a)); 3388 b->free_a = PETSC_TRUE; 3389 PetscCall(PetscArrayzero(b->a, nz * bs2)); 3390 } 3391 b->free_ij = PETSC_TRUE; 3392 PetscCall(PetscArrayzero(b->j, nz)); 3393 3394 b->i[0] = 0; 3395 for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1]; 3396 } else { 3397 b->free_a = PETSC_FALSE; 3398 b->free_ij = PETSC_FALSE; 3399 } 3400 3401 b->bs2 = bs2; 3402 b->mbs = mbs; 3403 b->nz = 0; 3404 b->maxnz = nz; 3405 B->info.nz_unneeded = (PetscReal)b->maxnz * bs2; 3406 B->was_assembled = PETSC_FALSE; 3407 B->assembled = PETSC_FALSE; 3408 if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3409 PetscFunctionReturn(PETSC_SUCCESS); 3410 } 3411 3412 static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 3413 { 3414 PetscInt i, m, nz, nz_max = 0, *nnz; 3415 PetscScalar *values = NULL; 3416 PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented; 3417 3418 PetscFunctionBegin; 3419 PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 3420 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 3421 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 3422 PetscCall(PetscLayoutSetUp(B->rmap)); 3423 PetscCall(PetscLayoutSetUp(B->cmap)); 3424 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3425 m = B->rmap->n / bs; 3426 3427 PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 3428 PetscCall(PetscMalloc1(m + 1, &nnz)); 3429 for (i = 0; i < m; i++) { 3430 nz = ii[i + 1] - ii[i]; 3431 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 3432 nz_max = PetscMax(nz_max, nz); 3433 nnz[i] = nz; 3434 } 3435 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 3436 PetscCall(PetscFree(nnz)); 3437 3438 values = (PetscScalar *)V; 3439 if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values)); 3440 for (i = 0; i < m; i++) { 3441 PetscInt ncols = ii[i + 1] - ii[i]; 3442 const PetscInt *icols = jj + ii[i]; 3443 if (bs == 1 || !roworiented) { 3444 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 3445 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES)); 3446 } else { 3447 PetscInt j; 3448 for (j = 0; j < ncols; j++) { 3449 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 3450 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES)); 3451 } 3452 } 3453 } 3454 if (!V) PetscCall(PetscFree(values)); 3455 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3456 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3457 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3458 PetscFunctionReturn(PETSC_SUCCESS); 3459 } 3460 3461 /*@C 3462 MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored 3463 3464 Not Collective 3465 3466 Input Parameter: 3467 . A - a `MATSEQBAIJ` matrix 3468 3469 Output Parameter: 3470 . array - pointer to the data 3471 3472 Level: intermediate 3473 3474 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3475 @*/ 3476 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[]) 3477 { 3478 PetscFunctionBegin; 3479 PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array)); 3480 PetscFunctionReturn(PETSC_SUCCESS); 3481 } 3482 3483 /*@C 3484 MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()` 3485 3486 Not Collective 3487 3488 Input Parameters: 3489 + A - a `MATSEQBAIJ` matrix 3490 - array - pointer to the data 3491 3492 Level: intermediate 3493 3494 .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3495 @*/ 3496 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[]) 3497 { 3498 PetscFunctionBegin; 3499 PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array)); 3500 PetscFunctionReturn(PETSC_SUCCESS); 3501 } 3502 3503 /*MC 3504 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3505 block sparse compressed row format. 3506 3507 Options Database Keys: 3508 + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()` 3509 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3510 3511 Level: beginner 3512 3513 Notes: 3514 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 3515 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 3516 3517 Run with `-info` to see what version of the matrix-vector product is being used 3518 3519 .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()` 3520 M*/ 3521 3522 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *); 3523 3524 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3525 { 3526 PetscMPIInt size; 3527 Mat_SeqBAIJ *b; 3528 3529 PetscFunctionBegin; 3530 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3531 PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3532 3533 PetscCall(PetscNew(&b)); 3534 B->data = (void *)b; 3535 B->ops[0] = MatOps_Values; 3536 3537 b->row = NULL; 3538 b->col = NULL; 3539 b->icol = NULL; 3540 b->reallocs = 0; 3541 b->saved_values = NULL; 3542 3543 b->roworiented = PETSC_TRUE; 3544 b->nonew = 0; 3545 b->diag = NULL; 3546 B->spptr = NULL; 3547 B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2; 3548 b->keepnonzeropattern = PETSC_FALSE; 3549 3550 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ)); 3551 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ)); 3552 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ)); 3553 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ)); 3554 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ)); 3555 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ)); 3556 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ)); 3557 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ)); 3558 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 3559 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ)); 3560 #if defined(PETSC_HAVE_HYPRE) 3561 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE)); 3562 #endif 3563 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS)); 3564 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 3565 PetscFunctionReturn(PETSC_SUCCESS); 3566 } 3567 3568 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) 3569 { 3570 Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data; 3571 PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2; 3572 3573 PetscFunctionBegin; 3574 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix"); 3575 PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix"); 3576 3577 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3578 c->imax = a->imax; 3579 c->ilen = a->ilen; 3580 c->free_imax_ilen = PETSC_FALSE; 3581 } else { 3582 PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen)); 3583 for (i = 0; i < mbs; i++) { 3584 c->imax[i] = a->imax[i]; 3585 c->ilen[i] = a->ilen[i]; 3586 } 3587 c->free_imax_ilen = PETSC_TRUE; 3588 } 3589 3590 /* allocate the matrix space */ 3591 if (mallocmatspace) { 3592 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3593 PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a)); 3594 PetscCall(PetscArrayzero(c->a, bs2 * nz)); 3595 c->free_a = PETSC_TRUE; 3596 c->i = a->i; 3597 c->j = a->j; 3598 c->free_ij = PETSC_FALSE; 3599 c->parent = A; 3600 C->preallocated = PETSC_TRUE; 3601 C->assembled = PETSC_TRUE; 3602 3603 PetscCall(PetscObjectReference((PetscObject)A)); 3604 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3605 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3606 } else { 3607 PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a)); 3608 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j)); 3609 PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i)); 3610 c->free_a = PETSC_TRUE; 3611 c->free_ij = PETSC_TRUE; 3612 3613 PetscCall(PetscArraycpy(c->i, a->i, mbs + 1)); 3614 if (mbs > 0) { 3615 PetscCall(PetscArraycpy(c->j, a->j, nz)); 3616 if (cpvalues == MAT_COPY_VALUES) { 3617 PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz)); 3618 } else { 3619 PetscCall(PetscArrayzero(c->a, bs2 * nz)); 3620 } 3621 } 3622 C->preallocated = PETSC_TRUE; 3623 C->assembled = PETSC_TRUE; 3624 } 3625 } 3626 3627 c->roworiented = a->roworiented; 3628 c->nonew = a->nonew; 3629 3630 PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 3631 PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 3632 3633 c->bs2 = a->bs2; 3634 c->mbs = a->mbs; 3635 c->nbs = a->nbs; 3636 3637 if (a->diag) { 3638 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3639 c->diag = a->diag; 3640 c->free_diag = PETSC_FALSE; 3641 } else { 3642 PetscCall(PetscMalloc1(mbs + 1, &c->diag)); 3643 for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i]; 3644 c->free_diag = PETSC_TRUE; 3645 } 3646 } else c->diag = NULL; 3647 3648 c->nz = a->nz; 3649 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3650 c->solve_work = NULL; 3651 c->mult_work = NULL; 3652 c->sor_workt = NULL; 3653 c->sor_work = NULL; 3654 3655 c->compressedrow.use = a->compressedrow.use; 3656 c->compressedrow.nrows = a->compressedrow.nrows; 3657 if (a->compressedrow.use) { 3658 i = a->compressedrow.nrows; 3659 PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex)); 3660 PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1)); 3661 PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i)); 3662 } else { 3663 c->compressedrow.use = PETSC_FALSE; 3664 c->compressedrow.i = NULL; 3665 c->compressedrow.rindex = NULL; 3666 } 3667 c->nonzerorowcnt = a->nonzerorowcnt; 3668 C->nonzerostate = A->nonzerostate; 3669 3670 PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 3671 PetscFunctionReturn(PETSC_SUCCESS); 3672 } 3673 3674 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) 3675 { 3676 PetscFunctionBegin; 3677 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 3678 PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n)); 3679 PetscCall(MatSetType(*B, MATSEQBAIJ)); 3680 PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE)); 3681 PetscFunctionReturn(PETSC_SUCCESS); 3682 } 3683 3684 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3685 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 3686 { 3687 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3688 PetscInt *rowidxs, *colidxs; 3689 PetscScalar *matvals; 3690 3691 PetscFunctionBegin; 3692 PetscCall(PetscViewerSetUp(viewer)); 3693 3694 /* read matrix header */ 3695 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3696 PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3697 M = header[1]; 3698 N = header[2]; 3699 nz = header[3]; 3700 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3701 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3702 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3703 3704 /* set block sizes from the viewer's .info file */ 3705 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3706 /* set local and global sizes if not set already */ 3707 if (mat->rmap->n < 0) mat->rmap->n = M; 3708 if (mat->cmap->n < 0) mat->cmap->n = N; 3709 if (mat->rmap->N < 0) mat->rmap->N = M; 3710 if (mat->cmap->N < 0) mat->cmap->N = N; 3711 PetscCall(PetscLayoutSetUp(mat->rmap)); 3712 PetscCall(PetscLayoutSetUp(mat->cmap)); 3713 3714 /* check if the matrix sizes are correct */ 3715 PetscCall(MatGetSize(mat, &rows, &cols)); 3716 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3717 PetscCall(MatGetBlockSize(mat, &bs)); 3718 PetscCall(MatGetLocalSize(mat, &m, &n)); 3719 mbs = m / bs; 3720 nbs = n / bs; 3721 3722 /* read in row lengths, column indices and nonzero values */ 3723 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3724 PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT)); 3725 rowidxs[0] = 0; 3726 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3727 sum = rowidxs[m]; 3728 PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3729 3730 /* read in column indices and nonzero values */ 3731 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals)); 3732 PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT)); 3733 PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR)); 3734 3735 { /* preallocate matrix storage */ 3736 PetscBT bt; /* helper bit set to count nonzeros */ 3737 PetscInt *nnz; 3738 PetscBool sbaij; 3739 3740 PetscCall(PetscBTCreate(nbs, &bt)); 3741 PetscCall(PetscCalloc1(mbs, &nnz)); 3742 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij)); 3743 for (i = 0; i < mbs; i++) { 3744 PetscCall(PetscBTMemzero(nbs, bt)); 3745 for (k = 0; k < bs; k++) { 3746 PetscInt row = bs * i + k; 3747 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3748 PetscInt col = colidxs[j]; 3749 if (!sbaij || col >= row) 3750 if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++; 3751 } 3752 } 3753 } 3754 PetscCall(PetscBTDestroy(&bt)); 3755 PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz)); 3756 PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz)); 3757 PetscCall(PetscFree(nnz)); 3758 } 3759 3760 /* store matrix values */ 3761 for (i = 0; i < m; i++) { 3762 PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1]; 3763 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3764 } 3765 3766 PetscCall(PetscFree(rowidxs)); 3767 PetscCall(PetscFree2(colidxs, matvals)); 3768 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3769 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3770 PetscFunctionReturn(PETSC_SUCCESS); 3771 } 3772 3773 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) 3774 { 3775 PetscBool isbinary; 3776 3777 PetscFunctionBegin; 3778 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3779 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3780 PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer)); 3781 PetscFunctionReturn(PETSC_SUCCESS); 3782 } 3783 3784 /*@ 3785 MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block 3786 compressed row) format. For good matrix assembly performance the 3787 user should preallocate the matrix storage by setting the parameter `nz` 3788 (or the array `nnz`). 3789 3790 Collective 3791 3792 Input Parameters: 3793 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3794 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3795 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3796 . m - number of rows 3797 . n - number of columns 3798 . nz - number of nonzero blocks per block row (same for all rows) 3799 - nnz - array containing the number of nonzero blocks in the various block rows 3800 (possibly different for each block row) or `NULL` 3801 3802 Output Parameter: 3803 . A - the matrix 3804 3805 Options Database Keys: 3806 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3807 - -mat_block_size - size of the blocks to use 3808 3809 Level: intermediate 3810 3811 Notes: 3812 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3813 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3814 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 3815 3816 The number of rows and columns must be divisible by blocksize. 3817 3818 If the `nnz` parameter is given then the `nz` parameter is ignored 3819 3820 A nonzero block is any block that as 1 or more nonzeros in it 3821 3822 The `MATSEQBAIJ` format is fully compatible with standard Fortran 3823 storage. That is, the stored row and column indices can begin at 3824 either one (as in Fortran) or zero. 3825 3826 Specify the preallocated storage with either `nz` or `nnz` (not both). 3827 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3828 allocation. See [Sparse Matrices](sec_matsparse) for details. 3829 matrices. 3830 3831 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3832 @*/ 3833 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 3834 { 3835 PetscFunctionBegin; 3836 PetscCall(MatCreate(comm, A)); 3837 PetscCall(MatSetSizes(*A, m, n, m, n)); 3838 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3839 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz)); 3840 PetscFunctionReturn(PETSC_SUCCESS); 3841 } 3842 3843 /*@ 3844 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3845 per row in the matrix. For good matrix assembly performance the 3846 user should preallocate the matrix storage by setting the parameter `nz` 3847 (or the array `nnz`). 3848 3849 Collective 3850 3851 Input Parameters: 3852 + B - the matrix 3853 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3854 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3855 . nz - number of block nonzeros per block row (same for all rows) 3856 - nnz - array containing the number of block nonzeros in the various block rows 3857 (possibly different for each block row) or `NULL` 3858 3859 Options Database Keys: 3860 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3861 - -mat_block_size - size of the blocks to use 3862 3863 Level: intermediate 3864 3865 Notes: 3866 If the `nnz` parameter is given then the `nz` parameter is ignored 3867 3868 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3869 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3870 You can also run with the option `-info` and look for messages with the string 3871 malloc in them to see if additional memory allocation was needed. 3872 3873 The `MATSEQBAIJ` format is fully compatible with standard Fortran 3874 storage. That is, the stored row and column indices can begin at 3875 either one (as in Fortran) or zero. 3876 3877 Specify the preallocated storage with either `nz` or `nnz` (not both). 3878 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3879 allocation. See [Sparse Matrices](sec_matsparse) for details. 3880 3881 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3882 @*/ 3883 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3884 { 3885 PetscFunctionBegin; 3886 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3887 PetscValidType(B, 1); 3888 PetscValidLogicalCollectiveInt(B, bs, 2); 3889 PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 3890 PetscFunctionReturn(PETSC_SUCCESS); 3891 } 3892 3893 /*@C 3894 MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values 3895 3896 Collective 3897 3898 Input Parameters: 3899 + B - the matrix 3900 . bs - the blocksize 3901 . i - the indices into `j` for the start of each local row (indices start with zero) 3902 . j - the column indices for each local row (indices start with zero) these must be sorted for each row 3903 - v - optional values in the matrix, use `NULL` if not provided 3904 3905 Level: advanced 3906 3907 Notes: 3908 The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()` 3909 3910 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 3911 may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 3912 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3913 `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3914 block column and the second index is over columns within a block. 3915 3916 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3917 3918 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3919 @*/ 3920 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3921 { 3922 PetscFunctionBegin; 3923 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3924 PetscValidType(B, 1); 3925 PetscValidLogicalCollectiveInt(B, bs, 2); 3926 PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 3927 PetscFunctionReturn(PETSC_SUCCESS); 3928 } 3929 3930 /*@ 3931 MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user. 3932 3933 Collective 3934 3935 Input Parameters: 3936 + comm - must be an MPI communicator of size 1 3937 . bs - size of block 3938 . m - number of rows 3939 . n - number of columns 3940 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3941 . j - column indices 3942 - a - matrix values 3943 3944 Output Parameter: 3945 . mat - the matrix 3946 3947 Level: advanced 3948 3949 Notes: 3950 The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays 3951 once the matrix is destroyed 3952 3953 You cannot set new nonzero locations into this matrix, that will generate an error. 3954 3955 The `i` and `j` indices are 0 based 3956 3957 When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format 3958 3959 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3960 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3961 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3962 with column-major ordering within blocks. 3963 3964 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3965 @*/ 3966 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) 3967 { 3968 Mat_SeqBAIJ *baij; 3969 3970 PetscFunctionBegin; 3971 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs); 3972 if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3973 3974 PetscCall(MatCreate(comm, mat)); 3975 PetscCall(MatSetSizes(*mat, m, n, m, n)); 3976 PetscCall(MatSetType(*mat, MATSEQBAIJ)); 3977 PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL)); 3978 baij = (Mat_SeqBAIJ *)(*mat)->data; 3979 PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen)); 3980 3981 baij->i = i; 3982 baij->j = j; 3983 baij->a = a; 3984 3985 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3986 baij->free_a = PETSC_FALSE; 3987 baij->free_ij = PETSC_FALSE; 3988 baij->free_imax_ilen = PETSC_TRUE; 3989 3990 for (PetscInt ii = 0; ii < m; ii++) { 3991 const PetscInt row_len = i[ii + 1] - i[ii]; 3992 3993 baij->ilen[ii] = baij->imax[ii] = row_len; 3994 PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len); 3995 } 3996 if (PetscDefined(USE_DEBUG)) { 3997 for (PetscInt ii = 0; ii < baij->i[m]; ii++) { 3998 PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3999 PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 4000 } 4001 } 4002 4003 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 4004 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 4005 PetscFunctionReturn(PETSC_SUCCESS); 4006 } 4007 4008 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4009 { 4010 PetscFunctionBegin; 4011 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat)); 4012 PetscFunctionReturn(PETSC_SUCCESS); 4013 } 4014