1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */ 12 #define TYPE BAIJ 13 #define TYPE_BS 14 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 15 #undef TYPE_BS 16 #define TYPE_BS _BS 17 #define TYPE_BS_ON 18 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 19 #undef TYPE_BS 20 #include "../src/mat/impls/aij/seq/seqhashmat.h" 21 #undef TYPE 22 #undef TYPE_BS_ON 23 24 #if defined(PETSC_HAVE_HYPRE) 25 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 26 #endif 27 28 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 29 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 30 #endif 31 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 32 33 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) 34 { 35 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data; 36 PetscInt m, n, ib, jb, bs = A->rmap->bs; 37 MatScalar *a_val = a_aij->a; 38 39 PetscFunctionBegin; 40 PetscCall(MatGetSize(A, &m, &n)); 41 PetscCall(PetscArrayzero(reductions, n)); 42 if (type == NORM_2) { 43 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 44 for (jb = 0; jb < bs; jb++) { 45 for (ib = 0; ib < bs; ib++) { 46 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 47 a_val++; 48 } 49 } 50 } 51 } else if (type == NORM_1) { 52 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 53 for (jb = 0; jb < bs; jb++) { 54 for (ib = 0; ib < bs; ib++) { 55 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 56 a_val++; 57 } 58 } 59 } 60 } else if (type == NORM_INFINITY) { 61 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 62 for (jb = 0; jb < bs; jb++) { 63 for (ib = 0; ib < bs; ib++) { 64 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 65 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 66 a_val++; 67 } 68 } 69 } 70 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 71 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 72 for (jb = 0; jb < bs; jb++) { 73 for (ib = 0; ib < bs; ib++) { 74 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 75 a_val++; 76 } 77 } 78 } 79 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 80 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 81 for (jb = 0; jb < bs; jb++) { 82 for (ib = 0; ib < bs; ib++) { 83 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 84 a_val++; 85 } 86 } 87 } 88 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 89 if (type == NORM_2) { 90 for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 91 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 92 for (PetscInt i = 0; i < n; i++) reductions[i] /= m; 93 } 94 PetscFunctionReturn(PETSC_SUCCESS); 95 } 96 97 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) 98 { 99 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 100 PetscInt *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots; 101 MatScalar *v = a->a, *odiag, *diag, work[25], *v_work; 102 PetscReal shift = 0.0; 103 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 104 105 PetscFunctionBegin; 106 allowzeropivot = PetscNot(A->erroriffailure); 107 108 if (a->idiagvalid) { 109 if (values) *values = a->idiag; 110 PetscFunctionReturn(PETSC_SUCCESS); 111 } 112 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 113 diag_offset = a->diag; 114 if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); } 115 diag = a->idiag; 116 if (values) *values = a->idiag; 117 /* factor and invert each block */ 118 switch (bs) { 119 case 1: 120 for (i = 0; i < mbs; i++) { 121 odiag = v + 1 * diag_offset[i]; 122 diag[0] = odiag[0]; 123 124 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 125 if (allowzeropivot) { 126 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 127 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 128 A->factorerror_zeropivot_row = i; 129 PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i)); 130 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON); 131 } 132 133 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 134 diag += 1; 135 } 136 break; 137 case 2: 138 for (i = 0; i < mbs; i++) { 139 odiag = v + 4 * diag_offset[i]; 140 diag[0] = odiag[0]; 141 diag[1] = odiag[1]; 142 diag[2] = odiag[2]; 143 diag[3] = odiag[3]; 144 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 145 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 146 diag += 4; 147 } 148 break; 149 case 3: 150 for (i = 0; i < mbs; i++) { 151 odiag = v + 9 * diag_offset[i]; 152 diag[0] = odiag[0]; 153 diag[1] = odiag[1]; 154 diag[2] = odiag[2]; 155 diag[3] = odiag[3]; 156 diag[4] = odiag[4]; 157 diag[5] = odiag[5]; 158 diag[6] = odiag[6]; 159 diag[7] = odiag[7]; 160 diag[8] = odiag[8]; 161 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 162 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 163 diag += 9; 164 } 165 break; 166 case 4: 167 for (i = 0; i < mbs; i++) { 168 odiag = v + 16 * diag_offset[i]; 169 PetscCall(PetscArraycpy(diag, odiag, 16)); 170 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 171 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 172 diag += 16; 173 } 174 break; 175 case 5: 176 for (i = 0; i < mbs; i++) { 177 odiag = v + 25 * diag_offset[i]; 178 PetscCall(PetscArraycpy(diag, odiag, 25)); 179 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 180 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 181 diag += 25; 182 } 183 break; 184 case 6: 185 for (i = 0; i < mbs; i++) { 186 odiag = v + 36 * diag_offset[i]; 187 PetscCall(PetscArraycpy(diag, odiag, 36)); 188 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 189 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 190 diag += 36; 191 } 192 break; 193 case 7: 194 for (i = 0; i < mbs; i++) { 195 odiag = v + 49 * diag_offset[i]; 196 PetscCall(PetscArraycpy(diag, odiag, 49)); 197 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 198 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 199 diag += 49; 200 } 201 break; 202 default: 203 PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots)); 204 for (i = 0; i < mbs; i++) { 205 odiag = v + bs2 * diag_offset[i]; 206 PetscCall(PetscArraycpy(diag, odiag, bs2)); 207 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 208 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 209 diag += bs2; 210 } 211 PetscCall(PetscFree2(v_work, v_pivots)); 212 } 213 a->idiagvalid = PETSC_TRUE; 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 217 PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 218 { 219 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 220 PetscScalar *x, *work, *w, *workt, *t; 221 const MatScalar *v, *aa = a->a, *idiag; 222 const PetscScalar *b, *xb; 223 PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */ 224 PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it; 225 const PetscInt *diag, *ai = a->i, *aj = a->j, *vi; 226 227 PetscFunctionBegin; 228 its = its * lits; 229 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 230 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 231 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 232 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor"); 233 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 234 235 if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL)); 236 237 if (!m) PetscFunctionReturn(PETSC_SUCCESS); 238 diag = a->diag; 239 idiag = a->idiag; 240 k = PetscMax(A->rmap->n, A->cmap->n); 241 if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work)); 242 if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt)); 243 if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work)); 244 work = a->mult_work; 245 t = a->sor_workt; 246 w = a->sor_work; 247 248 PetscCall(VecGetArray(xx, &x)); 249 PetscCall(VecGetArrayRead(bb, &b)); 250 251 if (flag & SOR_ZERO_INITIAL_GUESS) { 252 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 253 switch (bs) { 254 case 1: 255 PetscKernel_v_gets_A_times_w_1(x, idiag, b); 256 t[0] = b[0]; 257 i2 = 1; 258 idiag += 1; 259 for (i = 1; i < m; i++) { 260 v = aa + ai[i]; 261 vi = aj + ai[i]; 262 nz = diag[i] - ai[i]; 263 s[0] = b[i2]; 264 for (j = 0; j < nz; j++) { 265 xw[0] = x[vi[j]]; 266 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 267 } 268 t[i2] = s[0]; 269 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 270 x[i2] = xw[0]; 271 idiag += 1; 272 i2 += 1; 273 } 274 break; 275 case 2: 276 PetscKernel_v_gets_A_times_w_2(x, idiag, b); 277 t[0] = b[0]; 278 t[1] = b[1]; 279 i2 = 2; 280 idiag += 4; 281 for (i = 1; i < m; i++) { 282 v = aa + 4 * ai[i]; 283 vi = aj + ai[i]; 284 nz = diag[i] - ai[i]; 285 s[0] = b[i2]; 286 s[1] = b[i2 + 1]; 287 for (j = 0; j < nz; j++) { 288 idx = 2 * vi[j]; 289 it = 4 * j; 290 xw[0] = x[idx]; 291 xw[1] = x[1 + idx]; 292 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 293 } 294 t[i2] = s[0]; 295 t[i2 + 1] = s[1]; 296 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 297 x[i2] = xw[0]; 298 x[i2 + 1] = xw[1]; 299 idiag += 4; 300 i2 += 2; 301 } 302 break; 303 case 3: 304 PetscKernel_v_gets_A_times_w_3(x, idiag, b); 305 t[0] = b[0]; 306 t[1] = b[1]; 307 t[2] = b[2]; 308 i2 = 3; 309 idiag += 9; 310 for (i = 1; i < m; i++) { 311 v = aa + 9 * ai[i]; 312 vi = aj + ai[i]; 313 nz = diag[i] - ai[i]; 314 s[0] = b[i2]; 315 s[1] = b[i2 + 1]; 316 s[2] = b[i2 + 2]; 317 while (nz--) { 318 idx = 3 * (*vi++); 319 xw[0] = x[idx]; 320 xw[1] = x[1 + idx]; 321 xw[2] = x[2 + idx]; 322 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 323 v += 9; 324 } 325 t[i2] = s[0]; 326 t[i2 + 1] = s[1]; 327 t[i2 + 2] = s[2]; 328 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 329 x[i2] = xw[0]; 330 x[i2 + 1] = xw[1]; 331 x[i2 + 2] = xw[2]; 332 idiag += 9; 333 i2 += 3; 334 } 335 break; 336 case 4: 337 PetscKernel_v_gets_A_times_w_4(x, idiag, b); 338 t[0] = b[0]; 339 t[1] = b[1]; 340 t[2] = b[2]; 341 t[3] = b[3]; 342 i2 = 4; 343 idiag += 16; 344 for (i = 1; i < m; i++) { 345 v = aa + 16 * ai[i]; 346 vi = aj + ai[i]; 347 nz = diag[i] - ai[i]; 348 s[0] = b[i2]; 349 s[1] = b[i2 + 1]; 350 s[2] = b[i2 + 2]; 351 s[3] = b[i2 + 3]; 352 while (nz--) { 353 idx = 4 * (*vi++); 354 xw[0] = x[idx]; 355 xw[1] = x[1 + idx]; 356 xw[2] = x[2 + idx]; 357 xw[3] = x[3 + idx]; 358 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 359 v += 16; 360 } 361 t[i2] = s[0]; 362 t[i2 + 1] = s[1]; 363 t[i2 + 2] = s[2]; 364 t[i2 + 3] = s[3]; 365 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 366 x[i2] = xw[0]; 367 x[i2 + 1] = xw[1]; 368 x[i2 + 2] = xw[2]; 369 x[i2 + 3] = xw[3]; 370 idiag += 16; 371 i2 += 4; 372 } 373 break; 374 case 5: 375 PetscKernel_v_gets_A_times_w_5(x, idiag, b); 376 t[0] = b[0]; 377 t[1] = b[1]; 378 t[2] = b[2]; 379 t[3] = b[3]; 380 t[4] = b[4]; 381 i2 = 5; 382 idiag += 25; 383 for (i = 1; i < m; i++) { 384 v = aa + 25 * ai[i]; 385 vi = aj + ai[i]; 386 nz = diag[i] - ai[i]; 387 s[0] = b[i2]; 388 s[1] = b[i2 + 1]; 389 s[2] = b[i2 + 2]; 390 s[3] = b[i2 + 3]; 391 s[4] = b[i2 + 4]; 392 while (nz--) { 393 idx = 5 * (*vi++); 394 xw[0] = x[idx]; 395 xw[1] = x[1 + idx]; 396 xw[2] = x[2 + idx]; 397 xw[3] = x[3 + idx]; 398 xw[4] = x[4 + idx]; 399 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 400 v += 25; 401 } 402 t[i2] = s[0]; 403 t[i2 + 1] = s[1]; 404 t[i2 + 2] = s[2]; 405 t[i2 + 3] = s[3]; 406 t[i2 + 4] = s[4]; 407 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 408 x[i2] = xw[0]; 409 x[i2 + 1] = xw[1]; 410 x[i2 + 2] = xw[2]; 411 x[i2 + 3] = xw[3]; 412 x[i2 + 4] = xw[4]; 413 idiag += 25; 414 i2 += 5; 415 } 416 break; 417 case 6: 418 PetscKernel_v_gets_A_times_w_6(x, idiag, b); 419 t[0] = b[0]; 420 t[1] = b[1]; 421 t[2] = b[2]; 422 t[3] = b[3]; 423 t[4] = b[4]; 424 t[5] = b[5]; 425 i2 = 6; 426 idiag += 36; 427 for (i = 1; i < m; i++) { 428 v = aa + 36 * ai[i]; 429 vi = aj + ai[i]; 430 nz = diag[i] - ai[i]; 431 s[0] = b[i2]; 432 s[1] = b[i2 + 1]; 433 s[2] = b[i2 + 2]; 434 s[3] = b[i2 + 3]; 435 s[4] = b[i2 + 4]; 436 s[5] = b[i2 + 5]; 437 while (nz--) { 438 idx = 6 * (*vi++); 439 xw[0] = x[idx]; 440 xw[1] = x[1 + idx]; 441 xw[2] = x[2 + idx]; 442 xw[3] = x[3 + idx]; 443 xw[4] = x[4 + idx]; 444 xw[5] = x[5 + idx]; 445 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 446 v += 36; 447 } 448 t[i2] = s[0]; 449 t[i2 + 1] = s[1]; 450 t[i2 + 2] = s[2]; 451 t[i2 + 3] = s[3]; 452 t[i2 + 4] = s[4]; 453 t[i2 + 5] = s[5]; 454 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 455 x[i2] = xw[0]; 456 x[i2 + 1] = xw[1]; 457 x[i2 + 2] = xw[2]; 458 x[i2 + 3] = xw[3]; 459 x[i2 + 4] = xw[4]; 460 x[i2 + 5] = xw[5]; 461 idiag += 36; 462 i2 += 6; 463 } 464 break; 465 case 7: 466 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 467 t[0] = b[0]; 468 t[1] = b[1]; 469 t[2] = b[2]; 470 t[3] = b[3]; 471 t[4] = b[4]; 472 t[5] = b[5]; 473 t[6] = b[6]; 474 i2 = 7; 475 idiag += 49; 476 for (i = 1; i < m; i++) { 477 v = aa + 49 * ai[i]; 478 vi = aj + ai[i]; 479 nz = diag[i] - ai[i]; 480 s[0] = b[i2]; 481 s[1] = b[i2 + 1]; 482 s[2] = b[i2 + 2]; 483 s[3] = b[i2 + 3]; 484 s[4] = b[i2 + 4]; 485 s[5] = b[i2 + 5]; 486 s[6] = b[i2 + 6]; 487 while (nz--) { 488 idx = 7 * (*vi++); 489 xw[0] = x[idx]; 490 xw[1] = x[1 + idx]; 491 xw[2] = x[2 + idx]; 492 xw[3] = x[3 + idx]; 493 xw[4] = x[4 + idx]; 494 xw[5] = x[5 + idx]; 495 xw[6] = x[6 + idx]; 496 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 497 v += 49; 498 } 499 t[i2] = s[0]; 500 t[i2 + 1] = s[1]; 501 t[i2 + 2] = s[2]; 502 t[i2 + 3] = s[3]; 503 t[i2 + 4] = s[4]; 504 t[i2 + 5] = s[5]; 505 t[i2 + 6] = s[6]; 506 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 507 x[i2] = xw[0]; 508 x[i2 + 1] = xw[1]; 509 x[i2 + 2] = xw[2]; 510 x[i2 + 3] = xw[3]; 511 x[i2 + 4] = xw[4]; 512 x[i2 + 5] = xw[5]; 513 x[i2 + 6] = xw[6]; 514 idiag += 49; 515 i2 += 7; 516 } 517 break; 518 default: 519 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); 520 PetscCall(PetscArraycpy(t, b, bs)); 521 i2 = bs; 522 idiag += bs2; 523 for (i = 1; i < m; i++) { 524 v = aa + bs2 * ai[i]; 525 vi = aj + ai[i]; 526 nz = diag[i] - ai[i]; 527 528 PetscCall(PetscArraycpy(w, b + i2, bs)); 529 /* copy all rows of x that are needed into contiguous space */ 530 workt = work; 531 for (j = 0; j < nz; j++) { 532 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 533 workt += bs; 534 } 535 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 536 PetscCall(PetscArraycpy(t + i2, w, bs)); 537 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 538 539 idiag += bs2; 540 i2 += bs; 541 } 542 break; 543 } 544 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 545 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 546 xb = t; 547 } else xb = b; 548 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 549 idiag = a->idiag + bs2 * (a->mbs - 1); 550 i2 = bs * (m - 1); 551 switch (bs) { 552 case 1: 553 s[0] = xb[i2]; 554 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 555 x[i2] = xw[0]; 556 i2 -= 1; 557 for (i = m - 2; i >= 0; i--) { 558 v = aa + (diag[i] + 1); 559 vi = aj + diag[i] + 1; 560 nz = ai[i + 1] - diag[i] - 1; 561 s[0] = xb[i2]; 562 for (j = 0; j < nz; j++) { 563 xw[0] = x[vi[j]]; 564 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 565 } 566 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 567 x[i2] = xw[0]; 568 idiag -= 1; 569 i2 -= 1; 570 } 571 break; 572 case 2: 573 s[0] = xb[i2]; 574 s[1] = xb[i2 + 1]; 575 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 576 x[i2] = xw[0]; 577 x[i2 + 1] = xw[1]; 578 i2 -= 2; 579 idiag -= 4; 580 for (i = m - 2; i >= 0; i--) { 581 v = aa + 4 * (diag[i] + 1); 582 vi = aj + diag[i] + 1; 583 nz = ai[i + 1] - diag[i] - 1; 584 s[0] = xb[i2]; 585 s[1] = xb[i2 + 1]; 586 for (j = 0; j < nz; j++) { 587 idx = 2 * vi[j]; 588 it = 4 * j; 589 xw[0] = x[idx]; 590 xw[1] = x[1 + idx]; 591 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 592 } 593 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 594 x[i2] = xw[0]; 595 x[i2 + 1] = xw[1]; 596 idiag -= 4; 597 i2 -= 2; 598 } 599 break; 600 case 3: 601 s[0] = xb[i2]; 602 s[1] = xb[i2 + 1]; 603 s[2] = xb[i2 + 2]; 604 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 605 x[i2] = xw[0]; 606 x[i2 + 1] = xw[1]; 607 x[i2 + 2] = xw[2]; 608 i2 -= 3; 609 idiag -= 9; 610 for (i = m - 2; i >= 0; i--) { 611 v = aa + 9 * (diag[i] + 1); 612 vi = aj + diag[i] + 1; 613 nz = ai[i + 1] - diag[i] - 1; 614 s[0] = xb[i2]; 615 s[1] = xb[i2 + 1]; 616 s[2] = xb[i2 + 2]; 617 while (nz--) { 618 idx = 3 * (*vi++); 619 xw[0] = x[idx]; 620 xw[1] = x[1 + idx]; 621 xw[2] = x[2 + idx]; 622 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 623 v += 9; 624 } 625 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 626 x[i2] = xw[0]; 627 x[i2 + 1] = xw[1]; 628 x[i2 + 2] = xw[2]; 629 idiag -= 9; 630 i2 -= 3; 631 } 632 break; 633 case 4: 634 s[0] = xb[i2]; 635 s[1] = xb[i2 + 1]; 636 s[2] = xb[i2 + 2]; 637 s[3] = xb[i2 + 3]; 638 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 639 x[i2] = xw[0]; 640 x[i2 + 1] = xw[1]; 641 x[i2 + 2] = xw[2]; 642 x[i2 + 3] = xw[3]; 643 i2 -= 4; 644 idiag -= 16; 645 for (i = m - 2; i >= 0; i--) { 646 v = aa + 16 * (diag[i] + 1); 647 vi = aj + diag[i] + 1; 648 nz = ai[i + 1] - diag[i] - 1; 649 s[0] = xb[i2]; 650 s[1] = xb[i2 + 1]; 651 s[2] = xb[i2 + 2]; 652 s[3] = xb[i2 + 3]; 653 while (nz--) { 654 idx = 4 * (*vi++); 655 xw[0] = x[idx]; 656 xw[1] = x[1 + idx]; 657 xw[2] = x[2 + idx]; 658 xw[3] = x[3 + idx]; 659 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 660 v += 16; 661 } 662 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 663 x[i2] = xw[0]; 664 x[i2 + 1] = xw[1]; 665 x[i2 + 2] = xw[2]; 666 x[i2 + 3] = xw[3]; 667 idiag -= 16; 668 i2 -= 4; 669 } 670 break; 671 case 5: 672 s[0] = xb[i2]; 673 s[1] = xb[i2 + 1]; 674 s[2] = xb[i2 + 2]; 675 s[3] = xb[i2 + 3]; 676 s[4] = xb[i2 + 4]; 677 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 678 x[i2] = xw[0]; 679 x[i2 + 1] = xw[1]; 680 x[i2 + 2] = xw[2]; 681 x[i2 + 3] = xw[3]; 682 x[i2 + 4] = xw[4]; 683 i2 -= 5; 684 idiag -= 25; 685 for (i = m - 2; i >= 0; i--) { 686 v = aa + 25 * (diag[i] + 1); 687 vi = aj + diag[i] + 1; 688 nz = ai[i + 1] - diag[i] - 1; 689 s[0] = xb[i2]; 690 s[1] = xb[i2 + 1]; 691 s[2] = xb[i2 + 2]; 692 s[3] = xb[i2 + 3]; 693 s[4] = xb[i2 + 4]; 694 while (nz--) { 695 idx = 5 * (*vi++); 696 xw[0] = x[idx]; 697 xw[1] = x[1 + idx]; 698 xw[2] = x[2 + idx]; 699 xw[3] = x[3 + idx]; 700 xw[4] = x[4 + idx]; 701 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 702 v += 25; 703 } 704 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 705 x[i2] = xw[0]; 706 x[i2 + 1] = xw[1]; 707 x[i2 + 2] = xw[2]; 708 x[i2 + 3] = xw[3]; 709 x[i2 + 4] = xw[4]; 710 idiag -= 25; 711 i2 -= 5; 712 } 713 break; 714 case 6: 715 s[0] = xb[i2]; 716 s[1] = xb[i2 + 1]; 717 s[2] = xb[i2 + 2]; 718 s[3] = xb[i2 + 3]; 719 s[4] = xb[i2 + 4]; 720 s[5] = xb[i2 + 5]; 721 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 722 x[i2] = xw[0]; 723 x[i2 + 1] = xw[1]; 724 x[i2 + 2] = xw[2]; 725 x[i2 + 3] = xw[3]; 726 x[i2 + 4] = xw[4]; 727 x[i2 + 5] = xw[5]; 728 i2 -= 6; 729 idiag -= 36; 730 for (i = m - 2; i >= 0; i--) { 731 v = aa + 36 * (diag[i] + 1); 732 vi = aj + diag[i] + 1; 733 nz = ai[i + 1] - diag[i] - 1; 734 s[0] = xb[i2]; 735 s[1] = xb[i2 + 1]; 736 s[2] = xb[i2 + 2]; 737 s[3] = xb[i2 + 3]; 738 s[4] = xb[i2 + 4]; 739 s[5] = xb[i2 + 5]; 740 while (nz--) { 741 idx = 6 * (*vi++); 742 xw[0] = x[idx]; 743 xw[1] = x[1 + idx]; 744 xw[2] = x[2 + idx]; 745 xw[3] = x[3 + idx]; 746 xw[4] = x[4 + idx]; 747 xw[5] = x[5 + idx]; 748 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 749 v += 36; 750 } 751 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 752 x[i2] = xw[0]; 753 x[i2 + 1] = xw[1]; 754 x[i2 + 2] = xw[2]; 755 x[i2 + 3] = xw[3]; 756 x[i2 + 4] = xw[4]; 757 x[i2 + 5] = xw[5]; 758 idiag -= 36; 759 i2 -= 6; 760 } 761 break; 762 case 7: 763 s[0] = xb[i2]; 764 s[1] = xb[i2 + 1]; 765 s[2] = xb[i2 + 2]; 766 s[3] = xb[i2 + 3]; 767 s[4] = xb[i2 + 4]; 768 s[5] = xb[i2 + 5]; 769 s[6] = xb[i2 + 6]; 770 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 771 x[i2] = xw[0]; 772 x[i2 + 1] = xw[1]; 773 x[i2 + 2] = xw[2]; 774 x[i2 + 3] = xw[3]; 775 x[i2 + 4] = xw[4]; 776 x[i2 + 5] = xw[5]; 777 x[i2 + 6] = xw[6]; 778 i2 -= 7; 779 idiag -= 49; 780 for (i = m - 2; i >= 0; i--) { 781 v = aa + 49 * (diag[i] + 1); 782 vi = aj + diag[i] + 1; 783 nz = ai[i + 1] - diag[i] - 1; 784 s[0] = xb[i2]; 785 s[1] = xb[i2 + 1]; 786 s[2] = xb[i2 + 2]; 787 s[3] = xb[i2 + 3]; 788 s[4] = xb[i2 + 4]; 789 s[5] = xb[i2 + 5]; 790 s[6] = xb[i2 + 6]; 791 while (nz--) { 792 idx = 7 * (*vi++); 793 xw[0] = x[idx]; 794 xw[1] = x[1 + idx]; 795 xw[2] = x[2 + idx]; 796 xw[3] = x[3 + idx]; 797 xw[4] = x[4 + idx]; 798 xw[5] = x[5 + idx]; 799 xw[6] = x[6 + idx]; 800 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 801 v += 49; 802 } 803 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 804 x[i2] = xw[0]; 805 x[i2 + 1] = xw[1]; 806 x[i2 + 2] = xw[2]; 807 x[i2 + 3] = xw[3]; 808 x[i2 + 4] = xw[4]; 809 x[i2 + 5] = xw[5]; 810 x[i2 + 6] = xw[6]; 811 idiag -= 49; 812 i2 -= 7; 813 } 814 break; 815 default: 816 PetscCall(PetscArraycpy(w, xb + i2, bs)); 817 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 818 i2 -= bs; 819 idiag -= bs2; 820 for (i = m - 2; i >= 0; i--) { 821 v = aa + bs2 * (diag[i] + 1); 822 vi = aj + diag[i] + 1; 823 nz = ai[i + 1] - diag[i] - 1; 824 825 PetscCall(PetscArraycpy(w, xb + i2, bs)); 826 /* copy all rows of x that are needed into contiguous space */ 827 workt = work; 828 for (j = 0; j < nz; j++) { 829 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 830 workt += bs; 831 } 832 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 833 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 834 835 idiag -= bs2; 836 i2 -= bs; 837 } 838 break; 839 } 840 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 841 } 842 its--; 843 } 844 while (its--) { 845 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 846 idiag = a->idiag; 847 i2 = 0; 848 switch (bs) { 849 case 1: 850 for (i = 0; i < m; i++) { 851 v = aa + ai[i]; 852 vi = aj + ai[i]; 853 nz = ai[i + 1] - ai[i]; 854 s[0] = b[i2]; 855 for (j = 0; j < nz; j++) { 856 xw[0] = x[vi[j]]; 857 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 858 } 859 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 860 x[i2] += xw[0]; 861 idiag += 1; 862 i2 += 1; 863 } 864 break; 865 case 2: 866 for (i = 0; i < m; i++) { 867 v = aa + 4 * ai[i]; 868 vi = aj + ai[i]; 869 nz = ai[i + 1] - ai[i]; 870 s[0] = b[i2]; 871 s[1] = b[i2 + 1]; 872 for (j = 0; j < nz; j++) { 873 idx = 2 * vi[j]; 874 it = 4 * j; 875 xw[0] = x[idx]; 876 xw[1] = x[1 + idx]; 877 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 878 } 879 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 880 x[i2] += xw[0]; 881 x[i2 + 1] += xw[1]; 882 idiag += 4; 883 i2 += 2; 884 } 885 break; 886 case 3: 887 for (i = 0; i < m; i++) { 888 v = aa + 9 * ai[i]; 889 vi = aj + ai[i]; 890 nz = ai[i + 1] - ai[i]; 891 s[0] = b[i2]; 892 s[1] = b[i2 + 1]; 893 s[2] = b[i2 + 2]; 894 while (nz--) { 895 idx = 3 * (*vi++); 896 xw[0] = x[idx]; 897 xw[1] = x[1 + idx]; 898 xw[2] = x[2 + idx]; 899 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 900 v += 9; 901 } 902 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 903 x[i2] += xw[0]; 904 x[i2 + 1] += xw[1]; 905 x[i2 + 2] += xw[2]; 906 idiag += 9; 907 i2 += 3; 908 } 909 break; 910 case 4: 911 for (i = 0; i < m; i++) { 912 v = aa + 16 * ai[i]; 913 vi = aj + ai[i]; 914 nz = ai[i + 1] - ai[i]; 915 s[0] = b[i2]; 916 s[1] = b[i2 + 1]; 917 s[2] = b[i2 + 2]; 918 s[3] = b[i2 + 3]; 919 while (nz--) { 920 idx = 4 * (*vi++); 921 xw[0] = x[idx]; 922 xw[1] = x[1 + idx]; 923 xw[2] = x[2 + idx]; 924 xw[3] = x[3 + idx]; 925 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 926 v += 16; 927 } 928 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 929 x[i2] += xw[0]; 930 x[i2 + 1] += xw[1]; 931 x[i2 + 2] += xw[2]; 932 x[i2 + 3] += xw[3]; 933 idiag += 16; 934 i2 += 4; 935 } 936 break; 937 case 5: 938 for (i = 0; i < m; i++) { 939 v = aa + 25 * ai[i]; 940 vi = aj + ai[i]; 941 nz = ai[i + 1] - ai[i]; 942 s[0] = b[i2]; 943 s[1] = b[i2 + 1]; 944 s[2] = b[i2 + 2]; 945 s[3] = b[i2 + 3]; 946 s[4] = b[i2 + 4]; 947 while (nz--) { 948 idx = 5 * (*vi++); 949 xw[0] = x[idx]; 950 xw[1] = x[1 + idx]; 951 xw[2] = x[2 + idx]; 952 xw[3] = x[3 + idx]; 953 xw[4] = x[4 + idx]; 954 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 955 v += 25; 956 } 957 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 958 x[i2] += xw[0]; 959 x[i2 + 1] += xw[1]; 960 x[i2 + 2] += xw[2]; 961 x[i2 + 3] += xw[3]; 962 x[i2 + 4] += xw[4]; 963 idiag += 25; 964 i2 += 5; 965 } 966 break; 967 case 6: 968 for (i = 0; i < m; i++) { 969 v = aa + 36 * ai[i]; 970 vi = aj + ai[i]; 971 nz = ai[i + 1] - ai[i]; 972 s[0] = b[i2]; 973 s[1] = b[i2 + 1]; 974 s[2] = b[i2 + 2]; 975 s[3] = b[i2 + 3]; 976 s[4] = b[i2 + 4]; 977 s[5] = b[i2 + 5]; 978 while (nz--) { 979 idx = 6 * (*vi++); 980 xw[0] = x[idx]; 981 xw[1] = x[1 + idx]; 982 xw[2] = x[2 + idx]; 983 xw[3] = x[3 + idx]; 984 xw[4] = x[4 + idx]; 985 xw[5] = x[5 + idx]; 986 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 987 v += 36; 988 } 989 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 990 x[i2] += xw[0]; 991 x[i2 + 1] += xw[1]; 992 x[i2 + 2] += xw[2]; 993 x[i2 + 3] += xw[3]; 994 x[i2 + 4] += xw[4]; 995 x[i2 + 5] += xw[5]; 996 idiag += 36; 997 i2 += 6; 998 } 999 break; 1000 case 7: 1001 for (i = 0; i < m; i++) { 1002 v = aa + 49 * ai[i]; 1003 vi = aj + ai[i]; 1004 nz = ai[i + 1] - ai[i]; 1005 s[0] = b[i2]; 1006 s[1] = b[i2 + 1]; 1007 s[2] = b[i2 + 2]; 1008 s[3] = b[i2 + 3]; 1009 s[4] = b[i2 + 4]; 1010 s[5] = b[i2 + 5]; 1011 s[6] = b[i2 + 6]; 1012 while (nz--) { 1013 idx = 7 * (*vi++); 1014 xw[0] = x[idx]; 1015 xw[1] = x[1 + idx]; 1016 xw[2] = x[2 + idx]; 1017 xw[3] = x[3 + idx]; 1018 xw[4] = x[4 + idx]; 1019 xw[5] = x[5 + idx]; 1020 xw[6] = x[6 + idx]; 1021 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1022 v += 49; 1023 } 1024 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1025 x[i2] += xw[0]; 1026 x[i2 + 1] += xw[1]; 1027 x[i2 + 2] += xw[2]; 1028 x[i2 + 3] += xw[3]; 1029 x[i2 + 4] += xw[4]; 1030 x[i2 + 5] += xw[5]; 1031 x[i2 + 6] += xw[6]; 1032 idiag += 49; 1033 i2 += 7; 1034 } 1035 break; 1036 default: 1037 for (i = 0; i < m; i++) { 1038 v = aa + bs2 * ai[i]; 1039 vi = aj + ai[i]; 1040 nz = ai[i + 1] - ai[i]; 1041 1042 PetscCall(PetscArraycpy(w, b + i2, bs)); 1043 /* copy all rows of x that are needed into contiguous space */ 1044 workt = work; 1045 for (j = 0; j < nz; j++) { 1046 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1047 workt += bs; 1048 } 1049 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1050 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1051 1052 idiag += bs2; 1053 i2 += bs; 1054 } 1055 break; 1056 } 1057 PetscCall(PetscLogFlops(2.0 * bs2 * a->nz)); 1058 } 1059 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1060 idiag = a->idiag + bs2 * (a->mbs - 1); 1061 i2 = bs * (m - 1); 1062 switch (bs) { 1063 case 1: 1064 for (i = m - 1; i >= 0; i--) { 1065 v = aa + ai[i]; 1066 vi = aj + ai[i]; 1067 nz = ai[i + 1] - ai[i]; 1068 s[0] = b[i2]; 1069 for (j = 0; j < nz; j++) { 1070 xw[0] = x[vi[j]]; 1071 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 1072 } 1073 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 1074 x[i2] += xw[0]; 1075 idiag -= 1; 1076 i2 -= 1; 1077 } 1078 break; 1079 case 2: 1080 for (i = m - 1; i >= 0; i--) { 1081 v = aa + 4 * ai[i]; 1082 vi = aj + ai[i]; 1083 nz = ai[i + 1] - ai[i]; 1084 s[0] = b[i2]; 1085 s[1] = b[i2 + 1]; 1086 for (j = 0; j < nz; j++) { 1087 idx = 2 * vi[j]; 1088 it = 4 * j; 1089 xw[0] = x[idx]; 1090 xw[1] = x[1 + idx]; 1091 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 1092 } 1093 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 1094 x[i2] += xw[0]; 1095 x[i2 + 1] += xw[1]; 1096 idiag -= 4; 1097 i2 -= 2; 1098 } 1099 break; 1100 case 3: 1101 for (i = m - 1; i >= 0; i--) { 1102 v = aa + 9 * ai[i]; 1103 vi = aj + ai[i]; 1104 nz = ai[i + 1] - ai[i]; 1105 s[0] = b[i2]; 1106 s[1] = b[i2 + 1]; 1107 s[2] = b[i2 + 2]; 1108 while (nz--) { 1109 idx = 3 * (*vi++); 1110 xw[0] = x[idx]; 1111 xw[1] = x[1 + idx]; 1112 xw[2] = x[2 + idx]; 1113 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 1114 v += 9; 1115 } 1116 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 1117 x[i2] += xw[0]; 1118 x[i2 + 1] += xw[1]; 1119 x[i2 + 2] += xw[2]; 1120 idiag -= 9; 1121 i2 -= 3; 1122 } 1123 break; 1124 case 4: 1125 for (i = m - 1; i >= 0; i--) { 1126 v = aa + 16 * ai[i]; 1127 vi = aj + ai[i]; 1128 nz = ai[i + 1] - ai[i]; 1129 s[0] = b[i2]; 1130 s[1] = b[i2 + 1]; 1131 s[2] = b[i2 + 2]; 1132 s[3] = b[i2 + 3]; 1133 while (nz--) { 1134 idx = 4 * (*vi++); 1135 xw[0] = x[idx]; 1136 xw[1] = x[1 + idx]; 1137 xw[2] = x[2 + idx]; 1138 xw[3] = x[3 + idx]; 1139 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 1140 v += 16; 1141 } 1142 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 1143 x[i2] += xw[0]; 1144 x[i2 + 1] += xw[1]; 1145 x[i2 + 2] += xw[2]; 1146 x[i2 + 3] += xw[3]; 1147 idiag -= 16; 1148 i2 -= 4; 1149 } 1150 break; 1151 case 5: 1152 for (i = m - 1; i >= 0; i--) { 1153 v = aa + 25 * ai[i]; 1154 vi = aj + ai[i]; 1155 nz = ai[i + 1] - ai[i]; 1156 s[0] = b[i2]; 1157 s[1] = b[i2 + 1]; 1158 s[2] = b[i2 + 2]; 1159 s[3] = b[i2 + 3]; 1160 s[4] = b[i2 + 4]; 1161 while (nz--) { 1162 idx = 5 * (*vi++); 1163 xw[0] = x[idx]; 1164 xw[1] = x[1 + idx]; 1165 xw[2] = x[2 + idx]; 1166 xw[3] = x[3 + idx]; 1167 xw[4] = x[4 + idx]; 1168 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 1169 v += 25; 1170 } 1171 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 1172 x[i2] += xw[0]; 1173 x[i2 + 1] += xw[1]; 1174 x[i2 + 2] += xw[2]; 1175 x[i2 + 3] += xw[3]; 1176 x[i2 + 4] += xw[4]; 1177 idiag -= 25; 1178 i2 -= 5; 1179 } 1180 break; 1181 case 6: 1182 for (i = m - 1; i >= 0; i--) { 1183 v = aa + 36 * ai[i]; 1184 vi = aj + ai[i]; 1185 nz = ai[i + 1] - ai[i]; 1186 s[0] = b[i2]; 1187 s[1] = b[i2 + 1]; 1188 s[2] = b[i2 + 2]; 1189 s[3] = b[i2 + 3]; 1190 s[4] = b[i2 + 4]; 1191 s[5] = b[i2 + 5]; 1192 while (nz--) { 1193 idx = 6 * (*vi++); 1194 xw[0] = x[idx]; 1195 xw[1] = x[1 + idx]; 1196 xw[2] = x[2 + idx]; 1197 xw[3] = x[3 + idx]; 1198 xw[4] = x[4 + idx]; 1199 xw[5] = x[5 + idx]; 1200 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 1201 v += 36; 1202 } 1203 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 1204 x[i2] += xw[0]; 1205 x[i2 + 1] += xw[1]; 1206 x[i2 + 2] += xw[2]; 1207 x[i2 + 3] += xw[3]; 1208 x[i2 + 4] += xw[4]; 1209 x[i2 + 5] += xw[5]; 1210 idiag -= 36; 1211 i2 -= 6; 1212 } 1213 break; 1214 case 7: 1215 for (i = m - 1; i >= 0; i--) { 1216 v = aa + 49 * ai[i]; 1217 vi = aj + ai[i]; 1218 nz = ai[i + 1] - ai[i]; 1219 s[0] = b[i2]; 1220 s[1] = b[i2 + 1]; 1221 s[2] = b[i2 + 2]; 1222 s[3] = b[i2 + 3]; 1223 s[4] = b[i2 + 4]; 1224 s[5] = b[i2 + 5]; 1225 s[6] = b[i2 + 6]; 1226 while (nz--) { 1227 idx = 7 * (*vi++); 1228 xw[0] = x[idx]; 1229 xw[1] = x[1 + idx]; 1230 xw[2] = x[2 + idx]; 1231 xw[3] = x[3 + idx]; 1232 xw[4] = x[4 + idx]; 1233 xw[5] = x[5 + idx]; 1234 xw[6] = x[6 + idx]; 1235 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1236 v += 49; 1237 } 1238 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1239 x[i2] += xw[0]; 1240 x[i2 + 1] += xw[1]; 1241 x[i2 + 2] += xw[2]; 1242 x[i2 + 3] += xw[3]; 1243 x[i2 + 4] += xw[4]; 1244 x[i2 + 5] += xw[5]; 1245 x[i2 + 6] += xw[6]; 1246 idiag -= 49; 1247 i2 -= 7; 1248 } 1249 break; 1250 default: 1251 for (i = m - 1; i >= 0; i--) { 1252 v = aa + bs2 * ai[i]; 1253 vi = aj + ai[i]; 1254 nz = ai[i + 1] - ai[i]; 1255 1256 PetscCall(PetscArraycpy(w, b + i2, bs)); 1257 /* copy all rows of x that are needed into contiguous space */ 1258 workt = work; 1259 for (j = 0; j < nz; j++) { 1260 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1261 workt += bs; 1262 } 1263 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1264 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1265 1266 idiag -= bs2; 1267 i2 -= bs; 1268 } 1269 break; 1270 } 1271 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz))); 1272 } 1273 } 1274 PetscCall(VecRestoreArray(xx, &x)); 1275 PetscCall(VecRestoreArrayRead(bb, &b)); 1276 PetscFunctionReturn(PETSC_SUCCESS); 1277 } 1278 1279 /* 1280 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1281 */ 1282 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1283 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1284 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1285 #define matsetvaluesblocked4_ matsetvaluesblocked4 1286 #endif 1287 1288 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) 1289 { 1290 Mat A = *AA; 1291 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1292 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn; 1293 PetscInt *ai = a->i, *ailen = a->ilen; 1294 PetscInt *aj = a->j, stepval, lastcol = -1; 1295 const PetscScalar *value = v; 1296 MatScalar *ap, *aa = a->a, *bap; 1297 1298 PetscFunctionBegin; 1299 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4"); 1300 stepval = (n - 1) * 4; 1301 for (k = 0; k < m; k++) { /* loop over added rows */ 1302 row = im[k]; 1303 rp = aj + ai[row]; 1304 ap = aa + 16 * ai[row]; 1305 nrow = ailen[row]; 1306 low = 0; 1307 high = nrow; 1308 for (l = 0; l < n; l++) { /* loop over added columns */ 1309 col = in[l]; 1310 if (col <= lastcol) low = 0; 1311 else high = nrow; 1312 lastcol = col; 1313 value = v + k * (stepval + 4 + l) * 4; 1314 while (high - low > 7) { 1315 t = (low + high) / 2; 1316 if (rp[t] > col) high = t; 1317 else low = t; 1318 } 1319 for (i = low; i < high; i++) { 1320 if (rp[i] > col) break; 1321 if (rp[i] == col) { 1322 bap = ap + 16 * i; 1323 for (ii = 0; ii < 4; ii++, value += stepval) { 1324 for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++; 1325 } 1326 goto noinsert2; 1327 } 1328 } 1329 N = nrow++ - 1; 1330 high++; /* added new column index thus must search to one higher than before */ 1331 /* shift up all the later entries in this row */ 1332 for (ii = N; ii >= i; ii--) { 1333 rp[ii + 1] = rp[ii]; 1334 PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16)); 1335 } 1336 if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1337 rp[i] = col; 1338 bap = ap + 16 * i; 1339 for (ii = 0; ii < 4; ii++, value += stepval) { 1340 for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++; 1341 } 1342 noinsert2:; 1343 low = i; 1344 } 1345 ailen[row] = nrow; 1346 } 1347 PetscFunctionReturnVoid(); 1348 } 1349 1350 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1351 #define matsetvalues4_ MATSETVALUES4 1352 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1353 #define matsetvalues4_ matsetvalues4 1354 #endif 1355 1356 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) 1357 { 1358 Mat A = *AA; 1359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1360 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm; 1361 PetscInt *ai = a->i, *ailen = a->ilen; 1362 PetscInt *aj = a->j, brow, bcol; 1363 PetscInt ridx, cidx, lastcol = -1; 1364 MatScalar *ap, value, *aa = a->a, *bap; 1365 1366 PetscFunctionBegin; 1367 for (k = 0; k < m; k++) { /* loop over added rows */ 1368 row = im[k]; 1369 brow = row / 4; 1370 rp = aj + ai[brow]; 1371 ap = aa + 16 * ai[brow]; 1372 nrow = ailen[brow]; 1373 low = 0; 1374 high = nrow; 1375 for (l = 0; l < n; l++) { /* loop over added columns */ 1376 col = in[l]; 1377 bcol = col / 4; 1378 ridx = row % 4; 1379 cidx = col % 4; 1380 value = v[l + k * n]; 1381 if (col <= lastcol) low = 0; 1382 else high = nrow; 1383 lastcol = col; 1384 while (high - low > 7) { 1385 t = (low + high) / 2; 1386 if (rp[t] > bcol) high = t; 1387 else low = t; 1388 } 1389 for (i = low; i < high; i++) { 1390 if (rp[i] > bcol) break; 1391 if (rp[i] == bcol) { 1392 bap = ap + 16 * i + 4 * cidx + ridx; 1393 *bap += value; 1394 goto noinsert1; 1395 } 1396 } 1397 N = nrow++ - 1; 1398 high++; /* added new column thus must search to one higher than before */ 1399 /* shift up all the later entries in this row */ 1400 PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 1401 PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1))); 1402 PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1403 rp[i] = bcol; 1404 ap[16 * i + 4 * cidx + ridx] = value; 1405 noinsert1:; 1406 low = i; 1407 } 1408 ailen[brow] = nrow; 1409 } 1410 PetscFunctionReturnVoid(); 1411 } 1412 1413 /* 1414 Checks for missing diagonals 1415 */ 1416 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1417 { 1418 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1419 PetscInt *diag, *ii = a->i, i; 1420 1421 PetscFunctionBegin; 1422 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 1423 *missing = PETSC_FALSE; 1424 if (A->rmap->n > 0 && !ii) { 1425 *missing = PETSC_TRUE; 1426 if (d) *d = 0; 1427 PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n")); 1428 } else { 1429 PetscInt n; 1430 n = PetscMin(a->mbs, a->nbs); 1431 diag = a->diag; 1432 for (i = 0; i < n; i++) { 1433 if (diag[i] >= ii[i + 1]) { 1434 *missing = PETSC_TRUE; 1435 if (d) *d = i; 1436 PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i)); 1437 break; 1438 } 1439 } 1440 } 1441 PetscFunctionReturn(PETSC_SUCCESS); 1442 } 1443 1444 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1445 { 1446 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1447 PetscInt i, j, m = a->mbs; 1448 1449 PetscFunctionBegin; 1450 if (!a->diag) { 1451 PetscCall(PetscMalloc1(m, &a->diag)); 1452 a->free_diag = PETSC_TRUE; 1453 } 1454 for (i = 0; i < m; i++) { 1455 a->diag[i] = a->i[i + 1]; 1456 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1457 if (a->j[j] == i) { 1458 a->diag[i] = j; 1459 break; 1460 } 1461 } 1462 } 1463 PetscFunctionReturn(PETSC_SUCCESS); 1464 } 1465 1466 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) 1467 { 1468 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1469 PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt; 1470 PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja; 1471 1472 PetscFunctionBegin; 1473 *nn = n; 1474 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1475 if (symmetric) { 1476 PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja)); 1477 nz = tia[n]; 1478 } else { 1479 tia = a->i; 1480 tja = a->j; 1481 } 1482 1483 if (!blockcompressed && bs > 1) { 1484 (*nn) *= bs; 1485 /* malloc & create the natural set of indices */ 1486 PetscCall(PetscMalloc1((n + 1) * bs, ia)); 1487 if (n) { 1488 (*ia)[0] = oshift; 1489 for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; 1490 } 1491 1492 for (i = 1; i < n; i++) { 1493 (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1]; 1494 for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; 1495 } 1496 if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; 1497 1498 if (inja) { 1499 PetscCall(PetscMalloc1(nz * bs * bs, ja)); 1500 cnt = 0; 1501 for (i = 0; i < n; i++) { 1502 for (j = 0; j < bs; j++) { 1503 for (k = tia[i]; k < tia[i + 1]; k++) { 1504 for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l; 1505 } 1506 } 1507 } 1508 } 1509 1510 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1511 PetscCall(PetscFree(tia)); 1512 PetscCall(PetscFree(tja)); 1513 } 1514 } else if (oshift == 1) { 1515 if (symmetric) { 1516 nz = tia[A->rmap->n / bs]; 1517 /* add 1 to i and j indices */ 1518 for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1; 1519 *ia = tia; 1520 if (ja) { 1521 for (i = 0; i < nz; i++) tja[i] = tja[i] + 1; 1522 *ja = tja; 1523 } 1524 } else { 1525 nz = a->i[A->rmap->n / bs]; 1526 /* malloc space and add 1 to i and j indices */ 1527 PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia)); 1528 for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1; 1529 if (ja) { 1530 PetscCall(PetscMalloc1(nz, ja)); 1531 for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1; 1532 } 1533 } 1534 } else { 1535 *ia = tia; 1536 if (ja) *ja = tja; 1537 } 1538 PetscFunctionReturn(PETSC_SUCCESS); 1539 } 1540 1541 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 1542 { 1543 PetscFunctionBegin; 1544 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1545 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1546 PetscCall(PetscFree(*ia)); 1547 if (ja) PetscCall(PetscFree(*ja)); 1548 } 1549 PetscFunctionReturn(PETSC_SUCCESS); 1550 } 1551 1552 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1553 { 1554 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1555 1556 PetscFunctionBegin; 1557 if (A->hash_active) { 1558 PetscInt bs; 1559 A->ops[0] = a->cops; 1560 PetscCall(PetscHMapIJVDestroy(&a->ht)); 1561 PetscCall(MatGetBlockSize(A, &bs)); 1562 if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht)); 1563 PetscCall(PetscFree(a->dnz)); 1564 PetscCall(PetscFree(a->bdnz)); 1565 A->hash_active = PETSC_FALSE; 1566 } 1567 PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz)); 1568 PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i)); 1569 PetscCall(ISDestroy(&a->row)); 1570 PetscCall(ISDestroy(&a->col)); 1571 if (a->free_diag) PetscCall(PetscFree(a->diag)); 1572 PetscCall(PetscFree(a->idiag)); 1573 if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen)); 1574 PetscCall(PetscFree(a->solve_work)); 1575 PetscCall(PetscFree(a->mult_work)); 1576 PetscCall(PetscFree(a->sor_workt)); 1577 PetscCall(PetscFree(a->sor_work)); 1578 PetscCall(ISDestroy(&a->icol)); 1579 PetscCall(PetscFree(a->saved_values)); 1580 PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex)); 1581 1582 PetscCall(MatDestroy(&a->sbaijMat)); 1583 PetscCall(MatDestroy(&a->parent)); 1584 PetscCall(PetscFree(A->data)); 1585 1586 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 1587 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL)); 1588 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL)); 1589 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 1590 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 1591 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL)); 1592 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL)); 1593 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL)); 1594 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL)); 1595 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL)); 1596 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL)); 1597 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL)); 1598 #if defined(PETSC_HAVE_HYPRE) 1599 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL)); 1600 #endif 1601 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL)); 1602 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1603 PetscFunctionReturn(PETSC_SUCCESS); 1604 } 1605 1606 PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) 1607 { 1608 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1609 1610 PetscFunctionBegin; 1611 switch (op) { 1612 case MAT_ROW_ORIENTED: 1613 a->roworiented = flg; 1614 break; 1615 case MAT_KEEP_NONZERO_PATTERN: 1616 a->keepnonzeropattern = flg; 1617 break; 1618 case MAT_NEW_NONZERO_LOCATIONS: 1619 a->nonew = (flg ? 0 : 1); 1620 break; 1621 case MAT_NEW_NONZERO_LOCATION_ERR: 1622 a->nonew = (flg ? -1 : 0); 1623 break; 1624 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1625 a->nonew = (flg ? -2 : 0); 1626 break; 1627 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1628 a->nounused = (flg ? -1 : 0); 1629 break; 1630 case MAT_FORCE_DIAGONAL_ENTRIES: 1631 case MAT_IGNORE_OFF_PROC_ENTRIES: 1632 case MAT_USE_HASH_TABLE: 1633 case MAT_SORTED_FULL: 1634 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1635 break; 1636 case MAT_SPD: 1637 case MAT_SYMMETRIC: 1638 case MAT_STRUCTURALLY_SYMMETRIC: 1639 case MAT_HERMITIAN: 1640 case MAT_SYMMETRY_ETERNAL: 1641 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1642 case MAT_SUBMAT_SINGLEIS: 1643 case MAT_STRUCTURE_ONLY: 1644 case MAT_SPD_ETERNAL: 1645 /* if the diagonal matrix is square it inherits some of the properties above */ 1646 break; 1647 default: 1648 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1649 } 1650 PetscFunctionReturn(PETSC_SUCCESS); 1651 } 1652 1653 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1654 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) 1655 { 1656 PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2; 1657 MatScalar *aa_i; 1658 PetscScalar *v_i; 1659 1660 PetscFunctionBegin; 1661 bs = A->rmap->bs; 1662 bs2 = bs * bs; 1663 PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1664 1665 bn = row / bs; /* Block number */ 1666 bp = row % bs; /* Block Position */ 1667 M = ai[bn + 1] - ai[bn]; 1668 *nz = bs * M; 1669 1670 if (v) { 1671 *v = NULL; 1672 if (*nz) { 1673 PetscCall(PetscMalloc1(*nz, v)); 1674 for (i = 0; i < M; i++) { /* for each block in the block row */ 1675 v_i = *v + i * bs; 1676 aa_i = aa + bs2 * (ai[bn] + i); 1677 for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j]; 1678 } 1679 } 1680 } 1681 1682 if (idx) { 1683 *idx = NULL; 1684 if (*nz) { 1685 PetscCall(PetscMalloc1(*nz, idx)); 1686 for (i = 0; i < M; i++) { /* for each block in the block row */ 1687 idx_i = *idx + i * bs; 1688 itmp = bs * aj[ai[bn] + i]; 1689 for (j = 0; j < bs; j++) idx_i[j] = itmp++; 1690 } 1691 } 1692 } 1693 PetscFunctionReturn(PETSC_SUCCESS); 1694 } 1695 1696 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1697 { 1698 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1699 1700 PetscFunctionBegin; 1701 PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a)); 1702 PetscFunctionReturn(PETSC_SUCCESS); 1703 } 1704 1705 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1706 { 1707 PetscFunctionBegin; 1708 if (nz) *nz = 0; 1709 if (idx) PetscCall(PetscFree(*idx)); 1710 if (v) PetscCall(PetscFree(*v)); 1711 PetscFunctionReturn(PETSC_SUCCESS); 1712 } 1713 1714 PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) 1715 { 1716 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at; 1717 Mat C; 1718 PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill; 1719 PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr; 1720 MatScalar *ata, *aa = a->a; 1721 1722 PetscFunctionBegin; 1723 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1724 PetscCall(PetscCalloc1(1 + nbs, &atfill)); 1725 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1726 for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1727 1728 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 1729 PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N)); 1730 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 1731 PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill)); 1732 1733 at = (Mat_SeqBAIJ *)C->data; 1734 ati = at->i; 1735 for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i]; 1736 } else { 1737 C = *B; 1738 at = (Mat_SeqBAIJ *)C->data; 1739 ati = at->i; 1740 } 1741 1742 atj = at->j; 1743 ata = at->a; 1744 1745 /* Copy ati into atfill so we have locations of the next free space in atj */ 1746 PetscCall(PetscArraycpy(atfill, ati, nbs)); 1747 1748 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1749 for (i = 0; i < mbs; i++) { 1750 anzj = ai[i + 1] - ai[i]; 1751 for (j = 0; j < anzj; j++) { 1752 atj[atfill[*aj]] = i; 1753 for (kr = 0; kr < bs; kr++) { 1754 for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; 1755 } 1756 atfill[*aj++] += 1; 1757 } 1758 } 1759 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1760 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1761 1762 /* Clean up temporary space and complete requests. */ 1763 PetscCall(PetscFree(atfill)); 1764 1765 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1766 PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1767 *B = C; 1768 } else { 1769 PetscCall(MatHeaderMerge(A, &C)); 1770 } 1771 PetscFunctionReturn(PETSC_SUCCESS); 1772 } 1773 1774 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) 1775 { 1776 Mat Btrans; 1777 1778 PetscFunctionBegin; 1779 *f = PETSC_FALSE; 1780 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans)); 1781 PetscCall(MatEqual_SeqBAIJ(B, Btrans, f)); 1782 PetscCall(MatDestroy(&Btrans)); 1783 PetscFunctionReturn(PETSC_SUCCESS); 1784 } 1785 1786 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1787 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 1788 { 1789 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data; 1790 PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l; 1791 PetscInt *rowlens, *colidxs; 1792 PetscScalar *matvals; 1793 1794 PetscFunctionBegin; 1795 PetscCall(PetscViewerSetUp(viewer)); 1796 1797 M = mat->rmap->N; 1798 N = mat->cmap->N; 1799 m = mat->rmap->n; 1800 bs = mat->rmap->bs; 1801 nz = bs * bs * A->nz; 1802 1803 /* write matrix header */ 1804 header[0] = MAT_FILE_CLASSID; 1805 header[1] = M; 1806 header[2] = N; 1807 header[3] = nz; 1808 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1809 1810 /* store row lengths */ 1811 PetscCall(PetscMalloc1(m, &rowlens)); 1812 for (cnt = 0, i = 0; i < A->mbs; i++) 1813 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]); 1814 PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT)); 1815 PetscCall(PetscFree(rowlens)); 1816 1817 /* store column indices */ 1818 PetscCall(PetscMalloc1(nz, &colidxs)); 1819 for (cnt = 0, i = 0; i < A->mbs; i++) 1820 for (k = 0; k < bs; k++) 1821 for (j = A->i[i]; j < A->i[i + 1]; j++) 1822 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l; 1823 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1824 PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT)); 1825 PetscCall(PetscFree(colidxs)); 1826 1827 /* store nonzero values */ 1828 PetscCall(PetscMalloc1(nz, &matvals)); 1829 for (cnt = 0, i = 0; i < A->mbs; i++) 1830 for (k = 0; k < bs; k++) 1831 for (j = A->i[i]; j < A->i[i + 1]; j++) 1832 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k]; 1833 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1834 PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR)); 1835 PetscCall(PetscFree(matvals)); 1836 1837 /* write block size option to the viewer's .info file */ 1838 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1839 PetscFunctionReturn(PETSC_SUCCESS); 1840 } 1841 1842 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) 1843 { 1844 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1845 PetscInt i, bs = A->rmap->bs, k; 1846 1847 PetscFunctionBegin; 1848 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1849 for (i = 0; i < a->mbs; i++) { 1850 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1)); 1851 for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1)); 1852 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1853 } 1854 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1855 PetscFunctionReturn(PETSC_SUCCESS); 1856 } 1857 1858 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) 1859 { 1860 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1861 PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2; 1862 PetscViewerFormat format; 1863 1864 PetscFunctionBegin; 1865 if (A->structure_only) { 1866 PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer)); 1867 PetscFunctionReturn(PETSC_SUCCESS); 1868 } 1869 1870 PetscCall(PetscViewerGetFormat(viewer, &format)); 1871 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1872 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1873 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1874 const char *matname; 1875 Mat aij; 1876 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij)); 1877 PetscCall(PetscObjectGetName((PetscObject)A, &matname)); 1878 PetscCall(PetscObjectSetName((PetscObject)aij, matname)); 1879 PetscCall(MatView(aij, viewer)); 1880 PetscCall(MatDestroy(&aij)); 1881 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1882 PetscFunctionReturn(PETSC_SUCCESS); 1883 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1884 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1885 for (i = 0; i < a->mbs; i++) { 1886 for (j = 0; j < bs; j++) { 1887 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1888 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1889 for (l = 0; l < bs; l++) { 1890 #if defined(PETSC_USE_COMPLEX) 1891 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1892 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1893 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1894 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1895 } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1896 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1897 } 1898 #else 1899 if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1900 #endif 1901 } 1902 } 1903 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1904 } 1905 } 1906 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1907 } else { 1908 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1909 for (i = 0; i < a->mbs; i++) { 1910 for (j = 0; j < bs; j++) { 1911 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1912 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1913 for (l = 0; l < bs; l++) { 1914 #if defined(PETSC_USE_COMPLEX) 1915 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) { 1916 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1917 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) { 1918 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1919 } else { 1920 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1921 } 1922 #else 1923 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1924 #endif 1925 } 1926 } 1927 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1928 } 1929 } 1930 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1931 } 1932 PetscCall(PetscViewerFlush(viewer)); 1933 PetscFunctionReturn(PETSC_SUCCESS); 1934 } 1935 1936 #include <petscdraw.h> 1937 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) 1938 { 1939 Mat A = (Mat)Aa; 1940 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1941 PetscInt row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2; 1942 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1943 MatScalar *aa; 1944 PetscViewer viewer; 1945 PetscViewerFormat format; 1946 1947 PetscFunctionBegin; 1948 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1949 PetscCall(PetscViewerGetFormat(viewer, &format)); 1950 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1951 1952 /* loop over matrix elements drawing boxes */ 1953 1954 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1955 PetscDrawCollectiveBegin(draw); 1956 /* Blue for negative, Cyan for zero and Red for positive */ 1957 color = PETSC_DRAW_BLUE; 1958 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1959 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1960 y_l = A->rmap->N - row - 1.0; 1961 y_r = y_l + 1.0; 1962 x_l = a->j[j] * bs; 1963 x_r = x_l + 1.0; 1964 aa = a->a + j * bs2; 1965 for (k = 0; k < bs; k++) { 1966 for (l = 0; l < bs; l++) { 1967 if (PetscRealPart(*aa++) >= 0.) continue; 1968 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1969 } 1970 } 1971 } 1972 } 1973 color = PETSC_DRAW_CYAN; 1974 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1975 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1976 y_l = A->rmap->N - row - 1.0; 1977 y_r = y_l + 1.0; 1978 x_l = a->j[j] * bs; 1979 x_r = x_l + 1.0; 1980 aa = a->a + j * bs2; 1981 for (k = 0; k < bs; k++) { 1982 for (l = 0; l < bs; l++) { 1983 if (PetscRealPart(*aa++) != 0.) continue; 1984 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1985 } 1986 } 1987 } 1988 } 1989 color = PETSC_DRAW_RED; 1990 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1991 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1992 y_l = A->rmap->N - row - 1.0; 1993 y_r = y_l + 1.0; 1994 x_l = a->j[j] * bs; 1995 x_r = x_l + 1.0; 1996 aa = a->a + j * bs2; 1997 for (k = 0; k < bs; k++) { 1998 for (l = 0; l < bs; l++) { 1999 if (PetscRealPart(*aa++) <= 0.) continue; 2000 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2001 } 2002 } 2003 } 2004 } 2005 PetscDrawCollectiveEnd(draw); 2006 } else { 2007 /* use contour shading to indicate magnitude of values */ 2008 /* first determine max of all nonzero values */ 2009 PetscReal minv = 0.0, maxv = 0.0; 2010 PetscDraw popup; 2011 2012 for (i = 0; i < a->nz * a->bs2; i++) { 2013 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 2014 } 2015 if (minv >= maxv) maxv = minv + PETSC_SMALL; 2016 PetscCall(PetscDrawGetPopup(draw, &popup)); 2017 PetscCall(PetscDrawScalePopup(popup, 0.0, maxv)); 2018 2019 PetscDrawCollectiveBegin(draw); 2020 for (i = 0, row = 0; i < mbs; i++, row += bs) { 2021 for (j = a->i[i]; j < a->i[i + 1]; j++) { 2022 y_l = A->rmap->N - row - 1.0; 2023 y_r = y_l + 1.0; 2024 x_l = a->j[j] * bs; 2025 x_r = x_l + 1.0; 2026 aa = a->a + j * bs2; 2027 for (k = 0; k < bs; k++) { 2028 for (l = 0; l < bs; l++) { 2029 MatScalar v = *aa++; 2030 color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv); 2031 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2032 } 2033 } 2034 } 2035 } 2036 PetscDrawCollectiveEnd(draw); 2037 } 2038 PetscFunctionReturn(PETSC_SUCCESS); 2039 } 2040 2041 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) 2042 { 2043 PetscReal xl, yl, xr, yr, w, h; 2044 PetscDraw draw; 2045 PetscBool isnull; 2046 2047 PetscFunctionBegin; 2048 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 2049 PetscCall(PetscDrawIsNull(draw, &isnull)); 2050 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 2051 2052 xr = A->cmap->n; 2053 yr = A->rmap->N; 2054 h = yr / 10.0; 2055 w = xr / 10.0; 2056 xr += w; 2057 yr += h; 2058 xl = -w; 2059 yl = -h; 2060 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 2061 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 2062 PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A)); 2063 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 2064 PetscCall(PetscDrawSave(draw)); 2065 PetscFunctionReturn(PETSC_SUCCESS); 2066 } 2067 2068 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) 2069 { 2070 PetscBool iascii, isbinary, isdraw; 2071 2072 PetscFunctionBegin; 2073 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2074 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2075 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 2076 if (iascii) { 2077 PetscCall(MatView_SeqBAIJ_ASCII(A, viewer)); 2078 } else if (isbinary) { 2079 PetscCall(MatView_SeqBAIJ_Binary(A, viewer)); 2080 } else if (isdraw) { 2081 PetscCall(MatView_SeqBAIJ_Draw(A, viewer)); 2082 } else { 2083 Mat B; 2084 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2085 PetscCall(MatView(B, viewer)); 2086 PetscCall(MatDestroy(&B)); 2087 } 2088 PetscFunctionReturn(PETSC_SUCCESS); 2089 } 2090 2091 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) 2092 { 2093 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2094 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 2095 PetscInt *ai = a->i, *ailen = a->ilen; 2096 PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2; 2097 MatScalar *ap, *aa = a->a; 2098 2099 PetscFunctionBegin; 2100 for (k = 0; k < m; k++) { /* loop over rows */ 2101 row = im[k]; 2102 brow = row / bs; 2103 if (row < 0) { 2104 v += n; 2105 continue; 2106 } /* negative row */ 2107 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row); 2108 rp = aj ? aj + ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2109 ap = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2110 nrow = ailen[brow]; 2111 for (l = 0; l < n; l++) { /* loop over columns */ 2112 if (in[l] < 0) { 2113 v++; 2114 continue; 2115 } /* negative column */ 2116 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]); 2117 col = in[l]; 2118 bcol = col / bs; 2119 cidx = col % bs; 2120 ridx = row % bs; 2121 high = nrow; 2122 low = 0; /* assume unsorted */ 2123 while (high - low > 5) { 2124 t = (low + high) / 2; 2125 if (rp[t] > bcol) high = t; 2126 else low = t; 2127 } 2128 for (i = low; i < high; i++) { 2129 if (rp[i] > bcol) break; 2130 if (rp[i] == bcol) { 2131 *v++ = ap[bs2 * i + bs * cidx + ridx]; 2132 goto finished; 2133 } 2134 } 2135 *v++ = 0.0; 2136 finished:; 2137 } 2138 } 2139 PetscFunctionReturn(PETSC_SUCCESS); 2140 } 2141 2142 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2143 { 2144 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2145 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1; 2146 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2147 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval; 2148 PetscBool roworiented = a->roworiented; 2149 const PetscScalar *value = v; 2150 MatScalar *ap = NULL, *aa = a->a, *bap; 2151 2152 PetscFunctionBegin; 2153 if (roworiented) { 2154 stepval = (n - 1) * bs; 2155 } else { 2156 stepval = (m - 1) * bs; 2157 } 2158 for (k = 0; k < m; k++) { /* loop over added rows */ 2159 row = im[k]; 2160 if (row < 0) continue; 2161 PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1); 2162 rp = aj + ai[row]; 2163 if (!A->structure_only) ap = aa + bs2 * ai[row]; 2164 rmax = imax[row]; 2165 nrow = ailen[row]; 2166 low = 0; 2167 high = nrow; 2168 for (l = 0; l < n; l++) { /* loop over added columns */ 2169 if (in[l] < 0) continue; 2170 PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1); 2171 col = in[l]; 2172 if (!A->structure_only) { 2173 if (roworiented) { 2174 value = v + (k * (stepval + bs) + l) * bs; 2175 } else { 2176 value = v + (l * (stepval + bs) + k) * bs; 2177 } 2178 } 2179 if (col <= lastcol) low = 0; 2180 else high = nrow; 2181 lastcol = col; 2182 while (high - low > 7) { 2183 t = (low + high) / 2; 2184 if (rp[t] > col) high = t; 2185 else low = t; 2186 } 2187 for (i = low; i < high; i++) { 2188 if (rp[i] > col) break; 2189 if (rp[i] == col) { 2190 if (A->structure_only) goto noinsert2; 2191 bap = ap + bs2 * i; 2192 if (roworiented) { 2193 if (is == ADD_VALUES) { 2194 for (ii = 0; ii < bs; ii++, value += stepval) { 2195 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 2196 } 2197 } else { 2198 for (ii = 0; ii < bs; ii++, value += stepval) { 2199 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2200 } 2201 } 2202 } else { 2203 if (is == ADD_VALUES) { 2204 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2205 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 2206 bap += bs; 2207 } 2208 } else { 2209 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2210 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 2211 bap += bs; 2212 } 2213 } 2214 } 2215 goto noinsert2; 2216 } 2217 } 2218 if (nonew == 1) goto noinsert2; 2219 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2220 if (A->structure_only) { 2221 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar); 2222 } else { 2223 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2224 } 2225 N = nrow++ - 1; 2226 high++; 2227 /* shift up all the later entries in this row */ 2228 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2229 rp[i] = col; 2230 if (!A->structure_only) { 2231 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2232 bap = ap + bs2 * i; 2233 if (roworiented) { 2234 for (ii = 0; ii < bs; ii++, value += stepval) { 2235 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2236 } 2237 } else { 2238 for (ii = 0; ii < bs; ii++, value += stepval) { 2239 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 2240 } 2241 } 2242 } 2243 noinsert2:; 2244 low = i; 2245 } 2246 ailen[row] = nrow; 2247 } 2248 PetscFunctionReturn(PETSC_SUCCESS); 2249 } 2250 2251 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) 2252 { 2253 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2254 PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax; 2255 PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen; 2256 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2257 MatScalar *aa = a->a, *ap; 2258 PetscReal ratio = 0.6; 2259 2260 PetscFunctionBegin; 2261 if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS); 2262 2263 if (m) rmax = ailen[0]; 2264 for (i = 1; i < mbs; i++) { 2265 /* move each row back by the amount of empty slots (fshift) before it*/ 2266 fshift += imax[i - 1] - ailen[i - 1]; 2267 rmax = PetscMax(rmax, ailen[i]); 2268 if (fshift) { 2269 ip = aj + ai[i]; 2270 ap = aa + bs2 * ai[i]; 2271 N = ailen[i]; 2272 PetscCall(PetscArraymove(ip - fshift, ip, N)); 2273 if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N)); 2274 } 2275 ai[i] = ai[i - 1] + ailen[i - 1]; 2276 } 2277 if (mbs) { 2278 fshift += imax[mbs - 1] - ailen[mbs - 1]; 2279 ai[mbs] = ai[mbs - 1] + ailen[mbs - 1]; 2280 } 2281 2282 /* reset ilen and imax for each row */ 2283 a->nonzerorowcnt = 0; 2284 if (A->structure_only) { 2285 PetscCall(PetscFree2(a->imax, a->ilen)); 2286 } else { /* !A->structure_only */ 2287 for (i = 0; i < mbs; i++) { 2288 ailen[i] = imax[i] = ai[i + 1] - ai[i]; 2289 a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0); 2290 } 2291 } 2292 a->nz = ai[mbs]; 2293 2294 /* diagonals may have moved, so kill the diagonal pointers */ 2295 a->idiagvalid = PETSC_FALSE; 2296 if (fshift && a->diag) PetscCall(PetscFree(a->diag)); 2297 if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2); 2298 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2)); 2299 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs)); 2300 PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax)); 2301 2302 A->info.mallocs += a->reallocs; 2303 a->reallocs = 0; 2304 A->info.nz_unneeded = (PetscReal)fshift * bs2; 2305 a->rmax = rmax; 2306 2307 if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio)); 2308 PetscFunctionReturn(PETSC_SUCCESS); 2309 } 2310 2311 /* 2312 This function returns an array of flags which indicate the locations of contiguous 2313 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2314 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2315 Assume: sizes should be long enough to hold all the values. 2316 */ 2317 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) 2318 { 2319 PetscInt j = 0; 2320 2321 PetscFunctionBegin; 2322 for (PetscInt i = 0; i < n; j++) { 2323 PetscInt row = idx[i]; 2324 if (row % bs != 0) { /* Not the beginning of a block */ 2325 sizes[j] = 1; 2326 i++; 2327 } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */ 2328 sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2329 i++; 2330 } else { /* Beginning of the block, so check if the complete block exists */ 2331 PetscBool flg = PETSC_TRUE; 2332 for (PetscInt k = 1; k < bs; k++) { 2333 if (row + k != idx[i + k]) { /* break in the block */ 2334 flg = PETSC_FALSE; 2335 break; 2336 } 2337 } 2338 if (flg) { /* No break in the bs */ 2339 sizes[j] = bs; 2340 i += bs; 2341 } else { 2342 sizes[j] = 1; 2343 i++; 2344 } 2345 } 2346 } 2347 *bs_max = j; 2348 PetscFunctionReturn(PETSC_SUCCESS); 2349 } 2350 2351 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2352 { 2353 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2354 PetscInt i, j, k, count, *rows; 2355 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max; 2356 PetscScalar zero = 0.0; 2357 MatScalar *aa; 2358 const PetscScalar *xx; 2359 PetscScalar *bb; 2360 2361 PetscFunctionBegin; 2362 /* fix right hand side if needed */ 2363 if (x && b) { 2364 PetscCall(VecGetArrayRead(x, &xx)); 2365 PetscCall(VecGetArray(b, &bb)); 2366 for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]]; 2367 PetscCall(VecRestoreArrayRead(x, &xx)); 2368 PetscCall(VecRestoreArray(b, &bb)); 2369 } 2370 2371 /* Make a copy of the IS and sort it */ 2372 /* allocate memory for rows,sizes */ 2373 PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes)); 2374 2375 /* copy IS values to rows, and sort them */ 2376 for (i = 0; i < is_n; i++) rows[i] = is_idx[i]; 2377 PetscCall(PetscSortInt(is_n, rows)); 2378 2379 if (baij->keepnonzeropattern) { 2380 for (i = 0; i < is_n; i++) sizes[i] = 1; 2381 bs_max = is_n; 2382 } else { 2383 PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max)); 2384 A->nonzerostate++; 2385 } 2386 2387 for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) { 2388 row = rows[j]; 2389 PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row); 2390 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2391 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2392 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2393 if (diag != (PetscScalar)0.0) { 2394 if (baij->ilen[row / bs] > 0) { 2395 baij->ilen[row / bs] = 1; 2396 baij->j[baij->i[row / bs]] = row / bs; 2397 2398 PetscCall(PetscArrayzero(aa, count * bs)); 2399 } 2400 /* Now insert all the diagonal values for this bs */ 2401 for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES)); 2402 } else { /* (diag == 0.0) */ 2403 baij->ilen[row / bs] = 0; 2404 } /* end (diag == 0.0) */ 2405 } else { /* (sizes[i] != bs) */ 2406 PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1"); 2407 for (k = 0; k < count; k++) { 2408 aa[0] = zero; 2409 aa += bs; 2410 } 2411 if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES)); 2412 } 2413 } 2414 2415 PetscCall(PetscFree2(rows, sizes)); 2416 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2417 PetscFunctionReturn(PETSC_SUCCESS); 2418 } 2419 2420 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2421 { 2422 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2423 PetscInt i, j, k, count; 2424 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col; 2425 PetscScalar zero = 0.0; 2426 MatScalar *aa; 2427 const PetscScalar *xx; 2428 PetscScalar *bb; 2429 PetscBool *zeroed, vecs = PETSC_FALSE; 2430 2431 PetscFunctionBegin; 2432 /* fix right hand side if needed */ 2433 if (x && b) { 2434 PetscCall(VecGetArrayRead(x, &xx)); 2435 PetscCall(VecGetArray(b, &bb)); 2436 vecs = PETSC_TRUE; 2437 } 2438 2439 /* zero the columns */ 2440 PetscCall(PetscCalloc1(A->rmap->n, &zeroed)); 2441 for (i = 0; i < is_n; i++) { 2442 PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]); 2443 zeroed[is_idx[i]] = PETSC_TRUE; 2444 } 2445 for (i = 0; i < A->rmap->N; i++) { 2446 if (!zeroed[i]) { 2447 row = i / bs; 2448 for (j = baij->i[row]; j < baij->i[row + 1]; j++) { 2449 for (k = 0; k < bs; k++) { 2450 col = bs * baij->j[j] + k; 2451 if (zeroed[col]) { 2452 aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 2453 if (vecs) bb[i] -= aa[0] * xx[col]; 2454 aa[0] = 0.0; 2455 } 2456 } 2457 } 2458 } else if (vecs) bb[i] = diag * xx[i]; 2459 } 2460 PetscCall(PetscFree(zeroed)); 2461 if (vecs) { 2462 PetscCall(VecRestoreArrayRead(x, &xx)); 2463 PetscCall(VecRestoreArray(b, &bb)); 2464 } 2465 2466 /* zero the rows */ 2467 for (i = 0; i < is_n; i++) { 2468 row = is_idx[i]; 2469 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2470 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2471 for (k = 0; k < count; k++) { 2472 aa[0] = zero; 2473 aa += bs; 2474 } 2475 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES); 2476 } 2477 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2478 PetscFunctionReturn(PETSC_SUCCESS); 2479 } 2480 2481 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2482 { 2483 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2484 PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 2485 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2486 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 2487 PetscInt ridx, cidx, bs2 = a->bs2; 2488 PetscBool roworiented = a->roworiented; 2489 MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap; 2490 2491 PetscFunctionBegin; 2492 for (k = 0; k < m; k++) { /* loop over added rows */ 2493 row = im[k]; 2494 brow = row / bs; 2495 if (row < 0) continue; 2496 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 2497 rp = aj + ai[brow]; 2498 if (!A->structure_only) ap = aa + bs2 * ai[brow]; 2499 rmax = imax[brow]; 2500 nrow = ailen[brow]; 2501 low = 0; 2502 high = nrow; 2503 for (l = 0; l < n; l++) { /* loop over added columns */ 2504 if (in[l] < 0) continue; 2505 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 2506 col = in[l]; 2507 bcol = col / bs; 2508 ridx = row % bs; 2509 cidx = col % bs; 2510 if (!A->structure_only) { 2511 if (roworiented) { 2512 value = v[l + k * n]; 2513 } else { 2514 value = v[k + l * m]; 2515 } 2516 } 2517 if (col <= lastcol) low = 0; 2518 else high = nrow; 2519 lastcol = col; 2520 while (high - low > 7) { 2521 t = (low + high) / 2; 2522 if (rp[t] > bcol) high = t; 2523 else low = t; 2524 } 2525 for (i = low; i < high; i++) { 2526 if (rp[i] > bcol) break; 2527 if (rp[i] == bcol) { 2528 bap = ap + bs2 * i + bs * cidx + ridx; 2529 if (!A->structure_only) { 2530 if (is == ADD_VALUES) *bap += value; 2531 else *bap = value; 2532 } 2533 goto noinsert1; 2534 } 2535 } 2536 if (nonew == 1) goto noinsert1; 2537 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2538 if (A->structure_only) { 2539 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar); 2540 } else { 2541 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2542 } 2543 N = nrow++ - 1; 2544 high++; 2545 /* shift up all the later entries in this row */ 2546 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2547 rp[i] = bcol; 2548 if (!A->structure_only) { 2549 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2550 PetscCall(PetscArrayzero(ap + bs2 * i, bs2)); 2551 ap[bs2 * i + bs * cidx + ridx] = value; 2552 } 2553 a->nz++; 2554 A->nonzerostate++; 2555 noinsert1:; 2556 low = i; 2557 } 2558 ailen[brow] = nrow; 2559 } 2560 PetscFunctionReturn(PETSC_SUCCESS); 2561 } 2562 2563 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) 2564 { 2565 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data; 2566 Mat outA; 2567 PetscBool row_identity, col_identity; 2568 2569 PetscFunctionBegin; 2570 PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU"); 2571 PetscCall(ISIdentity(row, &row_identity)); 2572 PetscCall(ISIdentity(col, &col_identity)); 2573 PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU"); 2574 2575 outA = inA; 2576 inA->factortype = MAT_FACTOR_LU; 2577 PetscCall(PetscFree(inA->solvertype)); 2578 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype)); 2579 2580 PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2581 2582 PetscCall(PetscObjectReference((PetscObject)row)); 2583 PetscCall(ISDestroy(&a->row)); 2584 a->row = row; 2585 PetscCall(PetscObjectReference((PetscObject)col)); 2586 PetscCall(ISDestroy(&a->col)); 2587 a->col = col; 2588 2589 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2590 PetscCall(ISDestroy(&a->icol)); 2591 PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol)); 2592 2593 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity))); 2594 if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); 2595 PetscCall(MatLUFactorNumeric(outA, inA, info)); 2596 PetscFunctionReturn(PETSC_SUCCESS); 2597 } 2598 2599 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices) 2600 { 2601 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2602 2603 PetscFunctionBegin; 2604 baij->nz = baij->maxnz; 2605 PetscCall(PetscArraycpy(baij->j, indices, baij->nz)); 2606 PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs)); 2607 PetscFunctionReturn(PETSC_SUCCESS); 2608 } 2609 2610 /*@ 2611 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix. 2612 2613 Input Parameters: 2614 + mat - the `MATSEQBAIJ` matrix 2615 - indices - the column indices 2616 2617 Level: advanced 2618 2619 Notes: 2620 This can be called if you have precomputed the nonzero structure of the 2621 matrix and want to provide it to the matrix object to improve the performance 2622 of the `MatSetValues()` operation. 2623 2624 You MUST have set the correct numbers of nonzeros per row in the call to 2625 `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted. 2626 2627 MUST be called before any calls to `MatSetValues()` 2628 2629 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()` 2630 @*/ 2631 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) 2632 { 2633 PetscFunctionBegin; 2634 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2635 PetscAssertPointer(indices, 2); 2636 PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices)); 2637 PetscFunctionReturn(PETSC_SUCCESS); 2638 } 2639 2640 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) 2641 { 2642 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2643 PetscInt i, j, n, row, bs, *ai, *aj, mbs; 2644 PetscReal atmp; 2645 PetscScalar *x, zero = 0.0; 2646 MatScalar *aa; 2647 PetscInt ncols, brow, krow, kcol; 2648 2649 PetscFunctionBegin; 2650 /* why is this not a macro???????????????????????????????????????????????????????????????? */ 2651 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2652 bs = A->rmap->bs; 2653 aa = a->a; 2654 ai = a->i; 2655 aj = a->j; 2656 mbs = a->mbs; 2657 2658 PetscCall(VecSet(v, zero)); 2659 PetscCall(VecGetArray(v, &x)); 2660 PetscCall(VecGetLocalSize(v, &n)); 2661 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2662 for (i = 0; i < mbs; i++) { 2663 ncols = ai[1] - ai[0]; 2664 ai++; 2665 brow = bs * i; 2666 for (j = 0; j < ncols; j++) { 2667 for (kcol = 0; kcol < bs; kcol++) { 2668 for (krow = 0; krow < bs; krow++) { 2669 atmp = PetscAbsScalar(*aa); 2670 aa++; 2671 row = brow + krow; /* row index */ 2672 if (PetscAbsScalar(x[row]) < atmp) { 2673 x[row] = atmp; 2674 if (idx) idx[row] = bs * (*aj) + kcol; 2675 } 2676 } 2677 } 2678 aj++; 2679 } 2680 } 2681 PetscCall(VecRestoreArray(v, &x)); 2682 PetscFunctionReturn(PETSC_SUCCESS); 2683 } 2684 2685 PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) 2686 { 2687 PetscFunctionBegin; 2688 /* If the two matrices have the same copy implementation, use fast copy. */ 2689 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2690 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2691 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 2692 PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs; 2693 2694 PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]); 2695 PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs); 2696 PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs])); 2697 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2698 } else { 2699 PetscCall(MatCopy_Basic(A, B, str)); 2700 } 2701 PetscFunctionReturn(PETSC_SUCCESS); 2702 } 2703 2704 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2705 { 2706 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2707 2708 PetscFunctionBegin; 2709 *array = a->a; 2710 PetscFunctionReturn(PETSC_SUCCESS); 2711 } 2712 2713 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2714 { 2715 PetscFunctionBegin; 2716 *array = NULL; 2717 PetscFunctionReturn(PETSC_SUCCESS); 2718 } 2719 2720 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) 2721 { 2722 PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs; 2723 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 2724 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 2725 2726 PetscFunctionBegin; 2727 /* Set the number of nonzeros in the new matrix */ 2728 PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz)); 2729 PetscFunctionReturn(PETSC_SUCCESS); 2730 } 2731 2732 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2733 { 2734 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data; 2735 PetscInt bs = Y->rmap->bs, bs2 = bs * bs; 2736 PetscBLASInt one = 1; 2737 2738 PetscFunctionBegin; 2739 if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2740 PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2741 if (e) { 2742 PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e)); 2743 if (e) { 2744 PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e)); 2745 if (e) str = SAME_NONZERO_PATTERN; 2746 } 2747 } 2748 if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN"); 2749 } 2750 if (str == SAME_NONZERO_PATTERN) { 2751 PetscScalar alpha = a; 2752 PetscBLASInt bnz; 2753 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 2754 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 2755 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2756 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2757 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2758 } else { 2759 Mat B; 2760 PetscInt *nnz; 2761 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 2762 PetscCall(PetscMalloc1(Y->rmap->N, &nnz)); 2763 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2764 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2765 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 2766 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 2767 PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name)); 2768 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz)); 2769 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 2770 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2771 PetscCall(MatHeaderMerge(Y, &B)); 2772 PetscCall(PetscFree(nnz)); 2773 } 2774 PetscFunctionReturn(PETSC_SUCCESS); 2775 } 2776 2777 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2778 { 2779 #if PetscDefined(USE_COMPLEX) 2780 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2781 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2782 MatScalar *aa = a->a; 2783 2784 PetscFunctionBegin; 2785 for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]); 2786 PetscFunctionReturn(PETSC_SUCCESS); 2787 #else 2788 (void)A; 2789 return PETSC_SUCCESS; 2790 #endif 2791 } 2792 2793 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2794 { 2795 #if PetscDefined(USE_COMPLEX) 2796 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2797 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2798 MatScalar *aa = a->a; 2799 2800 PetscFunctionBegin; 2801 for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]); 2802 PetscFunctionReturn(PETSC_SUCCESS); 2803 #else 2804 (void)A; 2805 return PETSC_SUCCESS; 2806 #endif 2807 } 2808 2809 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2810 { 2811 #if PetscDefined(USE_COMPLEX) 2812 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2813 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2814 MatScalar *aa = a->a; 2815 2816 PetscFunctionBegin; 2817 for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2818 PetscFunctionReturn(PETSC_SUCCESS); 2819 #else 2820 (void)A; 2821 return PETSC_SUCCESS; 2822 #endif 2823 } 2824 2825 /* 2826 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2827 */ 2828 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2829 { 2830 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2831 PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs; 2832 PetscInt nz = a->i[m], row, *jj, mr, col; 2833 2834 PetscFunctionBegin; 2835 *nn = n; 2836 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2837 PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices"); 2838 PetscCall(PetscCalloc1(n, &collengths)); 2839 PetscCall(PetscMalloc1(n + 1, &cia)); 2840 PetscCall(PetscMalloc1(nz, &cja)); 2841 jj = a->j; 2842 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2843 cia[0] = oshift; 2844 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2845 PetscCall(PetscArrayzero(collengths, n)); 2846 jj = a->j; 2847 for (row = 0; row < m; row++) { 2848 mr = a->i[row + 1] - a->i[row]; 2849 for (i = 0; i < mr; i++) { 2850 col = *jj++; 2851 2852 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2853 } 2854 } 2855 PetscCall(PetscFree(collengths)); 2856 *ia = cia; 2857 *ja = cja; 2858 PetscFunctionReturn(PETSC_SUCCESS); 2859 } 2860 2861 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2862 { 2863 PetscFunctionBegin; 2864 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2865 PetscCall(PetscFree(*ia)); 2866 PetscCall(PetscFree(*ja)); 2867 PetscFunctionReturn(PETSC_SUCCESS); 2868 } 2869 2870 /* 2871 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2872 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2873 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2874 */ 2875 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2876 { 2877 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2878 PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs; 2879 PetscInt nz = a->i[m], row, *jj, mr, col; 2880 PetscInt *cspidx; 2881 2882 PetscFunctionBegin; 2883 *nn = n; 2884 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2885 2886 PetscCall(PetscCalloc1(n, &collengths)); 2887 PetscCall(PetscMalloc1(n + 1, &cia)); 2888 PetscCall(PetscMalloc1(nz, &cja)); 2889 PetscCall(PetscMalloc1(nz, &cspidx)); 2890 jj = a->j; 2891 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2892 cia[0] = oshift; 2893 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2894 PetscCall(PetscArrayzero(collengths, n)); 2895 jj = a->j; 2896 for (row = 0; row < m; row++) { 2897 mr = a->i[row + 1] - a->i[row]; 2898 for (i = 0; i < mr; i++) { 2899 col = *jj++; 2900 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2901 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2902 } 2903 } 2904 PetscCall(PetscFree(collengths)); 2905 *ia = cia; 2906 *ja = cja; 2907 *spidx = cspidx; 2908 PetscFunctionReturn(PETSC_SUCCESS); 2909 } 2910 2911 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2912 { 2913 PetscFunctionBegin; 2914 PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done)); 2915 PetscCall(PetscFree(*spidx)); 2916 PetscFunctionReturn(PETSC_SUCCESS); 2917 } 2918 2919 PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) 2920 { 2921 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data; 2922 2923 PetscFunctionBegin; 2924 if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL)); 2925 PetscCall(MatShift_Basic(Y, a)); 2926 PetscFunctionReturn(PETSC_SUCCESS); 2927 } 2928 2929 PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep) 2930 { 2931 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2932 PetscInt fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k; 2933 PetscInt m = A->rmap->N, *ailen = a->ilen; 2934 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2935 MatScalar *aa = a->a, *ap; 2936 PetscBool zero; 2937 2938 PetscFunctionBegin; 2939 PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix"); 2940 if (m) rmax = ailen[0]; 2941 for (i = 1; i <= mbs; i++) { 2942 for (k = ai[i - 1]; k < ai[i]; k++) { 2943 zero = PETSC_TRUE; 2944 ap = aa + bs2 * k; 2945 for (j = 0; j < bs2 && zero; j++) { 2946 if (ap[j] != 0.0) zero = PETSC_FALSE; 2947 } 2948 if (zero && (aj[k] != i - 1 || !keep)) fshift++; 2949 else { 2950 if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1)); 2951 aj[k - fshift] = aj[k]; 2952 PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2)); 2953 } 2954 } 2955 ai[i - 1] -= fshift_prev; 2956 fshift_prev = fshift; 2957 ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1]; 2958 a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0); 2959 rmax = PetscMax(rmax, ailen[i - 1]); 2960 } 2961 if (fshift) { 2962 if (mbs) { 2963 ai[mbs] -= fshift; 2964 a->nz = ai[mbs]; 2965 } 2966 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz)); 2967 A->nonzerostate++; 2968 A->info.nz_unneeded += (PetscReal)fshift; 2969 a->rmax = rmax; 2970 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 2971 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 2972 } 2973 PetscFunctionReturn(PETSC_SUCCESS); 2974 } 2975 2976 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2977 MatGetRow_SeqBAIJ, 2978 MatRestoreRow_SeqBAIJ, 2979 MatMult_SeqBAIJ_N, 2980 /* 4*/ MatMultAdd_SeqBAIJ_N, 2981 MatMultTranspose_SeqBAIJ, 2982 MatMultTransposeAdd_SeqBAIJ, 2983 NULL, 2984 NULL, 2985 NULL, 2986 /* 10*/ NULL, 2987 MatLUFactor_SeqBAIJ, 2988 NULL, 2989 NULL, 2990 MatTranspose_SeqBAIJ, 2991 /* 15*/ MatGetInfo_SeqBAIJ, 2992 MatEqual_SeqBAIJ, 2993 MatGetDiagonal_SeqBAIJ, 2994 MatDiagonalScale_SeqBAIJ, 2995 MatNorm_SeqBAIJ, 2996 /* 20*/ NULL, 2997 MatAssemblyEnd_SeqBAIJ, 2998 MatSetOption_SeqBAIJ, 2999 MatZeroEntries_SeqBAIJ, 3000 /* 24*/ MatZeroRows_SeqBAIJ, 3001 NULL, 3002 NULL, 3003 NULL, 3004 NULL, 3005 /* 29*/ MatSetUp_Seq_Hash, 3006 NULL, 3007 NULL, 3008 NULL, 3009 NULL, 3010 /* 34*/ MatDuplicate_SeqBAIJ, 3011 NULL, 3012 NULL, 3013 MatILUFactor_SeqBAIJ, 3014 NULL, 3015 /* 39*/ MatAXPY_SeqBAIJ, 3016 MatCreateSubMatrices_SeqBAIJ, 3017 MatIncreaseOverlap_SeqBAIJ, 3018 MatGetValues_SeqBAIJ, 3019 MatCopy_SeqBAIJ, 3020 /* 44*/ NULL, 3021 MatScale_SeqBAIJ, 3022 MatShift_SeqBAIJ, 3023 NULL, 3024 MatZeroRowsColumns_SeqBAIJ, 3025 /* 49*/ NULL, 3026 MatGetRowIJ_SeqBAIJ, 3027 MatRestoreRowIJ_SeqBAIJ, 3028 MatGetColumnIJ_SeqBAIJ, 3029 MatRestoreColumnIJ_SeqBAIJ, 3030 /* 54*/ MatFDColoringCreate_SeqXAIJ, 3031 NULL, 3032 NULL, 3033 NULL, 3034 MatSetValuesBlocked_SeqBAIJ, 3035 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 3036 MatDestroy_SeqBAIJ, 3037 MatView_SeqBAIJ, 3038 NULL, 3039 NULL, 3040 /* 64*/ NULL, 3041 NULL, 3042 NULL, 3043 NULL, 3044 NULL, 3045 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 3046 NULL, 3047 MatConvert_Basic, 3048 NULL, 3049 NULL, 3050 /* 74*/ NULL, 3051 MatFDColoringApply_BAIJ, 3052 NULL, 3053 NULL, 3054 NULL, 3055 /* 79*/ NULL, 3056 NULL, 3057 NULL, 3058 NULL, 3059 MatLoad_SeqBAIJ, 3060 /* 84*/ NULL, 3061 NULL, 3062 NULL, 3063 NULL, 3064 NULL, 3065 /* 89*/ NULL, 3066 NULL, 3067 NULL, 3068 NULL, 3069 NULL, 3070 /* 94*/ NULL, 3071 NULL, 3072 NULL, 3073 NULL, 3074 NULL, 3075 /* 99*/ NULL, 3076 NULL, 3077 NULL, 3078 MatConjugate_SeqBAIJ, 3079 NULL, 3080 /*104*/ NULL, 3081 MatRealPart_SeqBAIJ, 3082 MatImaginaryPart_SeqBAIJ, 3083 NULL, 3084 NULL, 3085 /*109*/ NULL, 3086 NULL, 3087 NULL, 3088 NULL, 3089 MatMissingDiagonal_SeqBAIJ, 3090 /*114*/ NULL, 3091 NULL, 3092 NULL, 3093 NULL, 3094 NULL, 3095 /*119*/ NULL, 3096 NULL, 3097 MatMultHermitianTranspose_SeqBAIJ, 3098 MatMultHermitianTransposeAdd_SeqBAIJ, 3099 NULL, 3100 /*124*/ NULL, 3101 MatGetColumnReductions_SeqBAIJ, 3102 MatInvertBlockDiagonal_SeqBAIJ, 3103 NULL, 3104 NULL, 3105 /*129*/ NULL, 3106 NULL, 3107 NULL, 3108 NULL, 3109 NULL, 3110 /*134*/ NULL, 3111 NULL, 3112 NULL, 3113 NULL, 3114 NULL, 3115 /*139*/ MatSetBlockSizes_Default, 3116 NULL, 3117 NULL, 3118 MatFDColoringSetUp_SeqXAIJ, 3119 NULL, 3120 /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 3121 MatDestroySubMatrices_SeqBAIJ, 3122 NULL, 3123 NULL, 3124 NULL, 3125 NULL, 3126 /*150*/ NULL, 3127 MatEliminateZeros_SeqBAIJ}; 3128 3129 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3130 { 3131 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3132 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3133 3134 PetscFunctionBegin; 3135 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3136 3137 /* allocate space for values if not already there */ 3138 if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); 3139 3140 /* copy values over */ 3141 PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz)); 3142 PetscFunctionReturn(PETSC_SUCCESS); 3143 } 3144 3145 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3146 { 3147 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3148 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3149 3150 PetscFunctionBegin; 3151 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3152 PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 3153 3154 /* copy values over */ 3155 PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz)); 3156 PetscFunctionReturn(PETSC_SUCCESS); 3157 } 3158 3159 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 3160 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *); 3161 3162 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, PetscInt *nnz) 3163 { 3164 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 3165 PetscInt i, mbs, nbs, bs2; 3166 PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 3167 3168 PetscFunctionBegin; 3169 if (B->hash_active) { 3170 PetscInt bs; 3171 B->ops[0] = b->cops; 3172 PetscCall(PetscHMapIJVDestroy(&b->ht)); 3173 PetscCall(MatGetBlockSize(B, &bs)); 3174 if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht)); 3175 PetscCall(PetscFree(b->dnz)); 3176 PetscCall(PetscFree(b->bdnz)); 3177 B->hash_active = PETSC_FALSE; 3178 } 3179 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3180 if (nz == MAT_SKIP_ALLOCATION) { 3181 skipallocation = PETSC_TRUE; 3182 nz = 0; 3183 } 3184 3185 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 3186 PetscCall(PetscLayoutSetUp(B->rmap)); 3187 PetscCall(PetscLayoutSetUp(B->cmap)); 3188 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3189 3190 B->preallocated = PETSC_TRUE; 3191 3192 mbs = B->rmap->n / bs; 3193 nbs = B->cmap->n / bs; 3194 bs2 = bs * bs; 3195 3196 PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs); 3197 3198 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3199 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 3200 if (nnz) { 3201 for (i = 0; i < mbs; i++) { 3202 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 3203 PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs); 3204 } 3205 } 3206 3207 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat"); 3208 PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL)); 3209 PetscOptionsEnd(); 3210 3211 if (!flg) { 3212 switch (bs) { 3213 case 1: 3214 B->ops->mult = MatMult_SeqBAIJ_1; 3215 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3216 break; 3217 case 2: 3218 B->ops->mult = MatMult_SeqBAIJ_2; 3219 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3220 break; 3221 case 3: 3222 B->ops->mult = MatMult_SeqBAIJ_3; 3223 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3224 break; 3225 case 4: 3226 B->ops->mult = MatMult_SeqBAIJ_4; 3227 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3228 break; 3229 case 5: 3230 B->ops->mult = MatMult_SeqBAIJ_5; 3231 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3232 break; 3233 case 6: 3234 B->ops->mult = MatMult_SeqBAIJ_6; 3235 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3236 break; 3237 case 7: 3238 B->ops->mult = MatMult_SeqBAIJ_7; 3239 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3240 break; 3241 case 9: { 3242 PetscInt version = 1; 3243 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3244 switch (version) { 3245 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3246 case 1: 3247 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 3248 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 3249 PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3250 break; 3251 #endif 3252 default: 3253 B->ops->mult = MatMult_SeqBAIJ_N; 3254 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3255 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3256 break; 3257 } 3258 break; 3259 } 3260 case 11: 3261 B->ops->mult = MatMult_SeqBAIJ_11; 3262 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 3263 break; 3264 case 12: { 3265 PetscInt version = 1; 3266 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3267 switch (version) { 3268 case 1: 3269 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 3270 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3271 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3272 break; 3273 case 2: 3274 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 3275 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 3276 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3277 break; 3278 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3279 case 3: 3280 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 3281 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3282 PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3283 break; 3284 #endif 3285 default: 3286 B->ops->mult = MatMult_SeqBAIJ_N; 3287 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3288 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3289 break; 3290 } 3291 break; 3292 } 3293 case 15: { 3294 PetscInt version = 1; 3295 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3296 switch (version) { 3297 case 1: 3298 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3299 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3300 break; 3301 case 2: 3302 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 3303 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3304 break; 3305 case 3: 3306 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 3307 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3308 break; 3309 case 4: 3310 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 3311 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3312 break; 3313 default: 3314 B->ops->mult = MatMult_SeqBAIJ_N; 3315 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3316 break; 3317 } 3318 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3319 break; 3320 } 3321 default: 3322 B->ops->mult = MatMult_SeqBAIJ_N; 3323 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3324 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3325 break; 3326 } 3327 } 3328 B->ops->sor = MatSOR_SeqBAIJ; 3329 b->mbs = mbs; 3330 b->nbs = nbs; 3331 if (!skipallocation) { 3332 if (!b->imax) { 3333 PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen)); 3334 3335 b->free_imax_ilen = PETSC_TRUE; 3336 } 3337 /* b->ilen will count nonzeros in each block row so far. */ 3338 for (i = 0; i < mbs; i++) b->ilen[i] = 0; 3339 if (!nnz) { 3340 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3341 else if (nz < 0) nz = 1; 3342 nz = PetscMin(nz, nbs); 3343 for (i = 0; i < mbs; i++) b->imax[i] = nz; 3344 PetscCall(PetscIntMultError(nz, mbs, &nz)); 3345 } else { 3346 PetscInt64 nz64 = 0; 3347 for (i = 0; i < mbs; i++) { 3348 b->imax[i] = nnz[i]; 3349 nz64 += nnz[i]; 3350 } 3351 PetscCall(PetscIntCast(nz64, &nz)); 3352 } 3353 3354 /* allocate the matrix space */ 3355 PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i)); 3356 if (B->structure_only) { 3357 PetscCall(PetscMalloc1(nz, &b->j)); 3358 PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i)); 3359 } else { 3360 PetscInt nzbs2 = 0; 3361 PetscCall(PetscIntMultError(nz, bs2, &nzbs2)); 3362 PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i)); 3363 PetscCall(PetscArrayzero(b->a, nz * bs2)); 3364 } 3365 PetscCall(PetscArrayzero(b->j, nz)); 3366 3367 if (B->structure_only) { 3368 b->singlemalloc = PETSC_FALSE; 3369 b->free_a = PETSC_FALSE; 3370 } else { 3371 b->singlemalloc = PETSC_TRUE; 3372 b->free_a = PETSC_TRUE; 3373 } 3374 b->free_ij = PETSC_TRUE; 3375 3376 b->i[0] = 0; 3377 for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1]; 3378 3379 } else { 3380 b->free_a = PETSC_FALSE; 3381 b->free_ij = PETSC_FALSE; 3382 } 3383 3384 b->bs2 = bs2; 3385 b->mbs = mbs; 3386 b->nz = 0; 3387 b->maxnz = nz; 3388 B->info.nz_unneeded = (PetscReal)b->maxnz * bs2; 3389 B->was_assembled = PETSC_FALSE; 3390 B->assembled = PETSC_FALSE; 3391 if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3392 PetscFunctionReturn(PETSC_SUCCESS); 3393 } 3394 3395 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 3396 { 3397 PetscInt i, m, nz, nz_max = 0, *nnz; 3398 PetscScalar *values = NULL; 3399 PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented; 3400 3401 PetscFunctionBegin; 3402 PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 3403 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 3404 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 3405 PetscCall(PetscLayoutSetUp(B->rmap)); 3406 PetscCall(PetscLayoutSetUp(B->cmap)); 3407 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3408 m = B->rmap->n / bs; 3409 3410 PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 3411 PetscCall(PetscMalloc1(m + 1, &nnz)); 3412 for (i = 0; i < m; i++) { 3413 nz = ii[i + 1] - ii[i]; 3414 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 3415 nz_max = PetscMax(nz_max, nz); 3416 nnz[i] = nz; 3417 } 3418 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 3419 PetscCall(PetscFree(nnz)); 3420 3421 values = (PetscScalar *)V; 3422 if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values)); 3423 for (i = 0; i < m; i++) { 3424 PetscInt ncols = ii[i + 1] - ii[i]; 3425 const PetscInt *icols = jj + ii[i]; 3426 if (bs == 1 || !roworiented) { 3427 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 3428 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES)); 3429 } else { 3430 PetscInt j; 3431 for (j = 0; j < ncols; j++) { 3432 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 3433 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES)); 3434 } 3435 } 3436 } 3437 if (!V) PetscCall(PetscFree(values)); 3438 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3439 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3440 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3441 PetscFunctionReturn(PETSC_SUCCESS); 3442 } 3443 3444 /*@C 3445 MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored 3446 3447 Not Collective 3448 3449 Input Parameter: 3450 . A - a `MATSEQBAIJ` matrix 3451 3452 Output Parameter: 3453 . array - pointer to the data 3454 3455 Level: intermediate 3456 3457 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3458 @*/ 3459 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array) 3460 { 3461 PetscFunctionBegin; 3462 PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array)); 3463 PetscFunctionReturn(PETSC_SUCCESS); 3464 } 3465 3466 /*@C 3467 MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()` 3468 3469 Not Collective 3470 3471 Input Parameters: 3472 + A - a `MATSEQBAIJ` matrix 3473 - array - pointer to the data 3474 3475 Level: intermediate 3476 3477 .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3478 @*/ 3479 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array) 3480 { 3481 PetscFunctionBegin; 3482 PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array)); 3483 PetscFunctionReturn(PETSC_SUCCESS); 3484 } 3485 3486 /*MC 3487 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3488 block sparse compressed row format. 3489 3490 Options Database Keys: 3491 + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()` 3492 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3493 3494 Level: beginner 3495 3496 Notes: 3497 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 3498 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 3499 3500 Run with `-info` to see what version of the matrix-vector product is being used 3501 3502 .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()` 3503 M*/ 3504 3505 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *); 3506 3507 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3508 { 3509 PetscMPIInt size; 3510 Mat_SeqBAIJ *b; 3511 3512 PetscFunctionBegin; 3513 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3514 PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3515 3516 PetscCall(PetscNew(&b)); 3517 B->data = (void *)b; 3518 B->ops[0] = MatOps_Values; 3519 3520 b->row = NULL; 3521 b->col = NULL; 3522 b->icol = NULL; 3523 b->reallocs = 0; 3524 b->saved_values = NULL; 3525 3526 b->roworiented = PETSC_TRUE; 3527 b->nonew = 0; 3528 b->diag = NULL; 3529 B->spptr = NULL; 3530 B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2; 3531 b->keepnonzeropattern = PETSC_FALSE; 3532 3533 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ)); 3534 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ)); 3535 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ)); 3536 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ)); 3537 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ)); 3538 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ)); 3539 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ)); 3540 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ)); 3541 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 3542 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ)); 3543 #if defined(PETSC_HAVE_HYPRE) 3544 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE)); 3545 #endif 3546 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS)); 3547 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 3548 PetscFunctionReturn(PETSC_SUCCESS); 3549 } 3550 3551 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) 3552 { 3553 Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data; 3554 PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2; 3555 3556 PetscFunctionBegin; 3557 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix"); 3558 PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix"); 3559 3560 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3561 c->imax = a->imax; 3562 c->ilen = a->ilen; 3563 c->free_imax_ilen = PETSC_FALSE; 3564 } else { 3565 PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen)); 3566 for (i = 0; i < mbs; i++) { 3567 c->imax[i] = a->imax[i]; 3568 c->ilen[i] = a->ilen[i]; 3569 } 3570 c->free_imax_ilen = PETSC_TRUE; 3571 } 3572 3573 /* allocate the matrix space */ 3574 if (mallocmatspace) { 3575 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3576 PetscCall(PetscCalloc1(bs2 * nz, &c->a)); 3577 3578 c->i = a->i; 3579 c->j = a->j; 3580 c->singlemalloc = PETSC_FALSE; 3581 c->free_a = PETSC_TRUE; 3582 c->free_ij = PETSC_FALSE; 3583 c->parent = A; 3584 C->preallocated = PETSC_TRUE; 3585 C->assembled = PETSC_TRUE; 3586 3587 PetscCall(PetscObjectReference((PetscObject)A)); 3588 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3589 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3590 } else { 3591 PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i)); 3592 3593 c->singlemalloc = PETSC_TRUE; 3594 c->free_a = PETSC_TRUE; 3595 c->free_ij = PETSC_TRUE; 3596 3597 PetscCall(PetscArraycpy(c->i, a->i, mbs + 1)); 3598 if (mbs > 0) { 3599 PetscCall(PetscArraycpy(c->j, a->j, nz)); 3600 if (cpvalues == MAT_COPY_VALUES) { 3601 PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz)); 3602 } else { 3603 PetscCall(PetscArrayzero(c->a, bs2 * nz)); 3604 } 3605 } 3606 C->preallocated = PETSC_TRUE; 3607 C->assembled = PETSC_TRUE; 3608 } 3609 } 3610 3611 c->roworiented = a->roworiented; 3612 c->nonew = a->nonew; 3613 3614 PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 3615 PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 3616 3617 c->bs2 = a->bs2; 3618 c->mbs = a->mbs; 3619 c->nbs = a->nbs; 3620 3621 if (a->diag) { 3622 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3623 c->diag = a->diag; 3624 c->free_diag = PETSC_FALSE; 3625 } else { 3626 PetscCall(PetscMalloc1(mbs + 1, &c->diag)); 3627 for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i]; 3628 c->free_diag = PETSC_TRUE; 3629 } 3630 } else c->diag = NULL; 3631 3632 c->nz = a->nz; 3633 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3634 c->solve_work = NULL; 3635 c->mult_work = NULL; 3636 c->sor_workt = NULL; 3637 c->sor_work = NULL; 3638 3639 c->compressedrow.use = a->compressedrow.use; 3640 c->compressedrow.nrows = a->compressedrow.nrows; 3641 if (a->compressedrow.use) { 3642 i = a->compressedrow.nrows; 3643 PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex)); 3644 PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1)); 3645 PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i)); 3646 } else { 3647 c->compressedrow.use = PETSC_FALSE; 3648 c->compressedrow.i = NULL; 3649 c->compressedrow.rindex = NULL; 3650 } 3651 c->nonzerorowcnt = a->nonzerorowcnt; 3652 C->nonzerostate = A->nonzerostate; 3653 3654 PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 3655 PetscFunctionReturn(PETSC_SUCCESS); 3656 } 3657 3658 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) 3659 { 3660 PetscFunctionBegin; 3661 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 3662 PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n)); 3663 PetscCall(MatSetType(*B, MATSEQBAIJ)); 3664 PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE)); 3665 PetscFunctionReturn(PETSC_SUCCESS); 3666 } 3667 3668 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3669 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 3670 { 3671 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3672 PetscInt *rowidxs, *colidxs; 3673 PetscScalar *matvals; 3674 3675 PetscFunctionBegin; 3676 PetscCall(PetscViewerSetUp(viewer)); 3677 3678 /* read matrix header */ 3679 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3680 PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3681 M = header[1]; 3682 N = header[2]; 3683 nz = header[3]; 3684 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3685 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3686 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3687 3688 /* set block sizes from the viewer's .info file */ 3689 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3690 /* set local and global sizes if not set already */ 3691 if (mat->rmap->n < 0) mat->rmap->n = M; 3692 if (mat->cmap->n < 0) mat->cmap->n = N; 3693 if (mat->rmap->N < 0) mat->rmap->N = M; 3694 if (mat->cmap->N < 0) mat->cmap->N = N; 3695 PetscCall(PetscLayoutSetUp(mat->rmap)); 3696 PetscCall(PetscLayoutSetUp(mat->cmap)); 3697 3698 /* check if the matrix sizes are correct */ 3699 PetscCall(MatGetSize(mat, &rows, &cols)); 3700 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3701 PetscCall(MatGetBlockSize(mat, &bs)); 3702 PetscCall(MatGetLocalSize(mat, &m, &n)); 3703 mbs = m / bs; 3704 nbs = n / bs; 3705 3706 /* read in row lengths, column indices and nonzero values */ 3707 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3708 PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT)); 3709 rowidxs[0] = 0; 3710 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3711 sum = rowidxs[m]; 3712 PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3713 3714 /* read in column indices and nonzero values */ 3715 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals)); 3716 PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT)); 3717 PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR)); 3718 3719 { /* preallocate matrix storage */ 3720 PetscBT bt; /* helper bit set to count nonzeros */ 3721 PetscInt *nnz; 3722 PetscBool sbaij; 3723 3724 PetscCall(PetscBTCreate(nbs, &bt)); 3725 PetscCall(PetscCalloc1(mbs, &nnz)); 3726 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij)); 3727 for (i = 0; i < mbs; i++) { 3728 PetscCall(PetscBTMemzero(nbs, bt)); 3729 for (k = 0; k < bs; k++) { 3730 PetscInt row = bs * i + k; 3731 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3732 PetscInt col = colidxs[j]; 3733 if (!sbaij || col >= row) 3734 if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++; 3735 } 3736 } 3737 } 3738 PetscCall(PetscBTDestroy(&bt)); 3739 PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz)); 3740 PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz)); 3741 PetscCall(PetscFree(nnz)); 3742 } 3743 3744 /* store matrix values */ 3745 for (i = 0; i < m; i++) { 3746 PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1]; 3747 PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3748 } 3749 3750 PetscCall(PetscFree(rowidxs)); 3751 PetscCall(PetscFree2(colidxs, matvals)); 3752 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3753 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3754 PetscFunctionReturn(PETSC_SUCCESS); 3755 } 3756 3757 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) 3758 { 3759 PetscBool isbinary; 3760 3761 PetscFunctionBegin; 3762 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3763 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3764 PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer)); 3765 PetscFunctionReturn(PETSC_SUCCESS); 3766 } 3767 3768 /*@C 3769 MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block 3770 compressed row) format. For good matrix assembly performance the 3771 user should preallocate the matrix storage by setting the parameter `nz` 3772 (or the array `nnz`). 3773 3774 Collective 3775 3776 Input Parameters: 3777 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3778 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3779 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3780 . m - number of rows 3781 . n - number of columns 3782 . nz - number of nonzero blocks per block row (same for all rows) 3783 - nnz - array containing the number of nonzero blocks in the various block rows 3784 (possibly different for each block row) or `NULL` 3785 3786 Output Parameter: 3787 . A - the matrix 3788 3789 Options Database Keys: 3790 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3791 - -mat_block_size - size of the blocks to use 3792 3793 Level: intermediate 3794 3795 Notes: 3796 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3797 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3798 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 3799 3800 The number of rows and columns must be divisible by blocksize. 3801 3802 If the `nnz` parameter is given then the `nz` parameter is ignored 3803 3804 A nonzero block is any block that as 1 or more nonzeros in it 3805 3806 The `MATSEQBAIJ` format is fully compatible with standard Fortran 3807 storage. That is, the stored row and column indices can begin at 3808 either one (as in Fortran) or zero. 3809 3810 Specify the preallocated storage with either `nz` or `nnz` (not both). 3811 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3812 allocation. See [Sparse Matrices](sec_matsparse) for details. 3813 matrices. 3814 3815 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3816 @*/ 3817 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 3818 { 3819 PetscFunctionBegin; 3820 PetscCall(MatCreate(comm, A)); 3821 PetscCall(MatSetSizes(*A, m, n, m, n)); 3822 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3823 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz)); 3824 PetscFunctionReturn(PETSC_SUCCESS); 3825 } 3826 3827 /*@C 3828 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3829 per row in the matrix. For good matrix assembly performance the 3830 user should preallocate the matrix storage by setting the parameter `nz` 3831 (or the array `nnz`). 3832 3833 Collective 3834 3835 Input Parameters: 3836 + B - the matrix 3837 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3838 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3839 . nz - number of block nonzeros per block row (same for all rows) 3840 - nnz - array containing the number of block nonzeros in the various block rows 3841 (possibly different for each block row) or `NULL` 3842 3843 Options Database Keys: 3844 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3845 - -mat_block_size - size of the blocks to use 3846 3847 Level: intermediate 3848 3849 Notes: 3850 If the `nnz` parameter is given then the `nz` parameter is ignored 3851 3852 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3853 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3854 You can also run with the option `-info` and look for messages with the string 3855 malloc in them to see if additional memory allocation was needed. 3856 3857 The `MATSEQBAIJ` format is fully compatible with standard Fortran 3858 storage. That is, the stored row and column indices can begin at 3859 either one (as in Fortran) or zero. 3860 3861 Specify the preallocated storage with either nz or nnz (not both). 3862 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3863 allocation. See [Sparse Matrices](sec_matsparse) for details. 3864 3865 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3866 @*/ 3867 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3868 { 3869 PetscFunctionBegin; 3870 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3871 PetscValidType(B, 1); 3872 PetscValidLogicalCollectiveInt(B, bs, 2); 3873 PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 3874 PetscFunctionReturn(PETSC_SUCCESS); 3875 } 3876 3877 /*@C 3878 MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values 3879 3880 Collective 3881 3882 Input Parameters: 3883 + B - the matrix 3884 . bs - the blocksize 3885 . i - the indices into `j` for the start of each local row (starts with zero) 3886 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3887 - v - optional values in the matrix 3888 3889 Level: advanced 3890 3891 Notes: 3892 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 3893 may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 3894 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3895 `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3896 block column and the second index is over columns within a block. 3897 3898 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3899 3900 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3901 @*/ 3902 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3903 { 3904 PetscFunctionBegin; 3905 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3906 PetscValidType(B, 1); 3907 PetscValidLogicalCollectiveInt(B, bs, 2); 3908 PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 3909 PetscFunctionReturn(PETSC_SUCCESS); 3910 } 3911 3912 /*@ 3913 MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user. 3914 3915 Collective 3916 3917 Input Parameters: 3918 + comm - must be an MPI communicator of size 1 3919 . bs - size of block 3920 . m - number of rows 3921 . n - number of columns 3922 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3923 . j - column indices 3924 - a - matrix values 3925 3926 Output Parameter: 3927 . mat - the matrix 3928 3929 Level: advanced 3930 3931 Notes: 3932 The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays 3933 once the matrix is destroyed 3934 3935 You cannot set new nonzero locations into this matrix, that will generate an error. 3936 3937 The `i` and `j` indices are 0 based 3938 3939 When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format 3940 3941 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3942 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3943 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3944 with column-major ordering within blocks. 3945 3946 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3947 @*/ 3948 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) 3949 { 3950 Mat_SeqBAIJ *baij; 3951 3952 PetscFunctionBegin; 3953 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs); 3954 if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3955 3956 PetscCall(MatCreate(comm, mat)); 3957 PetscCall(MatSetSizes(*mat, m, n, m, n)); 3958 PetscCall(MatSetType(*mat, MATSEQBAIJ)); 3959 PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL)); 3960 baij = (Mat_SeqBAIJ *)(*mat)->data; 3961 PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen)); 3962 3963 baij->i = i; 3964 baij->j = j; 3965 baij->a = a; 3966 3967 baij->singlemalloc = PETSC_FALSE; 3968 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3969 baij->free_a = PETSC_FALSE; 3970 baij->free_ij = PETSC_FALSE; 3971 baij->free_imax_ilen = PETSC_TRUE; 3972 3973 for (PetscInt ii = 0; ii < m; ii++) { 3974 const PetscInt row_len = i[ii + 1] - i[ii]; 3975 3976 baij->ilen[ii] = baij->imax[ii] = row_len; 3977 PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len); 3978 } 3979 if (PetscDefined(USE_DEBUG)) { 3980 for (PetscInt ii = 0; ii < baij->i[m]; ii++) { 3981 PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3982 PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3983 } 3984 } 3985 3986 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3987 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3988 PetscFunctionReturn(PETSC_SUCCESS); 3989 } 3990 3991 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3992 { 3993 PetscFunctionBegin; 3994 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat)); 3995 PetscFunctionReturn(PETSC_SUCCESS); 3996 } 3997