1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */ 12 #define TYPE BAIJ 13 #define TYPE_BS 14 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 15 #undef TYPE_BS 16 #define TYPE_BS _BS 17 #define TYPE_BS_ON 18 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 19 #undef TYPE_BS 20 #include "../src/mat/impls/aij/seq/seqhashmat.h" 21 #undef TYPE 22 #undef TYPE_BS_ON 23 24 #if defined(PETSC_HAVE_HYPRE) 25 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 26 #endif 27 28 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 29 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 30 #endif 31 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 32 33 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) 34 { 35 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data; 36 PetscInt m, n, ib, jb, bs = A->rmap->bs; 37 MatScalar *a_val = a_aij->a; 38 39 PetscFunctionBegin; 40 PetscCall(MatGetSize(A, &m, &n)); 41 PetscCall(PetscArrayzero(reductions, n)); 42 if (type == NORM_2) { 43 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 44 for (jb = 0; jb < bs; jb++) { 45 for (ib = 0; ib < bs; ib++) { 46 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 47 a_val++; 48 } 49 } 50 } 51 } else if (type == NORM_1) { 52 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 53 for (jb = 0; jb < bs; jb++) { 54 for (ib = 0; ib < bs; ib++) { 55 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 56 a_val++; 57 } 58 } 59 } 60 } else if (type == NORM_INFINITY) { 61 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 62 for (jb = 0; jb < bs; jb++) { 63 for (ib = 0; ib < bs; ib++) { 64 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 65 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 66 a_val++; 67 } 68 } 69 } 70 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 71 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 72 for (jb = 0; jb < bs; jb++) { 73 for (ib = 0; ib < bs; ib++) { 74 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 75 a_val++; 76 } 77 } 78 } 79 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 80 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 81 for (jb = 0; jb < bs; jb++) { 82 for (ib = 0; ib < bs; ib++) { 83 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 84 a_val++; 85 } 86 } 87 } 88 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 89 if (type == NORM_2) { 90 for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 91 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 92 for (PetscInt i = 0; i < n; i++) reductions[i] /= m; 93 } 94 PetscFunctionReturn(PETSC_SUCCESS); 95 } 96 97 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) 98 { 99 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 100 PetscInt *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots; 101 MatScalar *v = a->a, *odiag, *diag, work[25], *v_work; 102 PetscReal shift = 0.0; 103 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 104 105 PetscFunctionBegin; 106 allowzeropivot = PetscNot(A->erroriffailure); 107 108 if (a->idiagvalid) { 109 if (values) *values = a->idiag; 110 PetscFunctionReturn(PETSC_SUCCESS); 111 } 112 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 113 diag_offset = a->diag; 114 if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); } 115 diag = a->idiag; 116 if (values) *values = a->idiag; 117 /* factor and invert each block */ 118 switch (bs) { 119 case 1: 120 for (i = 0; i < mbs; i++) { 121 odiag = v + 1 * diag_offset[i]; 122 diag[0] = odiag[0]; 123 124 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 125 if (allowzeropivot) { 126 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 127 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 128 A->factorerror_zeropivot_row = i; 129 PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i)); 130 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON); 131 } 132 133 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 134 diag += 1; 135 } 136 break; 137 case 2: 138 for (i = 0; i < mbs; i++) { 139 odiag = v + 4 * diag_offset[i]; 140 diag[0] = odiag[0]; 141 diag[1] = odiag[1]; 142 diag[2] = odiag[2]; 143 diag[3] = odiag[3]; 144 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 145 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 146 diag += 4; 147 } 148 break; 149 case 3: 150 for (i = 0; i < mbs; i++) { 151 odiag = v + 9 * diag_offset[i]; 152 diag[0] = odiag[0]; 153 diag[1] = odiag[1]; 154 diag[2] = odiag[2]; 155 diag[3] = odiag[3]; 156 diag[4] = odiag[4]; 157 diag[5] = odiag[5]; 158 diag[6] = odiag[6]; 159 diag[7] = odiag[7]; 160 diag[8] = odiag[8]; 161 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 162 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 163 diag += 9; 164 } 165 break; 166 case 4: 167 for (i = 0; i < mbs; i++) { 168 odiag = v + 16 * diag_offset[i]; 169 PetscCall(PetscArraycpy(diag, odiag, 16)); 170 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 171 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 172 diag += 16; 173 } 174 break; 175 case 5: 176 for (i = 0; i < mbs; i++) { 177 odiag = v + 25 * diag_offset[i]; 178 PetscCall(PetscArraycpy(diag, odiag, 25)); 179 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 180 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 181 diag += 25; 182 } 183 break; 184 case 6: 185 for (i = 0; i < mbs; i++) { 186 odiag = v + 36 * diag_offset[i]; 187 PetscCall(PetscArraycpy(diag, odiag, 36)); 188 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 189 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 190 diag += 36; 191 } 192 break; 193 case 7: 194 for (i = 0; i < mbs; i++) { 195 odiag = v + 49 * diag_offset[i]; 196 PetscCall(PetscArraycpy(diag, odiag, 49)); 197 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 198 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 199 diag += 49; 200 } 201 break; 202 default: 203 PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots)); 204 for (i = 0; i < mbs; i++) { 205 odiag = v + bs2 * diag_offset[i]; 206 PetscCall(PetscArraycpy(diag, odiag, bs2)); 207 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 208 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 209 diag += bs2; 210 } 211 PetscCall(PetscFree2(v_work, v_pivots)); 212 } 213 a->idiagvalid = PETSC_TRUE; 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 217 PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 218 { 219 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 220 PetscScalar *x, *work, *w, *workt, *t; 221 const MatScalar *v, *aa = a->a, *idiag; 222 const PetscScalar *b, *xb; 223 PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */ 224 PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it; 225 const PetscInt *diag, *ai = a->i, *aj = a->j, *vi; 226 227 PetscFunctionBegin; 228 its = its * lits; 229 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 230 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 231 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 232 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor"); 233 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 234 235 if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL)); 236 237 if (!m) PetscFunctionReturn(PETSC_SUCCESS); 238 diag = a->diag; 239 idiag = a->idiag; 240 k = PetscMax(A->rmap->n, A->cmap->n); 241 if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work)); 242 if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt)); 243 if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work)); 244 work = a->mult_work; 245 t = a->sor_workt; 246 w = a->sor_work; 247 248 PetscCall(VecGetArray(xx, &x)); 249 PetscCall(VecGetArrayRead(bb, &b)); 250 251 if (flag & SOR_ZERO_INITIAL_GUESS) { 252 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 253 switch (bs) { 254 case 1: 255 PetscKernel_v_gets_A_times_w_1(x, idiag, b); 256 t[0] = b[0]; 257 i2 = 1; 258 idiag += 1; 259 for (i = 1; i < m; i++) { 260 v = aa + ai[i]; 261 vi = aj + ai[i]; 262 nz = diag[i] - ai[i]; 263 s[0] = b[i2]; 264 for (j = 0; j < nz; j++) { 265 xw[0] = x[vi[j]]; 266 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 267 } 268 t[i2] = s[0]; 269 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 270 x[i2] = xw[0]; 271 idiag += 1; 272 i2 += 1; 273 } 274 break; 275 case 2: 276 PetscKernel_v_gets_A_times_w_2(x, idiag, b); 277 t[0] = b[0]; 278 t[1] = b[1]; 279 i2 = 2; 280 idiag += 4; 281 for (i = 1; i < m; i++) { 282 v = aa + 4 * ai[i]; 283 vi = aj + ai[i]; 284 nz = diag[i] - ai[i]; 285 s[0] = b[i2]; 286 s[1] = b[i2 + 1]; 287 for (j = 0; j < nz; j++) { 288 idx = 2 * vi[j]; 289 it = 4 * j; 290 xw[0] = x[idx]; 291 xw[1] = x[1 + idx]; 292 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 293 } 294 t[i2] = s[0]; 295 t[i2 + 1] = s[1]; 296 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 297 x[i2] = xw[0]; 298 x[i2 + 1] = xw[1]; 299 idiag += 4; 300 i2 += 2; 301 } 302 break; 303 case 3: 304 PetscKernel_v_gets_A_times_w_3(x, idiag, b); 305 t[0] = b[0]; 306 t[1] = b[1]; 307 t[2] = b[2]; 308 i2 = 3; 309 idiag += 9; 310 for (i = 1; i < m; i++) { 311 v = aa + 9 * ai[i]; 312 vi = aj + ai[i]; 313 nz = diag[i] - ai[i]; 314 s[0] = b[i2]; 315 s[1] = b[i2 + 1]; 316 s[2] = b[i2 + 2]; 317 while (nz--) { 318 idx = 3 * (*vi++); 319 xw[0] = x[idx]; 320 xw[1] = x[1 + idx]; 321 xw[2] = x[2 + idx]; 322 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 323 v += 9; 324 } 325 t[i2] = s[0]; 326 t[i2 + 1] = s[1]; 327 t[i2 + 2] = s[2]; 328 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 329 x[i2] = xw[0]; 330 x[i2 + 1] = xw[1]; 331 x[i2 + 2] = xw[2]; 332 idiag += 9; 333 i2 += 3; 334 } 335 break; 336 case 4: 337 PetscKernel_v_gets_A_times_w_4(x, idiag, b); 338 t[0] = b[0]; 339 t[1] = b[1]; 340 t[2] = b[2]; 341 t[3] = b[3]; 342 i2 = 4; 343 idiag += 16; 344 for (i = 1; i < m; i++) { 345 v = aa + 16 * ai[i]; 346 vi = aj + ai[i]; 347 nz = diag[i] - ai[i]; 348 s[0] = b[i2]; 349 s[1] = b[i2 + 1]; 350 s[2] = b[i2 + 2]; 351 s[3] = b[i2 + 3]; 352 while (nz--) { 353 idx = 4 * (*vi++); 354 xw[0] = x[idx]; 355 xw[1] = x[1 + idx]; 356 xw[2] = x[2 + idx]; 357 xw[3] = x[3 + idx]; 358 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 359 v += 16; 360 } 361 t[i2] = s[0]; 362 t[i2 + 1] = s[1]; 363 t[i2 + 2] = s[2]; 364 t[i2 + 3] = s[3]; 365 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 366 x[i2] = xw[0]; 367 x[i2 + 1] = xw[1]; 368 x[i2 + 2] = xw[2]; 369 x[i2 + 3] = xw[3]; 370 idiag += 16; 371 i2 += 4; 372 } 373 break; 374 case 5: 375 PetscKernel_v_gets_A_times_w_5(x, idiag, b); 376 t[0] = b[0]; 377 t[1] = b[1]; 378 t[2] = b[2]; 379 t[3] = b[3]; 380 t[4] = b[4]; 381 i2 = 5; 382 idiag += 25; 383 for (i = 1; i < m; i++) { 384 v = aa + 25 * ai[i]; 385 vi = aj + ai[i]; 386 nz = diag[i] - ai[i]; 387 s[0] = b[i2]; 388 s[1] = b[i2 + 1]; 389 s[2] = b[i2 + 2]; 390 s[3] = b[i2 + 3]; 391 s[4] = b[i2 + 4]; 392 while (nz--) { 393 idx = 5 * (*vi++); 394 xw[0] = x[idx]; 395 xw[1] = x[1 + idx]; 396 xw[2] = x[2 + idx]; 397 xw[3] = x[3 + idx]; 398 xw[4] = x[4 + idx]; 399 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 400 v += 25; 401 } 402 t[i2] = s[0]; 403 t[i2 + 1] = s[1]; 404 t[i2 + 2] = s[2]; 405 t[i2 + 3] = s[3]; 406 t[i2 + 4] = s[4]; 407 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 408 x[i2] = xw[0]; 409 x[i2 + 1] = xw[1]; 410 x[i2 + 2] = xw[2]; 411 x[i2 + 3] = xw[3]; 412 x[i2 + 4] = xw[4]; 413 idiag += 25; 414 i2 += 5; 415 } 416 break; 417 case 6: 418 PetscKernel_v_gets_A_times_w_6(x, idiag, b); 419 t[0] = b[0]; 420 t[1] = b[1]; 421 t[2] = b[2]; 422 t[3] = b[3]; 423 t[4] = b[4]; 424 t[5] = b[5]; 425 i2 = 6; 426 idiag += 36; 427 for (i = 1; i < m; i++) { 428 v = aa + 36 * ai[i]; 429 vi = aj + ai[i]; 430 nz = diag[i] - ai[i]; 431 s[0] = b[i2]; 432 s[1] = b[i2 + 1]; 433 s[2] = b[i2 + 2]; 434 s[3] = b[i2 + 3]; 435 s[4] = b[i2 + 4]; 436 s[5] = b[i2 + 5]; 437 while (nz--) { 438 idx = 6 * (*vi++); 439 xw[0] = x[idx]; 440 xw[1] = x[1 + idx]; 441 xw[2] = x[2 + idx]; 442 xw[3] = x[3 + idx]; 443 xw[4] = x[4 + idx]; 444 xw[5] = x[5 + idx]; 445 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 446 v += 36; 447 } 448 t[i2] = s[0]; 449 t[i2 + 1] = s[1]; 450 t[i2 + 2] = s[2]; 451 t[i2 + 3] = s[3]; 452 t[i2 + 4] = s[4]; 453 t[i2 + 5] = s[5]; 454 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 455 x[i2] = xw[0]; 456 x[i2 + 1] = xw[1]; 457 x[i2 + 2] = xw[2]; 458 x[i2 + 3] = xw[3]; 459 x[i2 + 4] = xw[4]; 460 x[i2 + 5] = xw[5]; 461 idiag += 36; 462 i2 += 6; 463 } 464 break; 465 case 7: 466 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 467 t[0] = b[0]; 468 t[1] = b[1]; 469 t[2] = b[2]; 470 t[3] = b[3]; 471 t[4] = b[4]; 472 t[5] = b[5]; 473 t[6] = b[6]; 474 i2 = 7; 475 idiag += 49; 476 for (i = 1; i < m; i++) { 477 v = aa + 49 * ai[i]; 478 vi = aj + ai[i]; 479 nz = diag[i] - ai[i]; 480 s[0] = b[i2]; 481 s[1] = b[i2 + 1]; 482 s[2] = b[i2 + 2]; 483 s[3] = b[i2 + 3]; 484 s[4] = b[i2 + 4]; 485 s[5] = b[i2 + 5]; 486 s[6] = b[i2 + 6]; 487 while (nz--) { 488 idx = 7 * (*vi++); 489 xw[0] = x[idx]; 490 xw[1] = x[1 + idx]; 491 xw[2] = x[2 + idx]; 492 xw[3] = x[3 + idx]; 493 xw[4] = x[4 + idx]; 494 xw[5] = x[5 + idx]; 495 xw[6] = x[6 + idx]; 496 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 497 v += 49; 498 } 499 t[i2] = s[0]; 500 t[i2 + 1] = s[1]; 501 t[i2 + 2] = s[2]; 502 t[i2 + 3] = s[3]; 503 t[i2 + 4] = s[4]; 504 t[i2 + 5] = s[5]; 505 t[i2 + 6] = s[6]; 506 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 507 x[i2] = xw[0]; 508 x[i2 + 1] = xw[1]; 509 x[i2 + 2] = xw[2]; 510 x[i2 + 3] = xw[3]; 511 x[i2 + 4] = xw[4]; 512 x[i2 + 5] = xw[5]; 513 x[i2 + 6] = xw[6]; 514 idiag += 49; 515 i2 += 7; 516 } 517 break; 518 default: 519 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); 520 PetscCall(PetscArraycpy(t, b, bs)); 521 i2 = bs; 522 idiag += bs2; 523 for (i = 1; i < m; i++) { 524 v = aa + bs2 * ai[i]; 525 vi = aj + ai[i]; 526 nz = diag[i] - ai[i]; 527 528 PetscCall(PetscArraycpy(w, b + i2, bs)); 529 /* copy all rows of x that are needed into contiguous space */ 530 workt = work; 531 for (j = 0; j < nz; j++) { 532 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 533 workt += bs; 534 } 535 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 536 PetscCall(PetscArraycpy(t + i2, w, bs)); 537 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 538 539 idiag += bs2; 540 i2 += bs; 541 } 542 break; 543 } 544 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 545 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 546 xb = t; 547 } else xb = b; 548 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 549 idiag = a->idiag + bs2 * (a->mbs - 1); 550 i2 = bs * (m - 1); 551 switch (bs) { 552 case 1: 553 s[0] = xb[i2]; 554 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 555 x[i2] = xw[0]; 556 i2 -= 1; 557 for (i = m - 2; i >= 0; i--) { 558 v = aa + (diag[i] + 1); 559 vi = aj + diag[i] + 1; 560 nz = ai[i + 1] - diag[i] - 1; 561 s[0] = xb[i2]; 562 for (j = 0; j < nz; j++) { 563 xw[0] = x[vi[j]]; 564 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 565 } 566 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 567 x[i2] = xw[0]; 568 idiag -= 1; 569 i2 -= 1; 570 } 571 break; 572 case 2: 573 s[0] = xb[i2]; 574 s[1] = xb[i2 + 1]; 575 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 576 x[i2] = xw[0]; 577 x[i2 + 1] = xw[1]; 578 i2 -= 2; 579 idiag -= 4; 580 for (i = m - 2; i >= 0; i--) { 581 v = aa + 4 * (diag[i] + 1); 582 vi = aj + diag[i] + 1; 583 nz = ai[i + 1] - diag[i] - 1; 584 s[0] = xb[i2]; 585 s[1] = xb[i2 + 1]; 586 for (j = 0; j < nz; j++) { 587 idx = 2 * vi[j]; 588 it = 4 * j; 589 xw[0] = x[idx]; 590 xw[1] = x[1 + idx]; 591 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 592 } 593 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 594 x[i2] = xw[0]; 595 x[i2 + 1] = xw[1]; 596 idiag -= 4; 597 i2 -= 2; 598 } 599 break; 600 case 3: 601 s[0] = xb[i2]; 602 s[1] = xb[i2 + 1]; 603 s[2] = xb[i2 + 2]; 604 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 605 x[i2] = xw[0]; 606 x[i2 + 1] = xw[1]; 607 x[i2 + 2] = xw[2]; 608 i2 -= 3; 609 idiag -= 9; 610 for (i = m - 2; i >= 0; i--) { 611 v = aa + 9 * (diag[i] + 1); 612 vi = aj + diag[i] + 1; 613 nz = ai[i + 1] - diag[i] - 1; 614 s[0] = xb[i2]; 615 s[1] = xb[i2 + 1]; 616 s[2] = xb[i2 + 2]; 617 while (nz--) { 618 idx = 3 * (*vi++); 619 xw[0] = x[idx]; 620 xw[1] = x[1 + idx]; 621 xw[2] = x[2 + idx]; 622 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 623 v += 9; 624 } 625 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 626 x[i2] = xw[0]; 627 x[i2 + 1] = xw[1]; 628 x[i2 + 2] = xw[2]; 629 idiag -= 9; 630 i2 -= 3; 631 } 632 break; 633 case 4: 634 s[0] = xb[i2]; 635 s[1] = xb[i2 + 1]; 636 s[2] = xb[i2 + 2]; 637 s[3] = xb[i2 + 3]; 638 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 639 x[i2] = xw[0]; 640 x[i2 + 1] = xw[1]; 641 x[i2 + 2] = xw[2]; 642 x[i2 + 3] = xw[3]; 643 i2 -= 4; 644 idiag -= 16; 645 for (i = m - 2; i >= 0; i--) { 646 v = aa + 16 * (diag[i] + 1); 647 vi = aj + diag[i] + 1; 648 nz = ai[i + 1] - diag[i] - 1; 649 s[0] = xb[i2]; 650 s[1] = xb[i2 + 1]; 651 s[2] = xb[i2 + 2]; 652 s[3] = xb[i2 + 3]; 653 while (nz--) { 654 idx = 4 * (*vi++); 655 xw[0] = x[idx]; 656 xw[1] = x[1 + idx]; 657 xw[2] = x[2 + idx]; 658 xw[3] = x[3 + idx]; 659 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 660 v += 16; 661 } 662 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 663 x[i2] = xw[0]; 664 x[i2 + 1] = xw[1]; 665 x[i2 + 2] = xw[2]; 666 x[i2 + 3] = xw[3]; 667 idiag -= 16; 668 i2 -= 4; 669 } 670 break; 671 case 5: 672 s[0] = xb[i2]; 673 s[1] = xb[i2 + 1]; 674 s[2] = xb[i2 + 2]; 675 s[3] = xb[i2 + 3]; 676 s[4] = xb[i2 + 4]; 677 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 678 x[i2] = xw[0]; 679 x[i2 + 1] = xw[1]; 680 x[i2 + 2] = xw[2]; 681 x[i2 + 3] = xw[3]; 682 x[i2 + 4] = xw[4]; 683 i2 -= 5; 684 idiag -= 25; 685 for (i = m - 2; i >= 0; i--) { 686 v = aa + 25 * (diag[i] + 1); 687 vi = aj + diag[i] + 1; 688 nz = ai[i + 1] - diag[i] - 1; 689 s[0] = xb[i2]; 690 s[1] = xb[i2 + 1]; 691 s[2] = xb[i2 + 2]; 692 s[3] = xb[i2 + 3]; 693 s[4] = xb[i2 + 4]; 694 while (nz--) { 695 idx = 5 * (*vi++); 696 xw[0] = x[idx]; 697 xw[1] = x[1 + idx]; 698 xw[2] = x[2 + idx]; 699 xw[3] = x[3 + idx]; 700 xw[4] = x[4 + idx]; 701 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 702 v += 25; 703 } 704 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 705 x[i2] = xw[0]; 706 x[i2 + 1] = xw[1]; 707 x[i2 + 2] = xw[2]; 708 x[i2 + 3] = xw[3]; 709 x[i2 + 4] = xw[4]; 710 idiag -= 25; 711 i2 -= 5; 712 } 713 break; 714 case 6: 715 s[0] = xb[i2]; 716 s[1] = xb[i2 + 1]; 717 s[2] = xb[i2 + 2]; 718 s[3] = xb[i2 + 3]; 719 s[4] = xb[i2 + 4]; 720 s[5] = xb[i2 + 5]; 721 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 722 x[i2] = xw[0]; 723 x[i2 + 1] = xw[1]; 724 x[i2 + 2] = xw[2]; 725 x[i2 + 3] = xw[3]; 726 x[i2 + 4] = xw[4]; 727 x[i2 + 5] = xw[5]; 728 i2 -= 6; 729 idiag -= 36; 730 for (i = m - 2; i >= 0; i--) { 731 v = aa + 36 * (diag[i] + 1); 732 vi = aj + diag[i] + 1; 733 nz = ai[i + 1] - diag[i] - 1; 734 s[0] = xb[i2]; 735 s[1] = xb[i2 + 1]; 736 s[2] = xb[i2 + 2]; 737 s[3] = xb[i2 + 3]; 738 s[4] = xb[i2 + 4]; 739 s[5] = xb[i2 + 5]; 740 while (nz--) { 741 idx = 6 * (*vi++); 742 xw[0] = x[idx]; 743 xw[1] = x[1 + idx]; 744 xw[2] = x[2 + idx]; 745 xw[3] = x[3 + idx]; 746 xw[4] = x[4 + idx]; 747 xw[5] = x[5 + idx]; 748 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 749 v += 36; 750 } 751 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 752 x[i2] = xw[0]; 753 x[i2 + 1] = xw[1]; 754 x[i2 + 2] = xw[2]; 755 x[i2 + 3] = xw[3]; 756 x[i2 + 4] = xw[4]; 757 x[i2 + 5] = xw[5]; 758 idiag -= 36; 759 i2 -= 6; 760 } 761 break; 762 case 7: 763 s[0] = xb[i2]; 764 s[1] = xb[i2 + 1]; 765 s[2] = xb[i2 + 2]; 766 s[3] = xb[i2 + 3]; 767 s[4] = xb[i2 + 4]; 768 s[5] = xb[i2 + 5]; 769 s[6] = xb[i2 + 6]; 770 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 771 x[i2] = xw[0]; 772 x[i2 + 1] = xw[1]; 773 x[i2 + 2] = xw[2]; 774 x[i2 + 3] = xw[3]; 775 x[i2 + 4] = xw[4]; 776 x[i2 + 5] = xw[5]; 777 x[i2 + 6] = xw[6]; 778 i2 -= 7; 779 idiag -= 49; 780 for (i = m - 2; i >= 0; i--) { 781 v = aa + 49 * (diag[i] + 1); 782 vi = aj + diag[i] + 1; 783 nz = ai[i + 1] - diag[i] - 1; 784 s[0] = xb[i2]; 785 s[1] = xb[i2 + 1]; 786 s[2] = xb[i2 + 2]; 787 s[3] = xb[i2 + 3]; 788 s[4] = xb[i2 + 4]; 789 s[5] = xb[i2 + 5]; 790 s[6] = xb[i2 + 6]; 791 while (nz--) { 792 idx = 7 * (*vi++); 793 xw[0] = x[idx]; 794 xw[1] = x[1 + idx]; 795 xw[2] = x[2 + idx]; 796 xw[3] = x[3 + idx]; 797 xw[4] = x[4 + idx]; 798 xw[5] = x[5 + idx]; 799 xw[6] = x[6 + idx]; 800 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 801 v += 49; 802 } 803 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 804 x[i2] = xw[0]; 805 x[i2 + 1] = xw[1]; 806 x[i2 + 2] = xw[2]; 807 x[i2 + 3] = xw[3]; 808 x[i2 + 4] = xw[4]; 809 x[i2 + 5] = xw[5]; 810 x[i2 + 6] = xw[6]; 811 idiag -= 49; 812 i2 -= 7; 813 } 814 break; 815 default: 816 PetscCall(PetscArraycpy(w, xb + i2, bs)); 817 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 818 i2 -= bs; 819 idiag -= bs2; 820 for (i = m - 2; i >= 0; i--) { 821 v = aa + bs2 * (diag[i] + 1); 822 vi = aj + diag[i] + 1; 823 nz = ai[i + 1] - diag[i] - 1; 824 825 PetscCall(PetscArraycpy(w, xb + i2, bs)); 826 /* copy all rows of x that are needed into contiguous space */ 827 workt = work; 828 for (j = 0; j < nz; j++) { 829 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 830 workt += bs; 831 } 832 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 833 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 834 835 idiag -= bs2; 836 i2 -= bs; 837 } 838 break; 839 } 840 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 841 } 842 its--; 843 } 844 while (its--) { 845 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 846 idiag = a->idiag; 847 i2 = 0; 848 switch (bs) { 849 case 1: 850 for (i = 0; i < m; i++) { 851 v = aa + ai[i]; 852 vi = aj + ai[i]; 853 nz = ai[i + 1] - ai[i]; 854 s[0] = b[i2]; 855 for (j = 0; j < nz; j++) { 856 xw[0] = x[vi[j]]; 857 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 858 } 859 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 860 x[i2] += xw[0]; 861 idiag += 1; 862 i2 += 1; 863 } 864 break; 865 case 2: 866 for (i = 0; i < m; i++) { 867 v = aa + 4 * ai[i]; 868 vi = aj + ai[i]; 869 nz = ai[i + 1] - ai[i]; 870 s[0] = b[i2]; 871 s[1] = b[i2 + 1]; 872 for (j = 0; j < nz; j++) { 873 idx = 2 * vi[j]; 874 it = 4 * j; 875 xw[0] = x[idx]; 876 xw[1] = x[1 + idx]; 877 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 878 } 879 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 880 x[i2] += xw[0]; 881 x[i2 + 1] += xw[1]; 882 idiag += 4; 883 i2 += 2; 884 } 885 break; 886 case 3: 887 for (i = 0; i < m; i++) { 888 v = aa + 9 * ai[i]; 889 vi = aj + ai[i]; 890 nz = ai[i + 1] - ai[i]; 891 s[0] = b[i2]; 892 s[1] = b[i2 + 1]; 893 s[2] = b[i2 + 2]; 894 while (nz--) { 895 idx = 3 * (*vi++); 896 xw[0] = x[idx]; 897 xw[1] = x[1 + idx]; 898 xw[2] = x[2 + idx]; 899 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 900 v += 9; 901 } 902 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 903 x[i2] += xw[0]; 904 x[i2 + 1] += xw[1]; 905 x[i2 + 2] += xw[2]; 906 idiag += 9; 907 i2 += 3; 908 } 909 break; 910 case 4: 911 for (i = 0; i < m; i++) { 912 v = aa + 16 * ai[i]; 913 vi = aj + ai[i]; 914 nz = ai[i + 1] - ai[i]; 915 s[0] = b[i2]; 916 s[1] = b[i2 + 1]; 917 s[2] = b[i2 + 2]; 918 s[3] = b[i2 + 3]; 919 while (nz--) { 920 idx = 4 * (*vi++); 921 xw[0] = x[idx]; 922 xw[1] = x[1 + idx]; 923 xw[2] = x[2 + idx]; 924 xw[3] = x[3 + idx]; 925 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 926 v += 16; 927 } 928 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 929 x[i2] += xw[0]; 930 x[i2 + 1] += xw[1]; 931 x[i2 + 2] += xw[2]; 932 x[i2 + 3] += xw[3]; 933 idiag += 16; 934 i2 += 4; 935 } 936 break; 937 case 5: 938 for (i = 0; i < m; i++) { 939 v = aa + 25 * ai[i]; 940 vi = aj + ai[i]; 941 nz = ai[i + 1] - ai[i]; 942 s[0] = b[i2]; 943 s[1] = b[i2 + 1]; 944 s[2] = b[i2 + 2]; 945 s[3] = b[i2 + 3]; 946 s[4] = b[i2 + 4]; 947 while (nz--) { 948 idx = 5 * (*vi++); 949 xw[0] = x[idx]; 950 xw[1] = x[1 + idx]; 951 xw[2] = x[2 + idx]; 952 xw[3] = x[3 + idx]; 953 xw[4] = x[4 + idx]; 954 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 955 v += 25; 956 } 957 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 958 x[i2] += xw[0]; 959 x[i2 + 1] += xw[1]; 960 x[i2 + 2] += xw[2]; 961 x[i2 + 3] += xw[3]; 962 x[i2 + 4] += xw[4]; 963 idiag += 25; 964 i2 += 5; 965 } 966 break; 967 case 6: 968 for (i = 0; i < m; i++) { 969 v = aa + 36 * ai[i]; 970 vi = aj + ai[i]; 971 nz = ai[i + 1] - ai[i]; 972 s[0] = b[i2]; 973 s[1] = b[i2 + 1]; 974 s[2] = b[i2 + 2]; 975 s[3] = b[i2 + 3]; 976 s[4] = b[i2 + 4]; 977 s[5] = b[i2 + 5]; 978 while (nz--) { 979 idx = 6 * (*vi++); 980 xw[0] = x[idx]; 981 xw[1] = x[1 + idx]; 982 xw[2] = x[2 + idx]; 983 xw[3] = x[3 + idx]; 984 xw[4] = x[4 + idx]; 985 xw[5] = x[5 + idx]; 986 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 987 v += 36; 988 } 989 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 990 x[i2] += xw[0]; 991 x[i2 + 1] += xw[1]; 992 x[i2 + 2] += xw[2]; 993 x[i2 + 3] += xw[3]; 994 x[i2 + 4] += xw[4]; 995 x[i2 + 5] += xw[5]; 996 idiag += 36; 997 i2 += 6; 998 } 999 break; 1000 case 7: 1001 for (i = 0; i < m; i++) { 1002 v = aa + 49 * ai[i]; 1003 vi = aj + ai[i]; 1004 nz = ai[i + 1] - ai[i]; 1005 s[0] = b[i2]; 1006 s[1] = b[i2 + 1]; 1007 s[2] = b[i2 + 2]; 1008 s[3] = b[i2 + 3]; 1009 s[4] = b[i2 + 4]; 1010 s[5] = b[i2 + 5]; 1011 s[6] = b[i2 + 6]; 1012 while (nz--) { 1013 idx = 7 * (*vi++); 1014 xw[0] = x[idx]; 1015 xw[1] = x[1 + idx]; 1016 xw[2] = x[2 + idx]; 1017 xw[3] = x[3 + idx]; 1018 xw[4] = x[4 + idx]; 1019 xw[5] = x[5 + idx]; 1020 xw[6] = x[6 + idx]; 1021 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1022 v += 49; 1023 } 1024 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1025 x[i2] += xw[0]; 1026 x[i2 + 1] += xw[1]; 1027 x[i2 + 2] += xw[2]; 1028 x[i2 + 3] += xw[3]; 1029 x[i2 + 4] += xw[4]; 1030 x[i2 + 5] += xw[5]; 1031 x[i2 + 6] += xw[6]; 1032 idiag += 49; 1033 i2 += 7; 1034 } 1035 break; 1036 default: 1037 for (i = 0; i < m; i++) { 1038 v = aa + bs2 * ai[i]; 1039 vi = aj + ai[i]; 1040 nz = ai[i + 1] - ai[i]; 1041 1042 PetscCall(PetscArraycpy(w, b + i2, bs)); 1043 /* copy all rows of x that are needed into contiguous space */ 1044 workt = work; 1045 for (j = 0; j < nz; j++) { 1046 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1047 workt += bs; 1048 } 1049 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1050 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1051 1052 idiag += bs2; 1053 i2 += bs; 1054 } 1055 break; 1056 } 1057 PetscCall(PetscLogFlops(2.0 * bs2 * a->nz)); 1058 } 1059 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1060 idiag = a->idiag + bs2 * (a->mbs - 1); 1061 i2 = bs * (m - 1); 1062 switch (bs) { 1063 case 1: 1064 for (i = m - 1; i >= 0; i--) { 1065 v = aa + ai[i]; 1066 vi = aj + ai[i]; 1067 nz = ai[i + 1] - ai[i]; 1068 s[0] = b[i2]; 1069 for (j = 0; j < nz; j++) { 1070 xw[0] = x[vi[j]]; 1071 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 1072 } 1073 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 1074 x[i2] += xw[0]; 1075 idiag -= 1; 1076 i2 -= 1; 1077 } 1078 break; 1079 case 2: 1080 for (i = m - 1; i >= 0; i--) { 1081 v = aa + 4 * ai[i]; 1082 vi = aj + ai[i]; 1083 nz = ai[i + 1] - ai[i]; 1084 s[0] = b[i2]; 1085 s[1] = b[i2 + 1]; 1086 for (j = 0; j < nz; j++) { 1087 idx = 2 * vi[j]; 1088 it = 4 * j; 1089 xw[0] = x[idx]; 1090 xw[1] = x[1 + idx]; 1091 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 1092 } 1093 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 1094 x[i2] += xw[0]; 1095 x[i2 + 1] += xw[1]; 1096 idiag -= 4; 1097 i2 -= 2; 1098 } 1099 break; 1100 case 3: 1101 for (i = m - 1; i >= 0; i--) { 1102 v = aa + 9 * ai[i]; 1103 vi = aj + ai[i]; 1104 nz = ai[i + 1] - ai[i]; 1105 s[0] = b[i2]; 1106 s[1] = b[i2 + 1]; 1107 s[2] = b[i2 + 2]; 1108 while (nz--) { 1109 idx = 3 * (*vi++); 1110 xw[0] = x[idx]; 1111 xw[1] = x[1 + idx]; 1112 xw[2] = x[2 + idx]; 1113 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 1114 v += 9; 1115 } 1116 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 1117 x[i2] += xw[0]; 1118 x[i2 + 1] += xw[1]; 1119 x[i2 + 2] += xw[2]; 1120 idiag -= 9; 1121 i2 -= 3; 1122 } 1123 break; 1124 case 4: 1125 for (i = m - 1; i >= 0; i--) { 1126 v = aa + 16 * ai[i]; 1127 vi = aj + ai[i]; 1128 nz = ai[i + 1] - ai[i]; 1129 s[0] = b[i2]; 1130 s[1] = b[i2 + 1]; 1131 s[2] = b[i2 + 2]; 1132 s[3] = b[i2 + 3]; 1133 while (nz--) { 1134 idx = 4 * (*vi++); 1135 xw[0] = x[idx]; 1136 xw[1] = x[1 + idx]; 1137 xw[2] = x[2 + idx]; 1138 xw[3] = x[3 + idx]; 1139 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 1140 v += 16; 1141 } 1142 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 1143 x[i2] += xw[0]; 1144 x[i2 + 1] += xw[1]; 1145 x[i2 + 2] += xw[2]; 1146 x[i2 + 3] += xw[3]; 1147 idiag -= 16; 1148 i2 -= 4; 1149 } 1150 break; 1151 case 5: 1152 for (i = m - 1; i >= 0; i--) { 1153 v = aa + 25 * ai[i]; 1154 vi = aj + ai[i]; 1155 nz = ai[i + 1] - ai[i]; 1156 s[0] = b[i2]; 1157 s[1] = b[i2 + 1]; 1158 s[2] = b[i2 + 2]; 1159 s[3] = b[i2 + 3]; 1160 s[4] = b[i2 + 4]; 1161 while (nz--) { 1162 idx = 5 * (*vi++); 1163 xw[0] = x[idx]; 1164 xw[1] = x[1 + idx]; 1165 xw[2] = x[2 + idx]; 1166 xw[3] = x[3 + idx]; 1167 xw[4] = x[4 + idx]; 1168 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 1169 v += 25; 1170 } 1171 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 1172 x[i2] += xw[0]; 1173 x[i2 + 1] += xw[1]; 1174 x[i2 + 2] += xw[2]; 1175 x[i2 + 3] += xw[3]; 1176 x[i2 + 4] += xw[4]; 1177 idiag -= 25; 1178 i2 -= 5; 1179 } 1180 break; 1181 case 6: 1182 for (i = m - 1; i >= 0; i--) { 1183 v = aa + 36 * ai[i]; 1184 vi = aj + ai[i]; 1185 nz = ai[i + 1] - ai[i]; 1186 s[0] = b[i2]; 1187 s[1] = b[i2 + 1]; 1188 s[2] = b[i2 + 2]; 1189 s[3] = b[i2 + 3]; 1190 s[4] = b[i2 + 4]; 1191 s[5] = b[i2 + 5]; 1192 while (nz--) { 1193 idx = 6 * (*vi++); 1194 xw[0] = x[idx]; 1195 xw[1] = x[1 + idx]; 1196 xw[2] = x[2 + idx]; 1197 xw[3] = x[3 + idx]; 1198 xw[4] = x[4 + idx]; 1199 xw[5] = x[5 + idx]; 1200 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 1201 v += 36; 1202 } 1203 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 1204 x[i2] += xw[0]; 1205 x[i2 + 1] += xw[1]; 1206 x[i2 + 2] += xw[2]; 1207 x[i2 + 3] += xw[3]; 1208 x[i2 + 4] += xw[4]; 1209 x[i2 + 5] += xw[5]; 1210 idiag -= 36; 1211 i2 -= 6; 1212 } 1213 break; 1214 case 7: 1215 for (i = m - 1; i >= 0; i--) { 1216 v = aa + 49 * ai[i]; 1217 vi = aj + ai[i]; 1218 nz = ai[i + 1] - ai[i]; 1219 s[0] = b[i2]; 1220 s[1] = b[i2 + 1]; 1221 s[2] = b[i2 + 2]; 1222 s[3] = b[i2 + 3]; 1223 s[4] = b[i2 + 4]; 1224 s[5] = b[i2 + 5]; 1225 s[6] = b[i2 + 6]; 1226 while (nz--) { 1227 idx = 7 * (*vi++); 1228 xw[0] = x[idx]; 1229 xw[1] = x[1 + idx]; 1230 xw[2] = x[2 + idx]; 1231 xw[3] = x[3 + idx]; 1232 xw[4] = x[4 + idx]; 1233 xw[5] = x[5 + idx]; 1234 xw[6] = x[6 + idx]; 1235 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1236 v += 49; 1237 } 1238 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1239 x[i2] += xw[0]; 1240 x[i2 + 1] += xw[1]; 1241 x[i2 + 2] += xw[2]; 1242 x[i2 + 3] += xw[3]; 1243 x[i2 + 4] += xw[4]; 1244 x[i2 + 5] += xw[5]; 1245 x[i2 + 6] += xw[6]; 1246 idiag -= 49; 1247 i2 -= 7; 1248 } 1249 break; 1250 default: 1251 for (i = m - 1; i >= 0; i--) { 1252 v = aa + bs2 * ai[i]; 1253 vi = aj + ai[i]; 1254 nz = ai[i + 1] - ai[i]; 1255 1256 PetscCall(PetscArraycpy(w, b + i2, bs)); 1257 /* copy all rows of x that are needed into contiguous space */ 1258 workt = work; 1259 for (j = 0; j < nz; j++) { 1260 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1261 workt += bs; 1262 } 1263 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1264 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1265 1266 idiag -= bs2; 1267 i2 -= bs; 1268 } 1269 break; 1270 } 1271 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz))); 1272 } 1273 } 1274 PetscCall(VecRestoreArray(xx, &x)); 1275 PetscCall(VecRestoreArrayRead(bb, &b)); 1276 PetscFunctionReturn(PETSC_SUCCESS); 1277 } 1278 1279 /* 1280 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1281 */ 1282 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1283 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1284 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1285 #define matsetvaluesblocked4_ matsetvaluesblocked4 1286 #endif 1287 1288 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) 1289 { 1290 Mat A = *AA; 1291 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1292 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn; 1293 PetscInt *ai = a->i, *ailen = a->ilen; 1294 PetscInt *aj = a->j, stepval, lastcol = -1; 1295 const PetscScalar *value = v; 1296 MatScalar *ap, *aa = a->a, *bap; 1297 1298 PetscFunctionBegin; 1299 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4"); 1300 stepval = (n - 1) * 4; 1301 for (k = 0; k < m; k++) { /* loop over added rows */ 1302 row = im[k]; 1303 rp = aj + ai[row]; 1304 ap = aa + 16 * ai[row]; 1305 nrow = ailen[row]; 1306 low = 0; 1307 high = nrow; 1308 for (l = 0; l < n; l++) { /* loop over added columns */ 1309 col = in[l]; 1310 if (col <= lastcol) low = 0; 1311 else high = nrow; 1312 lastcol = col; 1313 value = v + k * (stepval + 4 + l) * 4; 1314 while (high - low > 7) { 1315 t = (low + high) / 2; 1316 if (rp[t] > col) high = t; 1317 else low = t; 1318 } 1319 for (i = low; i < high; i++) { 1320 if (rp[i] > col) break; 1321 if (rp[i] == col) { 1322 bap = ap + 16 * i; 1323 for (ii = 0; ii < 4; ii++, value += stepval) { 1324 for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++; 1325 } 1326 goto noinsert2; 1327 } 1328 } 1329 N = nrow++ - 1; 1330 high++; /* added new column index thus must search to one higher than before */ 1331 /* shift up all the later entries in this row */ 1332 for (ii = N; ii >= i; ii--) { 1333 rp[ii + 1] = rp[ii]; 1334 PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16)); 1335 } 1336 if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1337 rp[i] = col; 1338 bap = ap + 16 * i; 1339 for (ii = 0; ii < 4; ii++, value += stepval) { 1340 for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++; 1341 } 1342 noinsert2:; 1343 low = i; 1344 } 1345 ailen[row] = nrow; 1346 } 1347 PetscFunctionReturnVoid(); 1348 } 1349 1350 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1351 #define matsetvalues4_ MATSETVALUES4 1352 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1353 #define matsetvalues4_ matsetvalues4 1354 #endif 1355 1356 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) 1357 { 1358 Mat A = *AA; 1359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1360 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm; 1361 PetscInt *ai = a->i, *ailen = a->ilen; 1362 PetscInt *aj = a->j, brow, bcol; 1363 PetscInt ridx, cidx, lastcol = -1; 1364 MatScalar *ap, value, *aa = a->a, *bap; 1365 1366 PetscFunctionBegin; 1367 for (k = 0; k < m; k++) { /* loop over added rows */ 1368 row = im[k]; 1369 brow = row / 4; 1370 rp = aj + ai[brow]; 1371 ap = aa + 16 * ai[brow]; 1372 nrow = ailen[brow]; 1373 low = 0; 1374 high = nrow; 1375 for (l = 0; l < n; l++) { /* loop over added columns */ 1376 col = in[l]; 1377 bcol = col / 4; 1378 ridx = row % 4; 1379 cidx = col % 4; 1380 value = v[l + k * n]; 1381 if (col <= lastcol) low = 0; 1382 else high = nrow; 1383 lastcol = col; 1384 while (high - low > 7) { 1385 t = (low + high) / 2; 1386 if (rp[t] > bcol) high = t; 1387 else low = t; 1388 } 1389 for (i = low; i < high; i++) { 1390 if (rp[i] > bcol) break; 1391 if (rp[i] == bcol) { 1392 bap = ap + 16 * i + 4 * cidx + ridx; 1393 *bap += value; 1394 goto noinsert1; 1395 } 1396 } 1397 N = nrow++ - 1; 1398 high++; /* added new column thus must search to one higher than before */ 1399 /* shift up all the later entries in this row */ 1400 PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 1401 PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1))); 1402 PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1403 rp[i] = bcol; 1404 ap[16 * i + 4 * cidx + ridx] = value; 1405 noinsert1:; 1406 low = i; 1407 } 1408 ailen[brow] = nrow; 1409 } 1410 PetscFunctionReturnVoid(); 1411 } 1412 1413 /* 1414 Checks for missing diagonals 1415 */ 1416 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1417 { 1418 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1419 PetscInt *diag, *ii = a->i, i; 1420 1421 PetscFunctionBegin; 1422 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 1423 *missing = PETSC_FALSE; 1424 if (A->rmap->n > 0 && !ii) { 1425 *missing = PETSC_TRUE; 1426 if (d) *d = 0; 1427 PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n")); 1428 } else { 1429 PetscInt n; 1430 n = PetscMin(a->mbs, a->nbs); 1431 diag = a->diag; 1432 for (i = 0; i < n; i++) { 1433 if (diag[i] >= ii[i + 1]) { 1434 *missing = PETSC_TRUE; 1435 if (d) *d = i; 1436 PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i)); 1437 break; 1438 } 1439 } 1440 } 1441 PetscFunctionReturn(PETSC_SUCCESS); 1442 } 1443 1444 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1445 { 1446 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1447 PetscInt i, j, m = a->mbs; 1448 1449 PetscFunctionBegin; 1450 if (!a->diag) { 1451 PetscCall(PetscMalloc1(m, &a->diag)); 1452 a->free_diag = PETSC_TRUE; 1453 } 1454 for (i = 0; i < m; i++) { 1455 a->diag[i] = a->i[i + 1]; 1456 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1457 if (a->j[j] == i) { 1458 a->diag[i] = j; 1459 break; 1460 } 1461 } 1462 } 1463 PetscFunctionReturn(PETSC_SUCCESS); 1464 } 1465 1466 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) 1467 { 1468 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1469 PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt; 1470 PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja; 1471 1472 PetscFunctionBegin; 1473 *nn = n; 1474 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1475 if (symmetric) { 1476 PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja)); 1477 nz = tia[n]; 1478 } else { 1479 tia = a->i; 1480 tja = a->j; 1481 } 1482 1483 if (!blockcompressed && bs > 1) { 1484 (*nn) *= bs; 1485 /* malloc & create the natural set of indices */ 1486 PetscCall(PetscMalloc1((n + 1) * bs, ia)); 1487 if (n) { 1488 (*ia)[0] = oshift; 1489 for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; 1490 } 1491 1492 for (i = 1; i < n; i++) { 1493 (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1]; 1494 for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; 1495 } 1496 if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; 1497 1498 if (inja) { 1499 PetscCall(PetscMalloc1(nz * bs * bs, ja)); 1500 cnt = 0; 1501 for (i = 0; i < n; i++) { 1502 for (j = 0; j < bs; j++) { 1503 for (k = tia[i]; k < tia[i + 1]; k++) { 1504 for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l; 1505 } 1506 } 1507 } 1508 } 1509 1510 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1511 PetscCall(PetscFree(tia)); 1512 PetscCall(PetscFree(tja)); 1513 } 1514 } else if (oshift == 1) { 1515 if (symmetric) { 1516 nz = tia[A->rmap->n / bs]; 1517 /* add 1 to i and j indices */ 1518 for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1; 1519 *ia = tia; 1520 if (ja) { 1521 for (i = 0; i < nz; i++) tja[i] = tja[i] + 1; 1522 *ja = tja; 1523 } 1524 } else { 1525 nz = a->i[A->rmap->n / bs]; 1526 /* malloc space and add 1 to i and j indices */ 1527 PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia)); 1528 for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1; 1529 if (ja) { 1530 PetscCall(PetscMalloc1(nz, ja)); 1531 for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1; 1532 } 1533 } 1534 } else { 1535 *ia = tia; 1536 if (ja) *ja = tja; 1537 } 1538 PetscFunctionReturn(PETSC_SUCCESS); 1539 } 1540 1541 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 1542 { 1543 PetscFunctionBegin; 1544 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1545 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1546 PetscCall(PetscFree(*ia)); 1547 if (ja) PetscCall(PetscFree(*ja)); 1548 } 1549 PetscFunctionReturn(PETSC_SUCCESS); 1550 } 1551 1552 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1553 { 1554 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1555 1556 PetscFunctionBegin; 1557 if (A->hash_active) { 1558 PetscInt bs; 1559 A->ops[0] = a->cops; 1560 PetscCall(PetscHMapIJVDestroy(&a->ht)); 1561 PetscCall(MatGetBlockSize(A, &bs)); 1562 if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht)); 1563 PetscCall(PetscFree(a->dnz)); 1564 PetscCall(PetscFree(a->bdnz)); 1565 A->hash_active = PETSC_FALSE; 1566 } 1567 #if defined(PETSC_USE_LOG) 1568 PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz)); 1569 #endif 1570 PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i)); 1571 PetscCall(ISDestroy(&a->row)); 1572 PetscCall(ISDestroy(&a->col)); 1573 if (a->free_diag) PetscCall(PetscFree(a->diag)); 1574 PetscCall(PetscFree(a->idiag)); 1575 if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen)); 1576 PetscCall(PetscFree(a->solve_work)); 1577 PetscCall(PetscFree(a->mult_work)); 1578 PetscCall(PetscFree(a->sor_workt)); 1579 PetscCall(PetscFree(a->sor_work)); 1580 PetscCall(ISDestroy(&a->icol)); 1581 PetscCall(PetscFree(a->saved_values)); 1582 PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex)); 1583 1584 PetscCall(MatDestroy(&a->sbaijMat)); 1585 PetscCall(MatDestroy(&a->parent)); 1586 PetscCall(PetscFree(A->data)); 1587 1588 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 1589 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL)); 1590 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL)); 1591 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 1592 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 1593 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL)); 1594 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL)); 1595 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL)); 1596 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL)); 1597 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL)); 1598 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL)); 1599 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL)); 1600 #if defined(PETSC_HAVE_HYPRE) 1601 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL)); 1602 #endif 1603 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL)); 1604 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1605 PetscFunctionReturn(PETSC_SUCCESS); 1606 } 1607 1608 PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) 1609 { 1610 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1611 1612 PetscFunctionBegin; 1613 switch (op) { 1614 case MAT_ROW_ORIENTED: 1615 a->roworiented = flg; 1616 break; 1617 case MAT_KEEP_NONZERO_PATTERN: 1618 a->keepnonzeropattern = flg; 1619 break; 1620 case MAT_NEW_NONZERO_LOCATIONS: 1621 a->nonew = (flg ? 0 : 1); 1622 break; 1623 case MAT_NEW_NONZERO_LOCATION_ERR: 1624 a->nonew = (flg ? -1 : 0); 1625 break; 1626 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1627 a->nonew = (flg ? -2 : 0); 1628 break; 1629 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1630 a->nounused = (flg ? -1 : 0); 1631 break; 1632 case MAT_FORCE_DIAGONAL_ENTRIES: 1633 case MAT_IGNORE_OFF_PROC_ENTRIES: 1634 case MAT_USE_HASH_TABLE: 1635 case MAT_SORTED_FULL: 1636 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1637 break; 1638 case MAT_SPD: 1639 case MAT_SYMMETRIC: 1640 case MAT_STRUCTURALLY_SYMMETRIC: 1641 case MAT_HERMITIAN: 1642 case MAT_SYMMETRY_ETERNAL: 1643 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1644 case MAT_SUBMAT_SINGLEIS: 1645 case MAT_STRUCTURE_ONLY: 1646 case MAT_SPD_ETERNAL: 1647 /* if the diagonal matrix is square it inherits some of the properties above */ 1648 break; 1649 default: 1650 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1651 } 1652 PetscFunctionReturn(PETSC_SUCCESS); 1653 } 1654 1655 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1656 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) 1657 { 1658 PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2; 1659 MatScalar *aa_i; 1660 PetscScalar *v_i; 1661 1662 PetscFunctionBegin; 1663 bs = A->rmap->bs; 1664 bs2 = bs * bs; 1665 PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1666 1667 bn = row / bs; /* Block number */ 1668 bp = row % bs; /* Block Position */ 1669 M = ai[bn + 1] - ai[bn]; 1670 *nz = bs * M; 1671 1672 if (v) { 1673 *v = NULL; 1674 if (*nz) { 1675 PetscCall(PetscMalloc1(*nz, v)); 1676 for (i = 0; i < M; i++) { /* for each block in the block row */ 1677 v_i = *v + i * bs; 1678 aa_i = aa + bs2 * (ai[bn] + i); 1679 for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j]; 1680 } 1681 } 1682 } 1683 1684 if (idx) { 1685 *idx = NULL; 1686 if (*nz) { 1687 PetscCall(PetscMalloc1(*nz, idx)); 1688 for (i = 0; i < M; i++) { /* for each block in the block row */ 1689 idx_i = *idx + i * bs; 1690 itmp = bs * aj[ai[bn] + i]; 1691 for (j = 0; j < bs; j++) idx_i[j] = itmp++; 1692 } 1693 } 1694 } 1695 PetscFunctionReturn(PETSC_SUCCESS); 1696 } 1697 1698 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1699 { 1700 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1701 1702 PetscFunctionBegin; 1703 PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a)); 1704 PetscFunctionReturn(PETSC_SUCCESS); 1705 } 1706 1707 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1708 { 1709 PetscFunctionBegin; 1710 if (nz) *nz = 0; 1711 if (idx) PetscCall(PetscFree(*idx)); 1712 if (v) PetscCall(PetscFree(*v)); 1713 PetscFunctionReturn(PETSC_SUCCESS); 1714 } 1715 1716 PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) 1717 { 1718 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at; 1719 Mat C; 1720 PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill; 1721 PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr; 1722 MatScalar *ata, *aa = a->a; 1723 1724 PetscFunctionBegin; 1725 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1726 PetscCall(PetscCalloc1(1 + nbs, &atfill)); 1727 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1728 for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1729 1730 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 1731 PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N)); 1732 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 1733 PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill)); 1734 1735 at = (Mat_SeqBAIJ *)C->data; 1736 ati = at->i; 1737 for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i]; 1738 } else { 1739 C = *B; 1740 at = (Mat_SeqBAIJ *)C->data; 1741 ati = at->i; 1742 } 1743 1744 atj = at->j; 1745 ata = at->a; 1746 1747 /* Copy ati into atfill so we have locations of the next free space in atj */ 1748 PetscCall(PetscArraycpy(atfill, ati, nbs)); 1749 1750 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1751 for (i = 0; i < mbs; i++) { 1752 anzj = ai[i + 1] - ai[i]; 1753 for (j = 0; j < anzj; j++) { 1754 atj[atfill[*aj]] = i; 1755 for (kr = 0; kr < bs; kr++) { 1756 for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; 1757 } 1758 atfill[*aj++] += 1; 1759 } 1760 } 1761 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1762 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1763 1764 /* Clean up temporary space and complete requests. */ 1765 PetscCall(PetscFree(atfill)); 1766 1767 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1768 PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1769 *B = C; 1770 } else { 1771 PetscCall(MatHeaderMerge(A, &C)); 1772 } 1773 PetscFunctionReturn(PETSC_SUCCESS); 1774 } 1775 1776 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) 1777 { 1778 Mat Btrans; 1779 1780 PetscFunctionBegin; 1781 *f = PETSC_FALSE; 1782 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans)); 1783 PetscCall(MatEqual_SeqBAIJ(B, Btrans, f)); 1784 PetscCall(MatDestroy(&Btrans)); 1785 PetscFunctionReturn(PETSC_SUCCESS); 1786 } 1787 1788 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1789 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 1790 { 1791 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data; 1792 PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l; 1793 PetscInt *rowlens, *colidxs; 1794 PetscScalar *matvals; 1795 1796 PetscFunctionBegin; 1797 PetscCall(PetscViewerSetUp(viewer)); 1798 1799 M = mat->rmap->N; 1800 N = mat->cmap->N; 1801 m = mat->rmap->n; 1802 bs = mat->rmap->bs; 1803 nz = bs * bs * A->nz; 1804 1805 /* write matrix header */ 1806 header[0] = MAT_FILE_CLASSID; 1807 header[1] = M; 1808 header[2] = N; 1809 header[3] = nz; 1810 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1811 1812 /* store row lengths */ 1813 PetscCall(PetscMalloc1(m, &rowlens)); 1814 for (cnt = 0, i = 0; i < A->mbs; i++) 1815 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]); 1816 PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT)); 1817 PetscCall(PetscFree(rowlens)); 1818 1819 /* store column indices */ 1820 PetscCall(PetscMalloc1(nz, &colidxs)); 1821 for (cnt = 0, i = 0; i < A->mbs; i++) 1822 for (k = 0; k < bs; k++) 1823 for (j = A->i[i]; j < A->i[i + 1]; j++) 1824 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l; 1825 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1826 PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT)); 1827 PetscCall(PetscFree(colidxs)); 1828 1829 /* store nonzero values */ 1830 PetscCall(PetscMalloc1(nz, &matvals)); 1831 for (cnt = 0, i = 0; i < A->mbs; i++) 1832 for (k = 0; k < bs; k++) 1833 for (j = A->i[i]; j < A->i[i + 1]; j++) 1834 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k]; 1835 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1836 PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR)); 1837 PetscCall(PetscFree(matvals)); 1838 1839 /* write block size option to the viewer's .info file */ 1840 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1841 PetscFunctionReturn(PETSC_SUCCESS); 1842 } 1843 1844 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) 1845 { 1846 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1847 PetscInt i, bs = A->rmap->bs, k; 1848 1849 PetscFunctionBegin; 1850 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1851 for (i = 0; i < a->mbs; i++) { 1852 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1)); 1853 for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1)); 1854 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1855 } 1856 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1857 PetscFunctionReturn(PETSC_SUCCESS); 1858 } 1859 1860 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) 1861 { 1862 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1863 PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2; 1864 PetscViewerFormat format; 1865 1866 PetscFunctionBegin; 1867 if (A->structure_only) { 1868 PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer)); 1869 PetscFunctionReturn(PETSC_SUCCESS); 1870 } 1871 1872 PetscCall(PetscViewerGetFormat(viewer, &format)); 1873 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1874 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1875 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1876 const char *matname; 1877 Mat aij; 1878 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij)); 1879 PetscCall(PetscObjectGetName((PetscObject)A, &matname)); 1880 PetscCall(PetscObjectSetName((PetscObject)aij, matname)); 1881 PetscCall(MatView(aij, viewer)); 1882 PetscCall(MatDestroy(&aij)); 1883 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1884 PetscFunctionReturn(PETSC_SUCCESS); 1885 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1886 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1887 for (i = 0; i < a->mbs; i++) { 1888 for (j = 0; j < bs; j++) { 1889 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1890 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1891 for (l = 0; l < bs; l++) { 1892 #if defined(PETSC_USE_COMPLEX) 1893 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1894 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1895 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1896 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1897 } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1898 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1899 } 1900 #else 1901 if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1902 #endif 1903 } 1904 } 1905 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1906 } 1907 } 1908 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1909 } else { 1910 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1911 for (i = 0; i < a->mbs; i++) { 1912 for (j = 0; j < bs; j++) { 1913 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1914 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1915 for (l = 0; l < bs; l++) { 1916 #if defined(PETSC_USE_COMPLEX) 1917 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) { 1918 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1919 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) { 1920 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1921 } else { 1922 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1923 } 1924 #else 1925 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1926 #endif 1927 } 1928 } 1929 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1930 } 1931 } 1932 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1933 } 1934 PetscCall(PetscViewerFlush(viewer)); 1935 PetscFunctionReturn(PETSC_SUCCESS); 1936 } 1937 1938 #include <petscdraw.h> 1939 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) 1940 { 1941 Mat A = (Mat)Aa; 1942 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1943 PetscInt row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2; 1944 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1945 MatScalar *aa; 1946 PetscViewer viewer; 1947 PetscViewerFormat format; 1948 1949 PetscFunctionBegin; 1950 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1951 PetscCall(PetscViewerGetFormat(viewer, &format)); 1952 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1953 1954 /* loop over matrix elements drawing boxes */ 1955 1956 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1957 PetscDrawCollectiveBegin(draw); 1958 /* Blue for negative, Cyan for zero and Red for positive */ 1959 color = PETSC_DRAW_BLUE; 1960 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1961 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1962 y_l = A->rmap->N - row - 1.0; 1963 y_r = y_l + 1.0; 1964 x_l = a->j[j] * bs; 1965 x_r = x_l + 1.0; 1966 aa = a->a + j * bs2; 1967 for (k = 0; k < bs; k++) { 1968 for (l = 0; l < bs; l++) { 1969 if (PetscRealPart(*aa++) >= 0.) continue; 1970 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1971 } 1972 } 1973 } 1974 } 1975 color = PETSC_DRAW_CYAN; 1976 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1977 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1978 y_l = A->rmap->N - row - 1.0; 1979 y_r = y_l + 1.0; 1980 x_l = a->j[j] * bs; 1981 x_r = x_l + 1.0; 1982 aa = a->a + j * bs2; 1983 for (k = 0; k < bs; k++) { 1984 for (l = 0; l < bs; l++) { 1985 if (PetscRealPart(*aa++) != 0.) continue; 1986 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1987 } 1988 } 1989 } 1990 } 1991 color = PETSC_DRAW_RED; 1992 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1993 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1994 y_l = A->rmap->N - row - 1.0; 1995 y_r = y_l + 1.0; 1996 x_l = a->j[j] * bs; 1997 x_r = x_l + 1.0; 1998 aa = a->a + j * bs2; 1999 for (k = 0; k < bs; k++) { 2000 for (l = 0; l < bs; l++) { 2001 if (PetscRealPart(*aa++) <= 0.) continue; 2002 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2003 } 2004 } 2005 } 2006 } 2007 PetscDrawCollectiveEnd(draw); 2008 } else { 2009 /* use contour shading to indicate magnitude of values */ 2010 /* first determine max of all nonzero values */ 2011 PetscReal minv = 0.0, maxv = 0.0; 2012 PetscDraw popup; 2013 2014 for (i = 0; i < a->nz * a->bs2; i++) { 2015 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 2016 } 2017 if (minv >= maxv) maxv = minv + PETSC_SMALL; 2018 PetscCall(PetscDrawGetPopup(draw, &popup)); 2019 PetscCall(PetscDrawScalePopup(popup, 0.0, maxv)); 2020 2021 PetscDrawCollectiveBegin(draw); 2022 for (i = 0, row = 0; i < mbs; i++, row += bs) { 2023 for (j = a->i[i]; j < a->i[i + 1]; j++) { 2024 y_l = A->rmap->N - row - 1.0; 2025 y_r = y_l + 1.0; 2026 x_l = a->j[j] * bs; 2027 x_r = x_l + 1.0; 2028 aa = a->a + j * bs2; 2029 for (k = 0; k < bs; k++) { 2030 for (l = 0; l < bs; l++) { 2031 MatScalar v = *aa++; 2032 color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv); 2033 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2034 } 2035 } 2036 } 2037 } 2038 PetscDrawCollectiveEnd(draw); 2039 } 2040 PetscFunctionReturn(PETSC_SUCCESS); 2041 } 2042 2043 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) 2044 { 2045 PetscReal xl, yl, xr, yr, w, h; 2046 PetscDraw draw; 2047 PetscBool isnull; 2048 2049 PetscFunctionBegin; 2050 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 2051 PetscCall(PetscDrawIsNull(draw, &isnull)); 2052 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 2053 2054 xr = A->cmap->n; 2055 yr = A->rmap->N; 2056 h = yr / 10.0; 2057 w = xr / 10.0; 2058 xr += w; 2059 yr += h; 2060 xl = -w; 2061 yl = -h; 2062 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 2063 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 2064 PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A)); 2065 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 2066 PetscCall(PetscDrawSave(draw)); 2067 PetscFunctionReturn(PETSC_SUCCESS); 2068 } 2069 2070 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) 2071 { 2072 PetscBool iascii, isbinary, isdraw; 2073 2074 PetscFunctionBegin; 2075 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2076 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2077 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 2078 if (iascii) { 2079 PetscCall(MatView_SeqBAIJ_ASCII(A, viewer)); 2080 } else if (isbinary) { 2081 PetscCall(MatView_SeqBAIJ_Binary(A, viewer)); 2082 } else if (isdraw) { 2083 PetscCall(MatView_SeqBAIJ_Draw(A, viewer)); 2084 } else { 2085 Mat B; 2086 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2087 PetscCall(MatView(B, viewer)); 2088 PetscCall(MatDestroy(&B)); 2089 } 2090 PetscFunctionReturn(PETSC_SUCCESS); 2091 } 2092 2093 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) 2094 { 2095 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2096 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 2097 PetscInt *ai = a->i, *ailen = a->ilen; 2098 PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2; 2099 MatScalar *ap, *aa = a->a; 2100 2101 PetscFunctionBegin; 2102 for (k = 0; k < m; k++) { /* loop over rows */ 2103 row = im[k]; 2104 brow = row / bs; 2105 if (row < 0) { 2106 v += n; 2107 continue; 2108 } /* negative row */ 2109 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row); 2110 rp = aj ? aj + ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2111 ap = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2112 nrow = ailen[brow]; 2113 for (l = 0; l < n; l++) { /* loop over columns */ 2114 if (in[l] < 0) { 2115 v++; 2116 continue; 2117 } /* negative column */ 2118 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]); 2119 col = in[l]; 2120 bcol = col / bs; 2121 cidx = col % bs; 2122 ridx = row % bs; 2123 high = nrow; 2124 low = 0; /* assume unsorted */ 2125 while (high - low > 5) { 2126 t = (low + high) / 2; 2127 if (rp[t] > bcol) high = t; 2128 else low = t; 2129 } 2130 for (i = low; i < high; i++) { 2131 if (rp[i] > bcol) break; 2132 if (rp[i] == bcol) { 2133 *v++ = ap[bs2 * i + bs * cidx + ridx]; 2134 goto finished; 2135 } 2136 } 2137 *v++ = 0.0; 2138 finished:; 2139 } 2140 } 2141 PetscFunctionReturn(PETSC_SUCCESS); 2142 } 2143 2144 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2145 { 2146 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2147 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1; 2148 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2149 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval; 2150 PetscBool roworiented = a->roworiented; 2151 const PetscScalar *value = v; 2152 MatScalar *ap = NULL, *aa = a->a, *bap; 2153 2154 PetscFunctionBegin; 2155 if (roworiented) { 2156 stepval = (n - 1) * bs; 2157 } else { 2158 stepval = (m - 1) * bs; 2159 } 2160 for (k = 0; k < m; k++) { /* loop over added rows */ 2161 row = im[k]; 2162 if (row < 0) continue; 2163 PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1); 2164 rp = aj + ai[row]; 2165 if (!A->structure_only) ap = aa + bs2 * ai[row]; 2166 rmax = imax[row]; 2167 nrow = ailen[row]; 2168 low = 0; 2169 high = nrow; 2170 for (l = 0; l < n; l++) { /* loop over added columns */ 2171 if (in[l] < 0) continue; 2172 PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1); 2173 col = in[l]; 2174 if (!A->structure_only) { 2175 if (roworiented) { 2176 value = v + (k * (stepval + bs) + l) * bs; 2177 } else { 2178 value = v + (l * (stepval + bs) + k) * bs; 2179 } 2180 } 2181 if (col <= lastcol) low = 0; 2182 else high = nrow; 2183 lastcol = col; 2184 while (high - low > 7) { 2185 t = (low + high) / 2; 2186 if (rp[t] > col) high = t; 2187 else low = t; 2188 } 2189 for (i = low; i < high; i++) { 2190 if (rp[i] > col) break; 2191 if (rp[i] == col) { 2192 if (A->structure_only) goto noinsert2; 2193 bap = ap + bs2 * i; 2194 if (roworiented) { 2195 if (is == ADD_VALUES) { 2196 for (ii = 0; ii < bs; ii++, value += stepval) { 2197 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 2198 } 2199 } else { 2200 for (ii = 0; ii < bs; ii++, value += stepval) { 2201 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2202 } 2203 } 2204 } else { 2205 if (is == ADD_VALUES) { 2206 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2207 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 2208 bap += bs; 2209 } 2210 } else { 2211 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2212 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 2213 bap += bs; 2214 } 2215 } 2216 } 2217 goto noinsert2; 2218 } 2219 } 2220 if (nonew == 1) goto noinsert2; 2221 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2222 if (A->structure_only) { 2223 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar); 2224 } else { 2225 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2226 } 2227 N = nrow++ - 1; 2228 high++; 2229 /* shift up all the later entries in this row */ 2230 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2231 rp[i] = col; 2232 if (!A->structure_only) { 2233 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2234 bap = ap + bs2 * i; 2235 if (roworiented) { 2236 for (ii = 0; ii < bs; ii++, value += stepval) { 2237 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2238 } 2239 } else { 2240 for (ii = 0; ii < bs; ii++, value += stepval) { 2241 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 2242 } 2243 } 2244 } 2245 noinsert2:; 2246 low = i; 2247 } 2248 ailen[row] = nrow; 2249 } 2250 PetscFunctionReturn(PETSC_SUCCESS); 2251 } 2252 2253 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) 2254 { 2255 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2256 PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax; 2257 PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen; 2258 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2259 MatScalar *aa = a->a, *ap; 2260 PetscReal ratio = 0.6; 2261 2262 PetscFunctionBegin; 2263 if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS); 2264 2265 if (m) rmax = ailen[0]; 2266 for (i = 1; i < mbs; i++) { 2267 /* move each row back by the amount of empty slots (fshift) before it*/ 2268 fshift += imax[i - 1] - ailen[i - 1]; 2269 rmax = PetscMax(rmax, ailen[i]); 2270 if (fshift) { 2271 ip = aj + ai[i]; 2272 ap = aa + bs2 * ai[i]; 2273 N = ailen[i]; 2274 PetscCall(PetscArraymove(ip - fshift, ip, N)); 2275 if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N)); 2276 } 2277 ai[i] = ai[i - 1] + ailen[i - 1]; 2278 } 2279 if (mbs) { 2280 fshift += imax[mbs - 1] - ailen[mbs - 1]; 2281 ai[mbs] = ai[mbs - 1] + ailen[mbs - 1]; 2282 } 2283 2284 /* reset ilen and imax for each row */ 2285 a->nonzerorowcnt = 0; 2286 if (A->structure_only) { 2287 PetscCall(PetscFree2(a->imax, a->ilen)); 2288 } else { /* !A->structure_only */ 2289 for (i = 0; i < mbs; i++) { 2290 ailen[i] = imax[i] = ai[i + 1] - ai[i]; 2291 a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0); 2292 } 2293 } 2294 a->nz = ai[mbs]; 2295 2296 /* diagonals may have moved, so kill the diagonal pointers */ 2297 a->idiagvalid = PETSC_FALSE; 2298 if (fshift && a->diag) PetscCall(PetscFree(a->diag)); 2299 if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2); 2300 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2)); 2301 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs)); 2302 PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax)); 2303 2304 A->info.mallocs += a->reallocs; 2305 a->reallocs = 0; 2306 A->info.nz_unneeded = (PetscReal)fshift * bs2; 2307 a->rmax = rmax; 2308 2309 if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio)); 2310 PetscFunctionReturn(PETSC_SUCCESS); 2311 } 2312 2313 /* 2314 This function returns an array of flags which indicate the locations of contiguous 2315 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2316 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2317 Assume: sizes should be long enough to hold all the values. 2318 */ 2319 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) 2320 { 2321 PetscInt j = 0; 2322 2323 PetscFunctionBegin; 2324 for (PetscInt i = 0; i < n; j++) { 2325 PetscInt row = idx[i]; 2326 if (row % bs != 0) { /* Not the beginning of a block */ 2327 sizes[j] = 1; 2328 i++; 2329 } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */ 2330 sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2331 i++; 2332 } else { /* Beginning of the block, so check if the complete block exists */ 2333 PetscBool flg = PETSC_TRUE; 2334 for (PetscInt k = 1; k < bs; k++) { 2335 if (row + k != idx[i + k]) { /* break in the block */ 2336 flg = PETSC_FALSE; 2337 break; 2338 } 2339 } 2340 if (flg) { /* No break in the bs */ 2341 sizes[j] = bs; 2342 i += bs; 2343 } else { 2344 sizes[j] = 1; 2345 i++; 2346 } 2347 } 2348 } 2349 *bs_max = j; 2350 PetscFunctionReturn(PETSC_SUCCESS); 2351 } 2352 2353 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2354 { 2355 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2356 PetscInt i, j, k, count, *rows; 2357 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max; 2358 PetscScalar zero = 0.0; 2359 MatScalar *aa; 2360 const PetscScalar *xx; 2361 PetscScalar *bb; 2362 2363 PetscFunctionBegin; 2364 /* fix right hand side if needed */ 2365 if (x && b) { 2366 PetscCall(VecGetArrayRead(x, &xx)); 2367 PetscCall(VecGetArray(b, &bb)); 2368 for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]]; 2369 PetscCall(VecRestoreArrayRead(x, &xx)); 2370 PetscCall(VecRestoreArray(b, &bb)); 2371 } 2372 2373 /* Make a copy of the IS and sort it */ 2374 /* allocate memory for rows,sizes */ 2375 PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes)); 2376 2377 /* copy IS values to rows, and sort them */ 2378 for (i = 0; i < is_n; i++) rows[i] = is_idx[i]; 2379 PetscCall(PetscSortInt(is_n, rows)); 2380 2381 if (baij->keepnonzeropattern) { 2382 for (i = 0; i < is_n; i++) sizes[i] = 1; 2383 bs_max = is_n; 2384 } else { 2385 PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max)); 2386 A->nonzerostate++; 2387 } 2388 2389 for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) { 2390 row = rows[j]; 2391 PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row); 2392 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2393 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2394 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2395 if (diag != (PetscScalar)0.0) { 2396 if (baij->ilen[row / bs] > 0) { 2397 baij->ilen[row / bs] = 1; 2398 baij->j[baij->i[row / bs]] = row / bs; 2399 2400 PetscCall(PetscArrayzero(aa, count * bs)); 2401 } 2402 /* Now insert all the diagonal values for this bs */ 2403 for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES)); 2404 } else { /* (diag == 0.0) */ 2405 baij->ilen[row / bs] = 0; 2406 } /* end (diag == 0.0) */ 2407 } else { /* (sizes[i] != bs) */ 2408 PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1"); 2409 for (k = 0; k < count; k++) { 2410 aa[0] = zero; 2411 aa += bs; 2412 } 2413 if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES)); 2414 } 2415 } 2416 2417 PetscCall(PetscFree2(rows, sizes)); 2418 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2419 PetscFunctionReturn(PETSC_SUCCESS); 2420 } 2421 2422 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2423 { 2424 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2425 PetscInt i, j, k, count; 2426 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col; 2427 PetscScalar zero = 0.0; 2428 MatScalar *aa; 2429 const PetscScalar *xx; 2430 PetscScalar *bb; 2431 PetscBool *zeroed, vecs = PETSC_FALSE; 2432 2433 PetscFunctionBegin; 2434 /* fix right hand side if needed */ 2435 if (x && b) { 2436 PetscCall(VecGetArrayRead(x, &xx)); 2437 PetscCall(VecGetArray(b, &bb)); 2438 vecs = PETSC_TRUE; 2439 } 2440 2441 /* zero the columns */ 2442 PetscCall(PetscCalloc1(A->rmap->n, &zeroed)); 2443 for (i = 0; i < is_n; i++) { 2444 PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]); 2445 zeroed[is_idx[i]] = PETSC_TRUE; 2446 } 2447 for (i = 0; i < A->rmap->N; i++) { 2448 if (!zeroed[i]) { 2449 row = i / bs; 2450 for (j = baij->i[row]; j < baij->i[row + 1]; j++) { 2451 for (k = 0; k < bs; k++) { 2452 col = bs * baij->j[j] + k; 2453 if (zeroed[col]) { 2454 aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 2455 if (vecs) bb[i] -= aa[0] * xx[col]; 2456 aa[0] = 0.0; 2457 } 2458 } 2459 } 2460 } else if (vecs) bb[i] = diag * xx[i]; 2461 } 2462 PetscCall(PetscFree(zeroed)); 2463 if (vecs) { 2464 PetscCall(VecRestoreArrayRead(x, &xx)); 2465 PetscCall(VecRestoreArray(b, &bb)); 2466 } 2467 2468 /* zero the rows */ 2469 for (i = 0; i < is_n; i++) { 2470 row = is_idx[i]; 2471 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2472 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2473 for (k = 0; k < count; k++) { 2474 aa[0] = zero; 2475 aa += bs; 2476 } 2477 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES); 2478 } 2479 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2480 PetscFunctionReturn(PETSC_SUCCESS); 2481 } 2482 2483 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2484 { 2485 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2486 PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 2487 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2488 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 2489 PetscInt ridx, cidx, bs2 = a->bs2; 2490 PetscBool roworiented = a->roworiented; 2491 MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap; 2492 2493 PetscFunctionBegin; 2494 for (k = 0; k < m; k++) { /* loop over added rows */ 2495 row = im[k]; 2496 brow = row / bs; 2497 if (row < 0) continue; 2498 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 2499 rp = aj + ai[brow]; 2500 if (!A->structure_only) ap = aa + bs2 * ai[brow]; 2501 rmax = imax[brow]; 2502 nrow = ailen[brow]; 2503 low = 0; 2504 high = nrow; 2505 for (l = 0; l < n; l++) { /* loop over added columns */ 2506 if (in[l] < 0) continue; 2507 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 2508 col = in[l]; 2509 bcol = col / bs; 2510 ridx = row % bs; 2511 cidx = col % bs; 2512 if (!A->structure_only) { 2513 if (roworiented) { 2514 value = v[l + k * n]; 2515 } else { 2516 value = v[k + l * m]; 2517 } 2518 } 2519 if (col <= lastcol) low = 0; 2520 else high = nrow; 2521 lastcol = col; 2522 while (high - low > 7) { 2523 t = (low + high) / 2; 2524 if (rp[t] > bcol) high = t; 2525 else low = t; 2526 } 2527 for (i = low; i < high; i++) { 2528 if (rp[i] > bcol) break; 2529 if (rp[i] == bcol) { 2530 bap = ap + bs2 * i + bs * cidx + ridx; 2531 if (!A->structure_only) { 2532 if (is == ADD_VALUES) *bap += value; 2533 else *bap = value; 2534 } 2535 goto noinsert1; 2536 } 2537 } 2538 if (nonew == 1) goto noinsert1; 2539 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2540 if (A->structure_only) { 2541 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar); 2542 } else { 2543 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2544 } 2545 N = nrow++ - 1; 2546 high++; 2547 /* shift up all the later entries in this row */ 2548 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2549 rp[i] = bcol; 2550 if (!A->structure_only) { 2551 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2552 PetscCall(PetscArrayzero(ap + bs2 * i, bs2)); 2553 ap[bs2 * i + bs * cidx + ridx] = value; 2554 } 2555 a->nz++; 2556 A->nonzerostate++; 2557 noinsert1:; 2558 low = i; 2559 } 2560 ailen[brow] = nrow; 2561 } 2562 PetscFunctionReturn(PETSC_SUCCESS); 2563 } 2564 2565 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) 2566 { 2567 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data; 2568 Mat outA; 2569 PetscBool row_identity, col_identity; 2570 2571 PetscFunctionBegin; 2572 PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU"); 2573 PetscCall(ISIdentity(row, &row_identity)); 2574 PetscCall(ISIdentity(col, &col_identity)); 2575 PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU"); 2576 2577 outA = inA; 2578 inA->factortype = MAT_FACTOR_LU; 2579 PetscCall(PetscFree(inA->solvertype)); 2580 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype)); 2581 2582 PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2583 2584 PetscCall(PetscObjectReference((PetscObject)row)); 2585 PetscCall(ISDestroy(&a->row)); 2586 a->row = row; 2587 PetscCall(PetscObjectReference((PetscObject)col)); 2588 PetscCall(ISDestroy(&a->col)); 2589 a->col = col; 2590 2591 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2592 PetscCall(ISDestroy(&a->icol)); 2593 PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol)); 2594 2595 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity))); 2596 if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); 2597 PetscCall(MatLUFactorNumeric(outA, inA, info)); 2598 PetscFunctionReturn(PETSC_SUCCESS); 2599 } 2600 2601 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices) 2602 { 2603 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2604 2605 PetscFunctionBegin; 2606 baij->nz = baij->maxnz; 2607 PetscCall(PetscArraycpy(baij->j, indices, baij->nz)); 2608 PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs)); 2609 PetscFunctionReturn(PETSC_SUCCESS); 2610 } 2611 2612 /*@ 2613 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix. 2614 2615 Input Parameters: 2616 + mat - the `MATSEQBAIJ` matrix 2617 - indices - the column indices 2618 2619 Level: advanced 2620 2621 Notes: 2622 This can be called if you have precomputed the nonzero structure of the 2623 matrix and want to provide it to the matrix object to improve the performance 2624 of the `MatSetValues()` operation. 2625 2626 You MUST have set the correct numbers of nonzeros per row in the call to 2627 `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted. 2628 2629 MUST be called before any calls to `MatSetValues()` 2630 2631 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()` 2632 @*/ 2633 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) 2634 { 2635 PetscFunctionBegin; 2636 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2637 PetscAssertPointer(indices, 2); 2638 PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices)); 2639 PetscFunctionReturn(PETSC_SUCCESS); 2640 } 2641 2642 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) 2643 { 2644 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2645 PetscInt i, j, n, row, bs, *ai, *aj, mbs; 2646 PetscReal atmp; 2647 PetscScalar *x, zero = 0.0; 2648 MatScalar *aa; 2649 PetscInt ncols, brow, krow, kcol; 2650 2651 PetscFunctionBegin; 2652 /* why is this not a macro???????????????????????????????????????????????????????????????? */ 2653 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2654 bs = A->rmap->bs; 2655 aa = a->a; 2656 ai = a->i; 2657 aj = a->j; 2658 mbs = a->mbs; 2659 2660 PetscCall(VecSet(v, zero)); 2661 PetscCall(VecGetArray(v, &x)); 2662 PetscCall(VecGetLocalSize(v, &n)); 2663 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2664 for (i = 0; i < mbs; i++) { 2665 ncols = ai[1] - ai[0]; 2666 ai++; 2667 brow = bs * i; 2668 for (j = 0; j < ncols; j++) { 2669 for (kcol = 0; kcol < bs; kcol++) { 2670 for (krow = 0; krow < bs; krow++) { 2671 atmp = PetscAbsScalar(*aa); 2672 aa++; 2673 row = brow + krow; /* row index */ 2674 if (PetscAbsScalar(x[row]) < atmp) { 2675 x[row] = atmp; 2676 if (idx) idx[row] = bs * (*aj) + kcol; 2677 } 2678 } 2679 } 2680 aj++; 2681 } 2682 } 2683 PetscCall(VecRestoreArray(v, &x)); 2684 PetscFunctionReturn(PETSC_SUCCESS); 2685 } 2686 2687 PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) 2688 { 2689 PetscFunctionBegin; 2690 /* If the two matrices have the same copy implementation, use fast copy. */ 2691 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2692 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2693 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 2694 PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs; 2695 2696 PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]); 2697 PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs); 2698 PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs])); 2699 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2700 } else { 2701 PetscCall(MatCopy_Basic(A, B, str)); 2702 } 2703 PetscFunctionReturn(PETSC_SUCCESS); 2704 } 2705 2706 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2707 { 2708 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2709 2710 PetscFunctionBegin; 2711 *array = a->a; 2712 PetscFunctionReturn(PETSC_SUCCESS); 2713 } 2714 2715 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2716 { 2717 PetscFunctionBegin; 2718 *array = NULL; 2719 PetscFunctionReturn(PETSC_SUCCESS); 2720 } 2721 2722 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) 2723 { 2724 PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs; 2725 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 2726 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 2727 2728 PetscFunctionBegin; 2729 /* Set the number of nonzeros in the new matrix */ 2730 PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz)); 2731 PetscFunctionReturn(PETSC_SUCCESS); 2732 } 2733 2734 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2735 { 2736 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data; 2737 PetscInt bs = Y->rmap->bs, bs2 = bs * bs; 2738 PetscBLASInt one = 1; 2739 2740 PetscFunctionBegin; 2741 if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2742 PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2743 if (e) { 2744 PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e)); 2745 if (e) { 2746 PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e)); 2747 if (e) str = SAME_NONZERO_PATTERN; 2748 } 2749 } 2750 if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN"); 2751 } 2752 if (str == SAME_NONZERO_PATTERN) { 2753 PetscScalar alpha = a; 2754 PetscBLASInt bnz; 2755 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 2756 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 2757 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2758 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2759 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2760 } else { 2761 Mat B; 2762 PetscInt *nnz; 2763 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 2764 PetscCall(PetscMalloc1(Y->rmap->N, &nnz)); 2765 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2766 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2767 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 2768 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 2769 PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name)); 2770 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz)); 2771 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 2772 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2773 PetscCall(MatHeaderMerge(Y, &B)); 2774 PetscCall(PetscFree(nnz)); 2775 } 2776 PetscFunctionReturn(PETSC_SUCCESS); 2777 } 2778 2779 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2780 { 2781 #if PetscDefined(USE_COMPLEX) 2782 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2783 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2784 MatScalar *aa = a->a; 2785 2786 PetscFunctionBegin; 2787 for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]); 2788 PetscFunctionReturn(PETSC_SUCCESS); 2789 #else 2790 (void)A; 2791 return PETSC_SUCCESS; 2792 #endif 2793 } 2794 2795 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2796 { 2797 #if PetscDefined(USE_COMPLEX) 2798 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2799 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2800 MatScalar *aa = a->a; 2801 2802 PetscFunctionBegin; 2803 for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]); 2804 PetscFunctionReturn(PETSC_SUCCESS); 2805 #else 2806 (void)A; 2807 return PETSC_SUCCESS; 2808 #endif 2809 } 2810 2811 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2812 { 2813 #if PetscDefined(USE_COMPLEX) 2814 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2815 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2816 MatScalar *aa = a->a; 2817 2818 PetscFunctionBegin; 2819 for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2820 PetscFunctionReturn(PETSC_SUCCESS); 2821 #else 2822 (void)A; 2823 return PETSC_SUCCESS; 2824 #endif 2825 } 2826 2827 /* 2828 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2829 */ 2830 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2831 { 2832 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2833 PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs; 2834 PetscInt nz = a->i[m], row, *jj, mr, col; 2835 2836 PetscFunctionBegin; 2837 *nn = n; 2838 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2839 PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices"); 2840 PetscCall(PetscCalloc1(n, &collengths)); 2841 PetscCall(PetscMalloc1(n + 1, &cia)); 2842 PetscCall(PetscMalloc1(nz, &cja)); 2843 jj = a->j; 2844 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2845 cia[0] = oshift; 2846 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2847 PetscCall(PetscArrayzero(collengths, n)); 2848 jj = a->j; 2849 for (row = 0; row < m; row++) { 2850 mr = a->i[row + 1] - a->i[row]; 2851 for (i = 0; i < mr; i++) { 2852 col = *jj++; 2853 2854 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2855 } 2856 } 2857 PetscCall(PetscFree(collengths)); 2858 *ia = cia; 2859 *ja = cja; 2860 PetscFunctionReturn(PETSC_SUCCESS); 2861 } 2862 2863 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2864 { 2865 PetscFunctionBegin; 2866 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2867 PetscCall(PetscFree(*ia)); 2868 PetscCall(PetscFree(*ja)); 2869 PetscFunctionReturn(PETSC_SUCCESS); 2870 } 2871 2872 /* 2873 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2874 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2875 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2876 */ 2877 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2878 { 2879 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2880 PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs; 2881 PetscInt nz = a->i[m], row, *jj, mr, col; 2882 PetscInt *cspidx; 2883 2884 PetscFunctionBegin; 2885 *nn = n; 2886 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2887 2888 PetscCall(PetscCalloc1(n, &collengths)); 2889 PetscCall(PetscMalloc1(n + 1, &cia)); 2890 PetscCall(PetscMalloc1(nz, &cja)); 2891 PetscCall(PetscMalloc1(nz, &cspidx)); 2892 jj = a->j; 2893 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2894 cia[0] = oshift; 2895 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2896 PetscCall(PetscArrayzero(collengths, n)); 2897 jj = a->j; 2898 for (row = 0; row < m; row++) { 2899 mr = a->i[row + 1] - a->i[row]; 2900 for (i = 0; i < mr; i++) { 2901 col = *jj++; 2902 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2903 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2904 } 2905 } 2906 PetscCall(PetscFree(collengths)); 2907 *ia = cia; 2908 *ja = cja; 2909 *spidx = cspidx; 2910 PetscFunctionReturn(PETSC_SUCCESS); 2911 } 2912 2913 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2914 { 2915 PetscFunctionBegin; 2916 PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done)); 2917 PetscCall(PetscFree(*spidx)); 2918 PetscFunctionReturn(PETSC_SUCCESS); 2919 } 2920 2921 PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) 2922 { 2923 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data; 2924 2925 PetscFunctionBegin; 2926 if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL)); 2927 PetscCall(MatShift_Basic(Y, a)); 2928 PetscFunctionReturn(PETSC_SUCCESS); 2929 } 2930 2931 PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep) 2932 { 2933 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2934 PetscInt fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k; 2935 PetscInt m = A->rmap->N, *ailen = a->ilen; 2936 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2937 MatScalar *aa = a->a, *ap; 2938 PetscBool zero; 2939 2940 PetscFunctionBegin; 2941 PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix"); 2942 if (m) rmax = ailen[0]; 2943 for (i = 1; i <= mbs; i++) { 2944 for (k = ai[i - 1]; k < ai[i]; k++) { 2945 zero = PETSC_TRUE; 2946 ap = aa + bs2 * k; 2947 for (j = 0; j < bs2 && zero; j++) { 2948 if (ap[j] != 0.0) zero = PETSC_FALSE; 2949 } 2950 if (zero && (aj[k] != i - 1 || !keep)) fshift++; 2951 else { 2952 if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1)); 2953 aj[k - fshift] = aj[k]; 2954 PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2)); 2955 } 2956 } 2957 ai[i - 1] -= fshift_prev; 2958 fshift_prev = fshift; 2959 ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1]; 2960 a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0); 2961 rmax = PetscMax(rmax, ailen[i - 1]); 2962 } 2963 if (fshift) { 2964 if (mbs) { 2965 ai[mbs] -= fshift; 2966 a->nz = ai[mbs]; 2967 } 2968 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz)); 2969 A->nonzerostate++; 2970 A->info.nz_unneeded += (PetscReal)fshift; 2971 a->rmax = rmax; 2972 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 2973 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 2974 } 2975 PetscFunctionReturn(PETSC_SUCCESS); 2976 } 2977 2978 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2979 MatGetRow_SeqBAIJ, 2980 MatRestoreRow_SeqBAIJ, 2981 MatMult_SeqBAIJ_N, 2982 /* 4*/ MatMultAdd_SeqBAIJ_N, 2983 MatMultTranspose_SeqBAIJ, 2984 MatMultTransposeAdd_SeqBAIJ, 2985 NULL, 2986 NULL, 2987 NULL, 2988 /* 10*/ NULL, 2989 MatLUFactor_SeqBAIJ, 2990 NULL, 2991 NULL, 2992 MatTranspose_SeqBAIJ, 2993 /* 15*/ MatGetInfo_SeqBAIJ, 2994 MatEqual_SeqBAIJ, 2995 MatGetDiagonal_SeqBAIJ, 2996 MatDiagonalScale_SeqBAIJ, 2997 MatNorm_SeqBAIJ, 2998 /* 20*/ NULL, 2999 MatAssemblyEnd_SeqBAIJ, 3000 MatSetOption_SeqBAIJ, 3001 MatZeroEntries_SeqBAIJ, 3002 /* 24*/ MatZeroRows_SeqBAIJ, 3003 NULL, 3004 NULL, 3005 NULL, 3006 NULL, 3007 /* 29*/ MatSetUp_Seq_Hash, 3008 NULL, 3009 NULL, 3010 NULL, 3011 NULL, 3012 /* 34*/ MatDuplicate_SeqBAIJ, 3013 NULL, 3014 NULL, 3015 MatILUFactor_SeqBAIJ, 3016 NULL, 3017 /* 39*/ MatAXPY_SeqBAIJ, 3018 MatCreateSubMatrices_SeqBAIJ, 3019 MatIncreaseOverlap_SeqBAIJ, 3020 MatGetValues_SeqBAIJ, 3021 MatCopy_SeqBAIJ, 3022 /* 44*/ NULL, 3023 MatScale_SeqBAIJ, 3024 MatShift_SeqBAIJ, 3025 NULL, 3026 MatZeroRowsColumns_SeqBAIJ, 3027 /* 49*/ NULL, 3028 MatGetRowIJ_SeqBAIJ, 3029 MatRestoreRowIJ_SeqBAIJ, 3030 MatGetColumnIJ_SeqBAIJ, 3031 MatRestoreColumnIJ_SeqBAIJ, 3032 /* 54*/ MatFDColoringCreate_SeqXAIJ, 3033 NULL, 3034 NULL, 3035 NULL, 3036 MatSetValuesBlocked_SeqBAIJ, 3037 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 3038 MatDestroy_SeqBAIJ, 3039 MatView_SeqBAIJ, 3040 NULL, 3041 NULL, 3042 /* 64*/ NULL, 3043 NULL, 3044 NULL, 3045 NULL, 3046 NULL, 3047 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 3048 NULL, 3049 MatConvert_Basic, 3050 NULL, 3051 NULL, 3052 /* 74*/ NULL, 3053 MatFDColoringApply_BAIJ, 3054 NULL, 3055 NULL, 3056 NULL, 3057 /* 79*/ NULL, 3058 NULL, 3059 NULL, 3060 NULL, 3061 MatLoad_SeqBAIJ, 3062 /* 84*/ NULL, 3063 NULL, 3064 NULL, 3065 NULL, 3066 NULL, 3067 /* 89*/ NULL, 3068 NULL, 3069 NULL, 3070 NULL, 3071 NULL, 3072 /* 94*/ NULL, 3073 NULL, 3074 NULL, 3075 NULL, 3076 NULL, 3077 /* 99*/ NULL, 3078 NULL, 3079 NULL, 3080 MatConjugate_SeqBAIJ, 3081 NULL, 3082 /*104*/ NULL, 3083 MatRealPart_SeqBAIJ, 3084 MatImaginaryPart_SeqBAIJ, 3085 NULL, 3086 NULL, 3087 /*109*/ NULL, 3088 NULL, 3089 NULL, 3090 NULL, 3091 MatMissingDiagonal_SeqBAIJ, 3092 /*114*/ NULL, 3093 NULL, 3094 NULL, 3095 NULL, 3096 NULL, 3097 /*119*/ NULL, 3098 NULL, 3099 MatMultHermitianTranspose_SeqBAIJ, 3100 MatMultHermitianTransposeAdd_SeqBAIJ, 3101 NULL, 3102 /*124*/ NULL, 3103 MatGetColumnReductions_SeqBAIJ, 3104 MatInvertBlockDiagonal_SeqBAIJ, 3105 NULL, 3106 NULL, 3107 /*129*/ NULL, 3108 NULL, 3109 NULL, 3110 NULL, 3111 NULL, 3112 /*134*/ NULL, 3113 NULL, 3114 NULL, 3115 NULL, 3116 NULL, 3117 /*139*/ MatSetBlockSizes_Default, 3118 NULL, 3119 NULL, 3120 MatFDColoringSetUp_SeqXAIJ, 3121 NULL, 3122 /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 3123 MatDestroySubMatrices_SeqBAIJ, 3124 NULL, 3125 NULL, 3126 NULL, 3127 NULL, 3128 /*150*/ NULL, 3129 MatEliminateZeros_SeqBAIJ}; 3130 3131 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3132 { 3133 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3134 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3135 3136 PetscFunctionBegin; 3137 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3138 3139 /* allocate space for values if not already there */ 3140 if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); 3141 3142 /* copy values over */ 3143 PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz)); 3144 PetscFunctionReturn(PETSC_SUCCESS); 3145 } 3146 3147 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3148 { 3149 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3150 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3151 3152 PetscFunctionBegin; 3153 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3154 PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 3155 3156 /* copy values over */ 3157 PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz)); 3158 PetscFunctionReturn(PETSC_SUCCESS); 3159 } 3160 3161 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 3162 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *); 3163 3164 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, PetscInt *nnz) 3165 { 3166 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 3167 PetscInt i, mbs, nbs, bs2; 3168 PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 3169 3170 PetscFunctionBegin; 3171 if (B->hash_active) { 3172 PetscInt bs; 3173 B->ops[0] = b->cops; 3174 PetscCall(PetscHMapIJVDestroy(&b->ht)); 3175 PetscCall(MatGetBlockSize(B, &bs)); 3176 if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht)); 3177 PetscCall(PetscFree(b->dnz)); 3178 PetscCall(PetscFree(b->bdnz)); 3179 B->hash_active = PETSC_FALSE; 3180 } 3181 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3182 if (nz == MAT_SKIP_ALLOCATION) { 3183 skipallocation = PETSC_TRUE; 3184 nz = 0; 3185 } 3186 3187 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 3188 PetscCall(PetscLayoutSetUp(B->rmap)); 3189 PetscCall(PetscLayoutSetUp(B->cmap)); 3190 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3191 3192 B->preallocated = PETSC_TRUE; 3193 3194 mbs = B->rmap->n / bs; 3195 nbs = B->cmap->n / bs; 3196 bs2 = bs * bs; 3197 3198 PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs); 3199 3200 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3201 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 3202 if (nnz) { 3203 for (i = 0; i < mbs; i++) { 3204 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 3205 PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs); 3206 } 3207 } 3208 3209 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat"); 3210 PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL)); 3211 PetscOptionsEnd(); 3212 3213 if (!flg) { 3214 switch (bs) { 3215 case 1: 3216 B->ops->mult = MatMult_SeqBAIJ_1; 3217 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3218 break; 3219 case 2: 3220 B->ops->mult = MatMult_SeqBAIJ_2; 3221 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3222 break; 3223 case 3: 3224 B->ops->mult = MatMult_SeqBAIJ_3; 3225 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3226 break; 3227 case 4: 3228 B->ops->mult = MatMult_SeqBAIJ_4; 3229 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3230 break; 3231 case 5: 3232 B->ops->mult = MatMult_SeqBAIJ_5; 3233 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3234 break; 3235 case 6: 3236 B->ops->mult = MatMult_SeqBAIJ_6; 3237 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3238 break; 3239 case 7: 3240 B->ops->mult = MatMult_SeqBAIJ_7; 3241 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3242 break; 3243 case 9: { 3244 PetscInt version = 1; 3245 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3246 switch (version) { 3247 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3248 case 1: 3249 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 3250 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 3251 PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3252 break; 3253 #endif 3254 default: 3255 B->ops->mult = MatMult_SeqBAIJ_N; 3256 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3257 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3258 break; 3259 } 3260 break; 3261 } 3262 case 11: 3263 B->ops->mult = MatMult_SeqBAIJ_11; 3264 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 3265 break; 3266 case 12: { 3267 PetscInt version = 1; 3268 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3269 switch (version) { 3270 case 1: 3271 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 3272 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3273 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3274 break; 3275 case 2: 3276 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 3277 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 3278 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3279 break; 3280 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3281 case 3: 3282 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 3283 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3284 PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3285 break; 3286 #endif 3287 default: 3288 B->ops->mult = MatMult_SeqBAIJ_N; 3289 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3290 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3291 break; 3292 } 3293 break; 3294 } 3295 case 15: { 3296 PetscInt version = 1; 3297 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3298 switch (version) { 3299 case 1: 3300 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3301 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3302 break; 3303 case 2: 3304 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 3305 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3306 break; 3307 case 3: 3308 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 3309 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3310 break; 3311 case 4: 3312 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 3313 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3314 break; 3315 default: 3316 B->ops->mult = MatMult_SeqBAIJ_N; 3317 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3318 break; 3319 } 3320 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3321 break; 3322 } 3323 default: 3324 B->ops->mult = MatMult_SeqBAIJ_N; 3325 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3326 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3327 break; 3328 } 3329 } 3330 B->ops->sor = MatSOR_SeqBAIJ; 3331 b->mbs = mbs; 3332 b->nbs = nbs; 3333 if (!skipallocation) { 3334 if (!b->imax) { 3335 PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen)); 3336 3337 b->free_imax_ilen = PETSC_TRUE; 3338 } 3339 /* b->ilen will count nonzeros in each block row so far. */ 3340 for (i = 0; i < mbs; i++) b->ilen[i] = 0; 3341 if (!nnz) { 3342 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3343 else if (nz < 0) nz = 1; 3344 nz = PetscMin(nz, nbs); 3345 for (i = 0; i < mbs; i++) b->imax[i] = nz; 3346 PetscCall(PetscIntMultError(nz, mbs, &nz)); 3347 } else { 3348 PetscInt64 nz64 = 0; 3349 for (i = 0; i < mbs; i++) { 3350 b->imax[i] = nnz[i]; 3351 nz64 += nnz[i]; 3352 } 3353 PetscCall(PetscIntCast(nz64, &nz)); 3354 } 3355 3356 /* allocate the matrix space */ 3357 PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i)); 3358 if (B->structure_only) { 3359 PetscCall(PetscMalloc1(nz, &b->j)); 3360 PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i)); 3361 } else { 3362 PetscInt nzbs2 = 0; 3363 PetscCall(PetscIntMultError(nz, bs2, &nzbs2)); 3364 PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i)); 3365 PetscCall(PetscArrayzero(b->a, nz * bs2)); 3366 } 3367 PetscCall(PetscArrayzero(b->j, nz)); 3368 3369 if (B->structure_only) { 3370 b->singlemalloc = PETSC_FALSE; 3371 b->free_a = PETSC_FALSE; 3372 } else { 3373 b->singlemalloc = PETSC_TRUE; 3374 b->free_a = PETSC_TRUE; 3375 } 3376 b->free_ij = PETSC_TRUE; 3377 3378 b->i[0] = 0; 3379 for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1]; 3380 3381 } else { 3382 b->free_a = PETSC_FALSE; 3383 b->free_ij = PETSC_FALSE; 3384 } 3385 3386 b->bs2 = bs2; 3387 b->mbs = mbs; 3388 b->nz = 0; 3389 b->maxnz = nz; 3390 B->info.nz_unneeded = (PetscReal)b->maxnz * bs2; 3391 B->was_assembled = PETSC_FALSE; 3392 B->assembled = PETSC_FALSE; 3393 if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3394 PetscFunctionReturn(PETSC_SUCCESS); 3395 } 3396 3397 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 3398 { 3399 PetscInt i, m, nz, nz_max = 0, *nnz; 3400 PetscScalar *values = NULL; 3401 PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented; 3402 3403 PetscFunctionBegin; 3404 PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 3405 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 3406 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 3407 PetscCall(PetscLayoutSetUp(B->rmap)); 3408 PetscCall(PetscLayoutSetUp(B->cmap)); 3409 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3410 m = B->rmap->n / bs; 3411 3412 PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 3413 PetscCall(PetscMalloc1(m + 1, &nnz)); 3414 for (i = 0; i < m; i++) { 3415 nz = ii[i + 1] - ii[i]; 3416 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 3417 nz_max = PetscMax(nz_max, nz); 3418 nnz[i] = nz; 3419 } 3420 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 3421 PetscCall(PetscFree(nnz)); 3422 3423 values = (PetscScalar *)V; 3424 if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values)); 3425 for (i = 0; i < m; i++) { 3426 PetscInt ncols = ii[i + 1] - ii[i]; 3427 const PetscInt *icols = jj + ii[i]; 3428 if (bs == 1 || !roworiented) { 3429 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 3430 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES)); 3431 } else { 3432 PetscInt j; 3433 for (j = 0; j < ncols; j++) { 3434 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 3435 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES)); 3436 } 3437 } 3438 } 3439 if (!V) PetscCall(PetscFree(values)); 3440 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3441 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3442 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3443 PetscFunctionReturn(PETSC_SUCCESS); 3444 } 3445 3446 /*@C 3447 MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored 3448 3449 Not Collective 3450 3451 Input Parameter: 3452 . A - a `MATSEQBAIJ` matrix 3453 3454 Output Parameter: 3455 . array - pointer to the data 3456 3457 Level: intermediate 3458 3459 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3460 @*/ 3461 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array) 3462 { 3463 PetscFunctionBegin; 3464 PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array)); 3465 PetscFunctionReturn(PETSC_SUCCESS); 3466 } 3467 3468 /*@C 3469 MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()` 3470 3471 Not Collective 3472 3473 Input Parameters: 3474 + A - a `MATSEQBAIJ` matrix 3475 - array - pointer to the data 3476 3477 Level: intermediate 3478 3479 .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3480 @*/ 3481 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array) 3482 { 3483 PetscFunctionBegin; 3484 PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array)); 3485 PetscFunctionReturn(PETSC_SUCCESS); 3486 } 3487 3488 /*MC 3489 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3490 block sparse compressed row format. 3491 3492 Options Database Keys: 3493 + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()` 3494 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3495 3496 Level: beginner 3497 3498 Notes: 3499 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 3500 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 3501 3502 Run with `-info` to see what version of the matrix-vector product is being used 3503 3504 .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()` 3505 M*/ 3506 3507 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *); 3508 3509 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3510 { 3511 PetscMPIInt size; 3512 Mat_SeqBAIJ *b; 3513 3514 PetscFunctionBegin; 3515 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3516 PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3517 3518 PetscCall(PetscNew(&b)); 3519 B->data = (void *)b; 3520 B->ops[0] = MatOps_Values; 3521 3522 b->row = NULL; 3523 b->col = NULL; 3524 b->icol = NULL; 3525 b->reallocs = 0; 3526 b->saved_values = NULL; 3527 3528 b->roworiented = PETSC_TRUE; 3529 b->nonew = 0; 3530 b->diag = NULL; 3531 B->spptr = NULL; 3532 B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2; 3533 b->keepnonzeropattern = PETSC_FALSE; 3534 3535 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ)); 3536 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ)); 3537 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ)); 3538 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ)); 3539 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ)); 3540 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ)); 3541 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ)); 3542 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ)); 3543 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 3544 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ)); 3545 #if defined(PETSC_HAVE_HYPRE) 3546 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE)); 3547 #endif 3548 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS)); 3549 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 3550 PetscFunctionReturn(PETSC_SUCCESS); 3551 } 3552 3553 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) 3554 { 3555 Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data; 3556 PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2; 3557 3558 PetscFunctionBegin; 3559 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix"); 3560 PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix"); 3561 3562 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3563 c->imax = a->imax; 3564 c->ilen = a->ilen; 3565 c->free_imax_ilen = PETSC_FALSE; 3566 } else { 3567 PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen)); 3568 for (i = 0; i < mbs; i++) { 3569 c->imax[i] = a->imax[i]; 3570 c->ilen[i] = a->ilen[i]; 3571 } 3572 c->free_imax_ilen = PETSC_TRUE; 3573 } 3574 3575 /* allocate the matrix space */ 3576 if (mallocmatspace) { 3577 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3578 PetscCall(PetscCalloc1(bs2 * nz, &c->a)); 3579 3580 c->i = a->i; 3581 c->j = a->j; 3582 c->singlemalloc = PETSC_FALSE; 3583 c->free_a = PETSC_TRUE; 3584 c->free_ij = PETSC_FALSE; 3585 c->parent = A; 3586 C->preallocated = PETSC_TRUE; 3587 C->assembled = PETSC_TRUE; 3588 3589 PetscCall(PetscObjectReference((PetscObject)A)); 3590 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3591 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3592 } else { 3593 PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i)); 3594 3595 c->singlemalloc = PETSC_TRUE; 3596 c->free_a = PETSC_TRUE; 3597 c->free_ij = PETSC_TRUE; 3598 3599 PetscCall(PetscArraycpy(c->i, a->i, mbs + 1)); 3600 if (mbs > 0) { 3601 PetscCall(PetscArraycpy(c->j, a->j, nz)); 3602 if (cpvalues == MAT_COPY_VALUES) { 3603 PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz)); 3604 } else { 3605 PetscCall(PetscArrayzero(c->a, bs2 * nz)); 3606 } 3607 } 3608 C->preallocated = PETSC_TRUE; 3609 C->assembled = PETSC_TRUE; 3610 } 3611 } 3612 3613 c->roworiented = a->roworiented; 3614 c->nonew = a->nonew; 3615 3616 PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 3617 PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 3618 3619 c->bs2 = a->bs2; 3620 c->mbs = a->mbs; 3621 c->nbs = a->nbs; 3622 3623 if (a->diag) { 3624 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3625 c->diag = a->diag; 3626 c->free_diag = PETSC_FALSE; 3627 } else { 3628 PetscCall(PetscMalloc1(mbs + 1, &c->diag)); 3629 for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i]; 3630 c->free_diag = PETSC_TRUE; 3631 } 3632 } else c->diag = NULL; 3633 3634 c->nz = a->nz; 3635 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3636 c->solve_work = NULL; 3637 c->mult_work = NULL; 3638 c->sor_workt = NULL; 3639 c->sor_work = NULL; 3640 3641 c->compressedrow.use = a->compressedrow.use; 3642 c->compressedrow.nrows = a->compressedrow.nrows; 3643 if (a->compressedrow.use) { 3644 i = a->compressedrow.nrows; 3645 PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex)); 3646 PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1)); 3647 PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i)); 3648 } else { 3649 c->compressedrow.use = PETSC_FALSE; 3650 c->compressedrow.i = NULL; 3651 c->compressedrow.rindex = NULL; 3652 } 3653 c->nonzerorowcnt = a->nonzerorowcnt; 3654 C->nonzerostate = A->nonzerostate; 3655 3656 PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 3657 PetscFunctionReturn(PETSC_SUCCESS); 3658 } 3659 3660 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) 3661 { 3662 PetscFunctionBegin; 3663 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 3664 PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n)); 3665 PetscCall(MatSetType(*B, MATSEQBAIJ)); 3666 PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE)); 3667 PetscFunctionReturn(PETSC_SUCCESS); 3668 } 3669 3670 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3671 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 3672 { 3673 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3674 PetscInt *rowidxs, *colidxs; 3675 PetscScalar *matvals; 3676 3677 PetscFunctionBegin; 3678 PetscCall(PetscViewerSetUp(viewer)); 3679 3680 /* read matrix header */ 3681 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3682 PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3683 M = header[1]; 3684 N = header[2]; 3685 nz = header[3]; 3686 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3687 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3688 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3689 3690 /* set block sizes from the viewer's .info file */ 3691 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3692 /* set local and global sizes if not set already */ 3693 if (mat->rmap->n < 0) mat->rmap->n = M; 3694 if (mat->cmap->n < 0) mat->cmap->n = N; 3695 if (mat->rmap->N < 0) mat->rmap->N = M; 3696 if (mat->cmap->N < 0) mat->cmap->N = N; 3697 PetscCall(PetscLayoutSetUp(mat->rmap)); 3698 PetscCall(PetscLayoutSetUp(mat->cmap)); 3699 3700 /* check if the matrix sizes are correct */ 3701 PetscCall(MatGetSize(mat, &rows, &cols)); 3702 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3703 PetscCall(MatGetBlockSize(mat, &bs)); 3704 PetscCall(MatGetLocalSize(mat, &m, &n)); 3705 mbs = m / bs; 3706 nbs = n / bs; 3707 3708 /* read in row lengths, column indices and nonzero values */ 3709 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3710 PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT)); 3711 rowidxs[0] = 0; 3712 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3713 sum = rowidxs[m]; 3714 PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3715 3716 /* read in column indices and nonzero values */ 3717 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals)); 3718 PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT)); 3719 PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR)); 3720 3721 { /* preallocate matrix storage */ 3722 PetscBT bt; /* helper bit set to count nonzeros */ 3723 PetscInt *nnz; 3724 PetscBool sbaij; 3725 3726 PetscCall(PetscBTCreate(nbs, &bt)); 3727 PetscCall(PetscCalloc1(mbs, &nnz)); 3728 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij)); 3729 for (i = 0; i < mbs; i++) { 3730 PetscCall(PetscBTMemzero(nbs, bt)); 3731 for (k = 0; k < bs; k++) { 3732 PetscInt row = bs * i + k; 3733 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3734 PetscInt col = colidxs[j]; 3735 if (!sbaij || col >= row) 3736 if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++; 3737 } 3738 } 3739 } 3740 PetscCall(PetscBTDestroy(&bt)); 3741 PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz)); 3742 PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz)); 3743 PetscCall(PetscFree(nnz)); 3744 } 3745 3746 /* store matrix values */ 3747 for (i = 0; i < m; i++) { 3748 PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1]; 3749 PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3750 } 3751 3752 PetscCall(PetscFree(rowidxs)); 3753 PetscCall(PetscFree2(colidxs, matvals)); 3754 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3755 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3756 PetscFunctionReturn(PETSC_SUCCESS); 3757 } 3758 3759 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) 3760 { 3761 PetscBool isbinary; 3762 3763 PetscFunctionBegin; 3764 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3765 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3766 PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer)); 3767 PetscFunctionReturn(PETSC_SUCCESS); 3768 } 3769 3770 /*@C 3771 MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block 3772 compressed row) format. For good matrix assembly performance the 3773 user should preallocate the matrix storage by setting the parameter `nz` 3774 (or the array `nnz`). 3775 3776 Collective 3777 3778 Input Parameters: 3779 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3780 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3781 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3782 . m - number of rows 3783 . n - number of columns 3784 . nz - number of nonzero blocks per block row (same for all rows) 3785 - nnz - array containing the number of nonzero blocks in the various block rows 3786 (possibly different for each block row) or `NULL` 3787 3788 Output Parameter: 3789 . A - the matrix 3790 3791 Options Database Keys: 3792 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3793 - -mat_block_size - size of the blocks to use 3794 3795 Level: intermediate 3796 3797 Notes: 3798 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3799 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3800 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 3801 3802 The number of rows and columns must be divisible by blocksize. 3803 3804 If the `nnz` parameter is given then the `nz` parameter is ignored 3805 3806 A nonzero block is any block that as 1 or more nonzeros in it 3807 3808 The `MATSEQBAIJ` format is fully compatible with standard Fortran 3809 storage. That is, the stored row and column indices can begin at 3810 either one (as in Fortran) or zero. 3811 3812 Specify the preallocated storage with either `nz` or `nnz` (not both). 3813 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3814 allocation. See [Sparse Matrices](sec_matsparse) for details. 3815 matrices. 3816 3817 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3818 @*/ 3819 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 3820 { 3821 PetscFunctionBegin; 3822 PetscCall(MatCreate(comm, A)); 3823 PetscCall(MatSetSizes(*A, m, n, m, n)); 3824 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3825 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz)); 3826 PetscFunctionReturn(PETSC_SUCCESS); 3827 } 3828 3829 /*@C 3830 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3831 per row in the matrix. For good matrix assembly performance the 3832 user should preallocate the matrix storage by setting the parameter `nz` 3833 (or the array `nnz`). 3834 3835 Collective 3836 3837 Input Parameters: 3838 + B - the matrix 3839 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3840 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3841 . nz - number of block nonzeros per block row (same for all rows) 3842 - nnz - array containing the number of block nonzeros in the various block rows 3843 (possibly different for each block row) or `NULL` 3844 3845 Options Database Keys: 3846 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3847 - -mat_block_size - size of the blocks to use 3848 3849 Level: intermediate 3850 3851 Notes: 3852 If the `nnz` parameter is given then the `nz` parameter is ignored 3853 3854 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3855 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3856 You can also run with the option `-info` and look for messages with the string 3857 malloc in them to see if additional memory allocation was needed. 3858 3859 The `MATSEQBAIJ` format is fully compatible with standard Fortran 3860 storage. That is, the stored row and column indices can begin at 3861 either one (as in Fortran) or zero. 3862 3863 Specify the preallocated storage with either nz or nnz (not both). 3864 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3865 allocation. See [Sparse Matrices](sec_matsparse) for details. 3866 3867 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3868 @*/ 3869 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3870 { 3871 PetscFunctionBegin; 3872 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3873 PetscValidType(B, 1); 3874 PetscValidLogicalCollectiveInt(B, bs, 2); 3875 PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 3876 PetscFunctionReturn(PETSC_SUCCESS); 3877 } 3878 3879 /*@C 3880 MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values 3881 3882 Collective 3883 3884 Input Parameters: 3885 + B - the matrix 3886 . bs - the blocksize 3887 . i - the indices into `j` for the start of each local row (starts with zero) 3888 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3889 - v - optional values in the matrix 3890 3891 Level: advanced 3892 3893 Notes: 3894 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 3895 may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 3896 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3897 `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3898 block column and the second index is over columns within a block. 3899 3900 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3901 3902 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3903 @*/ 3904 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3905 { 3906 PetscFunctionBegin; 3907 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3908 PetscValidType(B, 1); 3909 PetscValidLogicalCollectiveInt(B, bs, 2); 3910 PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 3911 PetscFunctionReturn(PETSC_SUCCESS); 3912 } 3913 3914 /*@ 3915 MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user. 3916 3917 Collective 3918 3919 Input Parameters: 3920 + comm - must be an MPI communicator of size 1 3921 . bs - size of block 3922 . m - number of rows 3923 . n - number of columns 3924 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3925 . j - column indices 3926 - a - matrix values 3927 3928 Output Parameter: 3929 . mat - the matrix 3930 3931 Level: advanced 3932 3933 Notes: 3934 The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays 3935 once the matrix is destroyed 3936 3937 You cannot set new nonzero locations into this matrix, that will generate an error. 3938 3939 The `i` and `j` indices are 0 based 3940 3941 When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format 3942 3943 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3944 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3945 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3946 with column-major ordering within blocks. 3947 3948 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3949 @*/ 3950 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) 3951 { 3952 Mat_SeqBAIJ *baij; 3953 3954 PetscFunctionBegin; 3955 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs); 3956 if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3957 3958 PetscCall(MatCreate(comm, mat)); 3959 PetscCall(MatSetSizes(*mat, m, n, m, n)); 3960 PetscCall(MatSetType(*mat, MATSEQBAIJ)); 3961 PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL)); 3962 baij = (Mat_SeqBAIJ *)(*mat)->data; 3963 PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen)); 3964 3965 baij->i = i; 3966 baij->j = j; 3967 baij->a = a; 3968 3969 baij->singlemalloc = PETSC_FALSE; 3970 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3971 baij->free_a = PETSC_FALSE; 3972 baij->free_ij = PETSC_FALSE; 3973 baij->free_imax_ilen = PETSC_TRUE; 3974 3975 for (PetscInt ii = 0; ii < m; ii++) { 3976 const PetscInt row_len = i[ii + 1] - i[ii]; 3977 3978 baij->ilen[ii] = baij->imax[ii] = row_len; 3979 PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len); 3980 } 3981 if (PetscDefined(USE_DEBUG)) { 3982 for (PetscInt ii = 0; ii < baij->i[m]; ii++) { 3983 PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3984 PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3985 } 3986 } 3987 3988 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3989 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3990 PetscFunctionReturn(PETSC_SUCCESS); 3991 } 3992 3993 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3994 { 3995 PetscFunctionBegin; 3996 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat)); 3997 PetscFunctionReturn(PETSC_SUCCESS); 3998 } 3999