1 /* 2 Defines the basic matrix operations for the BAIJ (compressed row) 3 matrix storage format. 4 */ 5 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <petsc/private/kernels/blockinvert.h> 8 #include <petsc/private/kernels/blockmatmult.h> 9 10 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */ 11 #define TYPE BAIJ 12 #define TYPE_BS 13 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 14 #undef TYPE_BS 15 #define TYPE_BS _BS 16 #define TYPE_BS_ON 17 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 18 #undef TYPE_BS 19 #include "../src/mat/impls/aij/seq/seqhashmat.h" 20 #undef TYPE 21 #undef TYPE_BS_ON 22 23 #if defined(PETSC_HAVE_HYPRE) 24 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 25 #endif 26 27 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 28 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 29 #endif 30 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 31 32 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) 33 { 34 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data; 35 PetscInt m, n, ib, jb, bs = A->rmap->bs; 36 MatScalar *a_val = a_aij->a; 37 38 PetscFunctionBegin; 39 PetscCall(MatGetSize(A, &m, &n)); 40 PetscCall(PetscArrayzero(reductions, n)); 41 if (type == NORM_2) { 42 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 43 for (jb = 0; jb < bs; jb++) { 44 for (ib = 0; ib < bs; ib++) { 45 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 46 a_val++; 47 } 48 } 49 } 50 } else if (type == NORM_1) { 51 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 52 for (jb = 0; jb < bs; jb++) { 53 for (ib = 0; ib < bs; ib++) { 54 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 55 a_val++; 56 } 57 } 58 } 59 } else if (type == NORM_INFINITY) { 60 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 61 for (jb = 0; jb < bs; jb++) { 62 for (ib = 0; ib < bs; ib++) { 63 PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb; 64 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 65 a_val++; 66 } 67 } 68 } 69 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 70 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 71 for (jb = 0; jb < bs; jb++) { 72 for (ib = 0; ib < bs; ib++) { 73 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 74 a_val++; 75 } 76 } 77 } 78 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 79 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 80 for (jb = 0; jb < bs; jb++) { 81 for (ib = 0; ib < bs; ib++) { 82 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 83 a_val++; 84 } 85 } 86 } 87 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 88 if (type == NORM_2) { 89 for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 90 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 91 for (PetscInt i = 0; i < n; i++) reductions[i] /= m; 92 } 93 PetscFunctionReturn(PETSC_SUCCESS); 94 } 95 96 static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) 97 { 98 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 99 PetscInt *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots; 100 MatScalar *v = a->a, *odiag, *diag, work[25], *v_work; 101 PetscReal shift = 0.0; 102 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 103 104 PetscFunctionBegin; 105 allowzeropivot = PetscNot(A->erroriffailure); 106 107 if (a->idiagvalid) { 108 if (values) *values = a->idiag; 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 112 diag_offset = a->diag; 113 if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); } 114 diag = a->idiag; 115 if (values) *values = a->idiag; 116 /* factor and invert each block */ 117 switch (bs) { 118 case 1: 119 for (i = 0; i < mbs; i++) { 120 odiag = v + 1 * diag_offset[i]; 121 diag[0] = odiag[0]; 122 123 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 124 if (allowzeropivot) { 125 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 126 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 127 A->factorerror_zeropivot_row = i; 128 PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i)); 129 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON); 130 } 131 132 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 133 diag += 1; 134 } 135 break; 136 case 2: 137 for (i = 0; i < mbs; i++) { 138 odiag = v + 4 * diag_offset[i]; 139 diag[0] = odiag[0]; 140 diag[1] = odiag[1]; 141 diag[2] = odiag[2]; 142 diag[3] = odiag[3]; 143 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 144 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 145 diag += 4; 146 } 147 break; 148 case 3: 149 for (i = 0; i < mbs; i++) { 150 odiag = v + 9 * diag_offset[i]; 151 diag[0] = odiag[0]; 152 diag[1] = odiag[1]; 153 diag[2] = odiag[2]; 154 diag[3] = odiag[3]; 155 diag[4] = odiag[4]; 156 diag[5] = odiag[5]; 157 diag[6] = odiag[6]; 158 diag[7] = odiag[7]; 159 diag[8] = odiag[8]; 160 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 161 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 162 diag += 9; 163 } 164 break; 165 case 4: 166 for (i = 0; i < mbs; i++) { 167 odiag = v + 16 * diag_offset[i]; 168 PetscCall(PetscArraycpy(diag, odiag, 16)); 169 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 170 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 171 diag += 16; 172 } 173 break; 174 case 5: 175 for (i = 0; i < mbs; i++) { 176 odiag = v + 25 * diag_offset[i]; 177 PetscCall(PetscArraycpy(diag, odiag, 25)); 178 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 179 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 180 diag += 25; 181 } 182 break; 183 case 6: 184 for (i = 0; i < mbs; i++) { 185 odiag = v + 36 * diag_offset[i]; 186 PetscCall(PetscArraycpy(diag, odiag, 36)); 187 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 188 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 189 diag += 36; 190 } 191 break; 192 case 7: 193 for (i = 0; i < mbs; i++) { 194 odiag = v + 49 * diag_offset[i]; 195 PetscCall(PetscArraycpy(diag, odiag, 49)); 196 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 197 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 198 diag += 49; 199 } 200 break; 201 default: 202 PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots)); 203 for (i = 0; i < mbs; i++) { 204 odiag = v + bs2 * diag_offset[i]; 205 PetscCall(PetscArraycpy(diag, odiag, bs2)); 206 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 207 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 208 diag += bs2; 209 } 210 PetscCall(PetscFree2(v_work, v_pivots)); 211 } 212 a->idiagvalid = PETSC_TRUE; 213 PetscFunctionReturn(PETSC_SUCCESS); 214 } 215 216 static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 217 { 218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 219 PetscScalar *x, *work, *w, *workt, *t; 220 const MatScalar *v, *aa = a->a, *idiag; 221 const PetscScalar *b, *xb; 222 PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */ 223 PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it; 224 const PetscInt *diag, *ai = a->i, *aj = a->j, *vi; 225 226 PetscFunctionBegin; 227 its = its * lits; 228 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 229 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 230 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 231 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor"); 232 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 233 234 if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL)); 235 236 if (!m) PetscFunctionReturn(PETSC_SUCCESS); 237 diag = a->diag; 238 idiag = a->idiag; 239 k = PetscMax(A->rmap->n, A->cmap->n); 240 if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work)); 241 if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt)); 242 if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work)); 243 work = a->mult_work; 244 t = a->sor_workt; 245 w = a->sor_work; 246 247 PetscCall(VecGetArray(xx, &x)); 248 PetscCall(VecGetArrayRead(bb, &b)); 249 250 if (flag & SOR_ZERO_INITIAL_GUESS) { 251 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 252 switch (bs) { 253 case 1: 254 PetscKernel_v_gets_A_times_w_1(x, idiag, b); 255 t[0] = b[0]; 256 i2 = 1; 257 idiag += 1; 258 for (i = 1; i < m; i++) { 259 v = aa + ai[i]; 260 vi = aj + ai[i]; 261 nz = diag[i] - ai[i]; 262 s[0] = b[i2]; 263 for (j = 0; j < nz; j++) { 264 xw[0] = x[vi[j]]; 265 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 266 } 267 t[i2] = s[0]; 268 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 269 x[i2] = xw[0]; 270 idiag += 1; 271 i2 += 1; 272 } 273 break; 274 case 2: 275 PetscKernel_v_gets_A_times_w_2(x, idiag, b); 276 t[0] = b[0]; 277 t[1] = b[1]; 278 i2 = 2; 279 idiag += 4; 280 for (i = 1; i < m; i++) { 281 v = aa + 4 * ai[i]; 282 vi = aj + ai[i]; 283 nz = diag[i] - ai[i]; 284 s[0] = b[i2]; 285 s[1] = b[i2 + 1]; 286 for (j = 0; j < nz; j++) { 287 idx = 2 * vi[j]; 288 it = 4 * j; 289 xw[0] = x[idx]; 290 xw[1] = x[1 + idx]; 291 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 292 } 293 t[i2] = s[0]; 294 t[i2 + 1] = s[1]; 295 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 296 x[i2] = xw[0]; 297 x[i2 + 1] = xw[1]; 298 idiag += 4; 299 i2 += 2; 300 } 301 break; 302 case 3: 303 PetscKernel_v_gets_A_times_w_3(x, idiag, b); 304 t[0] = b[0]; 305 t[1] = b[1]; 306 t[2] = b[2]; 307 i2 = 3; 308 idiag += 9; 309 for (i = 1; i < m; i++) { 310 v = aa + 9 * ai[i]; 311 vi = aj + ai[i]; 312 nz = diag[i] - ai[i]; 313 s[0] = b[i2]; 314 s[1] = b[i2 + 1]; 315 s[2] = b[i2 + 2]; 316 while (nz--) { 317 idx = 3 * (*vi++); 318 xw[0] = x[idx]; 319 xw[1] = x[1 + idx]; 320 xw[2] = x[2 + idx]; 321 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 322 v += 9; 323 } 324 t[i2] = s[0]; 325 t[i2 + 1] = s[1]; 326 t[i2 + 2] = s[2]; 327 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 328 x[i2] = xw[0]; 329 x[i2 + 1] = xw[1]; 330 x[i2 + 2] = xw[2]; 331 idiag += 9; 332 i2 += 3; 333 } 334 break; 335 case 4: 336 PetscKernel_v_gets_A_times_w_4(x, idiag, b); 337 t[0] = b[0]; 338 t[1] = b[1]; 339 t[2] = b[2]; 340 t[3] = b[3]; 341 i2 = 4; 342 idiag += 16; 343 for (i = 1; i < m; i++) { 344 v = aa + 16 * ai[i]; 345 vi = aj + ai[i]; 346 nz = diag[i] - ai[i]; 347 s[0] = b[i2]; 348 s[1] = b[i2 + 1]; 349 s[2] = b[i2 + 2]; 350 s[3] = b[i2 + 3]; 351 while (nz--) { 352 idx = 4 * (*vi++); 353 xw[0] = x[idx]; 354 xw[1] = x[1 + idx]; 355 xw[2] = x[2 + idx]; 356 xw[3] = x[3 + idx]; 357 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 358 v += 16; 359 } 360 t[i2] = s[0]; 361 t[i2 + 1] = s[1]; 362 t[i2 + 2] = s[2]; 363 t[i2 + 3] = s[3]; 364 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 365 x[i2] = xw[0]; 366 x[i2 + 1] = xw[1]; 367 x[i2 + 2] = xw[2]; 368 x[i2 + 3] = xw[3]; 369 idiag += 16; 370 i2 += 4; 371 } 372 break; 373 case 5: 374 PetscKernel_v_gets_A_times_w_5(x, idiag, b); 375 t[0] = b[0]; 376 t[1] = b[1]; 377 t[2] = b[2]; 378 t[3] = b[3]; 379 t[4] = b[4]; 380 i2 = 5; 381 idiag += 25; 382 for (i = 1; i < m; i++) { 383 v = aa + 25 * ai[i]; 384 vi = aj + ai[i]; 385 nz = diag[i] - ai[i]; 386 s[0] = b[i2]; 387 s[1] = b[i2 + 1]; 388 s[2] = b[i2 + 2]; 389 s[3] = b[i2 + 3]; 390 s[4] = b[i2 + 4]; 391 while (nz--) { 392 idx = 5 * (*vi++); 393 xw[0] = x[idx]; 394 xw[1] = x[1 + idx]; 395 xw[2] = x[2 + idx]; 396 xw[3] = x[3 + idx]; 397 xw[4] = x[4 + idx]; 398 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 399 v += 25; 400 } 401 t[i2] = s[0]; 402 t[i2 + 1] = s[1]; 403 t[i2 + 2] = s[2]; 404 t[i2 + 3] = s[3]; 405 t[i2 + 4] = s[4]; 406 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 407 x[i2] = xw[0]; 408 x[i2 + 1] = xw[1]; 409 x[i2 + 2] = xw[2]; 410 x[i2 + 3] = xw[3]; 411 x[i2 + 4] = xw[4]; 412 idiag += 25; 413 i2 += 5; 414 } 415 break; 416 case 6: 417 PetscKernel_v_gets_A_times_w_6(x, idiag, b); 418 t[0] = b[0]; 419 t[1] = b[1]; 420 t[2] = b[2]; 421 t[3] = b[3]; 422 t[4] = b[4]; 423 t[5] = b[5]; 424 i2 = 6; 425 idiag += 36; 426 for (i = 1; i < m; i++) { 427 v = aa + 36 * ai[i]; 428 vi = aj + ai[i]; 429 nz = diag[i] - ai[i]; 430 s[0] = b[i2]; 431 s[1] = b[i2 + 1]; 432 s[2] = b[i2 + 2]; 433 s[3] = b[i2 + 3]; 434 s[4] = b[i2 + 4]; 435 s[5] = b[i2 + 5]; 436 while (nz--) { 437 idx = 6 * (*vi++); 438 xw[0] = x[idx]; 439 xw[1] = x[1 + idx]; 440 xw[2] = x[2 + idx]; 441 xw[3] = x[3 + idx]; 442 xw[4] = x[4 + idx]; 443 xw[5] = x[5 + idx]; 444 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 445 v += 36; 446 } 447 t[i2] = s[0]; 448 t[i2 + 1] = s[1]; 449 t[i2 + 2] = s[2]; 450 t[i2 + 3] = s[3]; 451 t[i2 + 4] = s[4]; 452 t[i2 + 5] = s[5]; 453 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 454 x[i2] = xw[0]; 455 x[i2 + 1] = xw[1]; 456 x[i2 + 2] = xw[2]; 457 x[i2 + 3] = xw[3]; 458 x[i2 + 4] = xw[4]; 459 x[i2 + 5] = xw[5]; 460 idiag += 36; 461 i2 += 6; 462 } 463 break; 464 case 7: 465 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 466 t[0] = b[0]; 467 t[1] = b[1]; 468 t[2] = b[2]; 469 t[3] = b[3]; 470 t[4] = b[4]; 471 t[5] = b[5]; 472 t[6] = b[6]; 473 i2 = 7; 474 idiag += 49; 475 for (i = 1; i < m; i++) { 476 v = aa + 49 * ai[i]; 477 vi = aj + ai[i]; 478 nz = diag[i] - ai[i]; 479 s[0] = b[i2]; 480 s[1] = b[i2 + 1]; 481 s[2] = b[i2 + 2]; 482 s[3] = b[i2 + 3]; 483 s[4] = b[i2 + 4]; 484 s[5] = b[i2 + 5]; 485 s[6] = b[i2 + 6]; 486 while (nz--) { 487 idx = 7 * (*vi++); 488 xw[0] = x[idx]; 489 xw[1] = x[1 + idx]; 490 xw[2] = x[2 + idx]; 491 xw[3] = x[3 + idx]; 492 xw[4] = x[4 + idx]; 493 xw[5] = x[5 + idx]; 494 xw[6] = x[6 + idx]; 495 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 496 v += 49; 497 } 498 t[i2] = s[0]; 499 t[i2 + 1] = s[1]; 500 t[i2 + 2] = s[2]; 501 t[i2 + 3] = s[3]; 502 t[i2 + 4] = s[4]; 503 t[i2 + 5] = s[5]; 504 t[i2 + 6] = s[6]; 505 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 506 x[i2] = xw[0]; 507 x[i2 + 1] = xw[1]; 508 x[i2 + 2] = xw[2]; 509 x[i2 + 3] = xw[3]; 510 x[i2 + 4] = xw[4]; 511 x[i2 + 5] = xw[5]; 512 x[i2 + 6] = xw[6]; 513 idiag += 49; 514 i2 += 7; 515 } 516 break; 517 default: 518 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); 519 PetscCall(PetscArraycpy(t, b, bs)); 520 i2 = bs; 521 idiag += bs2; 522 for (i = 1; i < m; i++) { 523 v = aa + bs2 * ai[i]; 524 vi = aj + ai[i]; 525 nz = diag[i] - ai[i]; 526 527 PetscCall(PetscArraycpy(w, b + i2, bs)); 528 /* copy all rows of x that are needed into contiguous space */ 529 workt = work; 530 for (j = 0; j < nz; j++) { 531 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 532 workt += bs; 533 } 534 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 535 PetscCall(PetscArraycpy(t + i2, w, bs)); 536 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 537 538 idiag += bs2; 539 i2 += bs; 540 } 541 break; 542 } 543 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 544 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 545 xb = t; 546 } else xb = b; 547 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 548 idiag = a->idiag + bs2 * (a->mbs - 1); 549 i2 = bs * (m - 1); 550 switch (bs) { 551 case 1: 552 s[0] = xb[i2]; 553 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 554 x[i2] = xw[0]; 555 i2 -= 1; 556 for (i = m - 2; i >= 0; i--) { 557 v = aa + (diag[i] + 1); 558 vi = aj + diag[i] + 1; 559 nz = ai[i + 1] - diag[i] - 1; 560 s[0] = xb[i2]; 561 for (j = 0; j < nz; j++) { 562 xw[0] = x[vi[j]]; 563 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 564 } 565 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 566 x[i2] = xw[0]; 567 idiag -= 1; 568 i2 -= 1; 569 } 570 break; 571 case 2: 572 s[0] = xb[i2]; 573 s[1] = xb[i2 + 1]; 574 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 575 x[i2] = xw[0]; 576 x[i2 + 1] = xw[1]; 577 i2 -= 2; 578 idiag -= 4; 579 for (i = m - 2; i >= 0; i--) { 580 v = aa + 4 * (diag[i] + 1); 581 vi = aj + diag[i] + 1; 582 nz = ai[i + 1] - diag[i] - 1; 583 s[0] = xb[i2]; 584 s[1] = xb[i2 + 1]; 585 for (j = 0; j < nz; j++) { 586 idx = 2 * vi[j]; 587 it = 4 * j; 588 xw[0] = x[idx]; 589 xw[1] = x[1 + idx]; 590 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 591 } 592 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 593 x[i2] = xw[0]; 594 x[i2 + 1] = xw[1]; 595 idiag -= 4; 596 i2 -= 2; 597 } 598 break; 599 case 3: 600 s[0] = xb[i2]; 601 s[1] = xb[i2 + 1]; 602 s[2] = xb[i2 + 2]; 603 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 604 x[i2] = xw[0]; 605 x[i2 + 1] = xw[1]; 606 x[i2 + 2] = xw[2]; 607 i2 -= 3; 608 idiag -= 9; 609 for (i = m - 2; i >= 0; i--) { 610 v = aa + 9 * (diag[i] + 1); 611 vi = aj + diag[i] + 1; 612 nz = ai[i + 1] - diag[i] - 1; 613 s[0] = xb[i2]; 614 s[1] = xb[i2 + 1]; 615 s[2] = xb[i2 + 2]; 616 while (nz--) { 617 idx = 3 * (*vi++); 618 xw[0] = x[idx]; 619 xw[1] = x[1 + idx]; 620 xw[2] = x[2 + idx]; 621 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 622 v += 9; 623 } 624 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 625 x[i2] = xw[0]; 626 x[i2 + 1] = xw[1]; 627 x[i2 + 2] = xw[2]; 628 idiag -= 9; 629 i2 -= 3; 630 } 631 break; 632 case 4: 633 s[0] = xb[i2]; 634 s[1] = xb[i2 + 1]; 635 s[2] = xb[i2 + 2]; 636 s[3] = xb[i2 + 3]; 637 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 638 x[i2] = xw[0]; 639 x[i2 + 1] = xw[1]; 640 x[i2 + 2] = xw[2]; 641 x[i2 + 3] = xw[3]; 642 i2 -= 4; 643 idiag -= 16; 644 for (i = m - 2; i >= 0; i--) { 645 v = aa + 16 * (diag[i] + 1); 646 vi = aj + diag[i] + 1; 647 nz = ai[i + 1] - diag[i] - 1; 648 s[0] = xb[i2]; 649 s[1] = xb[i2 + 1]; 650 s[2] = xb[i2 + 2]; 651 s[3] = xb[i2 + 3]; 652 while (nz--) { 653 idx = 4 * (*vi++); 654 xw[0] = x[idx]; 655 xw[1] = x[1 + idx]; 656 xw[2] = x[2 + idx]; 657 xw[3] = x[3 + idx]; 658 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 659 v += 16; 660 } 661 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 662 x[i2] = xw[0]; 663 x[i2 + 1] = xw[1]; 664 x[i2 + 2] = xw[2]; 665 x[i2 + 3] = xw[3]; 666 idiag -= 16; 667 i2 -= 4; 668 } 669 break; 670 case 5: 671 s[0] = xb[i2]; 672 s[1] = xb[i2 + 1]; 673 s[2] = xb[i2 + 2]; 674 s[3] = xb[i2 + 3]; 675 s[4] = xb[i2 + 4]; 676 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 677 x[i2] = xw[0]; 678 x[i2 + 1] = xw[1]; 679 x[i2 + 2] = xw[2]; 680 x[i2 + 3] = xw[3]; 681 x[i2 + 4] = xw[4]; 682 i2 -= 5; 683 idiag -= 25; 684 for (i = m - 2; i >= 0; i--) { 685 v = aa + 25 * (diag[i] + 1); 686 vi = aj + diag[i] + 1; 687 nz = ai[i + 1] - diag[i] - 1; 688 s[0] = xb[i2]; 689 s[1] = xb[i2 + 1]; 690 s[2] = xb[i2 + 2]; 691 s[3] = xb[i2 + 3]; 692 s[4] = xb[i2 + 4]; 693 while (nz--) { 694 idx = 5 * (*vi++); 695 xw[0] = x[idx]; 696 xw[1] = x[1 + idx]; 697 xw[2] = x[2 + idx]; 698 xw[3] = x[3 + idx]; 699 xw[4] = x[4 + idx]; 700 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 701 v += 25; 702 } 703 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 704 x[i2] = xw[0]; 705 x[i2 + 1] = xw[1]; 706 x[i2 + 2] = xw[2]; 707 x[i2 + 3] = xw[3]; 708 x[i2 + 4] = xw[4]; 709 idiag -= 25; 710 i2 -= 5; 711 } 712 break; 713 case 6: 714 s[0] = xb[i2]; 715 s[1] = xb[i2 + 1]; 716 s[2] = xb[i2 + 2]; 717 s[3] = xb[i2 + 3]; 718 s[4] = xb[i2 + 4]; 719 s[5] = xb[i2 + 5]; 720 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 721 x[i2] = xw[0]; 722 x[i2 + 1] = xw[1]; 723 x[i2 + 2] = xw[2]; 724 x[i2 + 3] = xw[3]; 725 x[i2 + 4] = xw[4]; 726 x[i2 + 5] = xw[5]; 727 i2 -= 6; 728 idiag -= 36; 729 for (i = m - 2; i >= 0; i--) { 730 v = aa + 36 * (diag[i] + 1); 731 vi = aj + diag[i] + 1; 732 nz = ai[i + 1] - diag[i] - 1; 733 s[0] = xb[i2]; 734 s[1] = xb[i2 + 1]; 735 s[2] = xb[i2 + 2]; 736 s[3] = xb[i2 + 3]; 737 s[4] = xb[i2 + 4]; 738 s[5] = xb[i2 + 5]; 739 while (nz--) { 740 idx = 6 * (*vi++); 741 xw[0] = x[idx]; 742 xw[1] = x[1 + idx]; 743 xw[2] = x[2 + idx]; 744 xw[3] = x[3 + idx]; 745 xw[4] = x[4 + idx]; 746 xw[5] = x[5 + idx]; 747 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 748 v += 36; 749 } 750 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 751 x[i2] = xw[0]; 752 x[i2 + 1] = xw[1]; 753 x[i2 + 2] = xw[2]; 754 x[i2 + 3] = xw[3]; 755 x[i2 + 4] = xw[4]; 756 x[i2 + 5] = xw[5]; 757 idiag -= 36; 758 i2 -= 6; 759 } 760 break; 761 case 7: 762 s[0] = xb[i2]; 763 s[1] = xb[i2 + 1]; 764 s[2] = xb[i2 + 2]; 765 s[3] = xb[i2 + 3]; 766 s[4] = xb[i2 + 4]; 767 s[5] = xb[i2 + 5]; 768 s[6] = xb[i2 + 6]; 769 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 770 x[i2] = xw[0]; 771 x[i2 + 1] = xw[1]; 772 x[i2 + 2] = xw[2]; 773 x[i2 + 3] = xw[3]; 774 x[i2 + 4] = xw[4]; 775 x[i2 + 5] = xw[5]; 776 x[i2 + 6] = xw[6]; 777 i2 -= 7; 778 idiag -= 49; 779 for (i = m - 2; i >= 0; i--) { 780 v = aa + 49 * (diag[i] + 1); 781 vi = aj + diag[i] + 1; 782 nz = ai[i + 1] - diag[i] - 1; 783 s[0] = xb[i2]; 784 s[1] = xb[i2 + 1]; 785 s[2] = xb[i2 + 2]; 786 s[3] = xb[i2 + 3]; 787 s[4] = xb[i2 + 4]; 788 s[5] = xb[i2 + 5]; 789 s[6] = xb[i2 + 6]; 790 while (nz--) { 791 idx = 7 * (*vi++); 792 xw[0] = x[idx]; 793 xw[1] = x[1 + idx]; 794 xw[2] = x[2 + idx]; 795 xw[3] = x[3 + idx]; 796 xw[4] = x[4 + idx]; 797 xw[5] = x[5 + idx]; 798 xw[6] = x[6 + idx]; 799 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 800 v += 49; 801 } 802 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 803 x[i2] = xw[0]; 804 x[i2 + 1] = xw[1]; 805 x[i2 + 2] = xw[2]; 806 x[i2 + 3] = xw[3]; 807 x[i2 + 4] = xw[4]; 808 x[i2 + 5] = xw[5]; 809 x[i2 + 6] = xw[6]; 810 idiag -= 49; 811 i2 -= 7; 812 } 813 break; 814 default: 815 PetscCall(PetscArraycpy(w, xb + i2, bs)); 816 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 817 i2 -= bs; 818 idiag -= bs2; 819 for (i = m - 2; i >= 0; i--) { 820 v = aa + bs2 * (diag[i] + 1); 821 vi = aj + diag[i] + 1; 822 nz = ai[i + 1] - diag[i] - 1; 823 824 PetscCall(PetscArraycpy(w, xb + i2, bs)); 825 /* copy all rows of x that are needed into contiguous space */ 826 workt = work; 827 for (j = 0; j < nz; j++) { 828 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 829 workt += bs; 830 } 831 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 832 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 833 834 idiag -= bs2; 835 i2 -= bs; 836 } 837 break; 838 } 839 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 840 } 841 its--; 842 } 843 while (its--) { 844 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 845 idiag = a->idiag; 846 i2 = 0; 847 switch (bs) { 848 case 1: 849 for (i = 0; i < m; i++) { 850 v = aa + ai[i]; 851 vi = aj + ai[i]; 852 nz = ai[i + 1] - ai[i]; 853 s[0] = b[i2]; 854 for (j = 0; j < nz; j++) { 855 xw[0] = x[vi[j]]; 856 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 857 } 858 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 859 x[i2] += xw[0]; 860 idiag += 1; 861 i2 += 1; 862 } 863 break; 864 case 2: 865 for (i = 0; i < m; i++) { 866 v = aa + 4 * ai[i]; 867 vi = aj + ai[i]; 868 nz = ai[i + 1] - ai[i]; 869 s[0] = b[i2]; 870 s[1] = b[i2 + 1]; 871 for (j = 0; j < nz; j++) { 872 idx = 2 * vi[j]; 873 it = 4 * j; 874 xw[0] = x[idx]; 875 xw[1] = x[1 + idx]; 876 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 877 } 878 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 879 x[i2] += xw[0]; 880 x[i2 + 1] += xw[1]; 881 idiag += 4; 882 i2 += 2; 883 } 884 break; 885 case 3: 886 for (i = 0; i < m; i++) { 887 v = aa + 9 * ai[i]; 888 vi = aj + ai[i]; 889 nz = ai[i + 1] - ai[i]; 890 s[0] = b[i2]; 891 s[1] = b[i2 + 1]; 892 s[2] = b[i2 + 2]; 893 while (nz--) { 894 idx = 3 * (*vi++); 895 xw[0] = x[idx]; 896 xw[1] = x[1 + idx]; 897 xw[2] = x[2 + idx]; 898 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 899 v += 9; 900 } 901 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 902 x[i2] += xw[0]; 903 x[i2 + 1] += xw[1]; 904 x[i2 + 2] += xw[2]; 905 idiag += 9; 906 i2 += 3; 907 } 908 break; 909 case 4: 910 for (i = 0; i < m; i++) { 911 v = aa + 16 * ai[i]; 912 vi = aj + ai[i]; 913 nz = ai[i + 1] - ai[i]; 914 s[0] = b[i2]; 915 s[1] = b[i2 + 1]; 916 s[2] = b[i2 + 2]; 917 s[3] = b[i2 + 3]; 918 while (nz--) { 919 idx = 4 * (*vi++); 920 xw[0] = x[idx]; 921 xw[1] = x[1 + idx]; 922 xw[2] = x[2 + idx]; 923 xw[3] = x[3 + idx]; 924 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 925 v += 16; 926 } 927 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 928 x[i2] += xw[0]; 929 x[i2 + 1] += xw[1]; 930 x[i2 + 2] += xw[2]; 931 x[i2 + 3] += xw[3]; 932 idiag += 16; 933 i2 += 4; 934 } 935 break; 936 case 5: 937 for (i = 0; i < m; i++) { 938 v = aa + 25 * ai[i]; 939 vi = aj + ai[i]; 940 nz = ai[i + 1] - ai[i]; 941 s[0] = b[i2]; 942 s[1] = b[i2 + 1]; 943 s[2] = b[i2 + 2]; 944 s[3] = b[i2 + 3]; 945 s[4] = b[i2 + 4]; 946 while (nz--) { 947 idx = 5 * (*vi++); 948 xw[0] = x[idx]; 949 xw[1] = x[1 + idx]; 950 xw[2] = x[2 + idx]; 951 xw[3] = x[3 + idx]; 952 xw[4] = x[4 + idx]; 953 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 954 v += 25; 955 } 956 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 957 x[i2] += xw[0]; 958 x[i2 + 1] += xw[1]; 959 x[i2 + 2] += xw[2]; 960 x[i2 + 3] += xw[3]; 961 x[i2 + 4] += xw[4]; 962 idiag += 25; 963 i2 += 5; 964 } 965 break; 966 case 6: 967 for (i = 0; i < m; i++) { 968 v = aa + 36 * ai[i]; 969 vi = aj + ai[i]; 970 nz = ai[i + 1] - ai[i]; 971 s[0] = b[i2]; 972 s[1] = b[i2 + 1]; 973 s[2] = b[i2 + 2]; 974 s[3] = b[i2 + 3]; 975 s[4] = b[i2 + 4]; 976 s[5] = b[i2 + 5]; 977 while (nz--) { 978 idx = 6 * (*vi++); 979 xw[0] = x[idx]; 980 xw[1] = x[1 + idx]; 981 xw[2] = x[2 + idx]; 982 xw[3] = x[3 + idx]; 983 xw[4] = x[4 + idx]; 984 xw[5] = x[5 + idx]; 985 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 986 v += 36; 987 } 988 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 989 x[i2] += xw[0]; 990 x[i2 + 1] += xw[1]; 991 x[i2 + 2] += xw[2]; 992 x[i2 + 3] += xw[3]; 993 x[i2 + 4] += xw[4]; 994 x[i2 + 5] += xw[5]; 995 idiag += 36; 996 i2 += 6; 997 } 998 break; 999 case 7: 1000 for (i = 0; i < m; i++) { 1001 v = aa + 49 * ai[i]; 1002 vi = aj + ai[i]; 1003 nz = ai[i + 1] - ai[i]; 1004 s[0] = b[i2]; 1005 s[1] = b[i2 + 1]; 1006 s[2] = b[i2 + 2]; 1007 s[3] = b[i2 + 3]; 1008 s[4] = b[i2 + 4]; 1009 s[5] = b[i2 + 5]; 1010 s[6] = b[i2 + 6]; 1011 while (nz--) { 1012 idx = 7 * (*vi++); 1013 xw[0] = x[idx]; 1014 xw[1] = x[1 + idx]; 1015 xw[2] = x[2 + idx]; 1016 xw[3] = x[3 + idx]; 1017 xw[4] = x[4 + idx]; 1018 xw[5] = x[5 + idx]; 1019 xw[6] = x[6 + idx]; 1020 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1021 v += 49; 1022 } 1023 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1024 x[i2] += xw[0]; 1025 x[i2 + 1] += xw[1]; 1026 x[i2 + 2] += xw[2]; 1027 x[i2 + 3] += xw[3]; 1028 x[i2 + 4] += xw[4]; 1029 x[i2 + 5] += xw[5]; 1030 x[i2 + 6] += xw[6]; 1031 idiag += 49; 1032 i2 += 7; 1033 } 1034 break; 1035 default: 1036 for (i = 0; i < m; i++) { 1037 v = aa + bs2 * ai[i]; 1038 vi = aj + ai[i]; 1039 nz = ai[i + 1] - ai[i]; 1040 1041 PetscCall(PetscArraycpy(w, b + i2, bs)); 1042 /* copy all rows of x that are needed into contiguous space */ 1043 workt = work; 1044 for (j = 0; j < nz; j++) { 1045 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1046 workt += bs; 1047 } 1048 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1049 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1050 1051 idiag += bs2; 1052 i2 += bs; 1053 } 1054 break; 1055 } 1056 PetscCall(PetscLogFlops(2.0 * bs2 * a->nz)); 1057 } 1058 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1059 idiag = a->idiag + bs2 * (a->mbs - 1); 1060 i2 = bs * (m - 1); 1061 switch (bs) { 1062 case 1: 1063 for (i = m - 1; i >= 0; i--) { 1064 v = aa + ai[i]; 1065 vi = aj + ai[i]; 1066 nz = ai[i + 1] - ai[i]; 1067 s[0] = b[i2]; 1068 for (j = 0; j < nz; j++) { 1069 xw[0] = x[vi[j]]; 1070 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 1071 } 1072 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 1073 x[i2] += xw[0]; 1074 idiag -= 1; 1075 i2 -= 1; 1076 } 1077 break; 1078 case 2: 1079 for (i = m - 1; i >= 0; i--) { 1080 v = aa + 4 * ai[i]; 1081 vi = aj + ai[i]; 1082 nz = ai[i + 1] - ai[i]; 1083 s[0] = b[i2]; 1084 s[1] = b[i2 + 1]; 1085 for (j = 0; j < nz; j++) { 1086 idx = 2 * vi[j]; 1087 it = 4 * j; 1088 xw[0] = x[idx]; 1089 xw[1] = x[1 + idx]; 1090 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 1091 } 1092 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 1093 x[i2] += xw[0]; 1094 x[i2 + 1] += xw[1]; 1095 idiag -= 4; 1096 i2 -= 2; 1097 } 1098 break; 1099 case 3: 1100 for (i = m - 1; i >= 0; i--) { 1101 v = aa + 9 * ai[i]; 1102 vi = aj + ai[i]; 1103 nz = ai[i + 1] - ai[i]; 1104 s[0] = b[i2]; 1105 s[1] = b[i2 + 1]; 1106 s[2] = b[i2 + 2]; 1107 while (nz--) { 1108 idx = 3 * (*vi++); 1109 xw[0] = x[idx]; 1110 xw[1] = x[1 + idx]; 1111 xw[2] = x[2 + idx]; 1112 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 1113 v += 9; 1114 } 1115 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 1116 x[i2] += xw[0]; 1117 x[i2 + 1] += xw[1]; 1118 x[i2 + 2] += xw[2]; 1119 idiag -= 9; 1120 i2 -= 3; 1121 } 1122 break; 1123 case 4: 1124 for (i = m - 1; i >= 0; i--) { 1125 v = aa + 16 * ai[i]; 1126 vi = aj + ai[i]; 1127 nz = ai[i + 1] - ai[i]; 1128 s[0] = b[i2]; 1129 s[1] = b[i2 + 1]; 1130 s[2] = b[i2 + 2]; 1131 s[3] = b[i2 + 3]; 1132 while (nz--) { 1133 idx = 4 * (*vi++); 1134 xw[0] = x[idx]; 1135 xw[1] = x[1 + idx]; 1136 xw[2] = x[2 + idx]; 1137 xw[3] = x[3 + idx]; 1138 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 1139 v += 16; 1140 } 1141 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 1142 x[i2] += xw[0]; 1143 x[i2 + 1] += xw[1]; 1144 x[i2 + 2] += xw[2]; 1145 x[i2 + 3] += xw[3]; 1146 idiag -= 16; 1147 i2 -= 4; 1148 } 1149 break; 1150 case 5: 1151 for (i = m - 1; i >= 0; i--) { 1152 v = aa + 25 * ai[i]; 1153 vi = aj + ai[i]; 1154 nz = ai[i + 1] - ai[i]; 1155 s[0] = b[i2]; 1156 s[1] = b[i2 + 1]; 1157 s[2] = b[i2 + 2]; 1158 s[3] = b[i2 + 3]; 1159 s[4] = b[i2 + 4]; 1160 while (nz--) { 1161 idx = 5 * (*vi++); 1162 xw[0] = x[idx]; 1163 xw[1] = x[1 + idx]; 1164 xw[2] = x[2 + idx]; 1165 xw[3] = x[3 + idx]; 1166 xw[4] = x[4 + idx]; 1167 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 1168 v += 25; 1169 } 1170 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 1171 x[i2] += xw[0]; 1172 x[i2 + 1] += xw[1]; 1173 x[i2 + 2] += xw[2]; 1174 x[i2 + 3] += xw[3]; 1175 x[i2 + 4] += xw[4]; 1176 idiag -= 25; 1177 i2 -= 5; 1178 } 1179 break; 1180 case 6: 1181 for (i = m - 1; i >= 0; i--) { 1182 v = aa + 36 * ai[i]; 1183 vi = aj + ai[i]; 1184 nz = ai[i + 1] - ai[i]; 1185 s[0] = b[i2]; 1186 s[1] = b[i2 + 1]; 1187 s[2] = b[i2 + 2]; 1188 s[3] = b[i2 + 3]; 1189 s[4] = b[i2 + 4]; 1190 s[5] = b[i2 + 5]; 1191 while (nz--) { 1192 idx = 6 * (*vi++); 1193 xw[0] = x[idx]; 1194 xw[1] = x[1 + idx]; 1195 xw[2] = x[2 + idx]; 1196 xw[3] = x[3 + idx]; 1197 xw[4] = x[4 + idx]; 1198 xw[5] = x[5 + idx]; 1199 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 1200 v += 36; 1201 } 1202 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 1203 x[i2] += xw[0]; 1204 x[i2 + 1] += xw[1]; 1205 x[i2 + 2] += xw[2]; 1206 x[i2 + 3] += xw[3]; 1207 x[i2 + 4] += xw[4]; 1208 x[i2 + 5] += xw[5]; 1209 idiag -= 36; 1210 i2 -= 6; 1211 } 1212 break; 1213 case 7: 1214 for (i = m - 1; i >= 0; i--) { 1215 v = aa + 49 * ai[i]; 1216 vi = aj + ai[i]; 1217 nz = ai[i + 1] - ai[i]; 1218 s[0] = b[i2]; 1219 s[1] = b[i2 + 1]; 1220 s[2] = b[i2 + 2]; 1221 s[3] = b[i2 + 3]; 1222 s[4] = b[i2 + 4]; 1223 s[5] = b[i2 + 5]; 1224 s[6] = b[i2 + 6]; 1225 while (nz--) { 1226 idx = 7 * (*vi++); 1227 xw[0] = x[idx]; 1228 xw[1] = x[1 + idx]; 1229 xw[2] = x[2 + idx]; 1230 xw[3] = x[3 + idx]; 1231 xw[4] = x[4 + idx]; 1232 xw[5] = x[5 + idx]; 1233 xw[6] = x[6 + idx]; 1234 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1235 v += 49; 1236 } 1237 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1238 x[i2] += xw[0]; 1239 x[i2 + 1] += xw[1]; 1240 x[i2 + 2] += xw[2]; 1241 x[i2 + 3] += xw[3]; 1242 x[i2 + 4] += xw[4]; 1243 x[i2 + 5] += xw[5]; 1244 x[i2 + 6] += xw[6]; 1245 idiag -= 49; 1246 i2 -= 7; 1247 } 1248 break; 1249 default: 1250 for (i = m - 1; i >= 0; i--) { 1251 v = aa + bs2 * ai[i]; 1252 vi = aj + ai[i]; 1253 nz = ai[i + 1] - ai[i]; 1254 1255 PetscCall(PetscArraycpy(w, b + i2, bs)); 1256 /* copy all rows of x that are needed into contiguous space */ 1257 workt = work; 1258 for (j = 0; j < nz; j++) { 1259 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1260 workt += bs; 1261 } 1262 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1263 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1264 1265 idiag -= bs2; 1266 i2 -= bs; 1267 } 1268 break; 1269 } 1270 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz))); 1271 } 1272 } 1273 PetscCall(VecRestoreArray(xx, &x)); 1274 PetscCall(VecRestoreArrayRead(bb, &b)); 1275 PetscFunctionReturn(PETSC_SUCCESS); 1276 } 1277 1278 /* 1279 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1280 */ 1281 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1282 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1283 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1284 #define matsetvaluesblocked4_ matsetvaluesblocked4 1285 #endif 1286 1287 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) 1288 { 1289 Mat A = *AA; 1290 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1291 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn; 1292 PetscInt *ai = a->i, *ailen = a->ilen; 1293 PetscInt *aj = a->j, stepval, lastcol = -1; 1294 const PetscScalar *value = v; 1295 MatScalar *ap, *aa = a->a, *bap; 1296 1297 PetscFunctionBegin; 1298 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4"); 1299 stepval = (n - 1) * 4; 1300 for (k = 0; k < m; k++) { /* loop over added rows */ 1301 row = im[k]; 1302 rp = aj + ai[row]; 1303 ap = aa + 16 * ai[row]; 1304 nrow = ailen[row]; 1305 low = 0; 1306 high = nrow; 1307 for (l = 0; l < n; l++) { /* loop over added columns */ 1308 col = in[l]; 1309 if (col <= lastcol) low = 0; 1310 else high = nrow; 1311 lastcol = col; 1312 value = v + k * (stepval + 4 + l) * 4; 1313 while (high - low > 7) { 1314 t = (low + high) / 2; 1315 if (rp[t] > col) high = t; 1316 else low = t; 1317 } 1318 for (i = low; i < high; i++) { 1319 if (rp[i] > col) break; 1320 if (rp[i] == col) { 1321 bap = ap + 16 * i; 1322 for (ii = 0; ii < 4; ii++, value += stepval) { 1323 for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++; 1324 } 1325 goto noinsert2; 1326 } 1327 } 1328 N = nrow++ - 1; 1329 high++; /* added new column index thus must search to one higher than before */ 1330 /* shift up all the later entries in this row */ 1331 for (ii = N; ii >= i; ii--) { 1332 rp[ii + 1] = rp[ii]; 1333 PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16)); 1334 } 1335 if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1336 rp[i] = col; 1337 bap = ap + 16 * i; 1338 for (ii = 0; ii < 4; ii++, value += stepval) { 1339 for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++; 1340 } 1341 noinsert2:; 1342 low = i; 1343 } 1344 ailen[row] = nrow; 1345 } 1346 PetscFunctionReturnVoid(); 1347 } 1348 1349 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1350 #define matsetvalues4_ MATSETVALUES4 1351 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1352 #define matsetvalues4_ matsetvalues4 1353 #endif 1354 1355 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) 1356 { 1357 Mat A = *AA; 1358 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1359 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm; 1360 PetscInt *ai = a->i, *ailen = a->ilen; 1361 PetscInt *aj = a->j, brow, bcol; 1362 PetscInt ridx, cidx, lastcol = -1; 1363 MatScalar *ap, value, *aa = a->a, *bap; 1364 1365 PetscFunctionBegin; 1366 for (k = 0; k < m; k++) { /* loop over added rows */ 1367 row = im[k]; 1368 brow = row / 4; 1369 rp = aj + ai[brow]; 1370 ap = aa + 16 * ai[brow]; 1371 nrow = ailen[brow]; 1372 low = 0; 1373 high = nrow; 1374 for (l = 0; l < n; l++) { /* loop over added columns */ 1375 col = in[l]; 1376 bcol = col / 4; 1377 ridx = row % 4; 1378 cidx = col % 4; 1379 value = v[l + k * n]; 1380 if (col <= lastcol) low = 0; 1381 else high = nrow; 1382 lastcol = col; 1383 while (high - low > 7) { 1384 t = (low + high) / 2; 1385 if (rp[t] > bcol) high = t; 1386 else low = t; 1387 } 1388 for (i = low; i < high; i++) { 1389 if (rp[i] > bcol) break; 1390 if (rp[i] == bcol) { 1391 bap = ap + 16 * i + 4 * cidx + ridx; 1392 *bap += value; 1393 goto noinsert1; 1394 } 1395 } 1396 N = nrow++ - 1; 1397 high++; /* added new column thus must search to one higher than before */ 1398 /* shift up all the later entries in this row */ 1399 PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 1400 PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1))); 1401 PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1402 rp[i] = bcol; 1403 ap[16 * i + 4 * cidx + ridx] = value; 1404 noinsert1:; 1405 low = i; 1406 } 1407 ailen[brow] = nrow; 1408 } 1409 PetscFunctionReturnVoid(); 1410 } 1411 1412 /* 1413 Checks for missing diagonals 1414 */ 1415 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1416 { 1417 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1418 PetscInt *diag, *ii = a->i, i; 1419 1420 PetscFunctionBegin; 1421 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 1422 *missing = PETSC_FALSE; 1423 if (A->rmap->n > 0 && !ii) { 1424 *missing = PETSC_TRUE; 1425 if (d) *d = 0; 1426 PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n")); 1427 } else { 1428 PetscInt n; 1429 n = PetscMin(a->mbs, a->nbs); 1430 diag = a->diag; 1431 for (i = 0; i < n; i++) { 1432 if (diag[i] >= ii[i + 1]) { 1433 *missing = PETSC_TRUE; 1434 if (d) *d = i; 1435 PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i)); 1436 break; 1437 } 1438 } 1439 } 1440 PetscFunctionReturn(PETSC_SUCCESS); 1441 } 1442 1443 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1444 { 1445 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1446 PetscInt i, j, m = a->mbs; 1447 1448 PetscFunctionBegin; 1449 if (!a->diag) { 1450 PetscCall(PetscMalloc1(m, &a->diag)); 1451 a->free_diag = PETSC_TRUE; 1452 } 1453 for (i = 0; i < m; i++) { 1454 a->diag[i] = a->i[i + 1]; 1455 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1456 if (a->j[j] == i) { 1457 a->diag[i] = j; 1458 break; 1459 } 1460 } 1461 } 1462 PetscFunctionReturn(PETSC_SUCCESS); 1463 } 1464 1465 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) 1466 { 1467 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1468 PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt; 1469 PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja; 1470 1471 PetscFunctionBegin; 1472 *nn = n; 1473 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1474 if (symmetric) { 1475 PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja)); 1476 nz = tia[n]; 1477 } else { 1478 tia = a->i; 1479 tja = a->j; 1480 } 1481 1482 if (!blockcompressed && bs > 1) { 1483 (*nn) *= bs; 1484 /* malloc & create the natural set of indices */ 1485 PetscCall(PetscMalloc1((n + 1) * bs, ia)); 1486 if (n) { 1487 (*ia)[0] = oshift; 1488 for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; 1489 } 1490 1491 for (i = 1; i < n; i++) { 1492 (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1]; 1493 for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; 1494 } 1495 if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; 1496 1497 if (inja) { 1498 PetscCall(PetscMalloc1(nz * bs * bs, ja)); 1499 cnt = 0; 1500 for (i = 0; i < n; i++) { 1501 for (j = 0; j < bs; j++) { 1502 for (k = tia[i]; k < tia[i + 1]; k++) { 1503 for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l; 1504 } 1505 } 1506 } 1507 } 1508 1509 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1510 PetscCall(PetscFree(tia)); 1511 PetscCall(PetscFree(tja)); 1512 } 1513 } else if (oshift == 1) { 1514 if (symmetric) { 1515 nz = tia[A->rmap->n / bs]; 1516 /* add 1 to i and j indices */ 1517 for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1; 1518 *ia = tia; 1519 if (ja) { 1520 for (i = 0; i < nz; i++) tja[i] = tja[i] + 1; 1521 *ja = tja; 1522 } 1523 } else { 1524 nz = a->i[A->rmap->n / bs]; 1525 /* malloc space and add 1 to i and j indices */ 1526 PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia)); 1527 for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1; 1528 if (ja) { 1529 PetscCall(PetscMalloc1(nz, ja)); 1530 for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1; 1531 } 1532 } 1533 } else { 1534 *ia = tia; 1535 if (ja) *ja = tja; 1536 } 1537 PetscFunctionReturn(PETSC_SUCCESS); 1538 } 1539 1540 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 1541 { 1542 PetscFunctionBegin; 1543 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1544 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1545 PetscCall(PetscFree(*ia)); 1546 if (ja) PetscCall(PetscFree(*ja)); 1547 } 1548 PetscFunctionReturn(PETSC_SUCCESS); 1549 } 1550 1551 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1552 { 1553 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1554 1555 PetscFunctionBegin; 1556 if (A->hash_active) { 1557 PetscInt bs; 1558 A->ops[0] = a->cops; 1559 PetscCall(PetscHMapIJVDestroy(&a->ht)); 1560 PetscCall(MatGetBlockSize(A, &bs)); 1561 if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht)); 1562 PetscCall(PetscFree(a->dnz)); 1563 PetscCall(PetscFree(a->bdnz)); 1564 A->hash_active = PETSC_FALSE; 1565 } 1566 PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz)); 1567 PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i)); 1568 PetscCall(ISDestroy(&a->row)); 1569 PetscCall(ISDestroy(&a->col)); 1570 if (a->free_diag) PetscCall(PetscFree(a->diag)); 1571 PetscCall(PetscFree(a->idiag)); 1572 if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen)); 1573 PetscCall(PetscFree(a->solve_work)); 1574 PetscCall(PetscFree(a->mult_work)); 1575 PetscCall(PetscFree(a->sor_workt)); 1576 PetscCall(PetscFree(a->sor_work)); 1577 PetscCall(ISDestroy(&a->icol)); 1578 PetscCall(PetscFree(a->saved_values)); 1579 PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex)); 1580 1581 PetscCall(MatDestroy(&a->sbaijMat)); 1582 PetscCall(MatDestroy(&a->parent)); 1583 PetscCall(PetscFree(A->data)); 1584 1585 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 1586 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL)); 1587 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL)); 1588 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 1589 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 1590 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL)); 1591 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL)); 1592 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL)); 1593 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL)); 1594 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL)); 1595 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL)); 1596 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL)); 1597 #if defined(PETSC_HAVE_HYPRE) 1598 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL)); 1599 #endif 1600 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL)); 1601 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1602 PetscFunctionReturn(PETSC_SUCCESS); 1603 } 1604 1605 static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) 1606 { 1607 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1608 1609 PetscFunctionBegin; 1610 switch (op) { 1611 case MAT_ROW_ORIENTED: 1612 a->roworiented = flg; 1613 break; 1614 case MAT_KEEP_NONZERO_PATTERN: 1615 a->keepnonzeropattern = flg; 1616 break; 1617 case MAT_NEW_NONZERO_LOCATIONS: 1618 a->nonew = (flg ? 0 : 1); 1619 break; 1620 case MAT_NEW_NONZERO_LOCATION_ERR: 1621 a->nonew = (flg ? -1 : 0); 1622 break; 1623 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1624 a->nonew = (flg ? -2 : 0); 1625 break; 1626 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1627 a->nounused = (flg ? -1 : 0); 1628 break; 1629 case MAT_FORCE_DIAGONAL_ENTRIES: 1630 case MAT_IGNORE_OFF_PROC_ENTRIES: 1631 case MAT_USE_HASH_TABLE: 1632 case MAT_SORTED_FULL: 1633 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1634 break; 1635 case MAT_SPD: 1636 case MAT_SYMMETRIC: 1637 case MAT_STRUCTURALLY_SYMMETRIC: 1638 case MAT_HERMITIAN: 1639 case MAT_SYMMETRY_ETERNAL: 1640 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1641 case MAT_SUBMAT_SINGLEIS: 1642 case MAT_STRUCTURE_ONLY: 1643 case MAT_SPD_ETERNAL: 1644 /* if the diagonal matrix is square it inherits some of the properties above */ 1645 break; 1646 default: 1647 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1648 } 1649 PetscFunctionReturn(PETSC_SUCCESS); 1650 } 1651 1652 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1653 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) 1654 { 1655 PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2; 1656 MatScalar *aa_i; 1657 PetscScalar *v_i; 1658 1659 PetscFunctionBegin; 1660 bs = A->rmap->bs; 1661 bs2 = bs * bs; 1662 PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1663 1664 bn = row / bs; /* Block number */ 1665 bp = row % bs; /* Block Position */ 1666 M = ai[bn + 1] - ai[bn]; 1667 *nz = bs * M; 1668 1669 if (v) { 1670 *v = NULL; 1671 if (*nz) { 1672 PetscCall(PetscMalloc1(*nz, v)); 1673 for (i = 0; i < M; i++) { /* for each block in the block row */ 1674 v_i = *v + i * bs; 1675 aa_i = aa + bs2 * (ai[bn] + i); 1676 for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j]; 1677 } 1678 } 1679 } 1680 1681 if (idx) { 1682 *idx = NULL; 1683 if (*nz) { 1684 PetscCall(PetscMalloc1(*nz, idx)); 1685 for (i = 0; i < M; i++) { /* for each block in the block row */ 1686 idx_i = *idx + i * bs; 1687 itmp = bs * aj[ai[bn] + i]; 1688 for (j = 0; j < bs; j++) idx_i[j] = itmp++; 1689 } 1690 } 1691 } 1692 PetscFunctionReturn(PETSC_SUCCESS); 1693 } 1694 1695 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1696 { 1697 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1698 1699 PetscFunctionBegin; 1700 PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a)); 1701 PetscFunctionReturn(PETSC_SUCCESS); 1702 } 1703 1704 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1705 { 1706 PetscFunctionBegin; 1707 if (idx) PetscCall(PetscFree(*idx)); 1708 if (v) PetscCall(PetscFree(*v)); 1709 PetscFunctionReturn(PETSC_SUCCESS); 1710 } 1711 1712 static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) 1713 { 1714 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at; 1715 Mat C; 1716 PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill; 1717 PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr; 1718 MatScalar *ata, *aa = a->a; 1719 1720 PetscFunctionBegin; 1721 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1722 PetscCall(PetscCalloc1(1 + nbs, &atfill)); 1723 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1724 for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1725 1726 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 1727 PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N)); 1728 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 1729 PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill)); 1730 1731 at = (Mat_SeqBAIJ *)C->data; 1732 ati = at->i; 1733 for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i]; 1734 } else { 1735 C = *B; 1736 at = (Mat_SeqBAIJ *)C->data; 1737 ati = at->i; 1738 } 1739 1740 atj = at->j; 1741 ata = at->a; 1742 1743 /* Copy ati into atfill so we have locations of the next free space in atj */ 1744 PetscCall(PetscArraycpy(atfill, ati, nbs)); 1745 1746 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1747 for (i = 0; i < mbs; i++) { 1748 anzj = ai[i + 1] - ai[i]; 1749 for (j = 0; j < anzj; j++) { 1750 atj[atfill[*aj]] = i; 1751 for (kr = 0; kr < bs; kr++) { 1752 for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; 1753 } 1754 atfill[*aj++] += 1; 1755 } 1756 } 1757 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1758 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1759 1760 /* Clean up temporary space and complete requests. */ 1761 PetscCall(PetscFree(atfill)); 1762 1763 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1764 PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1765 *B = C; 1766 } else { 1767 PetscCall(MatHeaderMerge(A, &C)); 1768 } 1769 PetscFunctionReturn(PETSC_SUCCESS); 1770 } 1771 1772 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) 1773 { 1774 Mat Btrans; 1775 1776 PetscFunctionBegin; 1777 *f = PETSC_FALSE; 1778 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans)); 1779 PetscCall(MatEqual_SeqBAIJ(B, Btrans, f)); 1780 PetscCall(MatDestroy(&Btrans)); 1781 PetscFunctionReturn(PETSC_SUCCESS); 1782 } 1783 1784 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1785 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 1786 { 1787 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data; 1788 PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l; 1789 PetscInt *rowlens, *colidxs; 1790 PetscScalar *matvals; 1791 1792 PetscFunctionBegin; 1793 PetscCall(PetscViewerSetUp(viewer)); 1794 1795 M = mat->rmap->N; 1796 N = mat->cmap->N; 1797 m = mat->rmap->n; 1798 bs = mat->rmap->bs; 1799 nz = bs * bs * A->nz; 1800 1801 /* write matrix header */ 1802 header[0] = MAT_FILE_CLASSID; 1803 header[1] = M; 1804 header[2] = N; 1805 header[3] = nz; 1806 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1807 1808 /* store row lengths */ 1809 PetscCall(PetscMalloc1(m, &rowlens)); 1810 for (cnt = 0, i = 0; i < A->mbs; i++) 1811 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]); 1812 PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT)); 1813 PetscCall(PetscFree(rowlens)); 1814 1815 /* store column indices */ 1816 PetscCall(PetscMalloc1(nz, &colidxs)); 1817 for (cnt = 0, i = 0; i < A->mbs; i++) 1818 for (k = 0; k < bs; k++) 1819 for (j = A->i[i]; j < A->i[i + 1]; j++) 1820 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l; 1821 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1822 PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT)); 1823 PetscCall(PetscFree(colidxs)); 1824 1825 /* store nonzero values */ 1826 PetscCall(PetscMalloc1(nz, &matvals)); 1827 for (cnt = 0, i = 0; i < A->mbs; i++) 1828 for (k = 0; k < bs; k++) 1829 for (j = A->i[i]; j < A->i[i + 1]; j++) 1830 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k]; 1831 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1832 PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR)); 1833 PetscCall(PetscFree(matvals)); 1834 1835 /* write block size option to the viewer's .info file */ 1836 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1837 PetscFunctionReturn(PETSC_SUCCESS); 1838 } 1839 1840 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) 1841 { 1842 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1843 PetscInt i, bs = A->rmap->bs, k; 1844 1845 PetscFunctionBegin; 1846 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1847 for (i = 0; i < a->mbs; i++) { 1848 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1)); 1849 for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1)); 1850 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1851 } 1852 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1853 PetscFunctionReturn(PETSC_SUCCESS); 1854 } 1855 1856 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) 1857 { 1858 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1859 PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2; 1860 PetscViewerFormat format; 1861 1862 PetscFunctionBegin; 1863 if (A->structure_only) { 1864 PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer)); 1865 PetscFunctionReturn(PETSC_SUCCESS); 1866 } 1867 1868 PetscCall(PetscViewerGetFormat(viewer, &format)); 1869 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1870 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1871 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1872 const char *matname; 1873 Mat aij; 1874 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij)); 1875 PetscCall(PetscObjectGetName((PetscObject)A, &matname)); 1876 PetscCall(PetscObjectSetName((PetscObject)aij, matname)); 1877 PetscCall(MatView(aij, viewer)); 1878 PetscCall(MatDestroy(&aij)); 1879 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1880 PetscFunctionReturn(PETSC_SUCCESS); 1881 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1882 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1883 for (i = 0; i < a->mbs; i++) { 1884 for (j = 0; j < bs; j++) { 1885 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1886 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1887 for (l = 0; l < bs; l++) { 1888 #if defined(PETSC_USE_COMPLEX) 1889 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1890 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1891 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1892 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1893 } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1894 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1895 } 1896 #else 1897 if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1898 #endif 1899 } 1900 } 1901 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1902 } 1903 } 1904 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1905 } else { 1906 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1907 for (i = 0; i < a->mbs; i++) { 1908 for (j = 0; j < bs; j++) { 1909 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1910 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1911 for (l = 0; l < bs; l++) { 1912 #if defined(PETSC_USE_COMPLEX) 1913 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) { 1914 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1915 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) { 1916 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1917 } else { 1918 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1919 } 1920 #else 1921 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1922 #endif 1923 } 1924 } 1925 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1926 } 1927 } 1928 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1929 } 1930 PetscCall(PetscViewerFlush(viewer)); 1931 PetscFunctionReturn(PETSC_SUCCESS); 1932 } 1933 1934 #include <petscdraw.h> 1935 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) 1936 { 1937 Mat A = (Mat)Aa; 1938 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1939 PetscInt row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 1940 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1941 MatScalar *aa; 1942 PetscViewer viewer; 1943 PetscViewerFormat format; 1944 int color; 1945 1946 PetscFunctionBegin; 1947 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1948 PetscCall(PetscViewerGetFormat(viewer, &format)); 1949 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1950 1951 /* loop over matrix elements drawing boxes */ 1952 1953 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1954 PetscDrawCollectiveBegin(draw); 1955 /* Blue for negative, Cyan for zero and Red for positive */ 1956 color = PETSC_DRAW_BLUE; 1957 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1958 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1959 y_l = A->rmap->N - row - 1.0; 1960 y_r = y_l + 1.0; 1961 x_l = a->j[j] * bs; 1962 x_r = x_l + 1.0; 1963 aa = a->a + j * bs2; 1964 for (k = 0; k < bs; k++) { 1965 for (l = 0; l < bs; l++) { 1966 if (PetscRealPart(*aa++) >= 0.) continue; 1967 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1968 } 1969 } 1970 } 1971 } 1972 color = PETSC_DRAW_CYAN; 1973 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1974 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1975 y_l = A->rmap->N - row - 1.0; 1976 y_r = y_l + 1.0; 1977 x_l = a->j[j] * bs; 1978 x_r = x_l + 1.0; 1979 aa = a->a + j * bs2; 1980 for (k = 0; k < bs; k++) { 1981 for (l = 0; l < bs; l++) { 1982 if (PetscRealPart(*aa++) != 0.) continue; 1983 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1984 } 1985 } 1986 } 1987 } 1988 color = PETSC_DRAW_RED; 1989 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1990 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1991 y_l = A->rmap->N - row - 1.0; 1992 y_r = y_l + 1.0; 1993 x_l = a->j[j] * bs; 1994 x_r = x_l + 1.0; 1995 aa = a->a + j * bs2; 1996 for (k = 0; k < bs; k++) { 1997 for (l = 0; l < bs; l++) { 1998 if (PetscRealPart(*aa++) <= 0.) continue; 1999 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2000 } 2001 } 2002 } 2003 } 2004 PetscDrawCollectiveEnd(draw); 2005 } else { 2006 /* use contour shading to indicate magnitude of values */ 2007 /* first determine max of all nonzero values */ 2008 PetscReal minv = 0.0, maxv = 0.0; 2009 PetscDraw popup; 2010 2011 for (i = 0; i < a->nz * a->bs2; i++) { 2012 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 2013 } 2014 if (minv >= maxv) maxv = minv + PETSC_SMALL; 2015 PetscCall(PetscDrawGetPopup(draw, &popup)); 2016 PetscCall(PetscDrawScalePopup(popup, 0.0, maxv)); 2017 2018 PetscDrawCollectiveBegin(draw); 2019 for (i = 0, row = 0; i < mbs; i++, row += bs) { 2020 for (j = a->i[i]; j < a->i[i + 1]; j++) { 2021 y_l = A->rmap->N - row - 1.0; 2022 y_r = y_l + 1.0; 2023 x_l = a->j[j] * bs; 2024 x_r = x_l + 1.0; 2025 aa = a->a + j * bs2; 2026 for (k = 0; k < bs; k++) { 2027 for (l = 0; l < bs; l++) { 2028 MatScalar v = *aa++; 2029 color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv); 2030 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2031 } 2032 } 2033 } 2034 } 2035 PetscDrawCollectiveEnd(draw); 2036 } 2037 PetscFunctionReturn(PETSC_SUCCESS); 2038 } 2039 2040 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) 2041 { 2042 PetscReal xl, yl, xr, yr, w, h; 2043 PetscDraw draw; 2044 PetscBool isnull; 2045 2046 PetscFunctionBegin; 2047 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 2048 PetscCall(PetscDrawIsNull(draw, &isnull)); 2049 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 2050 2051 xr = A->cmap->n; 2052 yr = A->rmap->N; 2053 h = yr / 10.0; 2054 w = xr / 10.0; 2055 xr += w; 2056 yr += h; 2057 xl = -w; 2058 yl = -h; 2059 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 2060 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 2061 PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A)); 2062 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 2063 PetscCall(PetscDrawSave(draw)); 2064 PetscFunctionReturn(PETSC_SUCCESS); 2065 } 2066 2067 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) 2068 { 2069 PetscBool iascii, isbinary, isdraw; 2070 2071 PetscFunctionBegin; 2072 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2073 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2074 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 2075 if (iascii) { 2076 PetscCall(MatView_SeqBAIJ_ASCII(A, viewer)); 2077 } else if (isbinary) { 2078 PetscCall(MatView_SeqBAIJ_Binary(A, viewer)); 2079 } else if (isdraw) { 2080 PetscCall(MatView_SeqBAIJ_Draw(A, viewer)); 2081 } else { 2082 Mat B; 2083 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2084 PetscCall(MatView(B, viewer)); 2085 PetscCall(MatDestroy(&B)); 2086 } 2087 PetscFunctionReturn(PETSC_SUCCESS); 2088 } 2089 2090 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) 2091 { 2092 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2093 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 2094 PetscInt *ai = a->i, *ailen = a->ilen; 2095 PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2; 2096 MatScalar *ap, *aa = a->a; 2097 2098 PetscFunctionBegin; 2099 for (k = 0; k < m; k++) { /* loop over rows */ 2100 row = im[k]; 2101 brow = row / bs; 2102 if (row < 0) { 2103 v += n; 2104 continue; 2105 } /* negative row */ 2106 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row); 2107 rp = PetscSafePointerPlusOffset(aj, ai[brow]); 2108 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); 2109 nrow = ailen[brow]; 2110 for (l = 0; l < n; l++) { /* loop over columns */ 2111 if (in[l] < 0) { 2112 v++; 2113 continue; 2114 } /* negative column */ 2115 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]); 2116 col = in[l]; 2117 bcol = col / bs; 2118 cidx = col % bs; 2119 ridx = row % bs; 2120 high = nrow; 2121 low = 0; /* assume unsorted */ 2122 while (high - low > 5) { 2123 t = (low + high) / 2; 2124 if (rp[t] > bcol) high = t; 2125 else low = t; 2126 } 2127 for (i = low; i < high; i++) { 2128 if (rp[i] > bcol) break; 2129 if (rp[i] == bcol) { 2130 *v++ = ap[bs2 * i + bs * cidx + ridx]; 2131 goto finished; 2132 } 2133 } 2134 *v++ = 0.0; 2135 finished:; 2136 } 2137 } 2138 PetscFunctionReturn(PETSC_SUCCESS); 2139 } 2140 2141 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2142 { 2143 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2144 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1; 2145 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2146 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval; 2147 PetscBool roworiented = a->roworiented; 2148 const PetscScalar *value = v; 2149 MatScalar *ap = NULL, *aa = a->a, *bap; 2150 2151 PetscFunctionBegin; 2152 if (roworiented) { 2153 stepval = (n - 1) * bs; 2154 } else { 2155 stepval = (m - 1) * bs; 2156 } 2157 for (k = 0; k < m; k++) { /* loop over added rows */ 2158 row = im[k]; 2159 if (row < 0) continue; 2160 PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1); 2161 rp = aj + ai[row]; 2162 if (!A->structure_only) ap = aa + bs2 * ai[row]; 2163 rmax = imax[row]; 2164 nrow = ailen[row]; 2165 low = 0; 2166 high = nrow; 2167 for (l = 0; l < n; l++) { /* loop over added columns */ 2168 if (in[l] < 0) continue; 2169 PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1); 2170 col = in[l]; 2171 if (!A->structure_only) { 2172 if (roworiented) { 2173 value = v + (k * (stepval + bs) + l) * bs; 2174 } else { 2175 value = v + (l * (stepval + bs) + k) * bs; 2176 } 2177 } 2178 if (col <= lastcol) low = 0; 2179 else high = nrow; 2180 lastcol = col; 2181 while (high - low > 7) { 2182 t = (low + high) / 2; 2183 if (rp[t] > col) high = t; 2184 else low = t; 2185 } 2186 for (i = low; i < high; i++) { 2187 if (rp[i] > col) break; 2188 if (rp[i] == col) { 2189 if (A->structure_only) goto noinsert2; 2190 bap = ap + bs2 * i; 2191 if (roworiented) { 2192 if (is == ADD_VALUES) { 2193 for (ii = 0; ii < bs; ii++, value += stepval) { 2194 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 2195 } 2196 } else { 2197 for (ii = 0; ii < bs; ii++, value += stepval) { 2198 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2199 } 2200 } 2201 } else { 2202 if (is == ADD_VALUES) { 2203 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2204 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 2205 bap += bs; 2206 } 2207 } else { 2208 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2209 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 2210 bap += bs; 2211 } 2212 } 2213 } 2214 goto noinsert2; 2215 } 2216 } 2217 if (nonew == 1) goto noinsert2; 2218 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2219 if (A->structure_only) { 2220 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar); 2221 } else { 2222 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2223 } 2224 N = nrow++ - 1; 2225 high++; 2226 /* shift up all the later entries in this row */ 2227 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2228 rp[i] = col; 2229 if (!A->structure_only) { 2230 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2231 bap = ap + bs2 * i; 2232 if (roworiented) { 2233 for (ii = 0; ii < bs; ii++, value += stepval) { 2234 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2235 } 2236 } else { 2237 for (ii = 0; ii < bs; ii++, value += stepval) { 2238 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 2239 } 2240 } 2241 } 2242 noinsert2:; 2243 low = i; 2244 } 2245 ailen[row] = nrow; 2246 } 2247 PetscFunctionReturn(PETSC_SUCCESS); 2248 } 2249 2250 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) 2251 { 2252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2253 PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax; 2254 PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen; 2255 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2256 MatScalar *aa = a->a, *ap; 2257 PetscReal ratio = 0.6; 2258 2259 PetscFunctionBegin; 2260 if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS); 2261 2262 if (m) rmax = ailen[0]; 2263 for (i = 1; i < mbs; i++) { 2264 /* move each row back by the amount of empty slots (fshift) before it*/ 2265 fshift += imax[i - 1] - ailen[i - 1]; 2266 rmax = PetscMax(rmax, ailen[i]); 2267 if (fshift) { 2268 ip = aj + ai[i]; 2269 ap = aa + bs2 * ai[i]; 2270 N = ailen[i]; 2271 PetscCall(PetscArraymove(ip - fshift, ip, N)); 2272 if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N)); 2273 } 2274 ai[i] = ai[i - 1] + ailen[i - 1]; 2275 } 2276 if (mbs) { 2277 fshift += imax[mbs - 1] - ailen[mbs - 1]; 2278 ai[mbs] = ai[mbs - 1] + ailen[mbs - 1]; 2279 } 2280 2281 /* reset ilen and imax for each row */ 2282 a->nonzerorowcnt = 0; 2283 if (A->structure_only) { 2284 PetscCall(PetscFree2(a->imax, a->ilen)); 2285 } else { /* !A->structure_only */ 2286 for (i = 0; i < mbs; i++) { 2287 ailen[i] = imax[i] = ai[i + 1] - ai[i]; 2288 a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0); 2289 } 2290 } 2291 a->nz = ai[mbs]; 2292 2293 /* diagonals may have moved, so kill the diagonal pointers */ 2294 a->idiagvalid = PETSC_FALSE; 2295 if (fshift && a->diag) PetscCall(PetscFree(a->diag)); 2296 if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2); 2297 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2)); 2298 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs)); 2299 PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax)); 2300 2301 A->info.mallocs += a->reallocs; 2302 a->reallocs = 0; 2303 A->info.nz_unneeded = (PetscReal)fshift * bs2; 2304 a->rmax = rmax; 2305 2306 if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio)); 2307 PetscFunctionReturn(PETSC_SUCCESS); 2308 } 2309 2310 /* 2311 This function returns an array of flags which indicate the locations of contiguous 2312 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2313 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2314 Assume: sizes should be long enough to hold all the values. 2315 */ 2316 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) 2317 { 2318 PetscInt j = 0; 2319 2320 PetscFunctionBegin; 2321 for (PetscInt i = 0; i < n; j++) { 2322 PetscInt row = idx[i]; 2323 if (row % bs != 0) { /* Not the beginning of a block */ 2324 sizes[j] = 1; 2325 i++; 2326 } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */ 2327 sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2328 i++; 2329 } else { /* Beginning of the block, so check if the complete block exists */ 2330 PetscBool flg = PETSC_TRUE; 2331 for (PetscInt k = 1; k < bs; k++) { 2332 if (row + k != idx[i + k]) { /* break in the block */ 2333 flg = PETSC_FALSE; 2334 break; 2335 } 2336 } 2337 if (flg) { /* No break in the bs */ 2338 sizes[j] = bs; 2339 i += bs; 2340 } else { 2341 sizes[j] = 1; 2342 i++; 2343 } 2344 } 2345 } 2346 *bs_max = j; 2347 PetscFunctionReturn(PETSC_SUCCESS); 2348 } 2349 2350 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2351 { 2352 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2353 PetscInt i, j, k, count, *rows; 2354 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max; 2355 PetscScalar zero = 0.0; 2356 MatScalar *aa; 2357 const PetscScalar *xx; 2358 PetscScalar *bb; 2359 2360 PetscFunctionBegin; 2361 /* fix right-hand side if needed */ 2362 if (x && b) { 2363 PetscCall(VecGetArrayRead(x, &xx)); 2364 PetscCall(VecGetArray(b, &bb)); 2365 for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]]; 2366 PetscCall(VecRestoreArrayRead(x, &xx)); 2367 PetscCall(VecRestoreArray(b, &bb)); 2368 } 2369 2370 /* Make a copy of the IS and sort it */ 2371 /* allocate memory for rows,sizes */ 2372 PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes)); 2373 2374 /* copy IS values to rows, and sort them */ 2375 for (i = 0; i < is_n; i++) rows[i] = is_idx[i]; 2376 PetscCall(PetscSortInt(is_n, rows)); 2377 2378 if (baij->keepnonzeropattern) { 2379 for (i = 0; i < is_n; i++) sizes[i] = 1; 2380 bs_max = is_n; 2381 } else { 2382 PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max)); 2383 A->nonzerostate++; 2384 } 2385 2386 for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) { 2387 row = rows[j]; 2388 PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row); 2389 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2390 aa = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs); 2391 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2392 if (diag != (PetscScalar)0.0) { 2393 if (baij->ilen[row / bs] > 0) { 2394 baij->ilen[row / bs] = 1; 2395 baij->j[baij->i[row / bs]] = row / bs; 2396 2397 PetscCall(PetscArrayzero(aa, count * bs)); 2398 } 2399 /* Now insert all the diagonal values for this bs */ 2400 for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES); 2401 } else { /* (diag == 0.0) */ 2402 baij->ilen[row / bs] = 0; 2403 } /* end (diag == 0.0) */ 2404 } else { /* (sizes[i] != bs) */ 2405 PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1"); 2406 for (k = 0; k < count; k++) { 2407 aa[0] = zero; 2408 aa += bs; 2409 } 2410 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES); 2411 } 2412 } 2413 2414 PetscCall(PetscFree2(rows, sizes)); 2415 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2416 PetscFunctionReturn(PETSC_SUCCESS); 2417 } 2418 2419 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2420 { 2421 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2422 PetscInt i, j, k, count; 2423 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col; 2424 PetscScalar zero = 0.0; 2425 MatScalar *aa; 2426 const PetscScalar *xx; 2427 PetscScalar *bb; 2428 PetscBool *zeroed, vecs = PETSC_FALSE; 2429 2430 PetscFunctionBegin; 2431 /* fix right-hand side if needed */ 2432 if (x && b) { 2433 PetscCall(VecGetArrayRead(x, &xx)); 2434 PetscCall(VecGetArray(b, &bb)); 2435 vecs = PETSC_TRUE; 2436 } 2437 2438 /* zero the columns */ 2439 PetscCall(PetscCalloc1(A->rmap->n, &zeroed)); 2440 for (i = 0; i < is_n; i++) { 2441 PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]); 2442 zeroed[is_idx[i]] = PETSC_TRUE; 2443 } 2444 for (i = 0; i < A->rmap->N; i++) { 2445 if (!zeroed[i]) { 2446 row = i / bs; 2447 for (j = baij->i[row]; j < baij->i[row + 1]; j++) { 2448 for (k = 0; k < bs; k++) { 2449 col = bs * baij->j[j] + k; 2450 if (zeroed[col]) { 2451 aa = ((MatScalar *)baij->a) + j * bs2 + (i % bs) + bs * k; 2452 if (vecs) bb[i] -= aa[0] * xx[col]; 2453 aa[0] = 0.0; 2454 } 2455 } 2456 } 2457 } else if (vecs) bb[i] = diag * xx[i]; 2458 } 2459 PetscCall(PetscFree(zeroed)); 2460 if (vecs) { 2461 PetscCall(VecRestoreArrayRead(x, &xx)); 2462 PetscCall(VecRestoreArray(b, &bb)); 2463 } 2464 2465 /* zero the rows */ 2466 for (i = 0; i < is_n; i++) { 2467 row = is_idx[i]; 2468 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2469 aa = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs); 2470 for (k = 0; k < count; k++) { 2471 aa[0] = zero; 2472 aa += bs; 2473 } 2474 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES); 2475 } 2476 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2477 PetscFunctionReturn(PETSC_SUCCESS); 2478 } 2479 2480 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2481 { 2482 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2483 PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 2484 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2485 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 2486 PetscInt ridx, cidx, bs2 = a->bs2; 2487 PetscBool roworiented = a->roworiented; 2488 MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap; 2489 2490 PetscFunctionBegin; 2491 for (k = 0; k < m; k++) { /* loop over added rows */ 2492 row = im[k]; 2493 brow = row / bs; 2494 if (row < 0) continue; 2495 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 2496 rp = PetscSafePointerPlusOffset(aj, ai[brow]); 2497 if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); 2498 rmax = imax[brow]; 2499 nrow = ailen[brow]; 2500 low = 0; 2501 high = nrow; 2502 for (l = 0; l < n; l++) { /* loop over added columns */ 2503 if (in[l] < 0) continue; 2504 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 2505 col = in[l]; 2506 bcol = col / bs; 2507 ridx = row % bs; 2508 cidx = col % bs; 2509 if (!A->structure_only) { 2510 if (roworiented) { 2511 value = v[l + k * n]; 2512 } else { 2513 value = v[k + l * m]; 2514 } 2515 } 2516 if (col <= lastcol) low = 0; 2517 else high = nrow; 2518 lastcol = col; 2519 while (high - low > 7) { 2520 t = (low + high) / 2; 2521 if (rp[t] > bcol) high = t; 2522 else low = t; 2523 } 2524 for (i = low; i < high; i++) { 2525 if (rp[i] > bcol) break; 2526 if (rp[i] == bcol) { 2527 bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx); 2528 if (!A->structure_only) { 2529 if (is == ADD_VALUES) *bap += value; 2530 else *bap = value; 2531 } 2532 goto noinsert1; 2533 } 2534 } 2535 if (nonew == 1) goto noinsert1; 2536 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2537 if (A->structure_only) { 2538 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar); 2539 } else { 2540 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2541 } 2542 N = nrow++ - 1; 2543 high++; 2544 /* shift up all the later entries in this row */ 2545 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2546 rp[i] = bcol; 2547 if (!A->structure_only) { 2548 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2549 PetscCall(PetscArrayzero(ap + bs2 * i, bs2)); 2550 ap[bs2 * i + bs * cidx + ridx] = value; 2551 } 2552 a->nz++; 2553 noinsert1:; 2554 low = i; 2555 } 2556 ailen[brow] = nrow; 2557 } 2558 PetscFunctionReturn(PETSC_SUCCESS); 2559 } 2560 2561 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) 2562 { 2563 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data; 2564 Mat outA; 2565 PetscBool row_identity, col_identity; 2566 2567 PetscFunctionBegin; 2568 PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU"); 2569 PetscCall(ISIdentity(row, &row_identity)); 2570 PetscCall(ISIdentity(col, &col_identity)); 2571 PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU"); 2572 2573 outA = inA; 2574 inA->factortype = MAT_FACTOR_LU; 2575 PetscCall(PetscFree(inA->solvertype)); 2576 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype)); 2577 2578 PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2579 2580 PetscCall(PetscObjectReference((PetscObject)row)); 2581 PetscCall(ISDestroy(&a->row)); 2582 a->row = row; 2583 PetscCall(PetscObjectReference((PetscObject)col)); 2584 PetscCall(ISDestroy(&a->col)); 2585 a->col = col; 2586 2587 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2588 PetscCall(ISDestroy(&a->icol)); 2589 PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol)); 2590 2591 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity))); 2592 if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); 2593 PetscCall(MatLUFactorNumeric(outA, inA, info)); 2594 PetscFunctionReturn(PETSC_SUCCESS); 2595 } 2596 2597 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices) 2598 { 2599 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2600 2601 PetscFunctionBegin; 2602 baij->nz = baij->maxnz; 2603 PetscCall(PetscArraycpy(baij->j, indices, baij->nz)); 2604 PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs)); 2605 PetscFunctionReturn(PETSC_SUCCESS); 2606 } 2607 2608 /*@ 2609 MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix. 2610 2611 Input Parameters: 2612 + mat - the `MATSEQBAIJ` matrix 2613 - indices - the block column indices 2614 2615 Level: advanced 2616 2617 Notes: 2618 This can be called if you have precomputed the nonzero structure of the 2619 matrix and want to provide it to the matrix object to improve the performance 2620 of the `MatSetValues()` operation. 2621 2622 You MUST have set the correct numbers of nonzeros per row in the call to 2623 `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted. 2624 2625 MUST be called before any calls to `MatSetValues()` 2626 2627 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()` 2628 @*/ 2629 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) 2630 { 2631 PetscFunctionBegin; 2632 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2633 PetscAssertPointer(indices, 2); 2634 PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices)); 2635 PetscFunctionReturn(PETSC_SUCCESS); 2636 } 2637 2638 static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) 2639 { 2640 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2641 PetscInt i, j, n, row, bs, *ai, *aj, mbs; 2642 PetscReal atmp; 2643 PetscScalar *x, zero = 0.0; 2644 MatScalar *aa; 2645 PetscInt ncols, brow, krow, kcol; 2646 2647 PetscFunctionBegin; 2648 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2649 bs = A->rmap->bs; 2650 aa = a->a; 2651 ai = a->i; 2652 aj = a->j; 2653 mbs = a->mbs; 2654 2655 PetscCall(VecSet(v, zero)); 2656 PetscCall(VecGetArray(v, &x)); 2657 PetscCall(VecGetLocalSize(v, &n)); 2658 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2659 for (i = 0; i < mbs; i++) { 2660 ncols = ai[1] - ai[0]; 2661 ai++; 2662 brow = bs * i; 2663 for (j = 0; j < ncols; j++) { 2664 for (kcol = 0; kcol < bs; kcol++) { 2665 for (krow = 0; krow < bs; krow++) { 2666 atmp = PetscAbsScalar(*aa); 2667 aa++; 2668 row = brow + krow; /* row index */ 2669 if (PetscAbsScalar(x[row]) < atmp) { 2670 x[row] = atmp; 2671 if (idx) idx[row] = bs * (*aj) + kcol; 2672 } 2673 } 2674 } 2675 aj++; 2676 } 2677 } 2678 PetscCall(VecRestoreArray(v, &x)); 2679 PetscFunctionReturn(PETSC_SUCCESS); 2680 } 2681 2682 static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v) 2683 { 2684 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2685 PetscInt i, j, n, row, bs, *ai, mbs; 2686 PetscReal atmp; 2687 PetscScalar *x, zero = 0.0; 2688 MatScalar *aa; 2689 PetscInt ncols, brow, krow, kcol; 2690 2691 PetscFunctionBegin; 2692 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2693 bs = A->rmap->bs; 2694 aa = a->a; 2695 ai = a->i; 2696 mbs = a->mbs; 2697 2698 PetscCall(VecSet(v, zero)); 2699 PetscCall(VecGetArrayWrite(v, &x)); 2700 PetscCall(VecGetLocalSize(v, &n)); 2701 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2702 for (i = 0; i < mbs; i++) { 2703 ncols = ai[1] - ai[0]; 2704 ai++; 2705 brow = bs * i; 2706 for (j = 0; j < ncols; j++) { 2707 for (kcol = 0; kcol < bs; kcol++) { 2708 for (krow = 0; krow < bs; krow++) { 2709 atmp = PetscAbsScalar(*aa); 2710 aa++; 2711 row = brow + krow; /* row index */ 2712 x[row] += atmp; 2713 } 2714 } 2715 } 2716 } 2717 PetscCall(VecRestoreArrayWrite(v, &x)); 2718 PetscFunctionReturn(PETSC_SUCCESS); 2719 } 2720 2721 static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) 2722 { 2723 PetscFunctionBegin; 2724 /* If the two matrices have the same copy implementation, use fast copy. */ 2725 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2726 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2727 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 2728 PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs; 2729 2730 PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]); 2731 PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs); 2732 PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs])); 2733 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2734 } else { 2735 PetscCall(MatCopy_Basic(A, B, str)); 2736 } 2737 PetscFunctionReturn(PETSC_SUCCESS); 2738 } 2739 2740 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2741 { 2742 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2743 2744 PetscFunctionBegin; 2745 *array = a->a; 2746 PetscFunctionReturn(PETSC_SUCCESS); 2747 } 2748 2749 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2750 { 2751 PetscFunctionBegin; 2752 *array = NULL; 2753 PetscFunctionReturn(PETSC_SUCCESS); 2754 } 2755 2756 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) 2757 { 2758 PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs; 2759 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 2760 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 2761 2762 PetscFunctionBegin; 2763 /* Set the number of nonzeros in the new matrix */ 2764 PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz)); 2765 PetscFunctionReturn(PETSC_SUCCESS); 2766 } 2767 2768 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2769 { 2770 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data; 2771 PetscInt bs = Y->rmap->bs, bs2 = bs * bs; 2772 PetscBLASInt one = 1; 2773 2774 PetscFunctionBegin; 2775 if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2776 PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2777 if (e) { 2778 PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e)); 2779 if (e) { 2780 PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e)); 2781 if (e) str = SAME_NONZERO_PATTERN; 2782 } 2783 } 2784 if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN"); 2785 } 2786 if (str == SAME_NONZERO_PATTERN) { 2787 PetscScalar alpha = a; 2788 PetscBLASInt bnz; 2789 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 2790 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 2791 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2792 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2793 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2794 } else { 2795 Mat B; 2796 PetscInt *nnz; 2797 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 2798 PetscCall(PetscMalloc1(Y->rmap->N, &nnz)); 2799 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2800 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2801 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 2802 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 2803 PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name)); 2804 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz)); 2805 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 2806 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2807 PetscCall(MatHeaderMerge(Y, &B)); 2808 PetscCall(PetscFree(nnz)); 2809 } 2810 PetscFunctionReturn(PETSC_SUCCESS); 2811 } 2812 2813 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2814 { 2815 #if PetscDefined(USE_COMPLEX) 2816 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2817 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2818 MatScalar *aa = a->a; 2819 2820 PetscFunctionBegin; 2821 for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]); 2822 PetscFunctionReturn(PETSC_SUCCESS); 2823 #else 2824 (void)A; 2825 return PETSC_SUCCESS; 2826 #endif 2827 } 2828 2829 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2830 { 2831 #if PetscDefined(USE_COMPLEX) 2832 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2833 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2834 MatScalar *aa = a->a; 2835 2836 PetscFunctionBegin; 2837 for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]); 2838 PetscFunctionReturn(PETSC_SUCCESS); 2839 #else 2840 (void)A; 2841 return PETSC_SUCCESS; 2842 #endif 2843 } 2844 2845 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2846 { 2847 #if PetscDefined(USE_COMPLEX) 2848 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2849 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2850 MatScalar *aa = a->a; 2851 2852 PetscFunctionBegin; 2853 for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2854 PetscFunctionReturn(PETSC_SUCCESS); 2855 #else 2856 (void)A; 2857 return PETSC_SUCCESS; 2858 #endif 2859 } 2860 2861 /* 2862 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2863 */ 2864 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2865 { 2866 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2867 PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs; 2868 PetscInt nz = a->i[m], row, *jj, mr, col; 2869 2870 PetscFunctionBegin; 2871 *nn = n; 2872 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2873 PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices"); 2874 PetscCall(PetscCalloc1(n, &collengths)); 2875 PetscCall(PetscMalloc1(n + 1, &cia)); 2876 PetscCall(PetscMalloc1(nz, &cja)); 2877 jj = a->j; 2878 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2879 cia[0] = oshift; 2880 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2881 PetscCall(PetscArrayzero(collengths, n)); 2882 jj = a->j; 2883 for (row = 0; row < m; row++) { 2884 mr = a->i[row + 1] - a->i[row]; 2885 for (i = 0; i < mr; i++) { 2886 col = *jj++; 2887 2888 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2889 } 2890 } 2891 PetscCall(PetscFree(collengths)); 2892 *ia = cia; 2893 *ja = cja; 2894 PetscFunctionReturn(PETSC_SUCCESS); 2895 } 2896 2897 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2898 { 2899 PetscFunctionBegin; 2900 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2901 PetscCall(PetscFree(*ia)); 2902 PetscCall(PetscFree(*ja)); 2903 PetscFunctionReturn(PETSC_SUCCESS); 2904 } 2905 2906 /* 2907 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2908 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2909 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2910 */ 2911 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2912 { 2913 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2914 PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs; 2915 PetscInt nz = a->i[m], row, *jj, mr, col; 2916 PetscInt *cspidx; 2917 2918 PetscFunctionBegin; 2919 *nn = n; 2920 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2921 2922 PetscCall(PetscCalloc1(n, &collengths)); 2923 PetscCall(PetscMalloc1(n + 1, &cia)); 2924 PetscCall(PetscMalloc1(nz, &cja)); 2925 PetscCall(PetscMalloc1(nz, &cspidx)); 2926 jj = a->j; 2927 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2928 cia[0] = oshift; 2929 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2930 PetscCall(PetscArrayzero(collengths, n)); 2931 jj = a->j; 2932 for (row = 0; row < m; row++) { 2933 mr = a->i[row + 1] - a->i[row]; 2934 for (i = 0; i < mr; i++) { 2935 col = *jj++; 2936 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2937 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2938 } 2939 } 2940 PetscCall(PetscFree(collengths)); 2941 *ia = cia; 2942 *ja = cja; 2943 *spidx = cspidx; 2944 PetscFunctionReturn(PETSC_SUCCESS); 2945 } 2946 2947 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2948 { 2949 PetscFunctionBegin; 2950 PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done)); 2951 PetscCall(PetscFree(*spidx)); 2952 PetscFunctionReturn(PETSC_SUCCESS); 2953 } 2954 2955 static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) 2956 { 2957 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data; 2958 2959 PetscFunctionBegin; 2960 if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL)); 2961 PetscCall(MatShift_Basic(Y, a)); 2962 PetscFunctionReturn(PETSC_SUCCESS); 2963 } 2964 2965 PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep) 2966 { 2967 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2968 PetscInt fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k; 2969 PetscInt m = A->rmap->N, *ailen = a->ilen; 2970 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2971 MatScalar *aa = a->a, *ap; 2972 PetscBool zero; 2973 2974 PetscFunctionBegin; 2975 PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix"); 2976 if (m) rmax = ailen[0]; 2977 for (i = 1; i <= mbs; i++) { 2978 for (k = ai[i - 1]; k < ai[i]; k++) { 2979 zero = PETSC_TRUE; 2980 ap = aa + bs2 * k; 2981 for (j = 0; j < bs2 && zero; j++) { 2982 if (ap[j] != 0.0) zero = PETSC_FALSE; 2983 } 2984 if (zero && (aj[k] != i - 1 || !keep)) fshift++; 2985 else { 2986 if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1)); 2987 aj[k - fshift] = aj[k]; 2988 PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2)); 2989 } 2990 } 2991 ai[i - 1] -= fshift_prev; 2992 fshift_prev = fshift; 2993 ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1]; 2994 a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0); 2995 rmax = PetscMax(rmax, ailen[i - 1]); 2996 } 2997 if (fshift) { 2998 if (mbs) { 2999 ai[mbs] -= fshift; 3000 a->nz = ai[mbs]; 3001 } 3002 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz)); 3003 A->nonzerostate++; 3004 A->info.nz_unneeded += (PetscReal)fshift; 3005 a->rmax = rmax; 3006 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 3007 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 3008 } 3009 PetscFunctionReturn(PETSC_SUCCESS); 3010 } 3011 3012 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 3013 MatGetRow_SeqBAIJ, 3014 MatRestoreRow_SeqBAIJ, 3015 MatMult_SeqBAIJ_N, 3016 /* 4*/ MatMultAdd_SeqBAIJ_N, 3017 MatMultTranspose_SeqBAIJ, 3018 MatMultTransposeAdd_SeqBAIJ, 3019 NULL, 3020 NULL, 3021 NULL, 3022 /* 10*/ NULL, 3023 MatLUFactor_SeqBAIJ, 3024 NULL, 3025 NULL, 3026 MatTranspose_SeqBAIJ, 3027 /* 15*/ MatGetInfo_SeqBAIJ, 3028 MatEqual_SeqBAIJ, 3029 MatGetDiagonal_SeqBAIJ, 3030 MatDiagonalScale_SeqBAIJ, 3031 MatNorm_SeqBAIJ, 3032 /* 20*/ NULL, 3033 MatAssemblyEnd_SeqBAIJ, 3034 MatSetOption_SeqBAIJ, 3035 MatZeroEntries_SeqBAIJ, 3036 /* 24*/ MatZeroRows_SeqBAIJ, 3037 NULL, 3038 NULL, 3039 NULL, 3040 NULL, 3041 /* 29*/ MatSetUp_Seq_Hash, 3042 NULL, 3043 NULL, 3044 NULL, 3045 NULL, 3046 /* 34*/ MatDuplicate_SeqBAIJ, 3047 NULL, 3048 NULL, 3049 MatILUFactor_SeqBAIJ, 3050 NULL, 3051 /* 39*/ MatAXPY_SeqBAIJ, 3052 MatCreateSubMatrices_SeqBAIJ, 3053 MatIncreaseOverlap_SeqBAIJ, 3054 MatGetValues_SeqBAIJ, 3055 MatCopy_SeqBAIJ, 3056 /* 44*/ NULL, 3057 MatScale_SeqBAIJ, 3058 MatShift_SeqBAIJ, 3059 NULL, 3060 MatZeroRowsColumns_SeqBAIJ, 3061 /* 49*/ NULL, 3062 MatGetRowIJ_SeqBAIJ, 3063 MatRestoreRowIJ_SeqBAIJ, 3064 MatGetColumnIJ_SeqBAIJ, 3065 MatRestoreColumnIJ_SeqBAIJ, 3066 /* 54*/ MatFDColoringCreate_SeqXAIJ, 3067 NULL, 3068 NULL, 3069 NULL, 3070 MatSetValuesBlocked_SeqBAIJ, 3071 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 3072 MatDestroy_SeqBAIJ, 3073 MatView_SeqBAIJ, 3074 NULL, 3075 NULL, 3076 /* 64*/ NULL, 3077 NULL, 3078 NULL, 3079 NULL, 3080 NULL, 3081 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 3082 NULL, 3083 MatConvert_Basic, 3084 NULL, 3085 NULL, 3086 /* 74*/ NULL, 3087 MatFDColoringApply_BAIJ, 3088 NULL, 3089 NULL, 3090 NULL, 3091 /* 79*/ NULL, 3092 NULL, 3093 NULL, 3094 NULL, 3095 MatLoad_SeqBAIJ, 3096 /* 84*/ NULL, 3097 NULL, 3098 NULL, 3099 NULL, 3100 NULL, 3101 /* 89*/ NULL, 3102 NULL, 3103 NULL, 3104 NULL, 3105 NULL, 3106 /* 94*/ NULL, 3107 NULL, 3108 NULL, 3109 NULL, 3110 NULL, 3111 /* 99*/ NULL, 3112 NULL, 3113 NULL, 3114 MatConjugate_SeqBAIJ, 3115 NULL, 3116 /*104*/ NULL, 3117 MatRealPart_SeqBAIJ, 3118 MatImaginaryPart_SeqBAIJ, 3119 NULL, 3120 NULL, 3121 /*109*/ NULL, 3122 NULL, 3123 NULL, 3124 NULL, 3125 MatMissingDiagonal_SeqBAIJ, 3126 /*114*/ NULL, 3127 NULL, 3128 NULL, 3129 NULL, 3130 NULL, 3131 /*119*/ NULL, 3132 NULL, 3133 MatMultHermitianTranspose_SeqBAIJ, 3134 MatMultHermitianTransposeAdd_SeqBAIJ, 3135 NULL, 3136 /*124*/ NULL, 3137 MatGetColumnReductions_SeqBAIJ, 3138 MatInvertBlockDiagonal_SeqBAIJ, 3139 NULL, 3140 NULL, 3141 /*129*/ NULL, 3142 NULL, 3143 NULL, 3144 NULL, 3145 NULL, 3146 /*134*/ NULL, 3147 NULL, 3148 NULL, 3149 NULL, 3150 NULL, 3151 /*139*/ MatSetBlockSizes_Default, 3152 NULL, 3153 NULL, 3154 MatFDColoringSetUp_SeqXAIJ, 3155 NULL, 3156 /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 3157 MatDestroySubMatrices_SeqBAIJ, 3158 NULL, 3159 NULL, 3160 NULL, 3161 NULL, 3162 /*150*/ NULL, 3163 MatEliminateZeros_SeqBAIJ, 3164 MatGetRowSumAbs_SeqBAIJ, 3165 NULL, 3166 NULL, 3167 NULL}; 3168 3169 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3170 { 3171 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3172 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3173 3174 PetscFunctionBegin; 3175 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3176 3177 /* allocate space for values if not already there */ 3178 if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); 3179 3180 /* copy values over */ 3181 PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz)); 3182 PetscFunctionReturn(PETSC_SUCCESS); 3183 } 3184 3185 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3186 { 3187 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3188 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3189 3190 PetscFunctionBegin; 3191 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3192 PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 3193 3194 /* copy values over */ 3195 PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz)); 3196 PetscFunctionReturn(PETSC_SUCCESS); 3197 } 3198 3199 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 3200 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *); 3201 3202 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3203 { 3204 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 3205 PetscInt i, mbs, nbs, bs2; 3206 PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 3207 3208 PetscFunctionBegin; 3209 if (B->hash_active) { 3210 PetscInt bs; 3211 B->ops[0] = b->cops; 3212 PetscCall(PetscHMapIJVDestroy(&b->ht)); 3213 PetscCall(MatGetBlockSize(B, &bs)); 3214 if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht)); 3215 PetscCall(PetscFree(b->dnz)); 3216 PetscCall(PetscFree(b->bdnz)); 3217 B->hash_active = PETSC_FALSE; 3218 } 3219 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3220 if (nz == MAT_SKIP_ALLOCATION) { 3221 skipallocation = PETSC_TRUE; 3222 nz = 0; 3223 } 3224 3225 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 3226 PetscCall(PetscLayoutSetUp(B->rmap)); 3227 PetscCall(PetscLayoutSetUp(B->cmap)); 3228 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3229 3230 B->preallocated = PETSC_TRUE; 3231 3232 mbs = B->rmap->n / bs; 3233 nbs = B->cmap->n / bs; 3234 bs2 = bs * bs; 3235 3236 PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs); 3237 3238 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3239 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 3240 if (nnz) { 3241 for (i = 0; i < mbs; i++) { 3242 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 3243 PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs); 3244 } 3245 } 3246 3247 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat"); 3248 PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL)); 3249 PetscOptionsEnd(); 3250 3251 if (!flg) { 3252 switch (bs) { 3253 case 1: 3254 B->ops->mult = MatMult_SeqBAIJ_1; 3255 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3256 break; 3257 case 2: 3258 B->ops->mult = MatMult_SeqBAIJ_2; 3259 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3260 break; 3261 case 3: 3262 B->ops->mult = MatMult_SeqBAIJ_3; 3263 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3264 break; 3265 case 4: 3266 B->ops->mult = MatMult_SeqBAIJ_4; 3267 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3268 break; 3269 case 5: 3270 B->ops->mult = MatMult_SeqBAIJ_5; 3271 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3272 break; 3273 case 6: 3274 B->ops->mult = MatMult_SeqBAIJ_6; 3275 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3276 break; 3277 case 7: 3278 B->ops->mult = MatMult_SeqBAIJ_7; 3279 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3280 break; 3281 case 9: { 3282 PetscInt version = 1; 3283 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3284 switch (version) { 3285 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3286 case 1: 3287 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 3288 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 3289 PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3290 break; 3291 #endif 3292 default: 3293 B->ops->mult = MatMult_SeqBAIJ_N; 3294 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3295 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3296 break; 3297 } 3298 break; 3299 } 3300 case 11: 3301 B->ops->mult = MatMult_SeqBAIJ_11; 3302 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 3303 break; 3304 case 12: { 3305 PetscInt version = 1; 3306 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3307 switch (version) { 3308 case 1: 3309 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 3310 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3311 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3312 break; 3313 case 2: 3314 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 3315 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 3316 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3317 break; 3318 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3319 case 3: 3320 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 3321 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3322 PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3323 break; 3324 #endif 3325 default: 3326 B->ops->mult = MatMult_SeqBAIJ_N; 3327 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3328 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3329 break; 3330 } 3331 break; 3332 } 3333 case 15: { 3334 PetscInt version = 1; 3335 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3336 switch (version) { 3337 case 1: 3338 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3339 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3340 break; 3341 case 2: 3342 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 3343 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3344 break; 3345 case 3: 3346 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 3347 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3348 break; 3349 case 4: 3350 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 3351 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3352 break; 3353 default: 3354 B->ops->mult = MatMult_SeqBAIJ_N; 3355 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3356 break; 3357 } 3358 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3359 break; 3360 } 3361 default: 3362 B->ops->mult = MatMult_SeqBAIJ_N; 3363 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3364 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3365 break; 3366 } 3367 } 3368 B->ops->sor = MatSOR_SeqBAIJ; 3369 b->mbs = mbs; 3370 b->nbs = nbs; 3371 if (!skipallocation) { 3372 if (!b->imax) { 3373 PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen)); 3374 3375 b->free_imax_ilen = PETSC_TRUE; 3376 } 3377 /* b->ilen will count nonzeros in each block row so far. */ 3378 for (i = 0; i < mbs; i++) b->ilen[i] = 0; 3379 if (!nnz) { 3380 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3381 else if (nz < 0) nz = 1; 3382 nz = PetscMin(nz, nbs); 3383 for (i = 0; i < mbs; i++) b->imax[i] = nz; 3384 PetscCall(PetscIntMultError(nz, mbs, &nz)); 3385 } else { 3386 PetscInt64 nz64 = 0; 3387 for (i = 0; i < mbs; i++) { 3388 b->imax[i] = nnz[i]; 3389 nz64 += nnz[i]; 3390 } 3391 PetscCall(PetscIntCast(nz64, &nz)); 3392 } 3393 3394 /* allocate the matrix space */ 3395 PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i)); 3396 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j)); 3397 PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i)); 3398 if (B->structure_only) { 3399 b->free_a = PETSC_FALSE; 3400 } else { 3401 PetscInt nzbs2 = 0; 3402 PetscCall(PetscIntMultError(nz, bs2, &nzbs2)); 3403 PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a)); 3404 b->free_a = PETSC_TRUE; 3405 PetscCall(PetscArrayzero(b->a, nz * bs2)); 3406 } 3407 b->free_ij = PETSC_TRUE; 3408 PetscCall(PetscArrayzero(b->j, nz)); 3409 3410 b->i[0] = 0; 3411 for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1]; 3412 } else { 3413 b->free_a = PETSC_FALSE; 3414 b->free_ij = PETSC_FALSE; 3415 } 3416 3417 b->bs2 = bs2; 3418 b->mbs = mbs; 3419 b->nz = 0; 3420 b->maxnz = nz; 3421 B->info.nz_unneeded = (PetscReal)b->maxnz * bs2; 3422 B->was_assembled = PETSC_FALSE; 3423 B->assembled = PETSC_FALSE; 3424 if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3425 PetscFunctionReturn(PETSC_SUCCESS); 3426 } 3427 3428 static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 3429 { 3430 PetscInt i, m, nz, nz_max = 0, *nnz; 3431 PetscScalar *values = NULL; 3432 PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented; 3433 3434 PetscFunctionBegin; 3435 PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 3436 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 3437 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 3438 PetscCall(PetscLayoutSetUp(B->rmap)); 3439 PetscCall(PetscLayoutSetUp(B->cmap)); 3440 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3441 m = B->rmap->n / bs; 3442 3443 PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 3444 PetscCall(PetscMalloc1(m + 1, &nnz)); 3445 for (i = 0; i < m; i++) { 3446 nz = ii[i + 1] - ii[i]; 3447 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 3448 nz_max = PetscMax(nz_max, nz); 3449 nnz[i] = nz; 3450 } 3451 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 3452 PetscCall(PetscFree(nnz)); 3453 3454 values = (PetscScalar *)V; 3455 if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values)); 3456 for (i = 0; i < m; i++) { 3457 PetscInt ncols = ii[i + 1] - ii[i]; 3458 const PetscInt *icols = jj + ii[i]; 3459 if (bs == 1 || !roworiented) { 3460 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 3461 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES)); 3462 } else { 3463 PetscInt j; 3464 for (j = 0; j < ncols; j++) { 3465 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 3466 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES)); 3467 } 3468 } 3469 } 3470 if (!V) PetscCall(PetscFree(values)); 3471 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3472 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3473 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3474 PetscFunctionReturn(PETSC_SUCCESS); 3475 } 3476 3477 /*@C 3478 MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored 3479 3480 Not Collective 3481 3482 Input Parameter: 3483 . A - a `MATSEQBAIJ` matrix 3484 3485 Output Parameter: 3486 . array - pointer to the data 3487 3488 Level: intermediate 3489 3490 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3491 @*/ 3492 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[]) 3493 { 3494 PetscFunctionBegin; 3495 PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array)); 3496 PetscFunctionReturn(PETSC_SUCCESS); 3497 } 3498 3499 /*@C 3500 MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()` 3501 3502 Not Collective 3503 3504 Input Parameters: 3505 + A - a `MATSEQBAIJ` matrix 3506 - array - pointer to the data 3507 3508 Level: intermediate 3509 3510 .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3511 @*/ 3512 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[]) 3513 { 3514 PetscFunctionBegin; 3515 PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array)); 3516 PetscFunctionReturn(PETSC_SUCCESS); 3517 } 3518 3519 /*MC 3520 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3521 block sparse compressed row format. 3522 3523 Options Database Keys: 3524 + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()` 3525 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3526 3527 Level: beginner 3528 3529 Notes: 3530 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 3531 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 3532 3533 Run with `-info` to see what version of the matrix-vector product is being used 3534 3535 .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()` 3536 M*/ 3537 3538 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *); 3539 3540 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3541 { 3542 PetscMPIInt size; 3543 Mat_SeqBAIJ *b; 3544 3545 PetscFunctionBegin; 3546 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3547 PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3548 3549 PetscCall(PetscNew(&b)); 3550 B->data = (void *)b; 3551 B->ops[0] = MatOps_Values; 3552 3553 b->row = NULL; 3554 b->col = NULL; 3555 b->icol = NULL; 3556 b->reallocs = 0; 3557 b->saved_values = NULL; 3558 3559 b->roworiented = PETSC_TRUE; 3560 b->nonew = 0; 3561 b->diag = NULL; 3562 B->spptr = NULL; 3563 B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2; 3564 b->keepnonzeropattern = PETSC_FALSE; 3565 3566 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ)); 3567 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ)); 3568 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ)); 3569 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ)); 3570 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ)); 3571 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ)); 3572 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ)); 3573 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ)); 3574 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 3575 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ)); 3576 #if defined(PETSC_HAVE_HYPRE) 3577 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE)); 3578 #endif 3579 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS)); 3580 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 3581 PetscFunctionReturn(PETSC_SUCCESS); 3582 } 3583 3584 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) 3585 { 3586 Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data; 3587 PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2; 3588 3589 PetscFunctionBegin; 3590 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix"); 3591 PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix"); 3592 3593 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3594 c->imax = a->imax; 3595 c->ilen = a->ilen; 3596 c->free_imax_ilen = PETSC_FALSE; 3597 } else { 3598 PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen)); 3599 for (i = 0; i < mbs; i++) { 3600 c->imax[i] = a->imax[i]; 3601 c->ilen[i] = a->ilen[i]; 3602 } 3603 c->free_imax_ilen = PETSC_TRUE; 3604 } 3605 3606 /* allocate the matrix space */ 3607 if (mallocmatspace) { 3608 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3609 PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a)); 3610 PetscCall(PetscArrayzero(c->a, bs2 * nz)); 3611 c->free_a = PETSC_TRUE; 3612 c->i = a->i; 3613 c->j = a->j; 3614 c->free_ij = PETSC_FALSE; 3615 c->parent = A; 3616 C->preallocated = PETSC_TRUE; 3617 C->assembled = PETSC_TRUE; 3618 3619 PetscCall(PetscObjectReference((PetscObject)A)); 3620 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3621 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3622 } else { 3623 PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a)); 3624 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j)); 3625 PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i)); 3626 c->free_a = PETSC_TRUE; 3627 c->free_ij = PETSC_TRUE; 3628 3629 PetscCall(PetscArraycpy(c->i, a->i, mbs + 1)); 3630 if (mbs > 0) { 3631 PetscCall(PetscArraycpy(c->j, a->j, nz)); 3632 if (cpvalues == MAT_COPY_VALUES) { 3633 PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz)); 3634 } else { 3635 PetscCall(PetscArrayzero(c->a, bs2 * nz)); 3636 } 3637 } 3638 C->preallocated = PETSC_TRUE; 3639 C->assembled = PETSC_TRUE; 3640 } 3641 } 3642 3643 c->roworiented = a->roworiented; 3644 c->nonew = a->nonew; 3645 3646 PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 3647 PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 3648 3649 c->bs2 = a->bs2; 3650 c->mbs = a->mbs; 3651 c->nbs = a->nbs; 3652 3653 if (a->diag) { 3654 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3655 c->diag = a->diag; 3656 c->free_diag = PETSC_FALSE; 3657 } else { 3658 PetscCall(PetscMalloc1(mbs + 1, &c->diag)); 3659 for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i]; 3660 c->free_diag = PETSC_TRUE; 3661 } 3662 } else c->diag = NULL; 3663 3664 c->nz = a->nz; 3665 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3666 c->solve_work = NULL; 3667 c->mult_work = NULL; 3668 c->sor_workt = NULL; 3669 c->sor_work = NULL; 3670 3671 c->compressedrow.use = a->compressedrow.use; 3672 c->compressedrow.nrows = a->compressedrow.nrows; 3673 if (a->compressedrow.use) { 3674 i = a->compressedrow.nrows; 3675 PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex)); 3676 PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1)); 3677 PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i)); 3678 } else { 3679 c->compressedrow.use = PETSC_FALSE; 3680 c->compressedrow.i = NULL; 3681 c->compressedrow.rindex = NULL; 3682 } 3683 c->nonzerorowcnt = a->nonzerorowcnt; 3684 C->nonzerostate = A->nonzerostate; 3685 3686 PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 3687 PetscFunctionReturn(PETSC_SUCCESS); 3688 } 3689 3690 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) 3691 { 3692 PetscFunctionBegin; 3693 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 3694 PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n)); 3695 PetscCall(MatSetType(*B, MATSEQBAIJ)); 3696 PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE)); 3697 PetscFunctionReturn(PETSC_SUCCESS); 3698 } 3699 3700 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3701 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 3702 { 3703 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3704 PetscInt *rowidxs, *colidxs; 3705 PetscScalar *matvals; 3706 3707 PetscFunctionBegin; 3708 PetscCall(PetscViewerSetUp(viewer)); 3709 3710 /* read matrix header */ 3711 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3712 PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3713 M = header[1]; 3714 N = header[2]; 3715 nz = header[3]; 3716 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3717 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3718 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3719 3720 /* set block sizes from the viewer's .info file */ 3721 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3722 /* set local and global sizes if not set already */ 3723 if (mat->rmap->n < 0) mat->rmap->n = M; 3724 if (mat->cmap->n < 0) mat->cmap->n = N; 3725 if (mat->rmap->N < 0) mat->rmap->N = M; 3726 if (mat->cmap->N < 0) mat->cmap->N = N; 3727 PetscCall(PetscLayoutSetUp(mat->rmap)); 3728 PetscCall(PetscLayoutSetUp(mat->cmap)); 3729 3730 /* check if the matrix sizes are correct */ 3731 PetscCall(MatGetSize(mat, &rows, &cols)); 3732 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3733 PetscCall(MatGetBlockSize(mat, &bs)); 3734 PetscCall(MatGetLocalSize(mat, &m, &n)); 3735 mbs = m / bs; 3736 nbs = n / bs; 3737 3738 /* read in row lengths, column indices and nonzero values */ 3739 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3740 PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT)); 3741 rowidxs[0] = 0; 3742 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3743 sum = rowidxs[m]; 3744 PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3745 3746 /* read in column indices and nonzero values */ 3747 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals)); 3748 PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT)); 3749 PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR)); 3750 3751 { /* preallocate matrix storage */ 3752 PetscBT bt; /* helper bit set to count nonzeros */ 3753 PetscInt *nnz; 3754 PetscBool sbaij; 3755 3756 PetscCall(PetscBTCreate(nbs, &bt)); 3757 PetscCall(PetscCalloc1(mbs, &nnz)); 3758 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij)); 3759 for (i = 0; i < mbs; i++) { 3760 PetscCall(PetscBTMemzero(nbs, bt)); 3761 for (k = 0; k < bs; k++) { 3762 PetscInt row = bs * i + k; 3763 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3764 PetscInt col = colidxs[j]; 3765 if (!sbaij || col >= row) 3766 if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++; 3767 } 3768 } 3769 } 3770 PetscCall(PetscBTDestroy(&bt)); 3771 PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz)); 3772 PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz)); 3773 PetscCall(PetscFree(nnz)); 3774 } 3775 3776 /* store matrix values */ 3777 for (i = 0; i < m; i++) { 3778 PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1]; 3779 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3780 } 3781 3782 PetscCall(PetscFree(rowidxs)); 3783 PetscCall(PetscFree2(colidxs, matvals)); 3784 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3785 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3786 PetscFunctionReturn(PETSC_SUCCESS); 3787 } 3788 3789 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) 3790 { 3791 PetscBool isbinary; 3792 3793 PetscFunctionBegin; 3794 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3795 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3796 PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer)); 3797 PetscFunctionReturn(PETSC_SUCCESS); 3798 } 3799 3800 /*@ 3801 MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block 3802 compressed row) format. For good matrix assembly performance the 3803 user should preallocate the matrix storage by setting the parameter `nz` 3804 (or the array `nnz`). 3805 3806 Collective 3807 3808 Input Parameters: 3809 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3810 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3811 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3812 . m - number of rows 3813 . n - number of columns 3814 . nz - number of nonzero blocks per block row (same for all rows) 3815 - nnz - array containing the number of nonzero blocks in the various block rows 3816 (possibly different for each block row) or `NULL` 3817 3818 Output Parameter: 3819 . A - the matrix 3820 3821 Options Database Keys: 3822 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3823 - -mat_block_size - size of the blocks to use 3824 3825 Level: intermediate 3826 3827 Notes: 3828 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3829 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3830 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 3831 3832 The number of rows and columns must be divisible by blocksize. 3833 3834 If the `nnz` parameter is given then the `nz` parameter is ignored 3835 3836 A nonzero block is any block that as 1 or more nonzeros in it 3837 3838 The `MATSEQBAIJ` format is fully compatible with standard Fortran 3839 storage. That is, the stored row and column indices can begin at 3840 either one (as in Fortran) or zero. 3841 3842 Specify the preallocated storage with either `nz` or `nnz` (not both). 3843 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3844 allocation. See [Sparse Matrices](sec_matsparse) for details. 3845 matrices. 3846 3847 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3848 @*/ 3849 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 3850 { 3851 PetscFunctionBegin; 3852 PetscCall(MatCreate(comm, A)); 3853 PetscCall(MatSetSizes(*A, m, n, m, n)); 3854 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3855 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz)); 3856 PetscFunctionReturn(PETSC_SUCCESS); 3857 } 3858 3859 /*@ 3860 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3861 per row in the matrix. For good matrix assembly performance the 3862 user should preallocate the matrix storage by setting the parameter `nz` 3863 (or the array `nnz`). 3864 3865 Collective 3866 3867 Input Parameters: 3868 + B - the matrix 3869 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3870 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3871 . nz - number of block nonzeros per block row (same for all rows) 3872 - nnz - array containing the number of block nonzeros in the various block rows 3873 (possibly different for each block row) or `NULL` 3874 3875 Options Database Keys: 3876 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3877 - -mat_block_size - size of the blocks to use 3878 3879 Level: intermediate 3880 3881 Notes: 3882 If the `nnz` parameter is given then the `nz` parameter is ignored 3883 3884 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3885 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3886 You can also run with the option `-info` and look for messages with the string 3887 malloc in them to see if additional memory allocation was needed. 3888 3889 The `MATSEQBAIJ` format is fully compatible with standard Fortran 3890 storage. That is, the stored row and column indices can begin at 3891 either one (as in Fortran) or zero. 3892 3893 Specify the preallocated storage with either `nz` or `nnz` (not both). 3894 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3895 allocation. See [Sparse Matrices](sec_matsparse) for details. 3896 3897 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3898 @*/ 3899 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3900 { 3901 PetscFunctionBegin; 3902 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3903 PetscValidType(B, 1); 3904 PetscValidLogicalCollectiveInt(B, bs, 2); 3905 PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 3906 PetscFunctionReturn(PETSC_SUCCESS); 3907 } 3908 3909 /*@C 3910 MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values 3911 3912 Collective 3913 3914 Input Parameters: 3915 + B - the matrix 3916 . bs - the blocksize 3917 . i - the indices into `j` for the start of each local row (indices start with zero) 3918 . j - the column indices for each local row (indices start with zero) these must be sorted for each row 3919 - v - optional values in the matrix, use `NULL` if not provided 3920 3921 Level: advanced 3922 3923 Notes: 3924 The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()` 3925 3926 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 3927 may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 3928 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3929 `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3930 block column and the second index is over columns within a block. 3931 3932 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3933 3934 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3935 @*/ 3936 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3937 { 3938 PetscFunctionBegin; 3939 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3940 PetscValidType(B, 1); 3941 PetscValidLogicalCollectiveInt(B, bs, 2); 3942 PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 3943 PetscFunctionReturn(PETSC_SUCCESS); 3944 } 3945 3946 /*@ 3947 MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user. 3948 3949 Collective 3950 3951 Input Parameters: 3952 + comm - must be an MPI communicator of size 1 3953 . bs - size of block 3954 . m - number of rows 3955 . n - number of columns 3956 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3957 . j - column indices 3958 - a - matrix values 3959 3960 Output Parameter: 3961 . mat - the matrix 3962 3963 Level: advanced 3964 3965 Notes: 3966 The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays 3967 once the matrix is destroyed 3968 3969 You cannot set new nonzero locations into this matrix, that will generate an error. 3970 3971 The `i` and `j` indices are 0 based 3972 3973 When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format 3974 3975 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3976 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3977 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3978 with column-major ordering within blocks. 3979 3980 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3981 @*/ 3982 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) 3983 { 3984 Mat_SeqBAIJ *baij; 3985 3986 PetscFunctionBegin; 3987 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs); 3988 if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3989 3990 PetscCall(MatCreate(comm, mat)); 3991 PetscCall(MatSetSizes(*mat, m, n, m, n)); 3992 PetscCall(MatSetType(*mat, MATSEQBAIJ)); 3993 PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL)); 3994 baij = (Mat_SeqBAIJ *)(*mat)->data; 3995 PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen)); 3996 3997 baij->i = i; 3998 baij->j = j; 3999 baij->a = a; 4000 4001 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 4002 baij->free_a = PETSC_FALSE; 4003 baij->free_ij = PETSC_FALSE; 4004 baij->free_imax_ilen = PETSC_TRUE; 4005 4006 for (PetscInt ii = 0; ii < m; ii++) { 4007 const PetscInt row_len = i[ii + 1] - i[ii]; 4008 4009 baij->ilen[ii] = baij->imax[ii] = row_len; 4010 PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len); 4011 } 4012 if (PetscDefined(USE_DEBUG)) { 4013 for (PetscInt ii = 0; ii < baij->i[m]; ii++) { 4014 PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 4015 PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 4016 } 4017 } 4018 4019 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 4020 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 4021 PetscFunctionReturn(PETSC_SUCCESS); 4022 } 4023 4024 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4025 { 4026 PetscFunctionBegin; 4027 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat)); 4028 PetscFunctionReturn(PETSC_SUCCESS); 4029 } 4030