1 /* 2 This provides a matrix that consists of Mats 3 */ 4 5 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 6 #include <../src/mat/impls/baij/seq/baij.h> /* use the common AIJ data-structure */ 7 8 typedef struct { 9 SEQAIJHEADER(Mat); 10 SEQBAIJHEADER; 11 Mat *diags; 12 13 Vec left, right, middle, workb; /* dummy vectors to perform local parts of product */ 14 } Mat_BlockMat; 15 16 MatGetDiagonalMarkers(BlockMat, A->rmap->bs) 17 18 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 19 { 20 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 21 PetscScalar *x; 22 const Mat *v; 23 const PetscScalar *b; 24 PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs; 25 const PetscInt *idx; 26 IS row, col; 27 MatFactorInfo info; 28 Vec left = a->left, right = a->right, middle = a->middle; 29 Mat *diag; 30 31 PetscFunctionBegin; 32 its = its * lits; 33 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 34 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 35 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0"); 36 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift"); 37 PetscCheck(!((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)), PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot do backward sweep without forward sweep"); 38 39 if (!a->diags) { 40 const PetscInt *adiag; 41 42 PetscCall(MatGetDiagonalMarkers_BlockMat(A, &adiag, NULL)); 43 PetscCall(PetscMalloc1(mbs, &a->diags)); 44 PetscCall(MatFactorInfoInitialize(&info)); 45 for (i = 0; i < mbs; i++) { 46 PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col)); 47 PetscCall(MatCholeskyFactorSymbolic(a->diags[i], a->a[adiag[i]], row, &info)); 48 PetscCall(MatCholeskyFactorNumeric(a->diags[i], a->a[adiag[i]], &info)); 49 PetscCall(ISDestroy(&row)); 50 PetscCall(ISDestroy(&col)); 51 } 52 PetscCall(VecDuplicate(bb, &a->workb)); 53 } 54 diag = a->diags; 55 56 PetscCall(VecSet(xx, 0.0)); 57 PetscCall(VecGetArray(xx, &x)); 58 /* copy right-hand side because it must be modified during iteration */ 59 PetscCall(VecCopy(bb, a->workb)); 60 PetscCall(VecGetArrayRead(a->workb, &b)); 61 62 /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */ 63 while (its--) { 64 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 65 for (i = 0; i < mbs; i++) { 66 n = a->i[i + 1] - a->i[i] - 1; 67 idx = a->j + a->i[i] + 1; 68 v = a->a + a->i[i] + 1; 69 70 PetscCall(VecSet(left, 0.0)); 71 for (j = 0; j < n; j++) { 72 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 73 PetscCall(MatMultAdd(v[j], right, left, left)); 74 PetscCall(VecResetArray(right)); 75 } 76 PetscCall(VecPlaceArray(right, b + i * bs)); 77 PetscCall(VecAYPX(left, -1.0, right)); 78 PetscCall(VecResetArray(right)); 79 80 PetscCall(VecPlaceArray(right, x + i * bs)); 81 PetscCall(MatSolve(diag[i], left, right)); 82 83 /* now adjust right-hand side, see MatSOR_SeqSBAIJ */ 84 for (j = 0; j < n; j++) { 85 PetscCall(MatMultTranspose(v[j], right, left)); 86 PetscCall(VecPlaceArray(middle, b + idx[j] * bs)); 87 PetscCall(VecAXPY(middle, -1.0, left)); 88 PetscCall(VecResetArray(middle)); 89 } 90 PetscCall(VecResetArray(right)); 91 } 92 } 93 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 94 for (i = mbs - 1; i >= 0; i--) { 95 n = a->i[i + 1] - a->i[i] - 1; 96 idx = a->j + a->i[i] + 1; 97 v = a->a + a->i[i] + 1; 98 99 PetscCall(VecSet(left, 0.0)); 100 for (j = 0; j < n; j++) { 101 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 102 PetscCall(MatMultAdd(v[j], right, left, left)); 103 PetscCall(VecResetArray(right)); 104 } 105 PetscCall(VecPlaceArray(right, b + i * bs)); 106 PetscCall(VecAYPX(left, -1.0, right)); 107 PetscCall(VecResetArray(right)); 108 109 PetscCall(VecPlaceArray(right, x + i * bs)); 110 PetscCall(MatSolve(diag[i], left, right)); 111 PetscCall(VecResetArray(right)); 112 } 113 } 114 } 115 PetscCall(VecRestoreArray(xx, &x)); 116 PetscCall(VecRestoreArrayRead(a->workb, &b)); 117 PetscFunctionReturn(PETSC_SUCCESS); 118 } 119 120 static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 121 { 122 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 123 PetscScalar *x; 124 const Mat *v; 125 const PetscScalar *b; 126 PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs; 127 const PetscInt *idx; 128 IS row, col; 129 MatFactorInfo info; 130 Vec left = a->left, right = a->right; 131 Mat *diag; 132 133 PetscFunctionBegin; 134 its = its * lits; 135 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 136 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 137 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0"); 138 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift"); 139 140 if (!a->diags) { 141 const PetscInt *adiag; 142 143 PetscCall(MatGetDiagonalMarkers_BlockMat(A, &adiag, NULL)); 144 PetscCall(PetscMalloc1(mbs, &a->diags)); 145 PetscCall(MatFactorInfoInitialize(&info)); 146 for (i = 0; i < mbs; i++) { 147 PetscCall(MatGetOrdering(a->a[adiag[i]], MATORDERINGND, &row, &col)); 148 PetscCall(MatLUFactorSymbolic(a->diags[i], a->a[adiag[i]], row, col, &info)); 149 PetscCall(MatLUFactorNumeric(a->diags[i], a->a[adiag[i]], &info)); 150 PetscCall(ISDestroy(&row)); 151 PetscCall(ISDestroy(&col)); 152 } 153 } 154 diag = a->diags; 155 156 PetscCall(VecSet(xx, 0.0)); 157 PetscCall(VecGetArray(xx, &x)); 158 PetscCall(VecGetArrayRead(bb, &b)); 159 160 /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */ 161 while (its--) { 162 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 163 for (i = 0; i < mbs; i++) { 164 n = a->i[i + 1] - a->i[i]; 165 idx = a->j + a->i[i]; 166 v = a->a + a->i[i]; 167 168 PetscCall(VecSet(left, 0.0)); 169 for (j = 0; j < n; j++) { 170 if (idx[j] != i) { 171 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 172 PetscCall(MatMultAdd(v[j], right, left, left)); 173 PetscCall(VecResetArray(right)); 174 } 175 } 176 PetscCall(VecPlaceArray(right, b + i * bs)); 177 PetscCall(VecAYPX(left, -1.0, right)); 178 PetscCall(VecResetArray(right)); 179 180 PetscCall(VecPlaceArray(right, x + i * bs)); 181 PetscCall(MatSolve(diag[i], left, right)); 182 PetscCall(VecResetArray(right)); 183 } 184 } 185 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 186 for (i = mbs - 1; i >= 0; i--) { 187 n = a->i[i + 1] - a->i[i]; 188 idx = a->j + a->i[i]; 189 v = a->a + a->i[i]; 190 191 PetscCall(VecSet(left, 0.0)); 192 for (j = 0; j < n; j++) { 193 if (idx[j] != i) { 194 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 195 PetscCall(MatMultAdd(v[j], right, left, left)); 196 PetscCall(VecResetArray(right)); 197 } 198 } 199 PetscCall(VecPlaceArray(right, b + i * bs)); 200 PetscCall(VecAYPX(left, -1.0, right)); 201 PetscCall(VecResetArray(right)); 202 203 PetscCall(VecPlaceArray(right, x + i * bs)); 204 PetscCall(MatSolve(diag[i], left, right)); 205 PetscCall(VecResetArray(right)); 206 } 207 } 208 } 209 PetscCall(VecRestoreArray(xx, &x)); 210 PetscCall(VecRestoreArrayRead(bb, &b)); 211 PetscFunctionReturn(PETSC_SUCCESS); 212 } 213 214 static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 215 { 216 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 217 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, lastcol = -1; 218 PetscInt *ai = a->i, *ailen = a->ilen; 219 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 220 PetscInt ridx, cidx; 221 PetscBool roworiented = a->roworiented; 222 MatScalar value; 223 Mat *ap, *aa = a->a; 224 225 PetscFunctionBegin; 226 for (k = 0; k < m; k++) { /* loop over added rows */ 227 row = im[k]; 228 brow = row / bs; 229 if (row < 0) continue; 230 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 231 rp = aj + ai[brow]; 232 ap = aa + ai[brow]; 233 nrow = ailen[brow]; 234 low = 0; 235 high = nrow; 236 for (l = 0; l < n; l++) { /* loop over added columns */ 237 if (in[l] < 0) continue; 238 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 239 col = in[l]; 240 bcol = col / bs; 241 if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue; 242 ridx = row % bs; 243 cidx = col % bs; 244 if (roworiented) value = v[l + k * n]; 245 else value = v[k + l * m]; 246 247 if (col <= lastcol) low = 0; 248 else high = nrow; 249 lastcol = col; 250 while (high - low > 7) { 251 t = (low + high) / 2; 252 if (rp[t] > bcol) high = t; 253 else low = t; 254 } 255 for (i = low; i < high; i++) { 256 if (rp[i] > bcol) break; 257 if (rp[i] == bcol) goto noinsert1; 258 } 259 if (nonew == 1) goto noinsert1; 260 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the block matrix", row, col); 261 #if 0 262 MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat); 263 N = nrow++ - 1; 264 high++; 265 /* shift up all the later entries in this row */ 266 for (ii = N; ii >= i; ii--) { 267 rp[ii + 1] = rp[ii]; 268 ap[ii + 1] = ap[ii]; 269 } 270 if (N >= i) ap[i] = NULL; 271 rp[i] = bcol; 272 a->nz++; 273 #endif 274 noinsert1:; 275 if (!*(ap + i)) PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i)); 276 PetscCall(MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is)); 277 low = i; 278 } 279 ailen[brow] = nrow; 280 } 281 PetscFunctionReturn(PETSC_SUCCESS); 282 } 283 284 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer) 285 { 286 Mat tmpA; 287 PetscInt i, j, m, n, bs = 1, ncols, *lens, currentcol, mbs, **ii, *ilens, nextcol, *llens, cnt = 0; 288 const PetscInt *cols; 289 const PetscScalar *values; 290 PetscBool flg = PETSC_FALSE, notdone; 291 Mat_SeqAIJ *a; 292 Mat_BlockMat *amat; 293 294 PetscFunctionBegin; 295 /* force binary viewer to load .info file if it has not yet done so */ 296 PetscCall(PetscViewerSetUp(viewer)); 297 PetscCall(MatCreate(PETSC_COMM_SELF, &tmpA)); 298 PetscCall(MatSetType(tmpA, MATSEQAIJ)); 299 PetscCall(MatLoad_SeqAIJ(tmpA, viewer)); 300 301 PetscCall(MatGetLocalSize(tmpA, &m, &n)); 302 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "Options for loading BlockMat matrix 1", "Mat"); 303 PetscCall(PetscOptionsInt("-matload_block_size", "Set the blocksize used to store the matrix", "MatLoad", bs, &bs, NULL)); 304 PetscCall(PetscOptionsBool("-matload_symmetric", "Store the matrix as symmetric", "MatLoad", flg, &flg, NULL)); 305 PetscOptionsEnd(); 306 307 /* Determine number of nonzero blocks for each block row */ 308 a = (Mat_SeqAIJ *)tmpA->data; 309 mbs = m / bs; 310 PetscCall(PetscMalloc3(mbs, &lens, bs, &ii, bs, &ilens)); 311 PetscCall(PetscArrayzero(lens, mbs)); 312 313 for (i = 0; i < mbs; i++) { 314 for (j = 0; j < bs; j++) { 315 ii[j] = a->j + a->i[i * bs + j]; 316 ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j]; 317 } 318 319 currentcol = -1; 320 while (PETSC_TRUE) { 321 notdone = PETSC_FALSE; 322 nextcol = 1000000000; 323 for (j = 0; j < bs; j++) { 324 while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) { 325 ii[j]++; 326 ilens[j]--; 327 } 328 if (ilens[j] > 0) { 329 notdone = PETSC_TRUE; 330 nextcol = PetscMin(nextcol, ii[j][0] / bs); 331 } 332 } 333 if (!notdone) break; 334 if (!flg || (nextcol >= i)) lens[i]++; 335 currentcol = nextcol; 336 } 337 } 338 339 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) PetscCall(MatSetSizes(newmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 340 PetscCall(MatBlockMatSetPreallocation(newmat, bs, 0, lens)); 341 if (flg) PetscCall(MatSetOption(newmat, MAT_SYMMETRIC, PETSC_TRUE)); 342 amat = (Mat_BlockMat *)newmat->data; 343 344 /* preallocate the submatrices */ 345 PetscCall(PetscMalloc1(bs, &llens)); 346 for (i = 0; i < mbs; i++) { /* loops for block rows */ 347 for (j = 0; j < bs; j++) { 348 ii[j] = a->j + a->i[i * bs + j]; 349 ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j]; 350 } 351 352 currentcol = 1000000000; 353 for (j = 0; j < bs; j++) { /* loop over rows in block finding first nonzero block */ 354 if (ilens[j] > 0) currentcol = PetscMin(currentcol, ii[j][0] / bs); 355 } 356 357 while (PETSC_TRUE) { /* loops over blocks in block row */ 358 notdone = PETSC_FALSE; 359 nextcol = 1000000000; 360 PetscCall(PetscArrayzero(llens, bs)); 361 for (j = 0; j < bs; j++) { /* loop over rows in block */ 362 while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) { /* loop over columns in row */ 363 ii[j]++; 364 ilens[j]--; 365 llens[j]++; 366 } 367 if (ilens[j] > 0) { 368 notdone = PETSC_TRUE; 369 nextcol = PetscMin(nextcol, ii[j][0] / bs); 370 } 371 } 372 PetscCheck(cnt < amat->maxnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of blocks found greater than expected %" PetscInt_FMT, cnt); 373 if (!flg || currentcol >= i) { 374 amat->j[cnt] = currentcol; 375 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, llens, amat->a + cnt++)); 376 } 377 378 if (!notdone) break; 379 currentcol = nextcol; 380 } 381 amat->ilen[i] = lens[i]; 382 } 383 384 PetscCall(PetscFree3(lens, ii, ilens)); 385 PetscCall(PetscFree(llens)); 386 387 /* copy over the matrix, one row at a time */ 388 for (i = 0; i < m; i++) { 389 PetscCall(MatGetRow(tmpA, i, &ncols, &cols, &values)); 390 PetscCall(MatSetValues(newmat, 1, &i, ncols, cols, values, INSERT_VALUES)); 391 PetscCall(MatRestoreRow(tmpA, i, &ncols, &cols, &values)); 392 } 393 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 394 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 395 PetscFunctionReturn(PETSC_SUCCESS); 396 } 397 398 static PetscErrorCode MatView_BlockMat(Mat A, PetscViewer viewer) 399 { 400 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 401 const char *name; 402 PetscViewerFormat format; 403 404 PetscFunctionBegin; 405 PetscCall(PetscObjectGetName((PetscObject)A, &name)); 406 PetscCall(PetscViewerGetFormat(viewer, &format)); 407 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 408 PetscCall(PetscViewerASCIIPrintf(viewer, "Nonzero block matrices = %" PetscInt_FMT " \n", a->nz)); 409 if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer, "Only upper triangular part of symmetric matrix is stored\n")); 410 } 411 PetscFunctionReturn(PETSC_SUCCESS); 412 } 413 414 static PetscErrorCode MatDestroy_BlockMat(Mat mat) 415 { 416 Mat_BlockMat *bmat = (Mat_BlockMat *)mat->data; 417 PetscInt i; 418 419 PetscFunctionBegin; 420 PetscCall(VecDestroy(&bmat->right)); 421 PetscCall(VecDestroy(&bmat->left)); 422 PetscCall(VecDestroy(&bmat->middle)); 423 PetscCall(VecDestroy(&bmat->workb)); 424 if (bmat->diags) { 425 for (i = 0; i < mat->rmap->n / mat->rmap->bs; i++) PetscCall(MatDestroy(&bmat->diags[i])); 426 } 427 if (bmat->a) { 428 for (i = 0; i < bmat->nz; i++) PetscCall(MatDestroy(&bmat->a[i])); 429 } 430 PetscCall(MatSeqXAIJFreeAIJ(mat, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i)); 431 PetscCall(PetscFree(mat->data)); 432 PetscFunctionReturn(PETSC_SUCCESS); 433 } 434 435 static PetscErrorCode MatMult_BlockMat(Mat A, Vec x, Vec y) 436 { 437 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data; 438 PetscScalar *xx, *yy; 439 PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j; 440 Mat *aa; 441 442 PetscFunctionBegin; 443 /* 444 Standard CSR multiply except each entry is a Mat 445 */ 446 PetscCall(VecGetArray(x, &xx)); 447 448 PetscCall(VecSet(y, 0.0)); 449 PetscCall(VecGetArray(y, &yy)); 450 aj = bmat->j; 451 aa = bmat->a; 452 ii = bmat->i; 453 for (i = 0; i < m; i++) { 454 jrow = ii[i]; 455 PetscCall(VecPlaceArray(bmat->left, yy + bs * i)); 456 n = ii[i + 1] - jrow; 457 for (j = 0; j < n; j++) { 458 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); 459 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left)); 460 PetscCall(VecResetArray(bmat->right)); 461 jrow++; 462 } 463 PetscCall(VecResetArray(bmat->left)); 464 } 465 PetscCall(VecRestoreArray(x, &xx)); 466 PetscCall(VecRestoreArray(y, &yy)); 467 PetscFunctionReturn(PETSC_SUCCESS); 468 } 469 470 static PetscErrorCode MatMult_BlockMat_Symmetric(Mat A, Vec x, Vec y) 471 { 472 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data; 473 PetscScalar *xx, *yy; 474 PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j; 475 Mat *aa; 476 477 PetscFunctionBegin; 478 /* 479 Standard CSR multiply except each entry is a Mat 480 */ 481 PetscCall(VecGetArray(x, &xx)); 482 483 PetscCall(VecSet(y, 0.0)); 484 PetscCall(VecGetArray(y, &yy)); 485 aj = bmat->j; 486 aa = bmat->a; 487 ii = bmat->i; 488 for (i = 0; i < m; i++) { 489 jrow = ii[i]; 490 n = ii[i + 1] - jrow; 491 PetscCall(VecPlaceArray(bmat->left, yy + bs * i)); 492 PetscCall(VecPlaceArray(bmat->middle, xx + bs * i)); 493 /* if we ALWAYS required a diagonal entry then could remove this if test */ 494 if (aj[jrow] == i) { 495 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); 496 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left)); 497 PetscCall(VecResetArray(bmat->right)); 498 jrow++; 499 n--; 500 } 501 for (j = 0; j < n; j++) { 502 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); /* upper triangular part */ 503 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left)); 504 PetscCall(VecResetArray(bmat->right)); 505 506 PetscCall(VecPlaceArray(bmat->right, yy + bs * aj[jrow])); /* lower triangular part */ 507 PetscCall(MatMultTransposeAdd(aa[jrow], bmat->middle, bmat->right, bmat->right)); 508 PetscCall(VecResetArray(bmat->right)); 509 jrow++; 510 } 511 PetscCall(VecResetArray(bmat->left)); 512 PetscCall(VecResetArray(bmat->middle)); 513 } 514 PetscCall(VecRestoreArray(x, &xx)); 515 PetscCall(VecRestoreArray(y, &yy)); 516 PetscFunctionReturn(PETSC_SUCCESS); 517 } 518 519 static PetscErrorCode MatMultAdd_BlockMat(Mat A, Vec x, Vec y, Vec z) 520 { 521 PetscFunctionBegin; 522 PetscFunctionReturn(PETSC_SUCCESS); 523 } 524 525 static PetscErrorCode MatMultTranspose_BlockMat(Mat A, Vec x, Vec y) 526 { 527 PetscFunctionBegin; 528 PetscFunctionReturn(PETSC_SUCCESS); 529 } 530 531 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A, Vec x, Vec y, Vec z) 532 { 533 PetscFunctionBegin; 534 PetscFunctionReturn(PETSC_SUCCESS); 535 } 536 537 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 538 { 539 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 540 Mat_SeqAIJ *c; 541 PetscInt i, k, first, step, lensi, nrows, ncols; 542 PetscInt *j_new, *i_new, *aj = a->j, *ailen = a->ilen; 543 PetscScalar *a_new; 544 Mat C, *aa = a->a; 545 PetscBool stride, equal; 546 547 PetscFunctionBegin; 548 PetscCall(ISEqual(isrow, iscol, &equal)); 549 PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for identical column and row indices"); 550 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &stride)); 551 PetscCheck(stride, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for stride indices"); 552 PetscCall(ISStrideGetInfo(iscol, &first, &step)); 553 PetscCheck(step == A->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_SUP, "Can only select one entry from each block"); 554 555 PetscCall(ISGetLocalSize(isrow, &nrows)); 556 ncols = nrows; 557 558 /* create submatrix */ 559 if (scall == MAT_REUSE_MATRIX) { 560 PetscInt n_cols, n_rows; 561 C = *B; 562 PetscCall(MatGetSize(C, &n_rows, &n_cols)); 563 PetscCheck(n_rows == nrows && n_cols == ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Reused submatrix wrong size"); 564 PetscCall(MatZeroEntries(C)); 565 } else { 566 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 567 PetscCall(MatSetSizes(C, nrows, ncols, PETSC_DETERMINE, PETSC_DETERMINE)); 568 if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C, MATSEQSBAIJ)); 569 else PetscCall(MatSetType(C, MATSEQAIJ)); 570 PetscCall(MatSeqAIJSetPreallocation(C, 0, ailen)); 571 PetscCall(MatSeqSBAIJSetPreallocation(C, 1, 0, ailen)); 572 } 573 c = (Mat_SeqAIJ *)C->data; 574 575 /* loop over rows inserting into submatrix */ 576 a_new = c->a; 577 j_new = c->j; 578 i_new = c->i; 579 580 for (i = 0; i < nrows; i++) { 581 lensi = ailen[i]; 582 for (k = 0; k < lensi; k++) { 583 *j_new++ = *aj++; 584 PetscCall(MatGetValue(*aa++, first, first, a_new++)); 585 } 586 i_new[i + 1] = i_new[i] + lensi; 587 c->ilen[i] = lensi; 588 } 589 590 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 591 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 592 *B = C; 593 PetscFunctionReturn(PETSC_SUCCESS); 594 } 595 596 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A, MatAssemblyType mode) 597 { 598 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 599 PetscInt fshift = 0, i, j, *ai = a->i, *aj = a->j, *imax = a->imax; 600 PetscInt m = a->mbs, *ip, N, *ailen = a->ilen, rmax = 0; 601 Mat *aa = a->a, *ap; 602 603 PetscFunctionBegin; 604 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 605 606 if (m) rmax = ailen[0]; /* determine row with most nonzeros */ 607 for (i = 1; i < m; i++) { 608 /* move each row back by the amount of empty slots (fshift) before it*/ 609 fshift += imax[i - 1] - ailen[i - 1]; 610 rmax = PetscMax(rmax, ailen[i]); 611 if (fshift) { 612 ip = aj + ai[i]; 613 ap = aa + ai[i]; 614 N = ailen[i]; 615 for (j = 0; j < N; j++) { 616 ip[j - fshift] = ip[j]; 617 ap[j - fshift] = ap[j]; 618 } 619 } 620 ai[i] = ai[i - 1] + ailen[i - 1]; 621 } 622 if (m) { 623 fshift += imax[m - 1] - ailen[m - 1]; 624 ai[m] = ai[m - 1] + ailen[m - 1]; 625 } 626 /* reset ilen and imax for each row */ 627 for (i = 0; i < m; i++) ailen[i] = imax[i] = ai[i + 1] - ai[i]; 628 a->nz = ai[m]; 629 for (i = 0; i < a->nz; i++) { 630 PetscAssert(aa[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT, i, aj[i], a->nz); 631 PetscCall(MatAssemblyBegin(aa[i], MAT_FINAL_ASSEMBLY)); 632 PetscCall(MatAssemblyEnd(aa[i], MAT_FINAL_ASSEMBLY)); 633 } 634 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n / A->cmap->bs, fshift, a->nz)); 635 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs)); 636 PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", rmax)); 637 638 A->info.mallocs += a->reallocs; 639 a->reallocs = 0; 640 A->info.nz_unneeded = (double)fshift; 641 a->rmax = rmax; 642 PetscFunctionReturn(PETSC_SUCCESS); 643 } 644 645 static PetscErrorCode MatSetOption_BlockMat(Mat A, MatOption opt, PetscBool flg) 646 { 647 PetscFunctionBegin; 648 if (opt == MAT_SYMMETRIC && flg) { 649 A->ops->sor = MatSOR_BlockMat_Symmetric; 650 A->ops->mult = MatMult_BlockMat_Symmetric; 651 } else { 652 PetscCall(PetscInfo(A, "Unused matrix option %s\n", MatOptions[opt])); 653 } 654 PetscFunctionReturn(PETSC_SUCCESS); 655 } 656 657 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat, 658 NULL, 659 NULL, 660 MatMult_BlockMat, 661 /* 4*/ MatMultAdd_BlockMat, 662 MatMultTranspose_BlockMat, 663 MatMultTransposeAdd_BlockMat, 664 NULL, 665 NULL, 666 NULL, 667 /* 10*/ NULL, 668 NULL, 669 NULL, 670 MatSOR_BlockMat, 671 NULL, 672 /* 15*/ NULL, 673 NULL, 674 NULL, 675 NULL, 676 NULL, 677 /* 20*/ NULL, 678 MatAssemblyEnd_BlockMat, 679 MatSetOption_BlockMat, 680 NULL, 681 /* 24*/ NULL, 682 NULL, 683 NULL, 684 NULL, 685 NULL, 686 /* 29*/ NULL, 687 NULL, 688 NULL, 689 NULL, 690 NULL, 691 /* 34*/ NULL, 692 NULL, 693 NULL, 694 NULL, 695 NULL, 696 /* 39*/ NULL, 697 NULL, 698 NULL, 699 NULL, 700 NULL, 701 /* 44*/ NULL, 702 NULL, 703 MatShift_Basic, 704 NULL, 705 NULL, 706 /* 49*/ NULL, 707 NULL, 708 NULL, 709 NULL, 710 NULL, 711 /* 54*/ NULL, 712 NULL, 713 NULL, 714 NULL, 715 NULL, 716 /* 59*/ MatCreateSubMatrix_BlockMat, 717 MatDestroy_BlockMat, 718 MatView_BlockMat, 719 NULL, 720 NULL, 721 /* 64*/ NULL, 722 NULL, 723 NULL, 724 NULL, 725 NULL, 726 /* 69*/ NULL, 727 NULL, 728 NULL, 729 NULL, 730 NULL, 731 /* 74*/ NULL, 732 NULL, 733 NULL, 734 NULL, 735 MatLoad_BlockMat, 736 /* 79*/ NULL, 737 NULL, 738 NULL, 739 NULL, 740 NULL, 741 /* 84*/ NULL, 742 NULL, 743 NULL, 744 NULL, 745 NULL, 746 /* 89*/ NULL, 747 NULL, 748 NULL, 749 NULL, 750 NULL, 751 /* 94*/ NULL, 752 NULL, 753 NULL, 754 NULL, 755 NULL, 756 /* 99*/ NULL, 757 NULL, 758 NULL, 759 NULL, 760 NULL, 761 /*104*/ NULL, 762 NULL, 763 NULL, 764 NULL, 765 NULL, 766 /*109*/ NULL, 767 NULL, 768 NULL, 769 NULL, 770 NULL, 771 /*114*/ NULL, 772 NULL, 773 NULL, 774 NULL, 775 NULL, 776 /*119*/ NULL, 777 NULL, 778 NULL, 779 NULL, 780 NULL, 781 /*124*/ NULL, 782 NULL, 783 NULL, 784 NULL, 785 NULL, 786 /*129*/ NULL, 787 NULL, 788 NULL, 789 NULL, 790 NULL, 791 /*134*/ NULL, 792 NULL, 793 NULL, 794 NULL, 795 NULL, 796 /*139*/ NULL, 797 NULL, 798 NULL, 799 NULL, 800 NULL}; 801 802 /*@C 803 MatBlockMatSetPreallocation - For good matrix assembly performance 804 the user should preallocate the matrix storage by setting the parameter nz 805 (or the array nnz). By setting these parameters accurately, performance 806 during matrix assembly can be increased by more than a factor of 50. 807 808 Collective 809 810 Input Parameters: 811 + B - The matrix 812 . bs - size of each block in matrix 813 . nz - number of nonzeros per block row (same for all rows) 814 - nnz - array containing the number of nonzeros in the various block rows 815 (possibly different for each row) or `NULL` 816 817 Level: intermediate 818 819 Notes: 820 If `nnz` is given then `nz` is ignored 821 822 Specify the preallocated storage with either `nz` or `nnz` (not both). 823 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 824 allocation. 825 826 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()` 827 @*/ 828 PetscErrorCode MatBlockMatSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 829 { 830 PetscFunctionBegin; 831 PetscTryMethod(B, "MatBlockMatSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 832 PetscFunctionReturn(PETSC_SUCCESS); 833 } 834 835 static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A, PetscInt bs, PetscInt nz, PetscInt *nnz) 836 { 837 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data; 838 PetscInt i; 839 840 PetscFunctionBegin; 841 PetscCall(PetscLayoutSetBlockSize(A->rmap, bs)); 842 PetscCall(PetscLayoutSetBlockSize(A->cmap, bs)); 843 PetscCall(PetscLayoutSetUp(A->rmap)); 844 PetscCall(PetscLayoutSetUp(A->cmap)); 845 PetscCall(PetscLayoutGetBlockSize(A->rmap, &bs)); 846 847 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 848 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 849 if (nnz) { 850 for (i = 0; i < A->rmap->n / bs; i++) { 851 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 852 PetscCheck(nnz[i] <= A->cmap->n / bs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], A->cmap->n / bs); 853 } 854 } 855 bmat->mbs = A->rmap->n / bs; 856 857 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->right)); 858 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->middle)); 859 PetscCall(VecCreateSeq(PETSC_COMM_SELF, bs, &bmat->left)); 860 861 if (!bmat->imax) PetscCall(PetscMalloc2(A->rmap->n, &bmat->imax, A->rmap->n, &bmat->ilen)); 862 PetscCheck(PetscLikely(nnz), PETSC_COMM_SELF, PETSC_ERR_SUP, "Currently requires block row by row preallocation"); 863 nz = 0; 864 for (i = 0; i < A->rmap->n / A->rmap->bs; i++) { 865 bmat->imax[i] = nnz[i]; 866 nz += nnz[i]; 867 } 868 869 /* bmat->ilen will count nonzeros in each row so far. */ 870 PetscCall(PetscArrayzero(bmat->ilen, bmat->mbs)); 871 872 /* allocate the matrix space */ 873 PetscCall(MatSeqXAIJFreeAIJ(A, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i)); 874 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscScalar), (void **)&bmat->a)); 875 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&bmat->j)); 876 PetscCall(PetscShmgetAllocateArray(A->rmap->n + 1, sizeof(PetscInt), (void **)&bmat->i)); 877 bmat->free_a = PETSC_TRUE; 878 bmat->free_ij = PETSC_TRUE; 879 880 bmat->i[0] = 0; 881 for (i = 1; i < bmat->mbs + 1; i++) bmat->i[i] = bmat->i[i - 1] + bmat->imax[i - 1]; 882 883 bmat->nz = 0; 884 bmat->maxnz = nz; 885 A->info.nz_unneeded = (double)bmat->maxnz; 886 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 887 PetscFunctionReturn(PETSC_SUCCESS); 888 } 889 890 /*MC 891 MATBLOCKMAT - A matrix that is defined by a set of `Mat`'s that represents a sparse block matrix 892 consisting of (usually) sparse blocks. 893 894 Level: advanced 895 896 .seealso: [](ch_matrices), `Mat`, `MatCreateBlockMat()` 897 M*/ 898 899 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A) 900 { 901 Mat_BlockMat *b; 902 903 PetscFunctionBegin; 904 PetscCall(PetscNew(&b)); 905 A->data = (void *)b; 906 A->ops[0] = MatOps_Values; 907 A->assembled = PETSC_TRUE; 908 A->preallocated = PETSC_FALSE; 909 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATBLOCKMAT)); 910 911 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatBlockMatSetPreallocation_C", MatBlockMatSetPreallocation_BlockMat)); 912 PetscFunctionReturn(PETSC_SUCCESS); 913 } 914 915 /*@C 916 MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential `Mat` object 917 918 Collective 919 920 Input Parameters: 921 + comm - MPI communicator 922 . m - number of rows 923 . n - number of columns 924 . bs - size of each submatrix 925 . nz - expected maximum number of nonzero blocks in row (use `PETSC_DEFAULT` if not known) 926 - nnz - expected number of nonzers per block row if known (use `NULL` otherwise) 927 928 Output Parameter: 929 . A - the matrix 930 931 Level: intermediate 932 933 Notes: 934 Matrices of this type are nominally-sparse matrices in which each "entry" is a `Mat` object. Each `Mat` must 935 have the same size and be sequential. The local and global sizes must be compatible with this decomposition. 936 937 For matrices containing parallel submatrices and variable block sizes, see `MATNEST`. 938 939 Developer Notes: 940 I don't like the name, it is not `MATNESTMAT` 941 942 .seealso: [](ch_matrices), `Mat`, `MATBLOCKMAT`, `MatCreateNest()` 943 @*/ 944 PetscErrorCode MatCreateBlockMat(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt bs, PetscInt nz, PetscInt *nnz, Mat *A) 945 { 946 PetscFunctionBegin; 947 PetscCall(MatCreate(comm, A)); 948 PetscCall(MatSetSizes(*A, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 949 PetscCall(MatSetType(*A, MATBLOCKMAT)); 950 PetscCall(MatBlockMatSetPreallocation(*A, bs, nz, nnz)); 951 PetscFunctionReturn(PETSC_SUCCESS); 952 } 953