1 /* 2 This provides a matrix that consists of Mats 3 */ 4 5 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 6 #include <../src/mat/impls/baij/seq/baij.h> /* use the common AIJ data-structure */ 7 8 typedef struct { 9 SEQAIJHEADER(Mat); 10 SEQBAIJHEADER; 11 Mat *diags; 12 13 Vec left, right, middle, workb; /* dummy vectors to perform local parts of product */ 14 } Mat_BlockMat; 15 16 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 17 { 18 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 19 PetscScalar *x; 20 const Mat *v; 21 const PetscScalar *b; 22 PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs; 23 const PetscInt *idx; 24 IS row, col; 25 MatFactorInfo info; 26 Vec left = a->left, right = a->right, middle = a->middle; 27 Mat *diag; 28 29 PetscFunctionBegin; 30 its = its * lits; 31 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 32 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 33 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0"); 34 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift"); 35 PetscCheck(!((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)), PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot do backward sweep without forward sweep"); 36 37 if (!a->diags) { 38 PetscCall(PetscMalloc1(mbs, &a->diags)); 39 PetscCall(MatFactorInfoInitialize(&info)); 40 for (i = 0; i < mbs; i++) { 41 PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col)); 42 PetscCall(MatCholeskyFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, &info)); 43 PetscCall(MatCholeskyFactorNumeric(a->diags[i], a->a[a->diag[i]], &info)); 44 PetscCall(ISDestroy(&row)); 45 PetscCall(ISDestroy(&col)); 46 } 47 PetscCall(VecDuplicate(bb, &a->workb)); 48 } 49 diag = a->diags; 50 51 PetscCall(VecSet(xx, 0.0)); 52 PetscCall(VecGetArray(xx, &x)); 53 /* copy right-hand side because it must be modified during iteration */ 54 PetscCall(VecCopy(bb, a->workb)); 55 PetscCall(VecGetArrayRead(a->workb, &b)); 56 57 /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */ 58 while (its--) { 59 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 60 for (i = 0; i < mbs; i++) { 61 n = a->i[i + 1] - a->i[i] - 1; 62 idx = a->j + a->i[i] + 1; 63 v = a->a + a->i[i] + 1; 64 65 PetscCall(VecSet(left, 0.0)); 66 for (j = 0; j < n; j++) { 67 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 68 PetscCall(MatMultAdd(v[j], right, left, left)); 69 PetscCall(VecResetArray(right)); 70 } 71 PetscCall(VecPlaceArray(right, b + i * bs)); 72 PetscCall(VecAYPX(left, -1.0, right)); 73 PetscCall(VecResetArray(right)); 74 75 PetscCall(VecPlaceArray(right, x + i * bs)); 76 PetscCall(MatSolve(diag[i], left, right)); 77 78 /* now adjust right-hand side, see MatSOR_SeqSBAIJ */ 79 for (j = 0; j < n; j++) { 80 PetscCall(MatMultTranspose(v[j], right, left)); 81 PetscCall(VecPlaceArray(middle, b + idx[j] * bs)); 82 PetscCall(VecAXPY(middle, -1.0, left)); 83 PetscCall(VecResetArray(middle)); 84 } 85 PetscCall(VecResetArray(right)); 86 } 87 } 88 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 89 for (i = mbs - 1; i >= 0; i--) { 90 n = a->i[i + 1] - a->i[i] - 1; 91 idx = a->j + a->i[i] + 1; 92 v = a->a + a->i[i] + 1; 93 94 PetscCall(VecSet(left, 0.0)); 95 for (j = 0; j < n; j++) { 96 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 97 PetscCall(MatMultAdd(v[j], right, left, left)); 98 PetscCall(VecResetArray(right)); 99 } 100 PetscCall(VecPlaceArray(right, b + i * bs)); 101 PetscCall(VecAYPX(left, -1.0, right)); 102 PetscCall(VecResetArray(right)); 103 104 PetscCall(VecPlaceArray(right, x + i * bs)); 105 PetscCall(MatSolve(diag[i], left, right)); 106 PetscCall(VecResetArray(right)); 107 } 108 } 109 } 110 PetscCall(VecRestoreArray(xx, &x)); 111 PetscCall(VecRestoreArrayRead(a->workb, &b)); 112 PetscFunctionReturn(PETSC_SUCCESS); 113 } 114 115 static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 116 { 117 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 118 PetscScalar *x; 119 const Mat *v; 120 const PetscScalar *b; 121 PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs; 122 const PetscInt *idx; 123 IS row, col; 124 MatFactorInfo info; 125 Vec left = a->left, right = a->right; 126 Mat *diag; 127 128 PetscFunctionBegin; 129 its = its * lits; 130 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 131 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 132 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0"); 133 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift"); 134 135 if (!a->diags) { 136 PetscCall(PetscMalloc1(mbs, &a->diags)); 137 PetscCall(MatFactorInfoInitialize(&info)); 138 for (i = 0; i < mbs; i++) { 139 PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col)); 140 PetscCall(MatLUFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, col, &info)); 141 PetscCall(MatLUFactorNumeric(a->diags[i], a->a[a->diag[i]], &info)); 142 PetscCall(ISDestroy(&row)); 143 PetscCall(ISDestroy(&col)); 144 } 145 } 146 diag = a->diags; 147 148 PetscCall(VecSet(xx, 0.0)); 149 PetscCall(VecGetArray(xx, &x)); 150 PetscCall(VecGetArrayRead(bb, &b)); 151 152 /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */ 153 while (its--) { 154 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 155 for (i = 0; i < mbs; i++) { 156 n = a->i[i + 1] - a->i[i]; 157 idx = a->j + a->i[i]; 158 v = a->a + a->i[i]; 159 160 PetscCall(VecSet(left, 0.0)); 161 for (j = 0; j < n; j++) { 162 if (idx[j] != i) { 163 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 164 PetscCall(MatMultAdd(v[j], right, left, left)); 165 PetscCall(VecResetArray(right)); 166 } 167 } 168 PetscCall(VecPlaceArray(right, b + i * bs)); 169 PetscCall(VecAYPX(left, -1.0, right)); 170 PetscCall(VecResetArray(right)); 171 172 PetscCall(VecPlaceArray(right, x + i * bs)); 173 PetscCall(MatSolve(diag[i], left, right)); 174 PetscCall(VecResetArray(right)); 175 } 176 } 177 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 178 for (i = mbs - 1; i >= 0; i--) { 179 n = a->i[i + 1] - a->i[i]; 180 idx = a->j + a->i[i]; 181 v = a->a + a->i[i]; 182 183 PetscCall(VecSet(left, 0.0)); 184 for (j = 0; j < n; j++) { 185 if (idx[j] != i) { 186 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 187 PetscCall(MatMultAdd(v[j], right, left, left)); 188 PetscCall(VecResetArray(right)); 189 } 190 } 191 PetscCall(VecPlaceArray(right, b + i * bs)); 192 PetscCall(VecAYPX(left, -1.0, right)); 193 PetscCall(VecResetArray(right)); 194 195 PetscCall(VecPlaceArray(right, x + i * bs)); 196 PetscCall(MatSolve(diag[i], left, right)); 197 PetscCall(VecResetArray(right)); 198 } 199 } 200 } 201 PetscCall(VecRestoreArray(xx, &x)); 202 PetscCall(VecRestoreArrayRead(bb, &b)); 203 PetscFunctionReturn(PETSC_SUCCESS); 204 } 205 206 static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 207 { 208 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 209 PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 210 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 211 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 212 PetscInt ridx, cidx; 213 PetscBool roworiented = a->roworiented; 214 MatScalar value; 215 Mat *ap, *aa = a->a; 216 217 PetscFunctionBegin; 218 for (k = 0; k < m; k++) { /* loop over added rows */ 219 row = im[k]; 220 brow = row / bs; 221 if (row < 0) continue; 222 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 223 rp = aj + ai[brow]; 224 ap = aa + ai[brow]; 225 rmax = imax[brow]; 226 nrow = ailen[brow]; 227 low = 0; 228 high = nrow; 229 for (l = 0; l < n; l++) { /* loop over added columns */ 230 if (in[l] < 0) continue; 231 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 232 col = in[l]; 233 bcol = col / bs; 234 if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue; 235 ridx = row % bs; 236 cidx = col % bs; 237 if (roworiented) value = v[l + k * n]; 238 else value = v[k + l * m]; 239 240 if (col <= lastcol) low = 0; 241 else high = nrow; 242 lastcol = col; 243 while (high - low > 7) { 244 t = (low + high) / 2; 245 if (rp[t] > bcol) high = t; 246 else low = t; 247 } 248 for (i = low; i < high; i++) { 249 if (rp[i] > bcol) break; 250 if (rp[i] == bcol) goto noinsert1; 251 } 252 if (nonew == 1) goto noinsert1; 253 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 254 MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat); 255 N = nrow++ - 1; 256 high++; 257 /* shift up all the later entries in this row */ 258 for (ii = N; ii >= i; ii--) { 259 rp[ii + 1] = rp[ii]; 260 ap[ii + 1] = ap[ii]; 261 } 262 if (N >= i) ap[i] = NULL; 263 rp[i] = bcol; 264 a->nz++; 265 A->nonzerostate++; 266 noinsert1:; 267 if (!*(ap + i)) PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i)); 268 PetscCall(MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is)); 269 low = i; 270 } 271 ailen[brow] = nrow; 272 } 273 PetscFunctionReturn(PETSC_SUCCESS); 274 } 275 276 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer) 277 { 278 Mat tmpA; 279 PetscInt i, j, m, n, bs = 1, ncols, *lens, currentcol, mbs, **ii, *ilens, nextcol, *llens, cnt = 0; 280 const PetscInt *cols; 281 const PetscScalar *values; 282 PetscBool flg = PETSC_FALSE, notdone; 283 Mat_SeqAIJ *a; 284 Mat_BlockMat *amat; 285 286 PetscFunctionBegin; 287 /* force binary viewer to load .info file if it has not yet done so */ 288 PetscCall(PetscViewerSetUp(viewer)); 289 PetscCall(MatCreate(PETSC_COMM_SELF, &tmpA)); 290 PetscCall(MatSetType(tmpA, MATSEQAIJ)); 291 PetscCall(MatLoad_SeqAIJ(tmpA, viewer)); 292 293 PetscCall(MatGetLocalSize(tmpA, &m, &n)); 294 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "Options for loading BlockMat matrix 1", "Mat"); 295 PetscCall(PetscOptionsInt("-matload_block_size", "Set the blocksize used to store the matrix", "MatLoad", bs, &bs, NULL)); 296 PetscCall(PetscOptionsBool("-matload_symmetric", "Store the matrix as symmetric", "MatLoad", flg, &flg, NULL)); 297 PetscOptionsEnd(); 298 299 /* Determine number of nonzero blocks for each block row */ 300 a = (Mat_SeqAIJ *)tmpA->data; 301 mbs = m / bs; 302 PetscCall(PetscMalloc3(mbs, &lens, bs, &ii, bs, &ilens)); 303 PetscCall(PetscArrayzero(lens, mbs)); 304 305 for (i = 0; i < mbs; i++) { 306 for (j = 0; j < bs; j++) { 307 ii[j] = a->j + a->i[i * bs + j]; 308 ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j]; 309 } 310 311 currentcol = -1; 312 while (PETSC_TRUE) { 313 notdone = PETSC_FALSE; 314 nextcol = 1000000000; 315 for (j = 0; j < bs; j++) { 316 while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) { 317 ii[j]++; 318 ilens[j]--; 319 } 320 if (ilens[j] > 0) { 321 notdone = PETSC_TRUE; 322 nextcol = PetscMin(nextcol, ii[j][0] / bs); 323 } 324 } 325 if (!notdone) break; 326 if (!flg || (nextcol >= i)) lens[i]++; 327 currentcol = nextcol; 328 } 329 } 330 331 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) PetscCall(MatSetSizes(newmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 332 PetscCall(MatBlockMatSetPreallocation(newmat, bs, 0, lens)); 333 if (flg) PetscCall(MatSetOption(newmat, MAT_SYMMETRIC, PETSC_TRUE)); 334 amat = (Mat_BlockMat *)(newmat)->data; 335 336 /* preallocate the submatrices */ 337 PetscCall(PetscMalloc1(bs, &llens)); 338 for (i = 0; i < mbs; i++) { /* loops for block rows */ 339 for (j = 0; j < bs; j++) { 340 ii[j] = a->j + a->i[i * bs + j]; 341 ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j]; 342 } 343 344 currentcol = 1000000000; 345 for (j = 0; j < bs; j++) { /* loop over rows in block finding first nonzero block */ 346 if (ilens[j] > 0) currentcol = PetscMin(currentcol, ii[j][0] / bs); 347 } 348 349 while (PETSC_TRUE) { /* loops over blocks in block row */ 350 notdone = PETSC_FALSE; 351 nextcol = 1000000000; 352 PetscCall(PetscArrayzero(llens, bs)); 353 for (j = 0; j < bs; j++) { /* loop over rows in block */ 354 while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) { /* loop over columns in row */ 355 ii[j]++; 356 ilens[j]--; 357 llens[j]++; 358 } 359 if (ilens[j] > 0) { 360 notdone = PETSC_TRUE; 361 nextcol = PetscMin(nextcol, ii[j][0] / bs); 362 } 363 } 364 PetscCheck(cnt < amat->maxnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of blocks found greater than expected %" PetscInt_FMT, cnt); 365 if (!flg || currentcol >= i) { 366 amat->j[cnt] = currentcol; 367 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, llens, amat->a + cnt++)); 368 } 369 370 if (!notdone) break; 371 currentcol = nextcol; 372 } 373 amat->ilen[i] = lens[i]; 374 } 375 376 PetscCall(PetscFree3(lens, ii, ilens)); 377 PetscCall(PetscFree(llens)); 378 379 /* copy over the matrix, one row at a time */ 380 for (i = 0; i < m; i++) { 381 PetscCall(MatGetRow(tmpA, i, &ncols, &cols, &values)); 382 PetscCall(MatSetValues(newmat, 1, &i, ncols, cols, values, INSERT_VALUES)); 383 PetscCall(MatRestoreRow(tmpA, i, &ncols, &cols, &values)); 384 } 385 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 386 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 387 PetscFunctionReturn(PETSC_SUCCESS); 388 } 389 390 static PetscErrorCode MatView_BlockMat(Mat A, PetscViewer viewer) 391 { 392 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 393 const char *name; 394 PetscViewerFormat format; 395 396 PetscFunctionBegin; 397 PetscCall(PetscObjectGetName((PetscObject)A, &name)); 398 PetscCall(PetscViewerGetFormat(viewer, &format)); 399 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 400 PetscCall(PetscViewerASCIIPrintf(viewer, "Nonzero block matrices = %" PetscInt_FMT " \n", a->nz)); 401 if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer, "Only upper triangular part of symmetric matrix is stored\n")); 402 } 403 PetscFunctionReturn(PETSC_SUCCESS); 404 } 405 406 static PetscErrorCode MatDestroy_BlockMat(Mat mat) 407 { 408 Mat_BlockMat *bmat = (Mat_BlockMat *)mat->data; 409 PetscInt i; 410 411 PetscFunctionBegin; 412 PetscCall(VecDestroy(&bmat->right)); 413 PetscCall(VecDestroy(&bmat->left)); 414 PetscCall(VecDestroy(&bmat->middle)); 415 PetscCall(VecDestroy(&bmat->workb)); 416 if (bmat->diags) { 417 for (i = 0; i < mat->rmap->n / mat->rmap->bs; i++) PetscCall(MatDestroy(&bmat->diags[i])); 418 } 419 if (bmat->a) { 420 for (i = 0; i < bmat->nz; i++) PetscCall(MatDestroy(&bmat->a[i])); 421 } 422 PetscCall(MatSeqXAIJFreeAIJ(mat, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i)); 423 PetscCall(PetscFree(mat->data)); 424 PetscFunctionReturn(PETSC_SUCCESS); 425 } 426 427 static PetscErrorCode MatMult_BlockMat(Mat A, Vec x, Vec y) 428 { 429 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data; 430 PetscScalar *xx, *yy; 431 PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j; 432 Mat *aa; 433 434 PetscFunctionBegin; 435 /* 436 Standard CSR multiply except each entry is a Mat 437 */ 438 PetscCall(VecGetArray(x, &xx)); 439 440 PetscCall(VecSet(y, 0.0)); 441 PetscCall(VecGetArray(y, &yy)); 442 aj = bmat->j; 443 aa = bmat->a; 444 ii = bmat->i; 445 for (i = 0; i < m; i++) { 446 jrow = ii[i]; 447 PetscCall(VecPlaceArray(bmat->left, yy + bs * i)); 448 n = ii[i + 1] - jrow; 449 for (j = 0; j < n; j++) { 450 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); 451 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left)); 452 PetscCall(VecResetArray(bmat->right)); 453 jrow++; 454 } 455 PetscCall(VecResetArray(bmat->left)); 456 } 457 PetscCall(VecRestoreArray(x, &xx)); 458 PetscCall(VecRestoreArray(y, &yy)); 459 PetscFunctionReturn(PETSC_SUCCESS); 460 } 461 462 static PetscErrorCode MatMult_BlockMat_Symmetric(Mat A, Vec x, Vec y) 463 { 464 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data; 465 PetscScalar *xx, *yy; 466 PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j; 467 Mat *aa; 468 469 PetscFunctionBegin; 470 /* 471 Standard CSR multiply except each entry is a Mat 472 */ 473 PetscCall(VecGetArray(x, &xx)); 474 475 PetscCall(VecSet(y, 0.0)); 476 PetscCall(VecGetArray(y, &yy)); 477 aj = bmat->j; 478 aa = bmat->a; 479 ii = bmat->i; 480 for (i = 0; i < m; i++) { 481 jrow = ii[i]; 482 n = ii[i + 1] - jrow; 483 PetscCall(VecPlaceArray(bmat->left, yy + bs * i)); 484 PetscCall(VecPlaceArray(bmat->middle, xx + bs * i)); 485 /* if we ALWAYS required a diagonal entry then could remove this if test */ 486 if (aj[jrow] == i) { 487 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); 488 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left)); 489 PetscCall(VecResetArray(bmat->right)); 490 jrow++; 491 n--; 492 } 493 for (j = 0; j < n; j++) { 494 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); /* upper triangular part */ 495 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left)); 496 PetscCall(VecResetArray(bmat->right)); 497 498 PetscCall(VecPlaceArray(bmat->right, yy + bs * aj[jrow])); /* lower triangular part */ 499 PetscCall(MatMultTransposeAdd(aa[jrow], bmat->middle, bmat->right, bmat->right)); 500 PetscCall(VecResetArray(bmat->right)); 501 jrow++; 502 } 503 PetscCall(VecResetArray(bmat->left)); 504 PetscCall(VecResetArray(bmat->middle)); 505 } 506 PetscCall(VecRestoreArray(x, &xx)); 507 PetscCall(VecRestoreArray(y, &yy)); 508 PetscFunctionReturn(PETSC_SUCCESS); 509 } 510 511 static PetscErrorCode MatMultAdd_BlockMat(Mat A, Vec x, Vec y, Vec z) 512 { 513 PetscFunctionBegin; 514 PetscFunctionReturn(PETSC_SUCCESS); 515 } 516 517 static PetscErrorCode MatMultTranspose_BlockMat(Mat A, Vec x, Vec y) 518 { 519 PetscFunctionBegin; 520 PetscFunctionReturn(PETSC_SUCCESS); 521 } 522 523 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A, Vec x, Vec y, Vec z) 524 { 525 PetscFunctionBegin; 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 /* 530 Adds diagonal pointers to sparse matrix structure. 531 */ 532 static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A) 533 { 534 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 535 PetscInt i, j, mbs = A->rmap->n / A->rmap->bs; 536 537 PetscFunctionBegin; 538 if (!a->diag) PetscCall(PetscMalloc1(mbs, &a->diag)); 539 for (i = 0; i < mbs; i++) { 540 a->diag[i] = a->i[i + 1]; 541 for (j = a->i[i]; j < a->i[i + 1]; j++) { 542 if (a->j[j] == i) { 543 a->diag[i] = j; 544 break; 545 } 546 } 547 } 548 PetscFunctionReturn(PETSC_SUCCESS); 549 } 550 551 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 552 { 553 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 554 Mat_SeqAIJ *c; 555 PetscInt i, k, first, step, lensi, nrows, ncols; 556 PetscInt *j_new, *i_new, *aj = a->j, *ailen = a->ilen; 557 PetscScalar *a_new; 558 Mat C, *aa = a->a; 559 PetscBool stride, equal; 560 561 PetscFunctionBegin; 562 PetscCall(ISEqual(isrow, iscol, &equal)); 563 PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for identical column and row indices"); 564 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &stride)); 565 PetscCheck(stride, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for stride indices"); 566 PetscCall(ISStrideGetInfo(iscol, &first, &step)); 567 PetscCheck(step == A->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_SUP, "Can only select one entry from each block"); 568 569 PetscCall(ISGetLocalSize(isrow, &nrows)); 570 ncols = nrows; 571 572 /* create submatrix */ 573 if (scall == MAT_REUSE_MATRIX) { 574 PetscInt n_cols, n_rows; 575 C = *B; 576 PetscCall(MatGetSize(C, &n_rows, &n_cols)); 577 PetscCheck(n_rows == nrows && n_cols == ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Reused submatrix wrong size"); 578 PetscCall(MatZeroEntries(C)); 579 } else { 580 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 581 PetscCall(MatSetSizes(C, nrows, ncols, PETSC_DETERMINE, PETSC_DETERMINE)); 582 if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C, MATSEQSBAIJ)); 583 else PetscCall(MatSetType(C, MATSEQAIJ)); 584 PetscCall(MatSeqAIJSetPreallocation(C, 0, ailen)); 585 PetscCall(MatSeqSBAIJSetPreallocation(C, 1, 0, ailen)); 586 } 587 c = (Mat_SeqAIJ *)C->data; 588 589 /* loop over rows inserting into submatrix */ 590 a_new = c->a; 591 j_new = c->j; 592 i_new = c->i; 593 594 for (i = 0; i < nrows; i++) { 595 lensi = ailen[i]; 596 for (k = 0; k < lensi; k++) { 597 *j_new++ = *aj++; 598 PetscCall(MatGetValue(*aa++, first, first, a_new++)); 599 } 600 i_new[i + 1] = i_new[i] + lensi; 601 c->ilen[i] = lensi; 602 } 603 604 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 605 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 606 *B = C; 607 PetscFunctionReturn(PETSC_SUCCESS); 608 } 609 610 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A, MatAssemblyType mode) 611 { 612 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 613 PetscInt fshift = 0, i, j, *ai = a->i, *aj = a->j, *imax = a->imax; 614 PetscInt m = a->mbs, *ip, N, *ailen = a->ilen, rmax = 0; 615 Mat *aa = a->a, *ap; 616 617 PetscFunctionBegin; 618 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 619 620 if (m) rmax = ailen[0]; /* determine row with most nonzeros */ 621 for (i = 1; i < m; i++) { 622 /* move each row back by the amount of empty slots (fshift) before it*/ 623 fshift += imax[i - 1] - ailen[i - 1]; 624 rmax = PetscMax(rmax, ailen[i]); 625 if (fshift) { 626 ip = aj + ai[i]; 627 ap = aa + ai[i]; 628 N = ailen[i]; 629 for (j = 0; j < N; j++) { 630 ip[j - fshift] = ip[j]; 631 ap[j - fshift] = ap[j]; 632 } 633 } 634 ai[i] = ai[i - 1] + ailen[i - 1]; 635 } 636 if (m) { 637 fshift += imax[m - 1] - ailen[m - 1]; 638 ai[m] = ai[m - 1] + ailen[m - 1]; 639 } 640 /* reset ilen and imax for each row */ 641 for (i = 0; i < m; i++) ailen[i] = imax[i] = ai[i + 1] - ai[i]; 642 a->nz = ai[m]; 643 for (i = 0; i < a->nz; i++) { 644 PetscAssert(aa[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT, i, aj[i], a->nz); 645 PetscCall(MatAssemblyBegin(aa[i], MAT_FINAL_ASSEMBLY)); 646 PetscCall(MatAssemblyEnd(aa[i], MAT_FINAL_ASSEMBLY)); 647 } 648 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n", m, A->cmap->n / A->cmap->bs, fshift, a->nz)); 649 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs)); 650 PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", rmax)); 651 652 A->info.mallocs += a->reallocs; 653 a->reallocs = 0; 654 A->info.nz_unneeded = (double)fshift; 655 a->rmax = rmax; 656 PetscCall(MatMarkDiagonal_BlockMat(A)); 657 PetscFunctionReturn(PETSC_SUCCESS); 658 } 659 660 static PetscErrorCode MatSetOption_BlockMat(Mat A, MatOption opt, PetscBool flg) 661 { 662 PetscFunctionBegin; 663 if (opt == MAT_SYMMETRIC && flg) { 664 A->ops->sor = MatSOR_BlockMat_Symmetric; 665 A->ops->mult = MatMult_BlockMat_Symmetric; 666 } else { 667 PetscCall(PetscInfo(A, "Unused matrix option %s\n", MatOptions[opt])); 668 } 669 PetscFunctionReturn(PETSC_SUCCESS); 670 } 671 672 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat, 673 NULL, 674 NULL, 675 MatMult_BlockMat, 676 /* 4*/ MatMultAdd_BlockMat, 677 MatMultTranspose_BlockMat, 678 MatMultTransposeAdd_BlockMat, 679 NULL, 680 NULL, 681 NULL, 682 /* 10*/ NULL, 683 NULL, 684 NULL, 685 MatSOR_BlockMat, 686 NULL, 687 /* 15*/ NULL, 688 NULL, 689 NULL, 690 NULL, 691 NULL, 692 /* 20*/ NULL, 693 MatAssemblyEnd_BlockMat, 694 MatSetOption_BlockMat, 695 NULL, 696 /* 24*/ NULL, 697 NULL, 698 NULL, 699 NULL, 700 NULL, 701 /* 29*/ NULL, 702 NULL, 703 NULL, 704 NULL, 705 NULL, 706 /* 34*/ NULL, 707 NULL, 708 NULL, 709 NULL, 710 NULL, 711 /* 39*/ NULL, 712 NULL, 713 NULL, 714 NULL, 715 NULL, 716 /* 44*/ NULL, 717 NULL, 718 MatShift_Basic, 719 NULL, 720 NULL, 721 /* 49*/ NULL, 722 NULL, 723 NULL, 724 NULL, 725 NULL, 726 /* 54*/ NULL, 727 NULL, 728 NULL, 729 NULL, 730 NULL, 731 /* 59*/ MatCreateSubMatrix_BlockMat, 732 MatDestroy_BlockMat, 733 MatView_BlockMat, 734 NULL, 735 NULL, 736 /* 64*/ NULL, 737 NULL, 738 NULL, 739 NULL, 740 NULL, 741 /* 69*/ NULL, 742 NULL, 743 NULL, 744 NULL, 745 NULL, 746 /* 74*/ NULL, 747 NULL, 748 NULL, 749 NULL, 750 NULL, 751 /* 79*/ NULL, 752 NULL, 753 NULL, 754 NULL, 755 MatLoad_BlockMat, 756 /* 84*/ NULL, 757 NULL, 758 NULL, 759 NULL, 760 NULL, 761 /* 89*/ NULL, 762 NULL, 763 NULL, 764 NULL, 765 NULL, 766 /* 94*/ NULL, 767 NULL, 768 NULL, 769 NULL, 770 NULL, 771 /* 99*/ NULL, 772 NULL, 773 NULL, 774 NULL, 775 NULL, 776 /*104*/ NULL, 777 NULL, 778 NULL, 779 NULL, 780 NULL, 781 /*109*/ NULL, 782 NULL, 783 NULL, 784 NULL, 785 NULL, 786 /*114*/ NULL, 787 NULL, 788 NULL, 789 NULL, 790 NULL, 791 /*119*/ NULL, 792 NULL, 793 NULL, 794 NULL, 795 NULL, 796 /*124*/ NULL, 797 NULL, 798 NULL, 799 NULL, 800 NULL, 801 /*129*/ NULL, 802 NULL, 803 NULL, 804 NULL, 805 NULL, 806 /*134*/ NULL, 807 NULL, 808 NULL, 809 NULL, 810 NULL, 811 /*139*/ NULL, 812 NULL, 813 NULL, 814 NULL, 815 NULL, 816 /*144*/ NULL, 817 NULL, 818 NULL, 819 NULL, 820 NULL, 821 NULL, 822 /*150*/ NULL, 823 NULL, 824 NULL}; 825 826 /*@C 827 MatBlockMatSetPreallocation - For good matrix assembly performance 828 the user should preallocate the matrix storage by setting the parameter nz 829 (or the array nnz). By setting these parameters accurately, performance 830 during matrix assembly can be increased by more than a factor of 50. 831 832 Collective 833 834 Input Parameters: 835 + B - The matrix 836 . bs - size of each block in matrix 837 . nz - number of nonzeros per block row (same for all rows) 838 - nnz - array containing the number of nonzeros in the various block rows 839 (possibly different for each row) or `NULL` 840 841 Level: intermediate 842 843 Notes: 844 If `nnz` is given then `nz` is ignored 845 846 Specify the preallocated storage with either `nz` or `nnz` (not both). 847 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 848 allocation. 849 850 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()` 851 @*/ 852 PetscErrorCode MatBlockMatSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 853 { 854 PetscFunctionBegin; 855 PetscTryMethod(B, "MatBlockMatSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 856 PetscFunctionReturn(PETSC_SUCCESS); 857 } 858 859 static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A, PetscInt bs, PetscInt nz, PetscInt *nnz) 860 { 861 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data; 862 PetscInt i; 863 864 PetscFunctionBegin; 865 PetscCall(PetscLayoutSetBlockSize(A->rmap, bs)); 866 PetscCall(PetscLayoutSetBlockSize(A->cmap, bs)); 867 PetscCall(PetscLayoutSetUp(A->rmap)); 868 PetscCall(PetscLayoutSetUp(A->cmap)); 869 PetscCall(PetscLayoutGetBlockSize(A->rmap, &bs)); 870 871 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 872 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 873 if (nnz) { 874 for (i = 0; i < A->rmap->n / bs; i++) { 875 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 876 PetscCheck(nnz[i] <= A->cmap->n / bs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], A->cmap->n / bs); 877 } 878 } 879 bmat->mbs = A->rmap->n / bs; 880 881 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->right)); 882 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->middle)); 883 PetscCall(VecCreateSeq(PETSC_COMM_SELF, bs, &bmat->left)); 884 885 if (!bmat->imax) PetscCall(PetscMalloc2(A->rmap->n, &bmat->imax, A->rmap->n, &bmat->ilen)); 886 if (PetscLikely(nnz)) { 887 nz = 0; 888 for (i = 0; i < A->rmap->n / A->rmap->bs; i++) { 889 bmat->imax[i] = nnz[i]; 890 nz += nnz[i]; 891 } 892 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Currently requires block row by row preallocation"); 893 894 /* bmat->ilen will count nonzeros in each row so far. */ 895 PetscCall(PetscArrayzero(bmat->ilen, bmat->mbs)); 896 897 /* allocate the matrix space */ 898 PetscCall(MatSeqXAIJFreeAIJ(A, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i)); 899 PetscCall(PetscMalloc3(nz, &bmat->a, nz, &bmat->j, A->rmap->n + 1, &bmat->i)); 900 bmat->i[0] = 0; 901 for (i = 1; i < bmat->mbs + 1; i++) bmat->i[i] = bmat->i[i - 1] + bmat->imax[i - 1]; 902 bmat->singlemalloc = PETSC_TRUE; 903 bmat->free_a = PETSC_TRUE; 904 bmat->free_ij = PETSC_TRUE; 905 906 bmat->nz = 0; 907 bmat->maxnz = nz; 908 A->info.nz_unneeded = (double)bmat->maxnz; 909 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 910 PetscFunctionReturn(PETSC_SUCCESS); 911 } 912 913 /*MC 914 MATBLOCKMAT - A matrix that is defined by a set of `Mat`'s that represents a sparse block matrix 915 consisting of (usually) sparse blocks. 916 917 Level: advanced 918 919 .seealso: [](ch_matrices), `Mat`, `MatCreateBlockMat()` 920 M*/ 921 922 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A) 923 { 924 Mat_BlockMat *b; 925 926 PetscFunctionBegin; 927 PetscCall(PetscNew(&b)); 928 A->data = (void *)b; 929 A->ops[0] = MatOps_Values; 930 A->assembled = PETSC_TRUE; 931 A->preallocated = PETSC_FALSE; 932 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATBLOCKMAT)); 933 934 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatBlockMatSetPreallocation_C", MatBlockMatSetPreallocation_BlockMat)); 935 PetscFunctionReturn(PETSC_SUCCESS); 936 } 937 938 /*@C 939 MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential `Mat` object 940 941 Collective 942 943 Input Parameters: 944 + comm - MPI communicator 945 . m - number of rows 946 . n - number of columns 947 . bs - size of each submatrix 948 . nz - expected maximum number of nonzero blocks in row (use `PETSC_DEFAULT` if not known) 949 - nnz - expected number of nonzers per block row if known (use `NULL` otherwise) 950 951 Output Parameter: 952 . A - the matrix 953 954 Level: intermediate 955 956 Notes: 957 Matrices of this type are nominally-sparse matrices in which each "entry" is a `Mat` object. Each `Mat` must 958 have the same size and be sequential. The local and global sizes must be compatible with this decomposition. 959 960 For matrices containing parallel submatrices and variable block sizes, see `MATNEST`. 961 962 Developer Notes: 963 I don't like the name, it is not `MATNESTMAT` 964 965 .seealso: [](ch_matrices), `Mat`, `MATBLOCKMAT`, `MatCreateNest()` 966 @*/ 967 PetscErrorCode MatCreateBlockMat(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt bs, PetscInt nz, PetscInt *nnz, Mat *A) 968 { 969 PetscFunctionBegin; 970 PetscCall(MatCreate(comm, A)); 971 PetscCall(MatSetSizes(*A, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 972 PetscCall(MatSetType(*A, MATBLOCKMAT)); 973 PetscCall(MatBlockMatSetPreallocation(*A, bs, nz, nnz)); 974 PetscFunctionReturn(PETSC_SUCCESS); 975 } 976