1 /* 2 This provides a matrix that consists of Mats 3 */ 4 5 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 6 #include <../src/mat/impls/baij/seq/baij.h> /* use the common AIJ data-structure */ 7 8 typedef struct { 9 SEQAIJHEADER(Mat); 10 SEQBAIJHEADER; 11 Mat *diags; 12 13 Vec left, right, middle, workb; /* dummy vectors to perform local parts of product */ 14 } Mat_BlockMat; 15 16 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 17 { 18 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 19 PetscScalar *x; 20 const Mat *v; 21 const PetscScalar *b; 22 PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs; 23 const PetscInt *idx; 24 IS row, col; 25 MatFactorInfo info; 26 Vec left = a->left, right = a->right, middle = a->middle; 27 Mat *diag; 28 29 PetscFunctionBegin; 30 its = its * lits; 31 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 32 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 33 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0"); 34 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift"); 35 PetscCheck(!((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)), PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot do backward sweep without forward sweep"); 36 37 if (!a->diags) { 38 PetscCall(PetscMalloc1(mbs, &a->diags)); 39 PetscCall(MatFactorInfoInitialize(&info)); 40 for (i = 0; i < mbs; i++) { 41 PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col)); 42 PetscCall(MatCholeskyFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, &info)); 43 PetscCall(MatCholeskyFactorNumeric(a->diags[i], a->a[a->diag[i]], &info)); 44 PetscCall(ISDestroy(&row)); 45 PetscCall(ISDestroy(&col)); 46 } 47 PetscCall(VecDuplicate(bb, &a->workb)); 48 } 49 diag = a->diags; 50 51 PetscCall(VecSet(xx, 0.0)); 52 PetscCall(VecGetArray(xx, &x)); 53 /* copy right-hand side because it must be modified during iteration */ 54 PetscCall(VecCopy(bb, a->workb)); 55 PetscCall(VecGetArrayRead(a->workb, &b)); 56 57 /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */ 58 while (its--) { 59 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 60 for (i = 0; i < mbs; i++) { 61 n = a->i[i + 1] - a->i[i] - 1; 62 idx = a->j + a->i[i] + 1; 63 v = a->a + a->i[i] + 1; 64 65 PetscCall(VecSet(left, 0.0)); 66 for (j = 0; j < n; j++) { 67 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 68 PetscCall(MatMultAdd(v[j], right, left, left)); 69 PetscCall(VecResetArray(right)); 70 } 71 PetscCall(VecPlaceArray(right, b + i * bs)); 72 PetscCall(VecAYPX(left, -1.0, right)); 73 PetscCall(VecResetArray(right)); 74 75 PetscCall(VecPlaceArray(right, x + i * bs)); 76 PetscCall(MatSolve(diag[i], left, right)); 77 78 /* now adjust right-hand side, see MatSOR_SeqSBAIJ */ 79 for (j = 0; j < n; j++) { 80 PetscCall(MatMultTranspose(v[j], right, left)); 81 PetscCall(VecPlaceArray(middle, b + idx[j] * bs)); 82 PetscCall(VecAXPY(middle, -1.0, left)); 83 PetscCall(VecResetArray(middle)); 84 } 85 PetscCall(VecResetArray(right)); 86 } 87 } 88 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 89 for (i = mbs - 1; i >= 0; i--) { 90 n = a->i[i + 1] - a->i[i] - 1; 91 idx = a->j + a->i[i] + 1; 92 v = a->a + a->i[i] + 1; 93 94 PetscCall(VecSet(left, 0.0)); 95 for (j = 0; j < n; j++) { 96 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 97 PetscCall(MatMultAdd(v[j], right, left, left)); 98 PetscCall(VecResetArray(right)); 99 } 100 PetscCall(VecPlaceArray(right, b + i * bs)); 101 PetscCall(VecAYPX(left, -1.0, right)); 102 PetscCall(VecResetArray(right)); 103 104 PetscCall(VecPlaceArray(right, x + i * bs)); 105 PetscCall(MatSolve(diag[i], left, right)); 106 PetscCall(VecResetArray(right)); 107 } 108 } 109 } 110 PetscCall(VecRestoreArray(xx, &x)); 111 PetscCall(VecRestoreArrayRead(a->workb, &b)); 112 PetscFunctionReturn(PETSC_SUCCESS); 113 } 114 115 static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 116 { 117 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 118 PetscScalar *x; 119 const Mat *v; 120 const PetscScalar *b; 121 PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs; 122 const PetscInt *idx; 123 IS row, col; 124 MatFactorInfo info; 125 Vec left = a->left, right = a->right; 126 Mat *diag; 127 128 PetscFunctionBegin; 129 its = its * lits; 130 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 131 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 132 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0"); 133 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift"); 134 135 if (!a->diags) { 136 PetscCall(PetscMalloc1(mbs, &a->diags)); 137 PetscCall(MatFactorInfoInitialize(&info)); 138 for (i = 0; i < mbs; i++) { 139 PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col)); 140 PetscCall(MatLUFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, col, &info)); 141 PetscCall(MatLUFactorNumeric(a->diags[i], a->a[a->diag[i]], &info)); 142 PetscCall(ISDestroy(&row)); 143 PetscCall(ISDestroy(&col)); 144 } 145 } 146 diag = a->diags; 147 148 PetscCall(VecSet(xx, 0.0)); 149 PetscCall(VecGetArray(xx, &x)); 150 PetscCall(VecGetArrayRead(bb, &b)); 151 152 /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */ 153 while (its--) { 154 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 155 for (i = 0; i < mbs; i++) { 156 n = a->i[i + 1] - a->i[i]; 157 idx = a->j + a->i[i]; 158 v = a->a + a->i[i]; 159 160 PetscCall(VecSet(left, 0.0)); 161 for (j = 0; j < n; j++) { 162 if (idx[j] != i) { 163 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 164 PetscCall(MatMultAdd(v[j], right, left, left)); 165 PetscCall(VecResetArray(right)); 166 } 167 } 168 PetscCall(VecPlaceArray(right, b + i * bs)); 169 PetscCall(VecAYPX(left, -1.0, right)); 170 PetscCall(VecResetArray(right)); 171 172 PetscCall(VecPlaceArray(right, x + i * bs)); 173 PetscCall(MatSolve(diag[i], left, right)); 174 PetscCall(VecResetArray(right)); 175 } 176 } 177 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 178 for (i = mbs - 1; i >= 0; i--) { 179 n = a->i[i + 1] - a->i[i]; 180 idx = a->j + a->i[i]; 181 v = a->a + a->i[i]; 182 183 PetscCall(VecSet(left, 0.0)); 184 for (j = 0; j < n; j++) { 185 if (idx[j] != i) { 186 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 187 PetscCall(MatMultAdd(v[j], right, left, left)); 188 PetscCall(VecResetArray(right)); 189 } 190 } 191 PetscCall(VecPlaceArray(right, b + i * bs)); 192 PetscCall(VecAYPX(left, -1.0, right)); 193 PetscCall(VecResetArray(right)); 194 195 PetscCall(VecPlaceArray(right, x + i * bs)); 196 PetscCall(MatSolve(diag[i], left, right)); 197 PetscCall(VecResetArray(right)); 198 } 199 } 200 } 201 PetscCall(VecRestoreArray(xx, &x)); 202 PetscCall(VecRestoreArrayRead(bb, &b)); 203 PetscFunctionReturn(PETSC_SUCCESS); 204 } 205 206 static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 207 { 208 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 209 PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 210 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 211 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 212 PetscInt ridx, cidx; 213 PetscBool roworiented = a->roworiented; 214 MatScalar value; 215 Mat *ap, *aa = a->a; 216 217 PetscFunctionBegin; 218 for (k = 0; k < m; k++) { /* loop over added rows */ 219 row = im[k]; 220 brow = row / bs; 221 if (row < 0) continue; 222 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 223 rp = aj + ai[brow]; 224 ap = aa + ai[brow]; 225 rmax = imax[brow]; 226 nrow = ailen[brow]; 227 low = 0; 228 high = nrow; 229 for (l = 0; l < n; l++) { /* loop over added columns */ 230 if (in[l] < 0) continue; 231 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 232 col = in[l]; 233 bcol = col / bs; 234 if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue; 235 ridx = row % bs; 236 cidx = col % bs; 237 if (roworiented) value = v[l + k * n]; 238 else value = v[k + l * m]; 239 240 if (col <= lastcol) low = 0; 241 else high = nrow; 242 lastcol = col; 243 while (high - low > 7) { 244 t = (low + high) / 2; 245 if (rp[t] > bcol) high = t; 246 else low = t; 247 } 248 for (i = low; i < high; i++) { 249 if (rp[i] > bcol) break; 250 if (rp[i] == bcol) goto noinsert1; 251 } 252 if (nonew == 1) goto noinsert1; 253 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 254 MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat); 255 N = nrow++ - 1; 256 high++; 257 /* shift up all the later entries in this row */ 258 for (ii = N; ii >= i; ii--) { 259 rp[ii + 1] = rp[ii]; 260 ap[ii + 1] = ap[ii]; 261 } 262 if (N >= i) ap[i] = NULL; 263 rp[i] = bcol; 264 a->nz++; 265 noinsert1:; 266 if (!*(ap + i)) PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i)); 267 PetscCall(MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is)); 268 low = i; 269 } 270 ailen[brow] = nrow; 271 } 272 PetscFunctionReturn(PETSC_SUCCESS); 273 } 274 275 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer) 276 { 277 Mat tmpA; 278 PetscInt i, j, m, n, bs = 1, ncols, *lens, currentcol, mbs, **ii, *ilens, nextcol, *llens, cnt = 0; 279 const PetscInt *cols; 280 const PetscScalar *values; 281 PetscBool flg = PETSC_FALSE, notdone; 282 Mat_SeqAIJ *a; 283 Mat_BlockMat *amat; 284 285 PetscFunctionBegin; 286 /* force binary viewer to load .info file if it has not yet done so */ 287 PetscCall(PetscViewerSetUp(viewer)); 288 PetscCall(MatCreate(PETSC_COMM_SELF, &tmpA)); 289 PetscCall(MatSetType(tmpA, MATSEQAIJ)); 290 PetscCall(MatLoad_SeqAIJ(tmpA, viewer)); 291 292 PetscCall(MatGetLocalSize(tmpA, &m, &n)); 293 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "Options for loading BlockMat matrix 1", "Mat"); 294 PetscCall(PetscOptionsInt("-matload_block_size", "Set the blocksize used to store the matrix", "MatLoad", bs, &bs, NULL)); 295 PetscCall(PetscOptionsBool("-matload_symmetric", "Store the matrix as symmetric", "MatLoad", flg, &flg, NULL)); 296 PetscOptionsEnd(); 297 298 /* Determine number of nonzero blocks for each block row */ 299 a = (Mat_SeqAIJ *)tmpA->data; 300 mbs = m / bs; 301 PetscCall(PetscMalloc3(mbs, &lens, bs, &ii, bs, &ilens)); 302 PetscCall(PetscArrayzero(lens, mbs)); 303 304 for (i = 0; i < mbs; i++) { 305 for (j = 0; j < bs; j++) { 306 ii[j] = a->j + a->i[i * bs + j]; 307 ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j]; 308 } 309 310 currentcol = -1; 311 while (PETSC_TRUE) { 312 notdone = PETSC_FALSE; 313 nextcol = 1000000000; 314 for (j = 0; j < bs; j++) { 315 while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) { 316 ii[j]++; 317 ilens[j]--; 318 } 319 if (ilens[j] > 0) { 320 notdone = PETSC_TRUE; 321 nextcol = PetscMin(nextcol, ii[j][0] / bs); 322 } 323 } 324 if (!notdone) break; 325 if (!flg || (nextcol >= i)) lens[i]++; 326 currentcol = nextcol; 327 } 328 } 329 330 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) PetscCall(MatSetSizes(newmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 331 PetscCall(MatBlockMatSetPreallocation(newmat, bs, 0, lens)); 332 if (flg) PetscCall(MatSetOption(newmat, MAT_SYMMETRIC, PETSC_TRUE)); 333 amat = (Mat_BlockMat *)(newmat)->data; 334 335 /* preallocate the submatrices */ 336 PetscCall(PetscMalloc1(bs, &llens)); 337 for (i = 0; i < mbs; i++) { /* loops for block rows */ 338 for (j = 0; j < bs; j++) { 339 ii[j] = a->j + a->i[i * bs + j]; 340 ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j]; 341 } 342 343 currentcol = 1000000000; 344 for (j = 0; j < bs; j++) { /* loop over rows in block finding first nonzero block */ 345 if (ilens[j] > 0) currentcol = PetscMin(currentcol, ii[j][0] / bs); 346 } 347 348 while (PETSC_TRUE) { /* loops over blocks in block row */ 349 notdone = PETSC_FALSE; 350 nextcol = 1000000000; 351 PetscCall(PetscArrayzero(llens, bs)); 352 for (j = 0; j < bs; j++) { /* loop over rows in block */ 353 while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) { /* loop over columns in row */ 354 ii[j]++; 355 ilens[j]--; 356 llens[j]++; 357 } 358 if (ilens[j] > 0) { 359 notdone = PETSC_TRUE; 360 nextcol = PetscMin(nextcol, ii[j][0] / bs); 361 } 362 } 363 PetscCheck(cnt < amat->maxnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of blocks found greater than expected %" PetscInt_FMT, cnt); 364 if (!flg || currentcol >= i) { 365 amat->j[cnt] = currentcol; 366 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, llens, amat->a + cnt++)); 367 } 368 369 if (!notdone) break; 370 currentcol = nextcol; 371 } 372 amat->ilen[i] = lens[i]; 373 } 374 375 PetscCall(PetscFree3(lens, ii, ilens)); 376 PetscCall(PetscFree(llens)); 377 378 /* copy over the matrix, one row at a time */ 379 for (i = 0; i < m; i++) { 380 PetscCall(MatGetRow(tmpA, i, &ncols, &cols, &values)); 381 PetscCall(MatSetValues(newmat, 1, &i, ncols, cols, values, INSERT_VALUES)); 382 PetscCall(MatRestoreRow(tmpA, i, &ncols, &cols, &values)); 383 } 384 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 385 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 386 PetscFunctionReturn(PETSC_SUCCESS); 387 } 388 389 static PetscErrorCode MatView_BlockMat(Mat A, PetscViewer viewer) 390 { 391 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 392 const char *name; 393 PetscViewerFormat format; 394 395 PetscFunctionBegin; 396 PetscCall(PetscObjectGetName((PetscObject)A, &name)); 397 PetscCall(PetscViewerGetFormat(viewer, &format)); 398 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 399 PetscCall(PetscViewerASCIIPrintf(viewer, "Nonzero block matrices = %" PetscInt_FMT " \n", a->nz)); 400 if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer, "Only upper triangular part of symmetric matrix is stored\n")); 401 } 402 PetscFunctionReturn(PETSC_SUCCESS); 403 } 404 405 static PetscErrorCode MatDestroy_BlockMat(Mat mat) 406 { 407 Mat_BlockMat *bmat = (Mat_BlockMat *)mat->data; 408 PetscInt i; 409 410 PetscFunctionBegin; 411 PetscCall(VecDestroy(&bmat->right)); 412 PetscCall(VecDestroy(&bmat->left)); 413 PetscCall(VecDestroy(&bmat->middle)); 414 PetscCall(VecDestroy(&bmat->workb)); 415 if (bmat->diags) { 416 for (i = 0; i < mat->rmap->n / mat->rmap->bs; i++) PetscCall(MatDestroy(&bmat->diags[i])); 417 } 418 if (bmat->a) { 419 for (i = 0; i < bmat->nz; i++) PetscCall(MatDestroy(&bmat->a[i])); 420 } 421 PetscCall(MatSeqXAIJFreeAIJ(mat, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i)); 422 PetscCall(PetscFree(mat->data)); 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } 425 426 static PetscErrorCode MatMult_BlockMat(Mat A, Vec x, Vec y) 427 { 428 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data; 429 PetscScalar *xx, *yy; 430 PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j; 431 Mat *aa; 432 433 PetscFunctionBegin; 434 /* 435 Standard CSR multiply except each entry is a Mat 436 */ 437 PetscCall(VecGetArray(x, &xx)); 438 439 PetscCall(VecSet(y, 0.0)); 440 PetscCall(VecGetArray(y, &yy)); 441 aj = bmat->j; 442 aa = bmat->a; 443 ii = bmat->i; 444 for (i = 0; i < m; i++) { 445 jrow = ii[i]; 446 PetscCall(VecPlaceArray(bmat->left, yy + bs * i)); 447 n = ii[i + 1] - jrow; 448 for (j = 0; j < n; j++) { 449 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); 450 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left)); 451 PetscCall(VecResetArray(bmat->right)); 452 jrow++; 453 } 454 PetscCall(VecResetArray(bmat->left)); 455 } 456 PetscCall(VecRestoreArray(x, &xx)); 457 PetscCall(VecRestoreArray(y, &yy)); 458 PetscFunctionReturn(PETSC_SUCCESS); 459 } 460 461 static PetscErrorCode MatMult_BlockMat_Symmetric(Mat A, Vec x, Vec y) 462 { 463 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data; 464 PetscScalar *xx, *yy; 465 PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j; 466 Mat *aa; 467 468 PetscFunctionBegin; 469 /* 470 Standard CSR multiply except each entry is a Mat 471 */ 472 PetscCall(VecGetArray(x, &xx)); 473 474 PetscCall(VecSet(y, 0.0)); 475 PetscCall(VecGetArray(y, &yy)); 476 aj = bmat->j; 477 aa = bmat->a; 478 ii = bmat->i; 479 for (i = 0; i < m; i++) { 480 jrow = ii[i]; 481 n = ii[i + 1] - jrow; 482 PetscCall(VecPlaceArray(bmat->left, yy + bs * i)); 483 PetscCall(VecPlaceArray(bmat->middle, xx + bs * i)); 484 /* if we ALWAYS required a diagonal entry then could remove this if test */ 485 if (aj[jrow] == i) { 486 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); 487 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left)); 488 PetscCall(VecResetArray(bmat->right)); 489 jrow++; 490 n--; 491 } 492 for (j = 0; j < n; j++) { 493 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); /* upper triangular part */ 494 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left)); 495 PetscCall(VecResetArray(bmat->right)); 496 497 PetscCall(VecPlaceArray(bmat->right, yy + bs * aj[jrow])); /* lower triangular part */ 498 PetscCall(MatMultTransposeAdd(aa[jrow], bmat->middle, bmat->right, bmat->right)); 499 PetscCall(VecResetArray(bmat->right)); 500 jrow++; 501 } 502 PetscCall(VecResetArray(bmat->left)); 503 PetscCall(VecResetArray(bmat->middle)); 504 } 505 PetscCall(VecRestoreArray(x, &xx)); 506 PetscCall(VecRestoreArray(y, &yy)); 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 static PetscErrorCode MatMultAdd_BlockMat(Mat A, Vec x, Vec y, Vec z) 511 { 512 PetscFunctionBegin; 513 PetscFunctionReturn(PETSC_SUCCESS); 514 } 515 516 static PetscErrorCode MatMultTranspose_BlockMat(Mat A, Vec x, Vec y) 517 { 518 PetscFunctionBegin; 519 PetscFunctionReturn(PETSC_SUCCESS); 520 } 521 522 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A, Vec x, Vec y, Vec z) 523 { 524 PetscFunctionBegin; 525 PetscFunctionReturn(PETSC_SUCCESS); 526 } 527 528 /* 529 Adds diagonal pointers to sparse matrix structure. 530 */ 531 static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A) 532 { 533 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 534 PetscInt i, j, mbs = A->rmap->n / A->rmap->bs; 535 536 PetscFunctionBegin; 537 if (!a->diag) PetscCall(PetscMalloc1(mbs, &a->diag)); 538 for (i = 0; i < mbs; i++) { 539 a->diag[i] = a->i[i + 1]; 540 for (j = a->i[i]; j < a->i[i + 1]; j++) { 541 if (a->j[j] == i) { 542 a->diag[i] = j; 543 break; 544 } 545 } 546 } 547 PetscFunctionReturn(PETSC_SUCCESS); 548 } 549 550 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 551 { 552 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 553 Mat_SeqAIJ *c; 554 PetscInt i, k, first, step, lensi, nrows, ncols; 555 PetscInt *j_new, *i_new, *aj = a->j, *ailen = a->ilen; 556 PetscScalar *a_new; 557 Mat C, *aa = a->a; 558 PetscBool stride, equal; 559 560 PetscFunctionBegin; 561 PetscCall(ISEqual(isrow, iscol, &equal)); 562 PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for identical column and row indices"); 563 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &stride)); 564 PetscCheck(stride, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for stride indices"); 565 PetscCall(ISStrideGetInfo(iscol, &first, &step)); 566 PetscCheck(step == A->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_SUP, "Can only select one entry from each block"); 567 568 PetscCall(ISGetLocalSize(isrow, &nrows)); 569 ncols = nrows; 570 571 /* create submatrix */ 572 if (scall == MAT_REUSE_MATRIX) { 573 PetscInt n_cols, n_rows; 574 C = *B; 575 PetscCall(MatGetSize(C, &n_rows, &n_cols)); 576 PetscCheck(n_rows == nrows && n_cols == ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Reused submatrix wrong size"); 577 PetscCall(MatZeroEntries(C)); 578 } else { 579 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 580 PetscCall(MatSetSizes(C, nrows, ncols, PETSC_DETERMINE, PETSC_DETERMINE)); 581 if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C, MATSEQSBAIJ)); 582 else PetscCall(MatSetType(C, MATSEQAIJ)); 583 PetscCall(MatSeqAIJSetPreallocation(C, 0, ailen)); 584 PetscCall(MatSeqSBAIJSetPreallocation(C, 1, 0, ailen)); 585 } 586 c = (Mat_SeqAIJ *)C->data; 587 588 /* loop over rows inserting into submatrix */ 589 a_new = c->a; 590 j_new = c->j; 591 i_new = c->i; 592 593 for (i = 0; i < nrows; i++) { 594 lensi = ailen[i]; 595 for (k = 0; k < lensi; k++) { 596 *j_new++ = *aj++; 597 PetscCall(MatGetValue(*aa++, first, first, a_new++)); 598 } 599 i_new[i + 1] = i_new[i] + lensi; 600 c->ilen[i] = lensi; 601 } 602 603 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 604 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 605 *B = C; 606 PetscFunctionReturn(PETSC_SUCCESS); 607 } 608 609 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A, MatAssemblyType mode) 610 { 611 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 612 PetscInt fshift = 0, i, j, *ai = a->i, *aj = a->j, *imax = a->imax; 613 PetscInt m = a->mbs, *ip, N, *ailen = a->ilen, rmax = 0; 614 Mat *aa = a->a, *ap; 615 616 PetscFunctionBegin; 617 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 618 619 if (m) rmax = ailen[0]; /* determine row with most nonzeros */ 620 for (i = 1; i < m; i++) { 621 /* move each row back by the amount of empty slots (fshift) before it*/ 622 fshift += imax[i - 1] - ailen[i - 1]; 623 rmax = PetscMax(rmax, ailen[i]); 624 if (fshift) { 625 ip = aj + ai[i]; 626 ap = aa + ai[i]; 627 N = ailen[i]; 628 for (j = 0; j < N; j++) { 629 ip[j - fshift] = ip[j]; 630 ap[j - fshift] = ap[j]; 631 } 632 } 633 ai[i] = ai[i - 1] + ailen[i - 1]; 634 } 635 if (m) { 636 fshift += imax[m - 1] - ailen[m - 1]; 637 ai[m] = ai[m - 1] + ailen[m - 1]; 638 } 639 /* reset ilen and imax for each row */ 640 for (i = 0; i < m; i++) ailen[i] = imax[i] = ai[i + 1] - ai[i]; 641 a->nz = ai[m]; 642 for (i = 0; i < a->nz; i++) { 643 PetscAssert(aa[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT, i, aj[i], a->nz); 644 PetscCall(MatAssemblyBegin(aa[i], MAT_FINAL_ASSEMBLY)); 645 PetscCall(MatAssemblyEnd(aa[i], MAT_FINAL_ASSEMBLY)); 646 } 647 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n", m, A->cmap->n / A->cmap->bs, fshift, a->nz)); 648 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs)); 649 PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", rmax)); 650 651 A->info.mallocs += a->reallocs; 652 a->reallocs = 0; 653 A->info.nz_unneeded = (double)fshift; 654 a->rmax = rmax; 655 PetscCall(MatMarkDiagonal_BlockMat(A)); 656 PetscFunctionReturn(PETSC_SUCCESS); 657 } 658 659 static PetscErrorCode MatSetOption_BlockMat(Mat A, MatOption opt, PetscBool flg) 660 { 661 PetscFunctionBegin; 662 if (opt == MAT_SYMMETRIC && flg) { 663 A->ops->sor = MatSOR_BlockMat_Symmetric; 664 A->ops->mult = MatMult_BlockMat_Symmetric; 665 } else { 666 PetscCall(PetscInfo(A, "Unused matrix option %s\n", MatOptions[opt])); 667 } 668 PetscFunctionReturn(PETSC_SUCCESS); 669 } 670 671 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat, 672 NULL, 673 NULL, 674 MatMult_BlockMat, 675 /* 4*/ MatMultAdd_BlockMat, 676 MatMultTranspose_BlockMat, 677 MatMultTransposeAdd_BlockMat, 678 NULL, 679 NULL, 680 NULL, 681 /* 10*/ NULL, 682 NULL, 683 NULL, 684 MatSOR_BlockMat, 685 NULL, 686 /* 15*/ NULL, 687 NULL, 688 NULL, 689 NULL, 690 NULL, 691 /* 20*/ NULL, 692 MatAssemblyEnd_BlockMat, 693 MatSetOption_BlockMat, 694 NULL, 695 /* 24*/ NULL, 696 NULL, 697 NULL, 698 NULL, 699 NULL, 700 /* 29*/ NULL, 701 NULL, 702 NULL, 703 NULL, 704 NULL, 705 /* 34*/ NULL, 706 NULL, 707 NULL, 708 NULL, 709 NULL, 710 /* 39*/ NULL, 711 NULL, 712 NULL, 713 NULL, 714 NULL, 715 /* 44*/ NULL, 716 NULL, 717 MatShift_Basic, 718 NULL, 719 NULL, 720 /* 49*/ NULL, 721 NULL, 722 NULL, 723 NULL, 724 NULL, 725 /* 54*/ NULL, 726 NULL, 727 NULL, 728 NULL, 729 NULL, 730 /* 59*/ MatCreateSubMatrix_BlockMat, 731 MatDestroy_BlockMat, 732 MatView_BlockMat, 733 NULL, 734 NULL, 735 /* 64*/ NULL, 736 NULL, 737 NULL, 738 NULL, 739 NULL, 740 /* 69*/ NULL, 741 NULL, 742 NULL, 743 NULL, 744 NULL, 745 /* 74*/ NULL, 746 NULL, 747 NULL, 748 NULL, 749 NULL, 750 /* 79*/ NULL, 751 NULL, 752 NULL, 753 NULL, 754 MatLoad_BlockMat, 755 /* 84*/ NULL, 756 NULL, 757 NULL, 758 NULL, 759 NULL, 760 /* 89*/ NULL, 761 NULL, 762 NULL, 763 NULL, 764 NULL, 765 /* 94*/ NULL, 766 NULL, 767 NULL, 768 NULL, 769 NULL, 770 /* 99*/ NULL, 771 NULL, 772 NULL, 773 NULL, 774 NULL, 775 /*104*/ NULL, 776 NULL, 777 NULL, 778 NULL, 779 NULL, 780 /*109*/ NULL, 781 NULL, 782 NULL, 783 NULL, 784 NULL, 785 /*114*/ NULL, 786 NULL, 787 NULL, 788 NULL, 789 NULL, 790 /*119*/ NULL, 791 NULL, 792 NULL, 793 NULL, 794 NULL, 795 /*124*/ NULL, 796 NULL, 797 NULL, 798 NULL, 799 NULL, 800 /*129*/ NULL, 801 NULL, 802 NULL, 803 NULL, 804 NULL, 805 /*134*/ NULL, 806 NULL, 807 NULL, 808 NULL, 809 NULL, 810 /*139*/ NULL, 811 NULL, 812 NULL, 813 NULL, 814 NULL, 815 /*144*/ NULL, 816 NULL, 817 NULL, 818 NULL, 819 NULL, 820 NULL, 821 /*150*/ NULL, 822 NULL, 823 NULL, 824 NULL, 825 NULL, 826 NULL}; 827 828 /*@C 829 MatBlockMatSetPreallocation - For good matrix assembly performance 830 the user should preallocate the matrix storage by setting the parameter nz 831 (or the array nnz). By setting these parameters accurately, performance 832 during matrix assembly can be increased by more than a factor of 50. 833 834 Collective 835 836 Input Parameters: 837 + B - The matrix 838 . bs - size of each block in matrix 839 . nz - number of nonzeros per block row (same for all rows) 840 - nnz - array containing the number of nonzeros in the various block rows 841 (possibly different for each row) or `NULL` 842 843 Level: intermediate 844 845 Notes: 846 If `nnz` is given then `nz` is ignored 847 848 Specify the preallocated storage with either `nz` or `nnz` (not both). 849 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 850 allocation. 851 852 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()` 853 @*/ 854 PetscErrorCode MatBlockMatSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 855 { 856 PetscFunctionBegin; 857 PetscTryMethod(B, "MatBlockMatSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 858 PetscFunctionReturn(PETSC_SUCCESS); 859 } 860 861 static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A, PetscInt bs, PetscInt nz, PetscInt *nnz) 862 { 863 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data; 864 PetscInt i; 865 866 PetscFunctionBegin; 867 PetscCall(PetscLayoutSetBlockSize(A->rmap, bs)); 868 PetscCall(PetscLayoutSetBlockSize(A->cmap, bs)); 869 PetscCall(PetscLayoutSetUp(A->rmap)); 870 PetscCall(PetscLayoutSetUp(A->cmap)); 871 PetscCall(PetscLayoutGetBlockSize(A->rmap, &bs)); 872 873 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 874 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 875 if (nnz) { 876 for (i = 0; i < A->rmap->n / bs; i++) { 877 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 878 PetscCheck(nnz[i] <= A->cmap->n / bs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], A->cmap->n / bs); 879 } 880 } 881 bmat->mbs = A->rmap->n / bs; 882 883 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->right)); 884 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->middle)); 885 PetscCall(VecCreateSeq(PETSC_COMM_SELF, bs, &bmat->left)); 886 887 if (!bmat->imax) PetscCall(PetscMalloc2(A->rmap->n, &bmat->imax, A->rmap->n, &bmat->ilen)); 888 if (PetscLikely(nnz)) { 889 nz = 0; 890 for (i = 0; i < A->rmap->n / A->rmap->bs; i++) { 891 bmat->imax[i] = nnz[i]; 892 nz += nnz[i]; 893 } 894 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Currently requires block row by row preallocation"); 895 896 /* bmat->ilen will count nonzeros in each row so far. */ 897 PetscCall(PetscArrayzero(bmat->ilen, bmat->mbs)); 898 899 /* allocate the matrix space */ 900 PetscCall(MatSeqXAIJFreeAIJ(A, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i)); 901 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscScalar), (void **)&bmat->a)); 902 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&bmat->j)); 903 PetscCall(PetscShmgetAllocateArray(A->rmap->n + 1, sizeof(PetscInt), (void **)&bmat->i)); 904 bmat->free_a = PETSC_TRUE; 905 bmat->free_ij = PETSC_TRUE; 906 907 bmat->i[0] = 0; 908 for (i = 1; i < bmat->mbs + 1; i++) bmat->i[i] = bmat->i[i - 1] + bmat->imax[i - 1]; 909 910 bmat->nz = 0; 911 bmat->maxnz = nz; 912 A->info.nz_unneeded = (double)bmat->maxnz; 913 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 914 PetscFunctionReturn(PETSC_SUCCESS); 915 } 916 917 /*MC 918 MATBLOCKMAT - A matrix that is defined by a set of `Mat`'s that represents a sparse block matrix 919 consisting of (usually) sparse blocks. 920 921 Level: advanced 922 923 .seealso: [](ch_matrices), `Mat`, `MatCreateBlockMat()` 924 M*/ 925 926 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A) 927 { 928 Mat_BlockMat *b; 929 930 PetscFunctionBegin; 931 PetscCall(PetscNew(&b)); 932 A->data = (void *)b; 933 A->ops[0] = MatOps_Values; 934 A->assembled = PETSC_TRUE; 935 A->preallocated = PETSC_FALSE; 936 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATBLOCKMAT)); 937 938 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatBlockMatSetPreallocation_C", MatBlockMatSetPreallocation_BlockMat)); 939 PetscFunctionReturn(PETSC_SUCCESS); 940 } 941 942 /*@C 943 MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential `Mat` object 944 945 Collective 946 947 Input Parameters: 948 + comm - MPI communicator 949 . m - number of rows 950 . n - number of columns 951 . bs - size of each submatrix 952 . nz - expected maximum number of nonzero blocks in row (use `PETSC_DEFAULT` if not known) 953 - nnz - expected number of nonzers per block row if known (use `NULL` otherwise) 954 955 Output Parameter: 956 . A - the matrix 957 958 Level: intermediate 959 960 Notes: 961 Matrices of this type are nominally-sparse matrices in which each "entry" is a `Mat` object. Each `Mat` must 962 have the same size and be sequential. The local and global sizes must be compatible with this decomposition. 963 964 For matrices containing parallel submatrices and variable block sizes, see `MATNEST`. 965 966 Developer Notes: 967 I don't like the name, it is not `MATNESTMAT` 968 969 .seealso: [](ch_matrices), `Mat`, `MATBLOCKMAT`, `MatCreateNest()` 970 @*/ 971 PetscErrorCode MatCreateBlockMat(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt bs, PetscInt nz, PetscInt *nnz, Mat *A) 972 { 973 PetscFunctionBegin; 974 PetscCall(MatCreate(comm, A)); 975 PetscCall(MatSetSizes(*A, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 976 PetscCall(MatSetType(*A, MATBLOCKMAT)); 977 PetscCall(MatBlockMatSetPreallocation(*A, bs, nz, nnz)); 978 PetscFunctionReturn(PETSC_SUCCESS); 979 } 980