1 2 /* 3 This provides a matrix that consists of Mats 4 */ 5 6 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/baij/seq/baij.h> /* use the common AIJ data-structure */ 8 9 typedef struct { 10 SEQAIJHEADER(Mat); 11 SEQBAIJHEADER; 12 Mat *diags; 13 14 Vec left, right, middle, workb; /* dummy vectors to perform local parts of product */ 15 } Mat_BlockMat; 16 17 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) { 18 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 19 PetscScalar *x; 20 const Mat *v; 21 const PetscScalar *b; 22 PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs; 23 const PetscInt *idx; 24 IS row, col; 25 MatFactorInfo info; 26 Vec left = a->left, right = a->right, middle = a->middle; 27 Mat *diag; 28 29 PetscFunctionBegin; 30 its = its * lits; 31 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 32 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 33 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0"); 34 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift"); 35 PetscCheck(!((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)), PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot do backward sweep without forward sweep"); 36 37 if (!a->diags) { 38 PetscCall(PetscMalloc1(mbs, &a->diags)); 39 PetscCall(MatFactorInfoInitialize(&info)); 40 for (i = 0; i < mbs; i++) { 41 PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col)); 42 PetscCall(MatCholeskyFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, &info)); 43 PetscCall(MatCholeskyFactorNumeric(a->diags[i], a->a[a->diag[i]], &info)); 44 PetscCall(ISDestroy(&row)); 45 PetscCall(ISDestroy(&col)); 46 } 47 PetscCall(VecDuplicate(bb, &a->workb)); 48 } 49 diag = a->diags; 50 51 PetscCall(VecSet(xx, 0.0)); 52 PetscCall(VecGetArray(xx, &x)); 53 /* copy right hand side because it must be modified during iteration */ 54 PetscCall(VecCopy(bb, a->workb)); 55 PetscCall(VecGetArrayRead(a->workb, &b)); 56 57 /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */ 58 while (its--) { 59 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 60 for (i = 0; i < mbs; i++) { 61 n = a->i[i + 1] - a->i[i] - 1; 62 idx = a->j + a->i[i] + 1; 63 v = a->a + a->i[i] + 1; 64 65 PetscCall(VecSet(left, 0.0)); 66 for (j = 0; j < n; j++) { 67 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 68 PetscCall(MatMultAdd(v[j], right, left, left)); 69 PetscCall(VecResetArray(right)); 70 } 71 PetscCall(VecPlaceArray(right, b + i * bs)); 72 PetscCall(VecAYPX(left, -1.0, right)); 73 PetscCall(VecResetArray(right)); 74 75 PetscCall(VecPlaceArray(right, x + i * bs)); 76 PetscCall(MatSolve(diag[i], left, right)); 77 78 /* now adjust right hand side, see MatSOR_SeqSBAIJ */ 79 for (j = 0; j < n; j++) { 80 PetscCall(MatMultTranspose(v[j], right, left)); 81 PetscCall(VecPlaceArray(middle, b + idx[j] * bs)); 82 PetscCall(VecAXPY(middle, -1.0, left)); 83 PetscCall(VecResetArray(middle)); 84 } 85 PetscCall(VecResetArray(right)); 86 } 87 } 88 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 89 for (i = mbs - 1; i >= 0; i--) { 90 n = a->i[i + 1] - a->i[i] - 1; 91 idx = a->j + a->i[i] + 1; 92 v = a->a + a->i[i] + 1; 93 94 PetscCall(VecSet(left, 0.0)); 95 for (j = 0; j < n; j++) { 96 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 97 PetscCall(MatMultAdd(v[j], right, left, left)); 98 PetscCall(VecResetArray(right)); 99 } 100 PetscCall(VecPlaceArray(right, b + i * bs)); 101 PetscCall(VecAYPX(left, -1.0, right)); 102 PetscCall(VecResetArray(right)); 103 104 PetscCall(VecPlaceArray(right, x + i * bs)); 105 PetscCall(MatSolve(diag[i], left, right)); 106 PetscCall(VecResetArray(right)); 107 } 108 } 109 } 110 PetscCall(VecRestoreArray(xx, &x)); 111 PetscCall(VecRestoreArrayRead(a->workb, &b)); 112 PetscFunctionReturn(0); 113 } 114 115 static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) { 116 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 117 PetscScalar *x; 118 const Mat *v; 119 const PetscScalar *b; 120 PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs; 121 const PetscInt *idx; 122 IS row, col; 123 MatFactorInfo info; 124 Vec left = a->left, right = a->right; 125 Mat *diag; 126 127 PetscFunctionBegin; 128 its = its * lits; 129 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 130 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 131 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0"); 132 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift"); 133 134 if (!a->diags) { 135 PetscCall(PetscMalloc1(mbs, &a->diags)); 136 PetscCall(MatFactorInfoInitialize(&info)); 137 for (i = 0; i < mbs; i++) { 138 PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col)); 139 PetscCall(MatLUFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, col, &info)); 140 PetscCall(MatLUFactorNumeric(a->diags[i], a->a[a->diag[i]], &info)); 141 PetscCall(ISDestroy(&row)); 142 PetscCall(ISDestroy(&col)); 143 } 144 } 145 diag = a->diags; 146 147 PetscCall(VecSet(xx, 0.0)); 148 PetscCall(VecGetArray(xx, &x)); 149 PetscCall(VecGetArrayRead(bb, &b)); 150 151 /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */ 152 while (its--) { 153 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 154 for (i = 0; i < mbs; i++) { 155 n = a->i[i + 1] - a->i[i]; 156 idx = a->j + a->i[i]; 157 v = a->a + a->i[i]; 158 159 PetscCall(VecSet(left, 0.0)); 160 for (j = 0; j < n; j++) { 161 if (idx[j] != i) { 162 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 163 PetscCall(MatMultAdd(v[j], right, left, left)); 164 PetscCall(VecResetArray(right)); 165 } 166 } 167 PetscCall(VecPlaceArray(right, b + i * bs)); 168 PetscCall(VecAYPX(left, -1.0, right)); 169 PetscCall(VecResetArray(right)); 170 171 PetscCall(VecPlaceArray(right, x + i * bs)); 172 PetscCall(MatSolve(diag[i], left, right)); 173 PetscCall(VecResetArray(right)); 174 } 175 } 176 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 177 for (i = mbs - 1; i >= 0; i--) { 178 n = a->i[i + 1] - a->i[i]; 179 idx = a->j + a->i[i]; 180 v = a->a + a->i[i]; 181 182 PetscCall(VecSet(left, 0.0)); 183 for (j = 0; j < n; j++) { 184 if (idx[j] != i) { 185 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 186 PetscCall(MatMultAdd(v[j], right, left, left)); 187 PetscCall(VecResetArray(right)); 188 } 189 } 190 PetscCall(VecPlaceArray(right, b + i * bs)); 191 PetscCall(VecAYPX(left, -1.0, right)); 192 PetscCall(VecResetArray(right)); 193 194 PetscCall(VecPlaceArray(right, x + i * bs)); 195 PetscCall(MatSolve(diag[i], left, right)); 196 PetscCall(VecResetArray(right)); 197 } 198 } 199 } 200 PetscCall(VecRestoreArray(xx, &x)); 201 PetscCall(VecRestoreArrayRead(bb, &b)); 202 PetscFunctionReturn(0); 203 } 204 205 static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) { 206 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 207 PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 208 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 209 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 210 PetscInt ridx, cidx; 211 PetscBool roworiented = a->roworiented; 212 MatScalar value; 213 Mat *ap, *aa = a->a; 214 215 PetscFunctionBegin; 216 for (k = 0; k < m; k++) { /* loop over added rows */ 217 row = im[k]; 218 brow = row / bs; 219 if (row < 0) continue; 220 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 221 rp = aj + ai[brow]; 222 ap = aa + ai[brow]; 223 rmax = imax[brow]; 224 nrow = ailen[brow]; 225 low = 0; 226 high = nrow; 227 for (l = 0; l < n; l++) { /* loop over added columns */ 228 if (in[l] < 0) continue; 229 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 230 col = in[l]; 231 bcol = col / bs; 232 if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue; 233 ridx = row % bs; 234 cidx = col % bs; 235 if (roworiented) value = v[l + k * n]; 236 else value = v[k + l * m]; 237 238 if (col <= lastcol) low = 0; 239 else high = nrow; 240 lastcol = col; 241 while (high - low > 7) { 242 t = (low + high) / 2; 243 if (rp[t] > bcol) high = t; 244 else low = t; 245 } 246 for (i = low; i < high; i++) { 247 if (rp[i] > bcol) break; 248 if (rp[i] == bcol) goto noinsert1; 249 } 250 if (nonew == 1) goto noinsert1; 251 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 252 MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat); 253 N = nrow++ - 1; 254 high++; 255 /* shift up all the later entries in this row */ 256 for (ii = N; ii >= i; ii--) { 257 rp[ii + 1] = rp[ii]; 258 ap[ii + 1] = ap[ii]; 259 } 260 if (N >= i) ap[i] = NULL; 261 rp[i] = bcol; 262 a->nz++; 263 A->nonzerostate++; 264 noinsert1:; 265 if (!*(ap + i)) PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i)); 266 PetscCall(MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is)); 267 low = i; 268 } 269 ailen[brow] = nrow; 270 } 271 PetscFunctionReturn(0); 272 } 273 274 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer) { 275 Mat tmpA; 276 PetscInt i, j, m, n, bs = 1, ncols, *lens, currentcol, mbs, **ii, *ilens, nextcol, *llens, cnt = 0; 277 const PetscInt *cols; 278 const PetscScalar *values; 279 PetscBool flg = PETSC_FALSE, notdone; 280 Mat_SeqAIJ *a; 281 Mat_BlockMat *amat; 282 283 PetscFunctionBegin; 284 /* force binary viewer to load .info file if it has not yet done so */ 285 PetscCall(PetscViewerSetUp(viewer)); 286 PetscCall(MatCreate(PETSC_COMM_SELF, &tmpA)); 287 PetscCall(MatSetType(tmpA, MATSEQAIJ)); 288 PetscCall(MatLoad_SeqAIJ(tmpA, viewer)); 289 290 PetscCall(MatGetLocalSize(tmpA, &m, &n)); 291 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "Options for loading BlockMat matrix 1", "Mat"); 292 PetscCall(PetscOptionsInt("-matload_block_size", "Set the blocksize used to store the matrix", "MatLoad", bs, &bs, NULL)); 293 PetscCall(PetscOptionsBool("-matload_symmetric", "Store the matrix as symmetric", "MatLoad", flg, &flg, NULL)); 294 PetscOptionsEnd(); 295 296 /* Determine number of nonzero blocks for each block row */ 297 a = (Mat_SeqAIJ *)tmpA->data; 298 mbs = m / bs; 299 PetscCall(PetscMalloc3(mbs, &lens, bs, &ii, bs, &ilens)); 300 PetscCall(PetscArrayzero(lens, mbs)); 301 302 for (i = 0; i < mbs; i++) { 303 for (j = 0; j < bs; j++) { 304 ii[j] = a->j + a->i[i * bs + j]; 305 ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j]; 306 } 307 308 currentcol = -1; 309 while (PETSC_TRUE) { 310 notdone = PETSC_FALSE; 311 nextcol = 1000000000; 312 for (j = 0; j < bs; j++) { 313 while ((ilens[j] > 0 && ii[j][0] / bs <= currentcol)) { 314 ii[j]++; 315 ilens[j]--; 316 } 317 if (ilens[j] > 0) { 318 notdone = PETSC_TRUE; 319 nextcol = PetscMin(nextcol, ii[j][0] / bs); 320 } 321 } 322 if (!notdone) break; 323 if (!flg || (nextcol >= i)) lens[i]++; 324 currentcol = nextcol; 325 } 326 } 327 328 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) PetscCall(MatSetSizes(newmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 329 PetscCall(MatBlockMatSetPreallocation(newmat, bs, 0, lens)); 330 if (flg) PetscCall(MatSetOption(newmat, MAT_SYMMETRIC, PETSC_TRUE)); 331 amat = (Mat_BlockMat *)(newmat)->data; 332 333 /* preallocate the submatrices */ 334 PetscCall(PetscMalloc1(bs, &llens)); 335 for (i = 0; i < mbs; i++) { /* loops for block rows */ 336 for (j = 0; j < bs; j++) { 337 ii[j] = a->j + a->i[i * bs + j]; 338 ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j]; 339 } 340 341 currentcol = 1000000000; 342 for (j = 0; j < bs; j++) { /* loop over rows in block finding first nonzero block */ 343 if (ilens[j] > 0) currentcol = PetscMin(currentcol, ii[j][0] / bs); 344 } 345 346 while (PETSC_TRUE) { /* loops over blocks in block row */ 347 notdone = PETSC_FALSE; 348 nextcol = 1000000000; 349 PetscCall(PetscArrayzero(llens, bs)); 350 for (j = 0; j < bs; j++) { /* loop over rows in block */ 351 while ((ilens[j] > 0 && ii[j][0] / bs <= currentcol)) { /* loop over columns in row */ 352 ii[j]++; 353 ilens[j]--; 354 llens[j]++; 355 } 356 if (ilens[j] > 0) { 357 notdone = PETSC_TRUE; 358 nextcol = PetscMin(nextcol, ii[j][0] / bs); 359 } 360 } 361 PetscCheck(cnt < amat->maxnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of blocks found greater than expected %" PetscInt_FMT, cnt); 362 if (!flg || currentcol >= i) { 363 amat->j[cnt] = currentcol; 364 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, llens, amat->a + cnt++)); 365 } 366 367 if (!notdone) break; 368 currentcol = nextcol; 369 } 370 amat->ilen[i] = lens[i]; 371 } 372 373 PetscCall(PetscFree3(lens, ii, ilens)); 374 PetscCall(PetscFree(llens)); 375 376 /* copy over the matrix, one row at a time */ 377 for (i = 0; i < m; i++) { 378 PetscCall(MatGetRow(tmpA, i, &ncols, &cols, &values)); 379 PetscCall(MatSetValues(newmat, 1, &i, ncols, cols, values, INSERT_VALUES)); 380 PetscCall(MatRestoreRow(tmpA, i, &ncols, &cols, &values)); 381 } 382 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 383 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 384 PetscFunctionReturn(0); 385 } 386 387 static PetscErrorCode MatView_BlockMat(Mat A, PetscViewer viewer) { 388 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 389 const char *name; 390 PetscViewerFormat format; 391 392 PetscFunctionBegin; 393 PetscCall(PetscObjectGetName((PetscObject)A, &name)); 394 PetscCall(PetscViewerGetFormat(viewer, &format)); 395 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 396 PetscCall(PetscViewerASCIIPrintf(viewer, "Nonzero block matrices = %" PetscInt_FMT " \n", a->nz)); 397 if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer, "Only upper triangular part of symmetric matrix is stored\n")); 398 } 399 PetscFunctionReturn(0); 400 } 401 402 static PetscErrorCode MatDestroy_BlockMat(Mat mat) { 403 Mat_BlockMat *bmat = (Mat_BlockMat *)mat->data; 404 PetscInt i; 405 406 PetscFunctionBegin; 407 PetscCall(VecDestroy(&bmat->right)); 408 PetscCall(VecDestroy(&bmat->left)); 409 PetscCall(VecDestroy(&bmat->middle)); 410 PetscCall(VecDestroy(&bmat->workb)); 411 if (bmat->diags) { 412 for (i = 0; i < mat->rmap->n / mat->rmap->bs; i++) PetscCall(MatDestroy(&bmat->diags[i])); 413 } 414 if (bmat->a) { 415 for (i = 0; i < bmat->nz; i++) PetscCall(MatDestroy(&bmat->a[i])); 416 } 417 PetscCall(MatSeqXAIJFreeAIJ(mat, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i)); 418 PetscCall(PetscFree(mat->data)); 419 PetscFunctionReturn(0); 420 } 421 422 static PetscErrorCode MatMult_BlockMat(Mat A, Vec x, Vec y) { 423 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data; 424 PetscScalar *xx, *yy; 425 PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j; 426 Mat *aa; 427 428 PetscFunctionBegin; 429 /* 430 Standard CSR multiply except each entry is a Mat 431 */ 432 PetscCall(VecGetArray(x, &xx)); 433 434 PetscCall(VecSet(y, 0.0)); 435 PetscCall(VecGetArray(y, &yy)); 436 aj = bmat->j; 437 aa = bmat->a; 438 ii = bmat->i; 439 for (i = 0; i < m; i++) { 440 jrow = ii[i]; 441 PetscCall(VecPlaceArray(bmat->left, yy + bs * i)); 442 n = ii[i + 1] - jrow; 443 for (j = 0; j < n; j++) { 444 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); 445 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left)); 446 PetscCall(VecResetArray(bmat->right)); 447 jrow++; 448 } 449 PetscCall(VecResetArray(bmat->left)); 450 } 451 PetscCall(VecRestoreArray(x, &xx)); 452 PetscCall(VecRestoreArray(y, &yy)); 453 PetscFunctionReturn(0); 454 } 455 456 PetscErrorCode MatMult_BlockMat_Symmetric(Mat A, Vec x, Vec y) { 457 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data; 458 PetscScalar *xx, *yy; 459 PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j; 460 Mat *aa; 461 462 PetscFunctionBegin; 463 /* 464 Standard CSR multiply except each entry is a Mat 465 */ 466 PetscCall(VecGetArray(x, &xx)); 467 468 PetscCall(VecSet(y, 0.0)); 469 PetscCall(VecGetArray(y, &yy)); 470 aj = bmat->j; 471 aa = bmat->a; 472 ii = bmat->i; 473 for (i = 0; i < m; i++) { 474 jrow = ii[i]; 475 n = ii[i + 1] - jrow; 476 PetscCall(VecPlaceArray(bmat->left, yy + bs * i)); 477 PetscCall(VecPlaceArray(bmat->middle, xx + bs * i)); 478 /* if we ALWAYS required a diagonal entry then could remove this if test */ 479 if (aj[jrow] == i) { 480 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); 481 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left)); 482 PetscCall(VecResetArray(bmat->right)); 483 jrow++; 484 n--; 485 } 486 for (j = 0; j < n; j++) { 487 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); /* upper triangular part */ 488 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left)); 489 PetscCall(VecResetArray(bmat->right)); 490 491 PetscCall(VecPlaceArray(bmat->right, yy + bs * aj[jrow])); /* lower triangular part */ 492 PetscCall(MatMultTransposeAdd(aa[jrow], bmat->middle, bmat->right, bmat->right)); 493 PetscCall(VecResetArray(bmat->right)); 494 jrow++; 495 } 496 PetscCall(VecResetArray(bmat->left)); 497 PetscCall(VecResetArray(bmat->middle)); 498 } 499 PetscCall(VecRestoreArray(x, &xx)); 500 PetscCall(VecRestoreArray(y, &yy)); 501 PetscFunctionReturn(0); 502 } 503 504 static PetscErrorCode MatMultAdd_BlockMat(Mat A, Vec x, Vec y, Vec z) { 505 PetscFunctionBegin; 506 PetscFunctionReturn(0); 507 } 508 509 static PetscErrorCode MatMultTranspose_BlockMat(Mat A, Vec x, Vec y) { 510 PetscFunctionBegin; 511 PetscFunctionReturn(0); 512 } 513 514 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A, Vec x, Vec y, Vec z) { 515 PetscFunctionBegin; 516 PetscFunctionReturn(0); 517 } 518 519 /* 520 Adds diagonal pointers to sparse matrix structure. 521 */ 522 static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A) { 523 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 524 PetscInt i, j, mbs = A->rmap->n / A->rmap->bs; 525 526 PetscFunctionBegin; 527 if (!a->diag) PetscCall(PetscMalloc1(mbs, &a->diag)); 528 for (i = 0; i < mbs; i++) { 529 a->diag[i] = a->i[i + 1]; 530 for (j = a->i[i]; j < a->i[i + 1]; j++) { 531 if (a->j[j] == i) { 532 a->diag[i] = j; 533 break; 534 } 535 } 536 } 537 PetscFunctionReturn(0); 538 } 539 540 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) { 541 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 542 Mat_SeqAIJ *c; 543 PetscInt i, k, first, step, lensi, nrows, ncols; 544 PetscInt *j_new, *i_new, *aj = a->j, *ailen = a->ilen; 545 PetscScalar *a_new; 546 Mat C, *aa = a->a; 547 PetscBool stride, equal; 548 549 PetscFunctionBegin; 550 PetscCall(ISEqual(isrow, iscol, &equal)); 551 PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for identical column and row indices"); 552 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &stride)); 553 PetscCheck(stride, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for stride indices"); 554 PetscCall(ISStrideGetInfo(iscol, &first, &step)); 555 PetscCheck(step == A->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_SUP, "Can only select one entry from each block"); 556 557 PetscCall(ISGetLocalSize(isrow, &nrows)); 558 ncols = nrows; 559 560 /* create submatrix */ 561 if (scall == MAT_REUSE_MATRIX) { 562 PetscInt n_cols, n_rows; 563 C = *B; 564 PetscCall(MatGetSize(C, &n_rows, &n_cols)); 565 PetscCheck(n_rows == nrows && n_cols == ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Reused submatrix wrong size"); 566 PetscCall(MatZeroEntries(C)); 567 } else { 568 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 569 PetscCall(MatSetSizes(C, nrows, ncols, PETSC_DETERMINE, PETSC_DETERMINE)); 570 if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C, MATSEQSBAIJ)); 571 else PetscCall(MatSetType(C, MATSEQAIJ)); 572 PetscCall(MatSeqAIJSetPreallocation(C, 0, ailen)); 573 PetscCall(MatSeqSBAIJSetPreallocation(C, 1, 0, ailen)); 574 } 575 c = (Mat_SeqAIJ *)C->data; 576 577 /* loop over rows inserting into submatrix */ 578 a_new = c->a; 579 j_new = c->j; 580 i_new = c->i; 581 582 for (i = 0; i < nrows; i++) { 583 lensi = ailen[i]; 584 for (k = 0; k < lensi; k++) { 585 *j_new++ = *aj++; 586 PetscCall(MatGetValue(*aa++, first, first, a_new++)); 587 } 588 i_new[i + 1] = i_new[i] + lensi; 589 c->ilen[i] = lensi; 590 } 591 592 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 593 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 594 *B = C; 595 PetscFunctionReturn(0); 596 } 597 598 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A, MatAssemblyType mode) { 599 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 600 PetscInt fshift = 0, i, j, *ai = a->i, *aj = a->j, *imax = a->imax; 601 PetscInt m = a->mbs, *ip, N, *ailen = a->ilen, rmax = 0; 602 Mat *aa = a->a, *ap; 603 604 PetscFunctionBegin; 605 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 606 607 if (m) rmax = ailen[0]; /* determine row with most nonzeros */ 608 for (i = 1; i < m; i++) { 609 /* move each row back by the amount of empty slots (fshift) before it*/ 610 fshift += imax[i - 1] - ailen[i - 1]; 611 rmax = PetscMax(rmax, ailen[i]); 612 if (fshift) { 613 ip = aj + ai[i]; 614 ap = aa + ai[i]; 615 N = ailen[i]; 616 for (j = 0; j < N; j++) { 617 ip[j - fshift] = ip[j]; 618 ap[j - fshift] = ap[j]; 619 } 620 } 621 ai[i] = ai[i - 1] + ailen[i - 1]; 622 } 623 if (m) { 624 fshift += imax[m - 1] - ailen[m - 1]; 625 ai[m] = ai[m - 1] + ailen[m - 1]; 626 } 627 /* reset ilen and imax for each row */ 628 for (i = 0; i < m; i++) ailen[i] = imax[i] = ai[i + 1] - ai[i]; 629 a->nz = ai[m]; 630 for (i = 0; i < a->nz; i++) { 631 PetscAssert(aa[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT, i, aj[i], a->nz); 632 PetscCall(MatAssemblyBegin(aa[i], MAT_FINAL_ASSEMBLY)); 633 PetscCall(MatAssemblyEnd(aa[i], MAT_FINAL_ASSEMBLY)); 634 } 635 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n", m, A->cmap->n / A->cmap->bs, fshift, a->nz)); 636 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs)); 637 PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", rmax)); 638 639 A->info.mallocs += a->reallocs; 640 a->reallocs = 0; 641 A->info.nz_unneeded = (double)fshift; 642 a->rmax = rmax; 643 PetscCall(MatMarkDiagonal_BlockMat(A)); 644 PetscFunctionReturn(0); 645 } 646 647 static PetscErrorCode MatSetOption_BlockMat(Mat A, MatOption opt, PetscBool flg) { 648 PetscFunctionBegin; 649 if (opt == MAT_SYMMETRIC && flg) { 650 A->ops->sor = MatSOR_BlockMat_Symmetric; 651 A->ops->mult = MatMult_BlockMat_Symmetric; 652 } else { 653 PetscCall(PetscInfo(A, "Unused matrix option %s\n", MatOptions[opt])); 654 } 655 PetscFunctionReturn(0); 656 } 657 658 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat, 659 NULL, 660 NULL, 661 MatMult_BlockMat, 662 /* 4*/ MatMultAdd_BlockMat, 663 MatMultTranspose_BlockMat, 664 MatMultTransposeAdd_BlockMat, 665 NULL, 666 NULL, 667 NULL, 668 /* 10*/ NULL, 669 NULL, 670 NULL, 671 MatSOR_BlockMat, 672 NULL, 673 /* 15*/ NULL, 674 NULL, 675 NULL, 676 NULL, 677 NULL, 678 /* 20*/ NULL, 679 MatAssemblyEnd_BlockMat, 680 MatSetOption_BlockMat, 681 NULL, 682 /* 24*/ NULL, 683 NULL, 684 NULL, 685 NULL, 686 NULL, 687 /* 29*/ NULL, 688 NULL, 689 NULL, 690 NULL, 691 NULL, 692 /* 34*/ NULL, 693 NULL, 694 NULL, 695 NULL, 696 NULL, 697 /* 39*/ NULL, 698 NULL, 699 NULL, 700 NULL, 701 NULL, 702 /* 44*/ NULL, 703 NULL, 704 MatShift_Basic, 705 NULL, 706 NULL, 707 /* 49*/ NULL, 708 NULL, 709 NULL, 710 NULL, 711 NULL, 712 /* 54*/ NULL, 713 NULL, 714 NULL, 715 NULL, 716 NULL, 717 /* 59*/ MatCreateSubMatrix_BlockMat, 718 MatDestroy_BlockMat, 719 MatView_BlockMat, 720 NULL, 721 NULL, 722 /* 64*/ NULL, 723 NULL, 724 NULL, 725 NULL, 726 NULL, 727 /* 69*/ NULL, 728 NULL, 729 NULL, 730 NULL, 731 NULL, 732 /* 74*/ NULL, 733 NULL, 734 NULL, 735 NULL, 736 NULL, 737 /* 79*/ NULL, 738 NULL, 739 NULL, 740 NULL, 741 MatLoad_BlockMat, 742 /* 84*/ NULL, 743 NULL, 744 NULL, 745 NULL, 746 NULL, 747 /* 89*/ NULL, 748 NULL, 749 NULL, 750 NULL, 751 NULL, 752 /* 94*/ NULL, 753 NULL, 754 NULL, 755 NULL, 756 NULL, 757 /* 99*/ NULL, 758 NULL, 759 NULL, 760 NULL, 761 NULL, 762 /*104*/ NULL, 763 NULL, 764 NULL, 765 NULL, 766 NULL, 767 /*109*/ NULL, 768 NULL, 769 NULL, 770 NULL, 771 NULL, 772 /*114*/ NULL, 773 NULL, 774 NULL, 775 NULL, 776 NULL, 777 /*119*/ NULL, 778 NULL, 779 NULL, 780 NULL, 781 NULL, 782 /*124*/ NULL, 783 NULL, 784 NULL, 785 NULL, 786 NULL, 787 /*129*/ NULL, 788 NULL, 789 NULL, 790 NULL, 791 NULL, 792 /*134*/ NULL, 793 NULL, 794 NULL, 795 NULL, 796 NULL, 797 /*139*/ NULL, 798 NULL, 799 NULL, 800 NULL, 801 NULL, 802 /*144*/ NULL, 803 NULL, 804 NULL, 805 NULL, 806 NULL, 807 NULL, 808 /*150*/ NULL}; 809 810 /*@C 811 MatBlockMatSetPreallocation - For good matrix assembly performance 812 the user should preallocate the matrix storage by setting the parameter nz 813 (or the array nnz). By setting these parameters accurately, performance 814 during matrix assembly can be increased by more than a factor of 50. 815 816 Collective 817 818 Input Parameters: 819 + B - The matrix 820 . bs - size of each block in matrix 821 . nz - number of nonzeros per block row (same for all rows) 822 - nnz - array containing the number of nonzeros in the various block rows 823 (possibly different for each row) or NULL 824 825 Notes: 826 If nnz is given then nz is ignored 827 828 Specify the preallocated storage with either nz or nnz (not both). 829 Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory 830 allocation. For large problems you MUST preallocate memory or you 831 will get TERRIBLE performance, see the users' manual chapter on matrices. 832 833 Level: intermediate 834 835 .seealso: `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()` 836 @*/ 837 PetscErrorCode MatBlockMatSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) { 838 PetscFunctionBegin; 839 PetscTryMethod(B, "MatBlockMatSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 840 PetscFunctionReturn(0); 841 } 842 843 static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A, PetscInt bs, PetscInt nz, PetscInt *nnz) { 844 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data; 845 PetscInt i; 846 847 PetscFunctionBegin; 848 PetscCall(PetscLayoutSetBlockSize(A->rmap, bs)); 849 PetscCall(PetscLayoutSetBlockSize(A->cmap, bs)); 850 PetscCall(PetscLayoutSetUp(A->rmap)); 851 PetscCall(PetscLayoutSetUp(A->cmap)); 852 PetscCall(PetscLayoutGetBlockSize(A->rmap, &bs)); 853 854 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 855 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 856 if (nnz) { 857 for (i = 0; i < A->rmap->n / bs; i++) { 858 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 859 PetscCheck(nnz[i] <= A->cmap->n / bs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], A->cmap->n / bs); 860 } 861 } 862 bmat->mbs = A->rmap->n / bs; 863 864 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->right)); 865 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->middle)); 866 PetscCall(VecCreateSeq(PETSC_COMM_SELF, bs, &bmat->left)); 867 868 if (!bmat->imax) { 869 PetscCall(PetscMalloc2(A->rmap->n, &bmat->imax, A->rmap->n, &bmat->ilen)); 870 PetscCall(PetscLogObjectMemory((PetscObject)A, 2 * A->rmap->n * sizeof(PetscInt))); 871 } 872 if (PetscLikely(nnz)) { 873 nz = 0; 874 for (i = 0; i < A->rmap->n / A->rmap->bs; i++) { 875 bmat->imax[i] = nnz[i]; 876 nz += nnz[i]; 877 } 878 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Currently requires block row by row preallocation"); 879 880 /* bmat->ilen will count nonzeros in each row so far. */ 881 PetscCall(PetscArrayzero(bmat->ilen, bmat->mbs)); 882 883 /* allocate the matrix space */ 884 PetscCall(MatSeqXAIJFreeAIJ(A, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i)); 885 PetscCall(PetscMalloc3(nz, &bmat->a, nz, &bmat->j, A->rmap->n + 1, &bmat->i)); 886 PetscCall(PetscLogObjectMemory((PetscObject)A, (A->rmap->n + 1) * sizeof(PetscInt) + nz * (sizeof(PetscScalar) + sizeof(PetscInt)))); 887 bmat->i[0] = 0; 888 for (i = 1; i < bmat->mbs + 1; i++) bmat->i[i] = bmat->i[i - 1] + bmat->imax[i - 1]; 889 bmat->singlemalloc = PETSC_TRUE; 890 bmat->free_a = PETSC_TRUE; 891 bmat->free_ij = PETSC_TRUE; 892 893 bmat->nz = 0; 894 bmat->maxnz = nz; 895 A->info.nz_unneeded = (double)bmat->maxnz; 896 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 897 PetscFunctionReturn(0); 898 } 899 900 /*MC 901 MATBLOCKMAT - A matrix that is defined by a set of `Mat`'s that represents a sparse block matrix 902 consisting of (usually) sparse blocks. 903 904 Level: advanced 905 906 .seealso: `MatCreateBlockMat()` 907 M*/ 908 909 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A) { 910 Mat_BlockMat *b; 911 912 PetscFunctionBegin; 913 PetscCall(PetscNewLog(A, &b)); 914 A->data = (void *)b; 915 PetscCall(PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps))); 916 917 A->assembled = PETSC_TRUE; 918 A->preallocated = PETSC_FALSE; 919 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATBLOCKMAT)); 920 921 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatBlockMatSetPreallocation_C", MatBlockMatSetPreallocation_BlockMat)); 922 PetscFunctionReturn(0); 923 } 924 925 /*@C 926 MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential `Mat` object 927 928 Collective 929 930 Input Parameters: 931 + comm - MPI communicator 932 . m - number of rows 933 . n - number of columns 934 . bs - size of each submatrix 935 . nz - expected maximum number of nonzero blocks in row (use `PETSC_DEFAULT` if not known) 936 - nnz - expected number of nonzers per block row if known (use NULL otherwise) 937 938 Output Parameter: 939 . A - the matrix 940 941 Level: intermediate 942 943 Notes: 944 Matrices of this type are nominally-sparse matrices in which each "entry" is a `Mat` object. Each `Mat` must 945 have the same size and be sequential. The local and global sizes must be compatible with this decomposition. 946 947 For matrices containing parallel submatrices and variable block sizes, see `MATNEST`. 948 949 Developer Note: 950 I don't like the name, it is not `MATNESTMAT` 951 952 .seealso: `MATBLOCKMAT`, `MatCreateNest()` 953 @*/ 954 PetscErrorCode MatCreateBlockMat(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt bs, PetscInt nz, PetscInt *nnz, Mat *A) { 955 PetscFunctionBegin; 956 PetscCall(MatCreate(comm, A)); 957 PetscCall(MatSetSizes(*A, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 958 PetscCall(MatSetType(*A, MATBLOCKMAT)); 959 PetscCall(MatBlockMatSetPreallocation(*A, bs, nz, nnz)); 960 PetscFunctionReturn(0); 961 } 962