1 2 /* 3 This provides a matrix that consists of Mats 4 */ 5 6 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/baij/seq/baij.h> /* use the common AIJ data-structure */ 8 9 typedef struct { 10 SEQAIJHEADER(Mat); 11 SEQBAIJHEADER; 12 Mat *diags; 13 14 Vec left, right, middle, workb; /* dummy vectors to perform local parts of product */ 15 } Mat_BlockMat; 16 17 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 18 { 19 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 20 PetscScalar *x; 21 const Mat *v; 22 const PetscScalar *b; 23 PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs; 24 const PetscInt *idx; 25 IS row, col; 26 MatFactorInfo info; 27 Vec left = a->left, right = a->right, middle = a->middle; 28 Mat *diag; 29 30 PetscFunctionBegin; 31 its = its * lits; 32 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 33 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 34 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0"); 35 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift"); 36 PetscCheck(!((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)), PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot do backward sweep without forward sweep"); 37 38 if (!a->diags) { 39 PetscCall(PetscMalloc1(mbs, &a->diags)); 40 PetscCall(MatFactorInfoInitialize(&info)); 41 for (i = 0; i < mbs; i++) { 42 PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col)); 43 PetscCall(MatCholeskyFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, &info)); 44 PetscCall(MatCholeskyFactorNumeric(a->diags[i], a->a[a->diag[i]], &info)); 45 PetscCall(ISDestroy(&row)); 46 PetscCall(ISDestroy(&col)); 47 } 48 PetscCall(VecDuplicate(bb, &a->workb)); 49 } 50 diag = a->diags; 51 52 PetscCall(VecSet(xx, 0.0)); 53 PetscCall(VecGetArray(xx, &x)); 54 /* copy right hand side because it must be modified during iteration */ 55 PetscCall(VecCopy(bb, a->workb)); 56 PetscCall(VecGetArrayRead(a->workb, &b)); 57 58 /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */ 59 while (its--) { 60 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 61 for (i = 0; i < mbs; i++) { 62 n = a->i[i + 1] - a->i[i] - 1; 63 idx = a->j + a->i[i] + 1; 64 v = a->a + a->i[i] + 1; 65 66 PetscCall(VecSet(left, 0.0)); 67 for (j = 0; j < n; j++) { 68 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 69 PetscCall(MatMultAdd(v[j], right, left, left)); 70 PetscCall(VecResetArray(right)); 71 } 72 PetscCall(VecPlaceArray(right, b + i * bs)); 73 PetscCall(VecAYPX(left, -1.0, right)); 74 PetscCall(VecResetArray(right)); 75 76 PetscCall(VecPlaceArray(right, x + i * bs)); 77 PetscCall(MatSolve(diag[i], left, right)); 78 79 /* now adjust right hand side, see MatSOR_SeqSBAIJ */ 80 for (j = 0; j < n; j++) { 81 PetscCall(MatMultTranspose(v[j], right, left)); 82 PetscCall(VecPlaceArray(middle, b + idx[j] * bs)); 83 PetscCall(VecAXPY(middle, -1.0, left)); 84 PetscCall(VecResetArray(middle)); 85 } 86 PetscCall(VecResetArray(right)); 87 } 88 } 89 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 90 for (i = mbs - 1; i >= 0; i--) { 91 n = a->i[i + 1] - a->i[i] - 1; 92 idx = a->j + a->i[i] + 1; 93 v = a->a + a->i[i] + 1; 94 95 PetscCall(VecSet(left, 0.0)); 96 for (j = 0; j < n; j++) { 97 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 98 PetscCall(MatMultAdd(v[j], right, left, left)); 99 PetscCall(VecResetArray(right)); 100 } 101 PetscCall(VecPlaceArray(right, b + i * bs)); 102 PetscCall(VecAYPX(left, -1.0, right)); 103 PetscCall(VecResetArray(right)); 104 105 PetscCall(VecPlaceArray(right, x + i * bs)); 106 PetscCall(MatSolve(diag[i], left, right)); 107 PetscCall(VecResetArray(right)); 108 } 109 } 110 } 111 PetscCall(VecRestoreArray(xx, &x)); 112 PetscCall(VecRestoreArrayRead(a->workb, &b)); 113 PetscFunctionReturn(0); 114 } 115 116 static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 117 { 118 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 119 PetscScalar *x; 120 const Mat *v; 121 const PetscScalar *b; 122 PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs; 123 const PetscInt *idx; 124 IS row, col; 125 MatFactorInfo info; 126 Vec left = a->left, right = a->right; 127 Mat *diag; 128 129 PetscFunctionBegin; 130 its = its * lits; 131 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 132 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 133 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0"); 134 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift"); 135 136 if (!a->diags) { 137 PetscCall(PetscMalloc1(mbs, &a->diags)); 138 PetscCall(MatFactorInfoInitialize(&info)); 139 for (i = 0; i < mbs; i++) { 140 PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col)); 141 PetscCall(MatLUFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, col, &info)); 142 PetscCall(MatLUFactorNumeric(a->diags[i], a->a[a->diag[i]], &info)); 143 PetscCall(ISDestroy(&row)); 144 PetscCall(ISDestroy(&col)); 145 } 146 } 147 diag = a->diags; 148 149 PetscCall(VecSet(xx, 0.0)); 150 PetscCall(VecGetArray(xx, &x)); 151 PetscCall(VecGetArrayRead(bb, &b)); 152 153 /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */ 154 while (its--) { 155 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 156 for (i = 0; i < mbs; i++) { 157 n = a->i[i + 1] - a->i[i]; 158 idx = a->j + a->i[i]; 159 v = a->a + a->i[i]; 160 161 PetscCall(VecSet(left, 0.0)); 162 for (j = 0; j < n; j++) { 163 if (idx[j] != i) { 164 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 165 PetscCall(MatMultAdd(v[j], right, left, left)); 166 PetscCall(VecResetArray(right)); 167 } 168 } 169 PetscCall(VecPlaceArray(right, b + i * bs)); 170 PetscCall(VecAYPX(left, -1.0, right)); 171 PetscCall(VecResetArray(right)); 172 173 PetscCall(VecPlaceArray(right, x + i * bs)); 174 PetscCall(MatSolve(diag[i], left, right)); 175 PetscCall(VecResetArray(right)); 176 } 177 } 178 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 179 for (i = mbs - 1; i >= 0; i--) { 180 n = a->i[i + 1] - a->i[i]; 181 idx = a->j + a->i[i]; 182 v = a->a + a->i[i]; 183 184 PetscCall(VecSet(left, 0.0)); 185 for (j = 0; j < n; j++) { 186 if (idx[j] != i) { 187 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 188 PetscCall(MatMultAdd(v[j], right, left, left)); 189 PetscCall(VecResetArray(right)); 190 } 191 } 192 PetscCall(VecPlaceArray(right, b + i * bs)); 193 PetscCall(VecAYPX(left, -1.0, right)); 194 PetscCall(VecResetArray(right)); 195 196 PetscCall(VecPlaceArray(right, x + i * bs)); 197 PetscCall(MatSolve(diag[i], left, right)); 198 PetscCall(VecResetArray(right)); 199 } 200 } 201 } 202 PetscCall(VecRestoreArray(xx, &x)); 203 PetscCall(VecRestoreArrayRead(bb, &b)); 204 PetscFunctionReturn(0); 205 } 206 207 static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 208 { 209 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 210 PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 211 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 212 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 213 PetscInt ridx, cidx; 214 PetscBool roworiented = a->roworiented; 215 MatScalar value; 216 Mat *ap, *aa = a->a; 217 218 PetscFunctionBegin; 219 for (k = 0; k < m; k++) { /* loop over added rows */ 220 row = im[k]; 221 brow = row / bs; 222 if (row < 0) continue; 223 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 224 rp = aj + ai[brow]; 225 ap = aa + ai[brow]; 226 rmax = imax[brow]; 227 nrow = ailen[brow]; 228 low = 0; 229 high = nrow; 230 for (l = 0; l < n; l++) { /* loop over added columns */ 231 if (in[l] < 0) continue; 232 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 233 col = in[l]; 234 bcol = col / bs; 235 if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue; 236 ridx = row % bs; 237 cidx = col % bs; 238 if (roworiented) value = v[l + k * n]; 239 else value = v[k + l * m]; 240 241 if (col <= lastcol) low = 0; 242 else high = nrow; 243 lastcol = col; 244 while (high - low > 7) { 245 t = (low + high) / 2; 246 if (rp[t] > bcol) high = t; 247 else low = t; 248 } 249 for (i = low; i < high; i++) { 250 if (rp[i] > bcol) break; 251 if (rp[i] == bcol) goto noinsert1; 252 } 253 if (nonew == 1) goto noinsert1; 254 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 255 MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat); 256 N = nrow++ - 1; 257 high++; 258 /* shift up all the later entries in this row */ 259 for (ii = N; ii >= i; ii--) { 260 rp[ii + 1] = rp[ii]; 261 ap[ii + 1] = ap[ii]; 262 } 263 if (N >= i) ap[i] = NULL; 264 rp[i] = bcol; 265 a->nz++; 266 A->nonzerostate++; 267 noinsert1:; 268 if (!*(ap + i)) PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i)); 269 PetscCall(MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is)); 270 low = i; 271 } 272 ailen[brow] = nrow; 273 } 274 PetscFunctionReturn(0); 275 } 276 277 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer) 278 { 279 Mat tmpA; 280 PetscInt i, j, m, n, bs = 1, ncols, *lens, currentcol, mbs, **ii, *ilens, nextcol, *llens, cnt = 0; 281 const PetscInt *cols; 282 const PetscScalar *values; 283 PetscBool flg = PETSC_FALSE, notdone; 284 Mat_SeqAIJ *a; 285 Mat_BlockMat *amat; 286 287 PetscFunctionBegin; 288 /* force binary viewer to load .info file if it has not yet done so */ 289 PetscCall(PetscViewerSetUp(viewer)); 290 PetscCall(MatCreate(PETSC_COMM_SELF, &tmpA)); 291 PetscCall(MatSetType(tmpA, MATSEQAIJ)); 292 PetscCall(MatLoad_SeqAIJ(tmpA, viewer)); 293 294 PetscCall(MatGetLocalSize(tmpA, &m, &n)); 295 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "Options for loading BlockMat matrix 1", "Mat"); 296 PetscCall(PetscOptionsInt("-matload_block_size", "Set the blocksize used to store the matrix", "MatLoad", bs, &bs, NULL)); 297 PetscCall(PetscOptionsBool("-matload_symmetric", "Store the matrix as symmetric", "MatLoad", flg, &flg, NULL)); 298 PetscOptionsEnd(); 299 300 /* Determine number of nonzero blocks for each block row */ 301 a = (Mat_SeqAIJ *)tmpA->data; 302 mbs = m / bs; 303 PetscCall(PetscMalloc3(mbs, &lens, bs, &ii, bs, &ilens)); 304 PetscCall(PetscArrayzero(lens, mbs)); 305 306 for (i = 0; i < mbs; i++) { 307 for (j = 0; j < bs; j++) { 308 ii[j] = a->j + a->i[i * bs + j]; 309 ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j]; 310 } 311 312 currentcol = -1; 313 while (PETSC_TRUE) { 314 notdone = PETSC_FALSE; 315 nextcol = 1000000000; 316 for (j = 0; j < bs; j++) { 317 while ((ilens[j] > 0 && ii[j][0] / bs <= currentcol)) { 318 ii[j]++; 319 ilens[j]--; 320 } 321 if (ilens[j] > 0) { 322 notdone = PETSC_TRUE; 323 nextcol = PetscMin(nextcol, ii[j][0] / bs); 324 } 325 } 326 if (!notdone) break; 327 if (!flg || (nextcol >= i)) lens[i]++; 328 currentcol = nextcol; 329 } 330 } 331 332 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) PetscCall(MatSetSizes(newmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 333 PetscCall(MatBlockMatSetPreallocation(newmat, bs, 0, lens)); 334 if (flg) PetscCall(MatSetOption(newmat, MAT_SYMMETRIC, PETSC_TRUE)); 335 amat = (Mat_BlockMat *)(newmat)->data; 336 337 /* preallocate the submatrices */ 338 PetscCall(PetscMalloc1(bs, &llens)); 339 for (i = 0; i < mbs; i++) { /* loops for block rows */ 340 for (j = 0; j < bs; j++) { 341 ii[j] = a->j + a->i[i * bs + j]; 342 ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j]; 343 } 344 345 currentcol = 1000000000; 346 for (j = 0; j < bs; j++) { /* loop over rows in block finding first nonzero block */ 347 if (ilens[j] > 0) currentcol = PetscMin(currentcol, ii[j][0] / bs); 348 } 349 350 while (PETSC_TRUE) { /* loops over blocks in block row */ 351 notdone = PETSC_FALSE; 352 nextcol = 1000000000; 353 PetscCall(PetscArrayzero(llens, bs)); 354 for (j = 0; j < bs; j++) { /* loop over rows in block */ 355 while ((ilens[j] > 0 && ii[j][0] / bs <= currentcol)) { /* loop over columns in row */ 356 ii[j]++; 357 ilens[j]--; 358 llens[j]++; 359 } 360 if (ilens[j] > 0) { 361 notdone = PETSC_TRUE; 362 nextcol = PetscMin(nextcol, ii[j][0] / bs); 363 } 364 } 365 PetscCheck(cnt < amat->maxnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of blocks found greater than expected %" PetscInt_FMT, cnt); 366 if (!flg || currentcol >= i) { 367 amat->j[cnt] = currentcol; 368 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, llens, amat->a + cnt++)); 369 } 370 371 if (!notdone) break; 372 currentcol = nextcol; 373 } 374 amat->ilen[i] = lens[i]; 375 } 376 377 PetscCall(PetscFree3(lens, ii, ilens)); 378 PetscCall(PetscFree(llens)); 379 380 /* copy over the matrix, one row at a time */ 381 for (i = 0; i < m; i++) { 382 PetscCall(MatGetRow(tmpA, i, &ncols, &cols, &values)); 383 PetscCall(MatSetValues(newmat, 1, &i, ncols, cols, values, INSERT_VALUES)); 384 PetscCall(MatRestoreRow(tmpA, i, &ncols, &cols, &values)); 385 } 386 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 387 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 388 PetscFunctionReturn(0); 389 } 390 391 static PetscErrorCode MatView_BlockMat(Mat A, PetscViewer viewer) 392 { 393 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 394 const char *name; 395 PetscViewerFormat format; 396 397 PetscFunctionBegin; 398 PetscCall(PetscObjectGetName((PetscObject)A, &name)); 399 PetscCall(PetscViewerGetFormat(viewer, &format)); 400 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 401 PetscCall(PetscViewerASCIIPrintf(viewer, "Nonzero block matrices = %" PetscInt_FMT " \n", a->nz)); 402 if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer, "Only upper triangular part of symmetric matrix is stored\n")); 403 } 404 PetscFunctionReturn(0); 405 } 406 407 static PetscErrorCode MatDestroy_BlockMat(Mat mat) 408 { 409 Mat_BlockMat *bmat = (Mat_BlockMat *)mat->data; 410 PetscInt i; 411 412 PetscFunctionBegin; 413 PetscCall(VecDestroy(&bmat->right)); 414 PetscCall(VecDestroy(&bmat->left)); 415 PetscCall(VecDestroy(&bmat->middle)); 416 PetscCall(VecDestroy(&bmat->workb)); 417 if (bmat->diags) { 418 for (i = 0; i < mat->rmap->n / mat->rmap->bs; i++) PetscCall(MatDestroy(&bmat->diags[i])); 419 } 420 if (bmat->a) { 421 for (i = 0; i < bmat->nz; i++) PetscCall(MatDestroy(&bmat->a[i])); 422 } 423 PetscCall(MatSeqXAIJFreeAIJ(mat, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i)); 424 PetscCall(PetscFree(mat->data)); 425 PetscFunctionReturn(0); 426 } 427 428 static PetscErrorCode MatMult_BlockMat(Mat A, Vec x, Vec y) 429 { 430 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data; 431 PetscScalar *xx, *yy; 432 PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j; 433 Mat *aa; 434 435 PetscFunctionBegin; 436 /* 437 Standard CSR multiply except each entry is a Mat 438 */ 439 PetscCall(VecGetArray(x, &xx)); 440 441 PetscCall(VecSet(y, 0.0)); 442 PetscCall(VecGetArray(y, &yy)); 443 aj = bmat->j; 444 aa = bmat->a; 445 ii = bmat->i; 446 for (i = 0; i < m; i++) { 447 jrow = ii[i]; 448 PetscCall(VecPlaceArray(bmat->left, yy + bs * i)); 449 n = ii[i + 1] - jrow; 450 for (j = 0; j < n; j++) { 451 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); 452 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left)); 453 PetscCall(VecResetArray(bmat->right)); 454 jrow++; 455 } 456 PetscCall(VecResetArray(bmat->left)); 457 } 458 PetscCall(VecRestoreArray(x, &xx)); 459 PetscCall(VecRestoreArray(y, &yy)); 460 PetscFunctionReturn(0); 461 } 462 463 PetscErrorCode MatMult_BlockMat_Symmetric(Mat A, Vec x, Vec y) 464 { 465 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data; 466 PetscScalar *xx, *yy; 467 PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j; 468 Mat *aa; 469 470 PetscFunctionBegin; 471 /* 472 Standard CSR multiply except each entry is a Mat 473 */ 474 PetscCall(VecGetArray(x, &xx)); 475 476 PetscCall(VecSet(y, 0.0)); 477 PetscCall(VecGetArray(y, &yy)); 478 aj = bmat->j; 479 aa = bmat->a; 480 ii = bmat->i; 481 for (i = 0; i < m; i++) { 482 jrow = ii[i]; 483 n = ii[i + 1] - jrow; 484 PetscCall(VecPlaceArray(bmat->left, yy + bs * i)); 485 PetscCall(VecPlaceArray(bmat->middle, xx + bs * i)); 486 /* if we ALWAYS required a diagonal entry then could remove this if test */ 487 if (aj[jrow] == i) { 488 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); 489 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left)); 490 PetscCall(VecResetArray(bmat->right)); 491 jrow++; 492 n--; 493 } 494 for (j = 0; j < n; j++) { 495 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); /* upper triangular part */ 496 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left)); 497 PetscCall(VecResetArray(bmat->right)); 498 499 PetscCall(VecPlaceArray(bmat->right, yy + bs * aj[jrow])); /* lower triangular part */ 500 PetscCall(MatMultTransposeAdd(aa[jrow], bmat->middle, bmat->right, bmat->right)); 501 PetscCall(VecResetArray(bmat->right)); 502 jrow++; 503 } 504 PetscCall(VecResetArray(bmat->left)); 505 PetscCall(VecResetArray(bmat->middle)); 506 } 507 PetscCall(VecRestoreArray(x, &xx)); 508 PetscCall(VecRestoreArray(y, &yy)); 509 PetscFunctionReturn(0); 510 } 511 512 static PetscErrorCode MatMultAdd_BlockMat(Mat A, Vec x, Vec y, Vec z) 513 { 514 PetscFunctionBegin; 515 PetscFunctionReturn(0); 516 } 517 518 static PetscErrorCode MatMultTranspose_BlockMat(Mat A, Vec x, Vec y) 519 { 520 PetscFunctionBegin; 521 PetscFunctionReturn(0); 522 } 523 524 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A, Vec x, Vec y, Vec z) 525 { 526 PetscFunctionBegin; 527 PetscFunctionReturn(0); 528 } 529 530 /* 531 Adds diagonal pointers to sparse matrix structure. 532 */ 533 static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A) 534 { 535 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 536 PetscInt i, j, mbs = A->rmap->n / A->rmap->bs; 537 538 PetscFunctionBegin; 539 if (!a->diag) PetscCall(PetscMalloc1(mbs, &a->diag)); 540 for (i = 0; i < mbs; i++) { 541 a->diag[i] = a->i[i + 1]; 542 for (j = a->i[i]; j < a->i[i + 1]; j++) { 543 if (a->j[j] == i) { 544 a->diag[i] = j; 545 break; 546 } 547 } 548 } 549 PetscFunctionReturn(0); 550 } 551 552 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 553 { 554 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 555 Mat_SeqAIJ *c; 556 PetscInt i, k, first, step, lensi, nrows, ncols; 557 PetscInt *j_new, *i_new, *aj = a->j, *ailen = a->ilen; 558 PetscScalar *a_new; 559 Mat C, *aa = a->a; 560 PetscBool stride, equal; 561 562 PetscFunctionBegin; 563 PetscCall(ISEqual(isrow, iscol, &equal)); 564 PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for identical column and row indices"); 565 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &stride)); 566 PetscCheck(stride, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for stride indices"); 567 PetscCall(ISStrideGetInfo(iscol, &first, &step)); 568 PetscCheck(step == A->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_SUP, "Can only select one entry from each block"); 569 570 PetscCall(ISGetLocalSize(isrow, &nrows)); 571 ncols = nrows; 572 573 /* create submatrix */ 574 if (scall == MAT_REUSE_MATRIX) { 575 PetscInt n_cols, n_rows; 576 C = *B; 577 PetscCall(MatGetSize(C, &n_rows, &n_cols)); 578 PetscCheck(n_rows == nrows && n_cols == ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Reused submatrix wrong size"); 579 PetscCall(MatZeroEntries(C)); 580 } else { 581 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 582 PetscCall(MatSetSizes(C, nrows, ncols, PETSC_DETERMINE, PETSC_DETERMINE)); 583 if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C, MATSEQSBAIJ)); 584 else PetscCall(MatSetType(C, MATSEQAIJ)); 585 PetscCall(MatSeqAIJSetPreallocation(C, 0, ailen)); 586 PetscCall(MatSeqSBAIJSetPreallocation(C, 1, 0, ailen)); 587 } 588 c = (Mat_SeqAIJ *)C->data; 589 590 /* loop over rows inserting into submatrix */ 591 a_new = c->a; 592 j_new = c->j; 593 i_new = c->i; 594 595 for (i = 0; i < nrows; i++) { 596 lensi = ailen[i]; 597 for (k = 0; k < lensi; k++) { 598 *j_new++ = *aj++; 599 PetscCall(MatGetValue(*aa++, first, first, a_new++)); 600 } 601 i_new[i + 1] = i_new[i] + lensi; 602 c->ilen[i] = lensi; 603 } 604 605 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 606 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 607 *B = C; 608 PetscFunctionReturn(0); 609 } 610 611 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A, MatAssemblyType mode) 612 { 613 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 614 PetscInt fshift = 0, i, j, *ai = a->i, *aj = a->j, *imax = a->imax; 615 PetscInt m = a->mbs, *ip, N, *ailen = a->ilen, rmax = 0; 616 Mat *aa = a->a, *ap; 617 618 PetscFunctionBegin; 619 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 620 621 if (m) rmax = ailen[0]; /* determine row with most nonzeros */ 622 for (i = 1; i < m; i++) { 623 /* move each row back by the amount of empty slots (fshift) before it*/ 624 fshift += imax[i - 1] - ailen[i - 1]; 625 rmax = PetscMax(rmax, ailen[i]); 626 if (fshift) { 627 ip = aj + ai[i]; 628 ap = aa + ai[i]; 629 N = ailen[i]; 630 for (j = 0; j < N; j++) { 631 ip[j - fshift] = ip[j]; 632 ap[j - fshift] = ap[j]; 633 } 634 } 635 ai[i] = ai[i - 1] + ailen[i - 1]; 636 } 637 if (m) { 638 fshift += imax[m - 1] - ailen[m - 1]; 639 ai[m] = ai[m - 1] + ailen[m - 1]; 640 } 641 /* reset ilen and imax for each row */ 642 for (i = 0; i < m; i++) ailen[i] = imax[i] = ai[i + 1] - ai[i]; 643 a->nz = ai[m]; 644 for (i = 0; i < a->nz; i++) { 645 PetscAssert(aa[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT, i, aj[i], a->nz); 646 PetscCall(MatAssemblyBegin(aa[i], MAT_FINAL_ASSEMBLY)); 647 PetscCall(MatAssemblyEnd(aa[i], MAT_FINAL_ASSEMBLY)); 648 } 649 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n", m, A->cmap->n / A->cmap->bs, fshift, a->nz)); 650 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs)); 651 PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", rmax)); 652 653 A->info.mallocs += a->reallocs; 654 a->reallocs = 0; 655 A->info.nz_unneeded = (double)fshift; 656 a->rmax = rmax; 657 PetscCall(MatMarkDiagonal_BlockMat(A)); 658 PetscFunctionReturn(0); 659 } 660 661 static PetscErrorCode MatSetOption_BlockMat(Mat A, MatOption opt, PetscBool flg) 662 { 663 PetscFunctionBegin; 664 if (opt == MAT_SYMMETRIC && flg) { 665 A->ops->sor = MatSOR_BlockMat_Symmetric; 666 A->ops->mult = MatMult_BlockMat_Symmetric; 667 } else { 668 PetscCall(PetscInfo(A, "Unused matrix option %s\n", MatOptions[opt])); 669 } 670 PetscFunctionReturn(0); 671 } 672 673 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat, 674 NULL, 675 NULL, 676 MatMult_BlockMat, 677 /* 4*/ MatMultAdd_BlockMat, 678 MatMultTranspose_BlockMat, 679 MatMultTransposeAdd_BlockMat, 680 NULL, 681 NULL, 682 NULL, 683 /* 10*/ NULL, 684 NULL, 685 NULL, 686 MatSOR_BlockMat, 687 NULL, 688 /* 15*/ NULL, 689 NULL, 690 NULL, 691 NULL, 692 NULL, 693 /* 20*/ NULL, 694 MatAssemblyEnd_BlockMat, 695 MatSetOption_BlockMat, 696 NULL, 697 /* 24*/ NULL, 698 NULL, 699 NULL, 700 NULL, 701 NULL, 702 /* 29*/ NULL, 703 NULL, 704 NULL, 705 NULL, 706 NULL, 707 /* 34*/ NULL, 708 NULL, 709 NULL, 710 NULL, 711 NULL, 712 /* 39*/ NULL, 713 NULL, 714 NULL, 715 NULL, 716 NULL, 717 /* 44*/ NULL, 718 NULL, 719 MatShift_Basic, 720 NULL, 721 NULL, 722 /* 49*/ NULL, 723 NULL, 724 NULL, 725 NULL, 726 NULL, 727 /* 54*/ NULL, 728 NULL, 729 NULL, 730 NULL, 731 NULL, 732 /* 59*/ MatCreateSubMatrix_BlockMat, 733 MatDestroy_BlockMat, 734 MatView_BlockMat, 735 NULL, 736 NULL, 737 /* 64*/ NULL, 738 NULL, 739 NULL, 740 NULL, 741 NULL, 742 /* 69*/ NULL, 743 NULL, 744 NULL, 745 NULL, 746 NULL, 747 /* 74*/ NULL, 748 NULL, 749 NULL, 750 NULL, 751 NULL, 752 /* 79*/ NULL, 753 NULL, 754 NULL, 755 NULL, 756 MatLoad_BlockMat, 757 /* 84*/ NULL, 758 NULL, 759 NULL, 760 NULL, 761 NULL, 762 /* 89*/ NULL, 763 NULL, 764 NULL, 765 NULL, 766 NULL, 767 /* 94*/ NULL, 768 NULL, 769 NULL, 770 NULL, 771 NULL, 772 /* 99*/ NULL, 773 NULL, 774 NULL, 775 NULL, 776 NULL, 777 /*104*/ NULL, 778 NULL, 779 NULL, 780 NULL, 781 NULL, 782 /*109*/ NULL, 783 NULL, 784 NULL, 785 NULL, 786 NULL, 787 /*114*/ NULL, 788 NULL, 789 NULL, 790 NULL, 791 NULL, 792 /*119*/ NULL, 793 NULL, 794 NULL, 795 NULL, 796 NULL, 797 /*124*/ NULL, 798 NULL, 799 NULL, 800 NULL, 801 NULL, 802 /*129*/ NULL, 803 NULL, 804 NULL, 805 NULL, 806 NULL, 807 /*134*/ NULL, 808 NULL, 809 NULL, 810 NULL, 811 NULL, 812 /*139*/ NULL, 813 NULL, 814 NULL, 815 NULL, 816 NULL, 817 /*144*/ NULL, 818 NULL, 819 NULL, 820 NULL, 821 NULL, 822 NULL, 823 /*150*/ NULL}; 824 825 /*@C 826 MatBlockMatSetPreallocation - For good matrix assembly performance 827 the user should preallocate the matrix storage by setting the parameter nz 828 (or the array nnz). By setting these parameters accurately, performance 829 during matrix assembly can be increased by more than a factor of 50. 830 831 Collective 832 833 Input Parameters: 834 + B - The matrix 835 . bs - size of each block in matrix 836 . nz - number of nonzeros per block row (same for all rows) 837 - nnz - array containing the number of nonzeros in the various block rows 838 (possibly different for each row) or NULL 839 840 Notes: 841 If nnz is given then nz is ignored 842 843 Specify the preallocated storage with either nz or nnz (not both). 844 Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory 845 allocation. For large problems you MUST preallocate memory or you 846 will get TERRIBLE performance, see the users' manual chapter on matrices. 847 848 Level: intermediate 849 850 .seealso: `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()` 851 @*/ 852 PetscErrorCode MatBlockMatSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 853 { 854 PetscFunctionBegin; 855 PetscTryMethod(B, "MatBlockMatSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 856 PetscFunctionReturn(0); 857 } 858 859 static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A, PetscInt bs, PetscInt nz, PetscInt *nnz) 860 { 861 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data; 862 PetscInt i; 863 864 PetscFunctionBegin; 865 PetscCall(PetscLayoutSetBlockSize(A->rmap, bs)); 866 PetscCall(PetscLayoutSetBlockSize(A->cmap, bs)); 867 PetscCall(PetscLayoutSetUp(A->rmap)); 868 PetscCall(PetscLayoutSetUp(A->cmap)); 869 PetscCall(PetscLayoutGetBlockSize(A->rmap, &bs)); 870 871 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 872 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 873 if (nnz) { 874 for (i = 0; i < A->rmap->n / bs; i++) { 875 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 876 PetscCheck(nnz[i] <= A->cmap->n / bs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], A->cmap->n / bs); 877 } 878 } 879 bmat->mbs = A->rmap->n / bs; 880 881 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->right)); 882 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->middle)); 883 PetscCall(VecCreateSeq(PETSC_COMM_SELF, bs, &bmat->left)); 884 885 if (!bmat->imax) { PetscCall(PetscMalloc2(A->rmap->n, &bmat->imax, A->rmap->n, &bmat->ilen)); } 886 if (PetscLikely(nnz)) { 887 nz = 0; 888 for (i = 0; i < A->rmap->n / A->rmap->bs; i++) { 889 bmat->imax[i] = nnz[i]; 890 nz += nnz[i]; 891 } 892 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Currently requires block row by row preallocation"); 893 894 /* bmat->ilen will count nonzeros in each row so far. */ 895 PetscCall(PetscArrayzero(bmat->ilen, bmat->mbs)); 896 897 /* allocate the matrix space */ 898 PetscCall(MatSeqXAIJFreeAIJ(A, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i)); 899 PetscCall(PetscMalloc3(nz, &bmat->a, nz, &bmat->j, A->rmap->n + 1, &bmat->i)); 900 bmat->i[0] = 0; 901 for (i = 1; i < bmat->mbs + 1; i++) bmat->i[i] = bmat->i[i - 1] + bmat->imax[i - 1]; 902 bmat->singlemalloc = PETSC_TRUE; 903 bmat->free_a = PETSC_TRUE; 904 bmat->free_ij = PETSC_TRUE; 905 906 bmat->nz = 0; 907 bmat->maxnz = nz; 908 A->info.nz_unneeded = (double)bmat->maxnz; 909 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 910 PetscFunctionReturn(0); 911 } 912 913 /*MC 914 MATBLOCKMAT - A matrix that is defined by a set of `Mat`'s that represents a sparse block matrix 915 consisting of (usually) sparse blocks. 916 917 Level: advanced 918 919 .seealso: `MatCreateBlockMat()` 920 M*/ 921 922 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A) 923 { 924 Mat_BlockMat *b; 925 926 PetscFunctionBegin; 927 PetscCall(PetscNew(&b)); 928 A->data = (void *)b; 929 PetscCall(PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps))); 930 931 A->assembled = PETSC_TRUE; 932 A->preallocated = PETSC_FALSE; 933 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATBLOCKMAT)); 934 935 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatBlockMatSetPreallocation_C", MatBlockMatSetPreallocation_BlockMat)); 936 PetscFunctionReturn(0); 937 } 938 939 /*@C 940 MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential `Mat` object 941 942 Collective 943 944 Input Parameters: 945 + comm - MPI communicator 946 . m - number of rows 947 . n - number of columns 948 . bs - size of each submatrix 949 . nz - expected maximum number of nonzero blocks in row (use `PETSC_DEFAULT` if not known) 950 - nnz - expected number of nonzers per block row if known (use NULL otherwise) 951 952 Output Parameter: 953 . A - the matrix 954 955 Level: intermediate 956 957 Notes: 958 Matrices of this type are nominally-sparse matrices in which each "entry" is a `Mat` object. Each `Mat` must 959 have the same size and be sequential. The local and global sizes must be compatible with this decomposition. 960 961 For matrices containing parallel submatrices and variable block sizes, see `MATNEST`. 962 963 Developer Note: 964 I don't like the name, it is not `MATNESTMAT` 965 966 .seealso: `MATBLOCKMAT`, `MatCreateNest()` 967 @*/ 968 PetscErrorCode MatCreateBlockMat(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt bs, PetscInt nz, PetscInt *nnz, Mat *A) 969 { 970 PetscFunctionBegin; 971 PetscCall(MatCreate(comm, A)); 972 PetscCall(MatSetSizes(*A, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 973 PetscCall(MatSetType(*A, MATBLOCKMAT)); 974 PetscCall(MatBlockMatSetPreallocation(*A, bs, nz, nnz)); 975 PetscFunctionReturn(0); 976 } 977