1 /* 2 This provides a matrix that consists of Mats 3 */ 4 5 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 6 #include <../src/mat/impls/baij/seq/baij.h> /* use the common AIJ data-structure */ 7 8 typedef struct { 9 SEQAIJHEADER(Mat); 10 SEQBAIJHEADER; 11 Mat *diags; 12 13 Vec left, right, middle, workb; /* dummy vectors to perform local parts of product */ 14 } Mat_BlockMat; 15 16 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 17 { 18 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 19 PetscScalar *x; 20 const Mat *v; 21 const PetscScalar *b; 22 PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs; 23 const PetscInt *idx; 24 IS row, col; 25 MatFactorInfo info; 26 Vec left = a->left, right = a->right, middle = a->middle; 27 Mat *diag; 28 29 PetscFunctionBegin; 30 its = its * lits; 31 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 32 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 33 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0"); 34 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift"); 35 PetscCheck(!((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)), PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot do backward sweep without forward sweep"); 36 37 if (!a->diags) { 38 PetscCall(PetscMalloc1(mbs, &a->diags)); 39 PetscCall(MatFactorInfoInitialize(&info)); 40 for (i = 0; i < mbs; i++) { 41 PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col)); 42 PetscCall(MatCholeskyFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, &info)); 43 PetscCall(MatCholeskyFactorNumeric(a->diags[i], a->a[a->diag[i]], &info)); 44 PetscCall(ISDestroy(&row)); 45 PetscCall(ISDestroy(&col)); 46 } 47 PetscCall(VecDuplicate(bb, &a->workb)); 48 } 49 diag = a->diags; 50 51 PetscCall(VecSet(xx, 0.0)); 52 PetscCall(VecGetArray(xx, &x)); 53 /* copy right-hand side because it must be modified during iteration */ 54 PetscCall(VecCopy(bb, a->workb)); 55 PetscCall(VecGetArrayRead(a->workb, &b)); 56 57 /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */ 58 while (its--) { 59 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 60 for (i = 0; i < mbs; i++) { 61 n = a->i[i + 1] - a->i[i] - 1; 62 idx = a->j + a->i[i] + 1; 63 v = a->a + a->i[i] + 1; 64 65 PetscCall(VecSet(left, 0.0)); 66 for (j = 0; j < n; j++) { 67 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 68 PetscCall(MatMultAdd(v[j], right, left, left)); 69 PetscCall(VecResetArray(right)); 70 } 71 PetscCall(VecPlaceArray(right, b + i * bs)); 72 PetscCall(VecAYPX(left, -1.0, right)); 73 PetscCall(VecResetArray(right)); 74 75 PetscCall(VecPlaceArray(right, x + i * bs)); 76 PetscCall(MatSolve(diag[i], left, right)); 77 78 /* now adjust right-hand side, see MatSOR_SeqSBAIJ */ 79 for (j = 0; j < n; j++) { 80 PetscCall(MatMultTranspose(v[j], right, left)); 81 PetscCall(VecPlaceArray(middle, b + idx[j] * bs)); 82 PetscCall(VecAXPY(middle, -1.0, left)); 83 PetscCall(VecResetArray(middle)); 84 } 85 PetscCall(VecResetArray(right)); 86 } 87 } 88 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 89 for (i = mbs - 1; i >= 0; i--) { 90 n = a->i[i + 1] - a->i[i] - 1; 91 idx = a->j + a->i[i] + 1; 92 v = a->a + a->i[i] + 1; 93 94 PetscCall(VecSet(left, 0.0)); 95 for (j = 0; j < n; j++) { 96 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 97 PetscCall(MatMultAdd(v[j], right, left, left)); 98 PetscCall(VecResetArray(right)); 99 } 100 PetscCall(VecPlaceArray(right, b + i * bs)); 101 PetscCall(VecAYPX(left, -1.0, right)); 102 PetscCall(VecResetArray(right)); 103 104 PetscCall(VecPlaceArray(right, x + i * bs)); 105 PetscCall(MatSolve(diag[i], left, right)); 106 PetscCall(VecResetArray(right)); 107 } 108 } 109 } 110 PetscCall(VecRestoreArray(xx, &x)); 111 PetscCall(VecRestoreArrayRead(a->workb, &b)); 112 PetscFunctionReturn(PETSC_SUCCESS); 113 } 114 115 static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 116 { 117 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 118 PetscScalar *x; 119 const Mat *v; 120 const PetscScalar *b; 121 PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs; 122 const PetscInt *idx; 123 IS row, col; 124 MatFactorInfo info; 125 Vec left = a->left, right = a->right; 126 Mat *diag; 127 128 PetscFunctionBegin; 129 its = its * lits; 130 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 131 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 132 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0"); 133 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift"); 134 135 if (!a->diags) { 136 PetscCall(PetscMalloc1(mbs, &a->diags)); 137 PetscCall(MatFactorInfoInitialize(&info)); 138 for (i = 0; i < mbs; i++) { 139 PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col)); 140 PetscCall(MatLUFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, col, &info)); 141 PetscCall(MatLUFactorNumeric(a->diags[i], a->a[a->diag[i]], &info)); 142 PetscCall(ISDestroy(&row)); 143 PetscCall(ISDestroy(&col)); 144 } 145 } 146 diag = a->diags; 147 148 PetscCall(VecSet(xx, 0.0)); 149 PetscCall(VecGetArray(xx, &x)); 150 PetscCall(VecGetArrayRead(bb, &b)); 151 152 /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */ 153 while (its--) { 154 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 155 for (i = 0; i < mbs; i++) { 156 n = a->i[i + 1] - a->i[i]; 157 idx = a->j + a->i[i]; 158 v = a->a + a->i[i]; 159 160 PetscCall(VecSet(left, 0.0)); 161 for (j = 0; j < n; j++) { 162 if (idx[j] != i) { 163 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 164 PetscCall(MatMultAdd(v[j], right, left, left)); 165 PetscCall(VecResetArray(right)); 166 } 167 } 168 PetscCall(VecPlaceArray(right, b + i * bs)); 169 PetscCall(VecAYPX(left, -1.0, right)); 170 PetscCall(VecResetArray(right)); 171 172 PetscCall(VecPlaceArray(right, x + i * bs)); 173 PetscCall(MatSolve(diag[i], left, right)); 174 PetscCall(VecResetArray(right)); 175 } 176 } 177 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 178 for (i = mbs - 1; i >= 0; i--) { 179 n = a->i[i + 1] - a->i[i]; 180 idx = a->j + a->i[i]; 181 v = a->a + a->i[i]; 182 183 PetscCall(VecSet(left, 0.0)); 184 for (j = 0; j < n; j++) { 185 if (idx[j] != i) { 186 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 187 PetscCall(MatMultAdd(v[j], right, left, left)); 188 PetscCall(VecResetArray(right)); 189 } 190 } 191 PetscCall(VecPlaceArray(right, b + i * bs)); 192 PetscCall(VecAYPX(left, -1.0, right)); 193 PetscCall(VecResetArray(right)); 194 195 PetscCall(VecPlaceArray(right, x + i * bs)); 196 PetscCall(MatSolve(diag[i], left, right)); 197 PetscCall(VecResetArray(right)); 198 } 199 } 200 } 201 PetscCall(VecRestoreArray(xx, &x)); 202 PetscCall(VecRestoreArrayRead(bb, &b)); 203 PetscFunctionReturn(PETSC_SUCCESS); 204 } 205 206 static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 207 { 208 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 209 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, lastcol = -1; 210 PetscInt *ai = a->i, *ailen = a->ilen; 211 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 212 PetscInt ridx, cidx; 213 PetscBool roworiented = a->roworiented; 214 MatScalar value; 215 Mat *ap, *aa = a->a; 216 217 PetscFunctionBegin; 218 for (k = 0; k < m; k++) { /* loop over added rows */ 219 row = im[k]; 220 brow = row / bs; 221 if (row < 0) continue; 222 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 223 rp = aj + ai[brow]; 224 ap = aa + ai[brow]; 225 nrow = ailen[brow]; 226 low = 0; 227 high = nrow; 228 for (l = 0; l < n; l++) { /* loop over added columns */ 229 if (in[l] < 0) continue; 230 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 231 col = in[l]; 232 bcol = col / bs; 233 if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue; 234 ridx = row % bs; 235 cidx = col % bs; 236 if (roworiented) value = v[l + k * n]; 237 else value = v[k + l * m]; 238 239 if (col <= lastcol) low = 0; 240 else high = nrow; 241 lastcol = col; 242 while (high - low > 7) { 243 t = (low + high) / 2; 244 if (rp[t] > bcol) high = t; 245 else low = t; 246 } 247 for (i = low; i < high; i++) { 248 if (rp[i] > bcol) break; 249 if (rp[i] == bcol) goto noinsert1; 250 } 251 if (nonew == 1) goto noinsert1; 252 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the block matrix", row, col); 253 #if 0 254 MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat); 255 N = nrow++ - 1; 256 high++; 257 /* shift up all the later entries in this row */ 258 for (ii = N; ii >= i; ii--) { 259 rp[ii + 1] = rp[ii]; 260 ap[ii + 1] = ap[ii]; 261 } 262 if (N >= i) ap[i] = NULL; 263 rp[i] = bcol; 264 a->nz++; 265 #endif 266 noinsert1:; 267 if (!*(ap + i)) PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i)); 268 PetscCall(MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is)); 269 low = i; 270 } 271 ailen[brow] = nrow; 272 } 273 PetscFunctionReturn(PETSC_SUCCESS); 274 } 275 276 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer) 277 { 278 Mat tmpA; 279 PetscInt i, j, m, n, bs = 1, ncols, *lens, currentcol, mbs, **ii, *ilens, nextcol, *llens, cnt = 0; 280 const PetscInt *cols; 281 const PetscScalar *values; 282 PetscBool flg = PETSC_FALSE, notdone; 283 Mat_SeqAIJ *a; 284 Mat_BlockMat *amat; 285 286 PetscFunctionBegin; 287 /* force binary viewer to load .info file if it has not yet done so */ 288 PetscCall(PetscViewerSetUp(viewer)); 289 PetscCall(MatCreate(PETSC_COMM_SELF, &tmpA)); 290 PetscCall(MatSetType(tmpA, MATSEQAIJ)); 291 PetscCall(MatLoad_SeqAIJ(tmpA, viewer)); 292 293 PetscCall(MatGetLocalSize(tmpA, &m, &n)); 294 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "Options for loading BlockMat matrix 1", "Mat"); 295 PetscCall(PetscOptionsInt("-matload_block_size", "Set the blocksize used to store the matrix", "MatLoad", bs, &bs, NULL)); 296 PetscCall(PetscOptionsBool("-matload_symmetric", "Store the matrix as symmetric", "MatLoad", flg, &flg, NULL)); 297 PetscOptionsEnd(); 298 299 /* Determine number of nonzero blocks for each block row */ 300 a = (Mat_SeqAIJ *)tmpA->data; 301 mbs = m / bs; 302 PetscCall(PetscMalloc3(mbs, &lens, bs, &ii, bs, &ilens)); 303 PetscCall(PetscArrayzero(lens, mbs)); 304 305 for (i = 0; i < mbs; i++) { 306 for (j = 0; j < bs; j++) { 307 ii[j] = a->j + a->i[i * bs + j]; 308 ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j]; 309 } 310 311 currentcol = -1; 312 while (PETSC_TRUE) { 313 notdone = PETSC_FALSE; 314 nextcol = 1000000000; 315 for (j = 0; j < bs; j++) { 316 while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) { 317 ii[j]++; 318 ilens[j]--; 319 } 320 if (ilens[j] > 0) { 321 notdone = PETSC_TRUE; 322 nextcol = PetscMin(nextcol, ii[j][0] / bs); 323 } 324 } 325 if (!notdone) break; 326 if (!flg || (nextcol >= i)) lens[i]++; 327 currentcol = nextcol; 328 } 329 } 330 331 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) PetscCall(MatSetSizes(newmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 332 PetscCall(MatBlockMatSetPreallocation(newmat, bs, 0, lens)); 333 if (flg) PetscCall(MatSetOption(newmat, MAT_SYMMETRIC, PETSC_TRUE)); 334 amat = (Mat_BlockMat *)newmat->data; 335 336 /* preallocate the submatrices */ 337 PetscCall(PetscMalloc1(bs, &llens)); 338 for (i = 0; i < mbs; i++) { /* loops for block rows */ 339 for (j = 0; j < bs; j++) { 340 ii[j] = a->j + a->i[i * bs + j]; 341 ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j]; 342 } 343 344 currentcol = 1000000000; 345 for (j = 0; j < bs; j++) { /* loop over rows in block finding first nonzero block */ 346 if (ilens[j] > 0) currentcol = PetscMin(currentcol, ii[j][0] / bs); 347 } 348 349 while (PETSC_TRUE) { /* loops over blocks in block row */ 350 notdone = PETSC_FALSE; 351 nextcol = 1000000000; 352 PetscCall(PetscArrayzero(llens, bs)); 353 for (j = 0; j < bs; j++) { /* loop over rows in block */ 354 while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) { /* loop over columns in row */ 355 ii[j]++; 356 ilens[j]--; 357 llens[j]++; 358 } 359 if (ilens[j] > 0) { 360 notdone = PETSC_TRUE; 361 nextcol = PetscMin(nextcol, ii[j][0] / bs); 362 } 363 } 364 PetscCheck(cnt < amat->maxnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of blocks found greater than expected %" PetscInt_FMT, cnt); 365 if (!flg || currentcol >= i) { 366 amat->j[cnt] = currentcol; 367 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, llens, amat->a + cnt++)); 368 } 369 370 if (!notdone) break; 371 currentcol = nextcol; 372 } 373 amat->ilen[i] = lens[i]; 374 } 375 376 PetscCall(PetscFree3(lens, ii, ilens)); 377 PetscCall(PetscFree(llens)); 378 379 /* copy over the matrix, one row at a time */ 380 for (i = 0; i < m; i++) { 381 PetscCall(MatGetRow(tmpA, i, &ncols, &cols, &values)); 382 PetscCall(MatSetValues(newmat, 1, &i, ncols, cols, values, INSERT_VALUES)); 383 PetscCall(MatRestoreRow(tmpA, i, &ncols, &cols, &values)); 384 } 385 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 386 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 387 PetscFunctionReturn(PETSC_SUCCESS); 388 } 389 390 static PetscErrorCode MatView_BlockMat(Mat A, PetscViewer viewer) 391 { 392 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 393 const char *name; 394 PetscViewerFormat format; 395 396 PetscFunctionBegin; 397 PetscCall(PetscObjectGetName((PetscObject)A, &name)); 398 PetscCall(PetscViewerGetFormat(viewer, &format)); 399 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 400 PetscCall(PetscViewerASCIIPrintf(viewer, "Nonzero block matrices = %" PetscInt_FMT " \n", a->nz)); 401 if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer, "Only upper triangular part of symmetric matrix is stored\n")); 402 } 403 PetscFunctionReturn(PETSC_SUCCESS); 404 } 405 406 static PetscErrorCode MatDestroy_BlockMat(Mat mat) 407 { 408 Mat_BlockMat *bmat = (Mat_BlockMat *)mat->data; 409 PetscInt i; 410 411 PetscFunctionBegin; 412 PetscCall(VecDestroy(&bmat->right)); 413 PetscCall(VecDestroy(&bmat->left)); 414 PetscCall(VecDestroy(&bmat->middle)); 415 PetscCall(VecDestroy(&bmat->workb)); 416 if (bmat->diags) { 417 for (i = 0; i < mat->rmap->n / mat->rmap->bs; i++) PetscCall(MatDestroy(&bmat->diags[i])); 418 } 419 if (bmat->a) { 420 for (i = 0; i < bmat->nz; i++) PetscCall(MatDestroy(&bmat->a[i])); 421 } 422 PetscCall(MatSeqXAIJFreeAIJ(mat, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i)); 423 PetscCall(PetscFree(mat->data)); 424 PetscFunctionReturn(PETSC_SUCCESS); 425 } 426 427 static PetscErrorCode MatMult_BlockMat(Mat A, Vec x, Vec y) 428 { 429 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data; 430 PetscScalar *xx, *yy; 431 PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j; 432 Mat *aa; 433 434 PetscFunctionBegin; 435 /* 436 Standard CSR multiply except each entry is a Mat 437 */ 438 PetscCall(VecGetArray(x, &xx)); 439 440 PetscCall(VecSet(y, 0.0)); 441 PetscCall(VecGetArray(y, &yy)); 442 aj = bmat->j; 443 aa = bmat->a; 444 ii = bmat->i; 445 for (i = 0; i < m; i++) { 446 jrow = ii[i]; 447 PetscCall(VecPlaceArray(bmat->left, yy + bs * i)); 448 n = ii[i + 1] - jrow; 449 for (j = 0; j < n; j++) { 450 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); 451 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left)); 452 PetscCall(VecResetArray(bmat->right)); 453 jrow++; 454 } 455 PetscCall(VecResetArray(bmat->left)); 456 } 457 PetscCall(VecRestoreArray(x, &xx)); 458 PetscCall(VecRestoreArray(y, &yy)); 459 PetscFunctionReturn(PETSC_SUCCESS); 460 } 461 462 static PetscErrorCode MatMult_BlockMat_Symmetric(Mat A, Vec x, Vec y) 463 { 464 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data; 465 PetscScalar *xx, *yy; 466 PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j; 467 Mat *aa; 468 469 PetscFunctionBegin; 470 /* 471 Standard CSR multiply except each entry is a Mat 472 */ 473 PetscCall(VecGetArray(x, &xx)); 474 475 PetscCall(VecSet(y, 0.0)); 476 PetscCall(VecGetArray(y, &yy)); 477 aj = bmat->j; 478 aa = bmat->a; 479 ii = bmat->i; 480 for (i = 0; i < m; i++) { 481 jrow = ii[i]; 482 n = ii[i + 1] - jrow; 483 PetscCall(VecPlaceArray(bmat->left, yy + bs * i)); 484 PetscCall(VecPlaceArray(bmat->middle, xx + bs * i)); 485 /* if we ALWAYS required a diagonal entry then could remove this if test */ 486 if (aj[jrow] == i) { 487 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); 488 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left)); 489 PetscCall(VecResetArray(bmat->right)); 490 jrow++; 491 n--; 492 } 493 for (j = 0; j < n; j++) { 494 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); /* upper triangular part */ 495 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left)); 496 PetscCall(VecResetArray(bmat->right)); 497 498 PetscCall(VecPlaceArray(bmat->right, yy + bs * aj[jrow])); /* lower triangular part */ 499 PetscCall(MatMultTransposeAdd(aa[jrow], bmat->middle, bmat->right, bmat->right)); 500 PetscCall(VecResetArray(bmat->right)); 501 jrow++; 502 } 503 PetscCall(VecResetArray(bmat->left)); 504 PetscCall(VecResetArray(bmat->middle)); 505 } 506 PetscCall(VecRestoreArray(x, &xx)); 507 PetscCall(VecRestoreArray(y, &yy)); 508 PetscFunctionReturn(PETSC_SUCCESS); 509 } 510 511 static PetscErrorCode MatMultAdd_BlockMat(Mat A, Vec x, Vec y, Vec z) 512 { 513 PetscFunctionBegin; 514 PetscFunctionReturn(PETSC_SUCCESS); 515 } 516 517 static PetscErrorCode MatMultTranspose_BlockMat(Mat A, Vec x, Vec y) 518 { 519 PetscFunctionBegin; 520 PetscFunctionReturn(PETSC_SUCCESS); 521 } 522 523 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A, Vec x, Vec y, Vec z) 524 { 525 PetscFunctionBegin; 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 /* 530 Adds diagonal pointers to sparse matrix nonzero structure. 531 */ 532 static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A) 533 { 534 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 535 PetscInt i, j, mbs = A->rmap->n / A->rmap->bs; 536 537 PetscFunctionBegin; 538 if (!a->diag) PetscCall(PetscMalloc1(mbs, &a->diag)); 539 for (i = 0; i < mbs; i++) { 540 a->diag[i] = a->i[i + 1]; 541 for (j = a->i[i]; j < a->i[i + 1]; j++) { 542 if (a->j[j] == i) { 543 a->diag[i] = j; 544 break; 545 } 546 } 547 } 548 PetscFunctionReturn(PETSC_SUCCESS); 549 } 550 551 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 552 { 553 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 554 Mat_SeqAIJ *c; 555 PetscInt i, k, first, step, lensi, nrows, ncols; 556 PetscInt *j_new, *i_new, *aj = a->j, *ailen = a->ilen; 557 PetscScalar *a_new; 558 Mat C, *aa = a->a; 559 PetscBool stride, equal; 560 561 PetscFunctionBegin; 562 PetscCall(ISEqual(isrow, iscol, &equal)); 563 PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for identical column and row indices"); 564 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &stride)); 565 PetscCheck(stride, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for stride indices"); 566 PetscCall(ISStrideGetInfo(iscol, &first, &step)); 567 PetscCheck(step == A->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_SUP, "Can only select one entry from each block"); 568 569 PetscCall(ISGetLocalSize(isrow, &nrows)); 570 ncols = nrows; 571 572 /* create submatrix */ 573 if (scall == MAT_REUSE_MATRIX) { 574 PetscInt n_cols, n_rows; 575 C = *B; 576 PetscCall(MatGetSize(C, &n_rows, &n_cols)); 577 PetscCheck(n_rows == nrows && n_cols == ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Reused submatrix wrong size"); 578 PetscCall(MatZeroEntries(C)); 579 } else { 580 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 581 PetscCall(MatSetSizes(C, nrows, ncols, PETSC_DETERMINE, PETSC_DETERMINE)); 582 if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C, MATSEQSBAIJ)); 583 else PetscCall(MatSetType(C, MATSEQAIJ)); 584 PetscCall(MatSeqAIJSetPreallocation(C, 0, ailen)); 585 PetscCall(MatSeqSBAIJSetPreallocation(C, 1, 0, ailen)); 586 } 587 c = (Mat_SeqAIJ *)C->data; 588 589 /* loop over rows inserting into submatrix */ 590 a_new = c->a; 591 j_new = c->j; 592 i_new = c->i; 593 594 for (i = 0; i < nrows; i++) { 595 lensi = ailen[i]; 596 for (k = 0; k < lensi; k++) { 597 *j_new++ = *aj++; 598 PetscCall(MatGetValue(*aa++, first, first, a_new++)); 599 } 600 i_new[i + 1] = i_new[i] + lensi; 601 c->ilen[i] = lensi; 602 } 603 604 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 605 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 606 *B = C; 607 PetscFunctionReturn(PETSC_SUCCESS); 608 } 609 610 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A, MatAssemblyType mode) 611 { 612 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 613 PetscInt fshift = 0, i, j, *ai = a->i, *aj = a->j, *imax = a->imax; 614 PetscInt m = a->mbs, *ip, N, *ailen = a->ilen, rmax = 0; 615 Mat *aa = a->a, *ap; 616 617 PetscFunctionBegin; 618 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 619 620 if (m) rmax = ailen[0]; /* determine row with most nonzeros */ 621 for (i = 1; i < m; i++) { 622 /* move each row back by the amount of empty slots (fshift) before it*/ 623 fshift += imax[i - 1] - ailen[i - 1]; 624 rmax = PetscMax(rmax, ailen[i]); 625 if (fshift) { 626 ip = aj + ai[i]; 627 ap = aa + ai[i]; 628 N = ailen[i]; 629 for (j = 0; j < N; j++) { 630 ip[j - fshift] = ip[j]; 631 ap[j - fshift] = ap[j]; 632 } 633 } 634 ai[i] = ai[i - 1] + ailen[i - 1]; 635 } 636 if (m) { 637 fshift += imax[m - 1] - ailen[m - 1]; 638 ai[m] = ai[m - 1] + ailen[m - 1]; 639 } 640 /* reset ilen and imax for each row */ 641 for (i = 0; i < m; i++) ailen[i] = imax[i] = ai[i + 1] - ai[i]; 642 a->nz = ai[m]; 643 for (i = 0; i < a->nz; i++) { 644 PetscAssert(aa[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT, i, aj[i], a->nz); 645 PetscCall(MatAssemblyBegin(aa[i], MAT_FINAL_ASSEMBLY)); 646 PetscCall(MatAssemblyEnd(aa[i], MAT_FINAL_ASSEMBLY)); 647 } 648 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n", m, A->cmap->n / A->cmap->bs, fshift, a->nz)); 649 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs)); 650 PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", rmax)); 651 652 A->info.mallocs += a->reallocs; 653 a->reallocs = 0; 654 A->info.nz_unneeded = (double)fshift; 655 a->rmax = rmax; 656 PetscCall(MatMarkDiagonal_BlockMat(A)); 657 PetscFunctionReturn(PETSC_SUCCESS); 658 } 659 660 static PetscErrorCode MatSetOption_BlockMat(Mat A, MatOption opt, PetscBool flg) 661 { 662 PetscFunctionBegin; 663 if (opt == MAT_SYMMETRIC && flg) { 664 A->ops->sor = MatSOR_BlockMat_Symmetric; 665 A->ops->mult = MatMult_BlockMat_Symmetric; 666 } else { 667 PetscCall(PetscInfo(A, "Unused matrix option %s\n", MatOptions[opt])); 668 } 669 PetscFunctionReturn(PETSC_SUCCESS); 670 } 671 672 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat, 673 NULL, 674 NULL, 675 MatMult_BlockMat, 676 /* 4*/ MatMultAdd_BlockMat, 677 MatMultTranspose_BlockMat, 678 MatMultTransposeAdd_BlockMat, 679 NULL, 680 NULL, 681 NULL, 682 /* 10*/ NULL, 683 NULL, 684 NULL, 685 MatSOR_BlockMat, 686 NULL, 687 /* 15*/ NULL, 688 NULL, 689 NULL, 690 NULL, 691 NULL, 692 /* 20*/ NULL, 693 MatAssemblyEnd_BlockMat, 694 MatSetOption_BlockMat, 695 NULL, 696 /* 24*/ NULL, 697 NULL, 698 NULL, 699 NULL, 700 NULL, 701 /* 29*/ NULL, 702 NULL, 703 NULL, 704 NULL, 705 NULL, 706 /* 34*/ NULL, 707 NULL, 708 NULL, 709 NULL, 710 NULL, 711 /* 39*/ NULL, 712 NULL, 713 NULL, 714 NULL, 715 NULL, 716 /* 44*/ NULL, 717 NULL, 718 MatShift_Basic, 719 NULL, 720 NULL, 721 /* 49*/ NULL, 722 NULL, 723 NULL, 724 NULL, 725 NULL, 726 /* 54*/ NULL, 727 NULL, 728 NULL, 729 NULL, 730 NULL, 731 /* 59*/ MatCreateSubMatrix_BlockMat, 732 MatDestroy_BlockMat, 733 MatView_BlockMat, 734 NULL, 735 NULL, 736 /* 64*/ NULL, 737 NULL, 738 NULL, 739 NULL, 740 NULL, 741 /* 69*/ NULL, 742 NULL, 743 NULL, 744 NULL, 745 NULL, 746 /* 74*/ NULL, 747 NULL, 748 NULL, 749 NULL, 750 MatLoad_BlockMat, 751 /* 79*/ NULL, 752 NULL, 753 NULL, 754 NULL, 755 NULL, 756 /* 84*/ NULL, 757 NULL, 758 NULL, 759 NULL, 760 NULL, 761 /* 89*/ NULL, 762 NULL, 763 NULL, 764 NULL, 765 NULL, 766 /* 94*/ NULL, 767 NULL, 768 NULL, 769 NULL, 770 NULL, 771 /* 99*/ NULL, 772 NULL, 773 NULL, 774 NULL, 775 NULL, 776 /*104*/ NULL, 777 NULL, 778 NULL, 779 NULL, 780 NULL, 781 /*109*/ NULL, 782 NULL, 783 NULL, 784 NULL, 785 NULL, 786 /*114*/ NULL, 787 NULL, 788 NULL, 789 NULL, 790 NULL, 791 /*119*/ NULL, 792 NULL, 793 NULL, 794 NULL, 795 NULL, 796 /*124*/ NULL, 797 NULL, 798 NULL, 799 NULL, 800 NULL, 801 /*129*/ NULL, 802 NULL, 803 NULL, 804 NULL, 805 NULL, 806 /*134*/ NULL, 807 NULL, 808 NULL, 809 NULL, 810 NULL, 811 /*139*/ NULL, 812 NULL, 813 NULL, 814 NULL, 815 NULL, 816 NULL}; 817 818 /*@C 819 MatBlockMatSetPreallocation - For good matrix assembly performance 820 the user should preallocate the matrix storage by setting the parameter nz 821 (or the array nnz). By setting these parameters accurately, performance 822 during matrix assembly can be increased by more than a factor of 50. 823 824 Collective 825 826 Input Parameters: 827 + B - The matrix 828 . bs - size of each block in matrix 829 . nz - number of nonzeros per block row (same for all rows) 830 - nnz - array containing the number of nonzeros in the various block rows 831 (possibly different for each row) or `NULL` 832 833 Level: intermediate 834 835 Notes: 836 If `nnz` is given then `nz` is ignored 837 838 Specify the preallocated storage with either `nz` or `nnz` (not both). 839 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 840 allocation. 841 842 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()` 843 @*/ 844 PetscErrorCode MatBlockMatSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 845 { 846 PetscFunctionBegin; 847 PetscTryMethod(B, "MatBlockMatSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 848 PetscFunctionReturn(PETSC_SUCCESS); 849 } 850 851 static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A, PetscInt bs, PetscInt nz, PetscInt *nnz) 852 { 853 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data; 854 PetscInt i; 855 856 PetscFunctionBegin; 857 PetscCall(PetscLayoutSetBlockSize(A->rmap, bs)); 858 PetscCall(PetscLayoutSetBlockSize(A->cmap, bs)); 859 PetscCall(PetscLayoutSetUp(A->rmap)); 860 PetscCall(PetscLayoutSetUp(A->cmap)); 861 PetscCall(PetscLayoutGetBlockSize(A->rmap, &bs)); 862 863 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 864 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 865 if (nnz) { 866 for (i = 0; i < A->rmap->n / bs; i++) { 867 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 868 PetscCheck(nnz[i] <= A->cmap->n / bs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], A->cmap->n / bs); 869 } 870 } 871 bmat->mbs = A->rmap->n / bs; 872 873 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->right)); 874 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->middle)); 875 PetscCall(VecCreateSeq(PETSC_COMM_SELF, bs, &bmat->left)); 876 877 if (!bmat->imax) PetscCall(PetscMalloc2(A->rmap->n, &bmat->imax, A->rmap->n, &bmat->ilen)); 878 PetscCheck(PetscLikely(nnz), PETSC_COMM_SELF, PETSC_ERR_SUP, "Currently requires block row by row preallocation"); 879 nz = 0; 880 for (i = 0; i < A->rmap->n / A->rmap->bs; i++) { 881 bmat->imax[i] = nnz[i]; 882 nz += nnz[i]; 883 } 884 885 /* bmat->ilen will count nonzeros in each row so far. */ 886 PetscCall(PetscArrayzero(bmat->ilen, bmat->mbs)); 887 888 /* allocate the matrix space */ 889 PetscCall(MatSeqXAIJFreeAIJ(A, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i)); 890 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscScalar), (void **)&bmat->a)); 891 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&bmat->j)); 892 PetscCall(PetscShmgetAllocateArray(A->rmap->n + 1, sizeof(PetscInt), (void **)&bmat->i)); 893 bmat->free_a = PETSC_TRUE; 894 bmat->free_ij = PETSC_TRUE; 895 896 bmat->i[0] = 0; 897 for (i = 1; i < bmat->mbs + 1; i++) bmat->i[i] = bmat->i[i - 1] + bmat->imax[i - 1]; 898 899 bmat->nz = 0; 900 bmat->maxnz = nz; 901 A->info.nz_unneeded = (double)bmat->maxnz; 902 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 903 PetscFunctionReturn(PETSC_SUCCESS); 904 } 905 906 /*MC 907 MATBLOCKMAT - A matrix that is defined by a set of `Mat`'s that represents a sparse block matrix 908 consisting of (usually) sparse blocks. 909 910 Level: advanced 911 912 .seealso: [](ch_matrices), `Mat`, `MatCreateBlockMat()` 913 M*/ 914 915 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A) 916 { 917 Mat_BlockMat *b; 918 919 PetscFunctionBegin; 920 PetscCall(PetscNew(&b)); 921 A->data = (void *)b; 922 A->ops[0] = MatOps_Values; 923 A->assembled = PETSC_TRUE; 924 A->preallocated = PETSC_FALSE; 925 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATBLOCKMAT)); 926 927 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatBlockMatSetPreallocation_C", MatBlockMatSetPreallocation_BlockMat)); 928 PetscFunctionReturn(PETSC_SUCCESS); 929 } 930 931 /*@C 932 MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential `Mat` object 933 934 Collective 935 936 Input Parameters: 937 + comm - MPI communicator 938 . m - number of rows 939 . n - number of columns 940 . bs - size of each submatrix 941 . nz - expected maximum number of nonzero blocks in row (use `PETSC_DEFAULT` if not known) 942 - nnz - expected number of nonzers per block row if known (use `NULL` otherwise) 943 944 Output Parameter: 945 . A - the matrix 946 947 Level: intermediate 948 949 Notes: 950 Matrices of this type are nominally-sparse matrices in which each "entry" is a `Mat` object. Each `Mat` must 951 have the same size and be sequential. The local and global sizes must be compatible with this decomposition. 952 953 For matrices containing parallel submatrices and variable block sizes, see `MATNEST`. 954 955 Developer Notes: 956 I don't like the name, it is not `MATNESTMAT` 957 958 .seealso: [](ch_matrices), `Mat`, `MATBLOCKMAT`, `MatCreateNest()` 959 @*/ 960 PetscErrorCode MatCreateBlockMat(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt bs, PetscInt nz, PetscInt *nnz, Mat *A) 961 { 962 PetscFunctionBegin; 963 PetscCall(MatCreate(comm, A)); 964 PetscCall(MatSetSizes(*A, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 965 PetscCall(MatSetType(*A, MATBLOCKMAT)); 966 PetscCall(MatBlockMatSetPreallocation(*A, bs, nz, nnz)); 967 PetscFunctionReturn(PETSC_SUCCESS); 968 } 969