1 /* 2 This provides a matrix that consists of Mats 3 */ 4 5 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 6 #include <../src/mat/impls/baij/seq/baij.h> /* use the common AIJ data-structure */ 7 8 typedef struct { 9 SEQAIJHEADER(Mat); 10 SEQBAIJHEADER; 11 Mat *diags; 12 13 Vec left, right, middle, workb; /* dummy vectors to perform local parts of product */ 14 } Mat_BlockMat; 15 16 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 17 { 18 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 19 PetscScalar *x; 20 const Mat *v; 21 const PetscScalar *b; 22 PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs; 23 const PetscInt *idx; 24 IS row, col; 25 MatFactorInfo info; 26 Vec left = a->left, right = a->right, middle = a->middle; 27 Mat *diag; 28 29 PetscFunctionBegin; 30 its = its * lits; 31 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 32 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 33 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0"); 34 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift"); 35 PetscCheck(!((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)), PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot do backward sweep without forward sweep"); 36 37 if (!a->diags) { 38 PetscCall(PetscMalloc1(mbs, &a->diags)); 39 PetscCall(MatFactorInfoInitialize(&info)); 40 for (i = 0; i < mbs; i++) { 41 PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col)); 42 PetscCall(MatCholeskyFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, &info)); 43 PetscCall(MatCholeskyFactorNumeric(a->diags[i], a->a[a->diag[i]], &info)); 44 PetscCall(ISDestroy(&row)); 45 PetscCall(ISDestroy(&col)); 46 } 47 PetscCall(VecDuplicate(bb, &a->workb)); 48 } 49 diag = a->diags; 50 51 PetscCall(VecSet(xx, 0.0)); 52 PetscCall(VecGetArray(xx, &x)); 53 /* copy right-hand side because it must be modified during iteration */ 54 PetscCall(VecCopy(bb, a->workb)); 55 PetscCall(VecGetArrayRead(a->workb, &b)); 56 57 /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */ 58 while (its--) { 59 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 60 for (i = 0; i < mbs; i++) { 61 n = a->i[i + 1] - a->i[i] - 1; 62 idx = a->j + a->i[i] + 1; 63 v = a->a + a->i[i] + 1; 64 65 PetscCall(VecSet(left, 0.0)); 66 for (j = 0; j < n; j++) { 67 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 68 PetscCall(MatMultAdd(v[j], right, left, left)); 69 PetscCall(VecResetArray(right)); 70 } 71 PetscCall(VecPlaceArray(right, b + i * bs)); 72 PetscCall(VecAYPX(left, -1.0, right)); 73 PetscCall(VecResetArray(right)); 74 75 PetscCall(VecPlaceArray(right, x + i * bs)); 76 PetscCall(MatSolve(diag[i], left, right)); 77 78 /* now adjust right-hand side, see MatSOR_SeqSBAIJ */ 79 for (j = 0; j < n; j++) { 80 PetscCall(MatMultTranspose(v[j], right, left)); 81 PetscCall(VecPlaceArray(middle, b + idx[j] * bs)); 82 PetscCall(VecAXPY(middle, -1.0, left)); 83 PetscCall(VecResetArray(middle)); 84 } 85 PetscCall(VecResetArray(right)); 86 } 87 } 88 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 89 for (i = mbs - 1; i >= 0; i--) { 90 n = a->i[i + 1] - a->i[i] - 1; 91 idx = a->j + a->i[i] + 1; 92 v = a->a + a->i[i] + 1; 93 94 PetscCall(VecSet(left, 0.0)); 95 for (j = 0; j < n; j++) { 96 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 97 PetscCall(MatMultAdd(v[j], right, left, left)); 98 PetscCall(VecResetArray(right)); 99 } 100 PetscCall(VecPlaceArray(right, b + i * bs)); 101 PetscCall(VecAYPX(left, -1.0, right)); 102 PetscCall(VecResetArray(right)); 103 104 PetscCall(VecPlaceArray(right, x + i * bs)); 105 PetscCall(MatSolve(diag[i], left, right)); 106 PetscCall(VecResetArray(right)); 107 } 108 } 109 } 110 PetscCall(VecRestoreArray(xx, &x)); 111 PetscCall(VecRestoreArrayRead(a->workb, &b)); 112 PetscFunctionReturn(PETSC_SUCCESS); 113 } 114 115 static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 116 { 117 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 118 PetscScalar *x; 119 const Mat *v; 120 const PetscScalar *b; 121 PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs; 122 const PetscInt *idx; 123 IS row, col; 124 MatFactorInfo info; 125 Vec left = a->left, right = a->right; 126 Mat *diag; 127 128 PetscFunctionBegin; 129 its = its * lits; 130 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 131 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 132 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0"); 133 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift"); 134 135 if (!a->diags) { 136 PetscCall(PetscMalloc1(mbs, &a->diags)); 137 PetscCall(MatFactorInfoInitialize(&info)); 138 for (i = 0; i < mbs; i++) { 139 PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col)); 140 PetscCall(MatLUFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, col, &info)); 141 PetscCall(MatLUFactorNumeric(a->diags[i], a->a[a->diag[i]], &info)); 142 PetscCall(ISDestroy(&row)); 143 PetscCall(ISDestroy(&col)); 144 } 145 } 146 diag = a->diags; 147 148 PetscCall(VecSet(xx, 0.0)); 149 PetscCall(VecGetArray(xx, &x)); 150 PetscCall(VecGetArrayRead(bb, &b)); 151 152 /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */ 153 while (its--) { 154 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 155 for (i = 0; i < mbs; i++) { 156 n = a->i[i + 1] - a->i[i]; 157 idx = a->j + a->i[i]; 158 v = a->a + a->i[i]; 159 160 PetscCall(VecSet(left, 0.0)); 161 for (j = 0; j < n; j++) { 162 if (idx[j] != i) { 163 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 164 PetscCall(MatMultAdd(v[j], right, left, left)); 165 PetscCall(VecResetArray(right)); 166 } 167 } 168 PetscCall(VecPlaceArray(right, b + i * bs)); 169 PetscCall(VecAYPX(left, -1.0, right)); 170 PetscCall(VecResetArray(right)); 171 172 PetscCall(VecPlaceArray(right, x + i * bs)); 173 PetscCall(MatSolve(diag[i], left, right)); 174 PetscCall(VecResetArray(right)); 175 } 176 } 177 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 178 for (i = mbs - 1; i >= 0; i--) { 179 n = a->i[i + 1] - a->i[i]; 180 idx = a->j + a->i[i]; 181 v = a->a + a->i[i]; 182 183 PetscCall(VecSet(left, 0.0)); 184 for (j = 0; j < n; j++) { 185 if (idx[j] != i) { 186 PetscCall(VecPlaceArray(right, x + idx[j] * bs)); 187 PetscCall(MatMultAdd(v[j], right, left, left)); 188 PetscCall(VecResetArray(right)); 189 } 190 } 191 PetscCall(VecPlaceArray(right, b + i * bs)); 192 PetscCall(VecAYPX(left, -1.0, right)); 193 PetscCall(VecResetArray(right)); 194 195 PetscCall(VecPlaceArray(right, x + i * bs)); 196 PetscCall(MatSolve(diag[i], left, right)); 197 PetscCall(VecResetArray(right)); 198 } 199 } 200 } 201 PetscCall(VecRestoreArray(xx, &x)); 202 PetscCall(VecRestoreArrayRead(bb, &b)); 203 PetscFunctionReturn(PETSC_SUCCESS); 204 } 205 206 static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 207 { 208 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 209 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, lastcol = -1; 210 PetscInt *ai = a->i, *ailen = a->ilen; 211 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 212 PetscInt ridx, cidx; 213 PetscBool roworiented = a->roworiented; 214 MatScalar value; 215 Mat *ap, *aa = a->a; 216 217 PetscFunctionBegin; 218 for (k = 0; k < m; k++) { /* loop over added rows */ 219 row = im[k]; 220 brow = row / bs; 221 if (row < 0) continue; 222 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 223 rp = aj + ai[brow]; 224 ap = aa + ai[brow]; 225 nrow = ailen[brow]; 226 low = 0; 227 high = nrow; 228 for (l = 0; l < n; l++) { /* loop over added columns */ 229 if (in[l] < 0) continue; 230 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 231 col = in[l]; 232 bcol = col / bs; 233 if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue; 234 ridx = row % bs; 235 cidx = col % bs; 236 if (roworiented) value = v[l + k * n]; 237 else value = v[k + l * m]; 238 239 if (col <= lastcol) low = 0; 240 else high = nrow; 241 lastcol = col; 242 while (high - low > 7) { 243 t = (low + high) / 2; 244 if (rp[t] > bcol) high = t; 245 else low = t; 246 } 247 for (i = low; i < high; i++) { 248 if (rp[i] > bcol) break; 249 if (rp[i] == bcol) goto noinsert1; 250 } 251 if (nonew == 1) goto noinsert1; 252 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the block matrix", row, col); 253 #if 0 254 MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat); 255 N = nrow++ - 1; 256 high++; 257 /* shift up all the later entries in this row */ 258 for (ii = N; ii >= i; ii--) { 259 rp[ii + 1] = rp[ii]; 260 ap[ii + 1] = ap[ii]; 261 } 262 if (N >= i) ap[i] = NULL; 263 rp[i] = bcol; 264 a->nz++; 265 #endif 266 noinsert1:; 267 if (!*(ap + i)) PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i)); 268 PetscCall(MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is)); 269 low = i; 270 } 271 ailen[brow] = nrow; 272 } 273 PetscFunctionReturn(PETSC_SUCCESS); 274 } 275 276 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer) 277 { 278 Mat tmpA; 279 PetscInt i, j, m, n, bs = 1, ncols, *lens, currentcol, mbs, **ii, *ilens, nextcol, *llens, cnt = 0; 280 const PetscInt *cols; 281 const PetscScalar *values; 282 PetscBool flg = PETSC_FALSE, notdone; 283 Mat_SeqAIJ *a; 284 Mat_BlockMat *amat; 285 286 PetscFunctionBegin; 287 /* force binary viewer to load .info file if it has not yet done so */ 288 PetscCall(PetscViewerSetUp(viewer)); 289 PetscCall(MatCreate(PETSC_COMM_SELF, &tmpA)); 290 PetscCall(MatSetType(tmpA, MATSEQAIJ)); 291 PetscCall(MatLoad_SeqAIJ(tmpA, viewer)); 292 293 PetscCall(MatGetLocalSize(tmpA, &m, &n)); 294 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "Options for loading BlockMat matrix 1", "Mat"); 295 PetscCall(PetscOptionsInt("-matload_block_size", "Set the blocksize used to store the matrix", "MatLoad", bs, &bs, NULL)); 296 PetscCall(PetscOptionsBool("-matload_symmetric", "Store the matrix as symmetric", "MatLoad", flg, &flg, NULL)); 297 PetscOptionsEnd(); 298 299 /* Determine number of nonzero blocks for each block row */ 300 a = (Mat_SeqAIJ *)tmpA->data; 301 mbs = m / bs; 302 PetscCall(PetscMalloc3(mbs, &lens, bs, &ii, bs, &ilens)); 303 PetscCall(PetscArrayzero(lens, mbs)); 304 305 for (i = 0; i < mbs; i++) { 306 for (j = 0; j < bs; j++) { 307 ii[j] = a->j + a->i[i * bs + j]; 308 ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j]; 309 } 310 311 currentcol = -1; 312 while (PETSC_TRUE) { 313 notdone = PETSC_FALSE; 314 nextcol = 1000000000; 315 for (j = 0; j < bs; j++) { 316 while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) { 317 ii[j]++; 318 ilens[j]--; 319 } 320 if (ilens[j] > 0) { 321 notdone = PETSC_TRUE; 322 nextcol = PetscMin(nextcol, ii[j][0] / bs); 323 } 324 } 325 if (!notdone) break; 326 if (!flg || (nextcol >= i)) lens[i]++; 327 currentcol = nextcol; 328 } 329 } 330 331 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) PetscCall(MatSetSizes(newmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 332 PetscCall(MatBlockMatSetPreallocation(newmat, bs, 0, lens)); 333 if (flg) PetscCall(MatSetOption(newmat, MAT_SYMMETRIC, PETSC_TRUE)); 334 amat = (Mat_BlockMat *)newmat->data; 335 336 /* preallocate the submatrices */ 337 PetscCall(PetscMalloc1(bs, &llens)); 338 for (i = 0; i < mbs; i++) { /* loops for block rows */ 339 for (j = 0; j < bs; j++) { 340 ii[j] = a->j + a->i[i * bs + j]; 341 ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j]; 342 } 343 344 currentcol = 1000000000; 345 for (j = 0; j < bs; j++) { /* loop over rows in block finding first nonzero block */ 346 if (ilens[j] > 0) currentcol = PetscMin(currentcol, ii[j][0] / bs); 347 } 348 349 while (PETSC_TRUE) { /* loops over blocks in block row */ 350 notdone = PETSC_FALSE; 351 nextcol = 1000000000; 352 PetscCall(PetscArrayzero(llens, bs)); 353 for (j = 0; j < bs; j++) { /* loop over rows in block */ 354 while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) { /* loop over columns in row */ 355 ii[j]++; 356 ilens[j]--; 357 llens[j]++; 358 } 359 if (ilens[j] > 0) { 360 notdone = PETSC_TRUE; 361 nextcol = PetscMin(nextcol, ii[j][0] / bs); 362 } 363 } 364 PetscCheck(cnt < amat->maxnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of blocks found greater than expected %" PetscInt_FMT, cnt); 365 if (!flg || currentcol >= i) { 366 amat->j[cnt] = currentcol; 367 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, llens, amat->a + cnt++)); 368 } 369 370 if (!notdone) break; 371 currentcol = nextcol; 372 } 373 amat->ilen[i] = lens[i]; 374 } 375 376 PetscCall(PetscFree3(lens, ii, ilens)); 377 PetscCall(PetscFree(llens)); 378 379 /* copy over the matrix, one row at a time */ 380 for (i = 0; i < m; i++) { 381 PetscCall(MatGetRow(tmpA, i, &ncols, &cols, &values)); 382 PetscCall(MatSetValues(newmat, 1, &i, ncols, cols, values, INSERT_VALUES)); 383 PetscCall(MatRestoreRow(tmpA, i, &ncols, &cols, &values)); 384 } 385 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 386 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 387 PetscFunctionReturn(PETSC_SUCCESS); 388 } 389 390 static PetscErrorCode MatView_BlockMat(Mat A, PetscViewer viewer) 391 { 392 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 393 const char *name; 394 PetscViewerFormat format; 395 396 PetscFunctionBegin; 397 PetscCall(PetscObjectGetName((PetscObject)A, &name)); 398 PetscCall(PetscViewerGetFormat(viewer, &format)); 399 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 400 PetscCall(PetscViewerASCIIPrintf(viewer, "Nonzero block matrices = %" PetscInt_FMT " \n", a->nz)); 401 if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer, "Only upper triangular part of symmetric matrix is stored\n")); 402 } 403 PetscFunctionReturn(PETSC_SUCCESS); 404 } 405 406 static PetscErrorCode MatDestroy_BlockMat(Mat mat) 407 { 408 Mat_BlockMat *bmat = (Mat_BlockMat *)mat->data; 409 PetscInt i; 410 411 PetscFunctionBegin; 412 PetscCall(VecDestroy(&bmat->right)); 413 PetscCall(VecDestroy(&bmat->left)); 414 PetscCall(VecDestroy(&bmat->middle)); 415 PetscCall(VecDestroy(&bmat->workb)); 416 if (bmat->diags) { 417 for (i = 0; i < mat->rmap->n / mat->rmap->bs; i++) PetscCall(MatDestroy(&bmat->diags[i])); 418 } 419 if (bmat->a) { 420 for (i = 0; i < bmat->nz; i++) PetscCall(MatDestroy(&bmat->a[i])); 421 } 422 PetscCall(MatSeqXAIJFreeAIJ(mat, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i)); 423 PetscCall(PetscFree(mat->data)); 424 PetscFunctionReturn(PETSC_SUCCESS); 425 } 426 427 static PetscErrorCode MatMult_BlockMat(Mat A, Vec x, Vec y) 428 { 429 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data; 430 PetscScalar *xx, *yy; 431 PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j; 432 Mat *aa; 433 434 PetscFunctionBegin; 435 /* 436 Standard CSR multiply except each entry is a Mat 437 */ 438 PetscCall(VecGetArray(x, &xx)); 439 440 PetscCall(VecSet(y, 0.0)); 441 PetscCall(VecGetArray(y, &yy)); 442 aj = bmat->j; 443 aa = bmat->a; 444 ii = bmat->i; 445 for (i = 0; i < m; i++) { 446 jrow = ii[i]; 447 PetscCall(VecPlaceArray(bmat->left, yy + bs * i)); 448 n = ii[i + 1] - jrow; 449 for (j = 0; j < n; j++) { 450 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); 451 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left)); 452 PetscCall(VecResetArray(bmat->right)); 453 jrow++; 454 } 455 PetscCall(VecResetArray(bmat->left)); 456 } 457 PetscCall(VecRestoreArray(x, &xx)); 458 PetscCall(VecRestoreArray(y, &yy)); 459 PetscFunctionReturn(PETSC_SUCCESS); 460 } 461 462 static PetscErrorCode MatMult_BlockMat_Symmetric(Mat A, Vec x, Vec y) 463 { 464 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data; 465 PetscScalar *xx, *yy; 466 PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j; 467 Mat *aa; 468 469 PetscFunctionBegin; 470 /* 471 Standard CSR multiply except each entry is a Mat 472 */ 473 PetscCall(VecGetArray(x, &xx)); 474 475 PetscCall(VecSet(y, 0.0)); 476 PetscCall(VecGetArray(y, &yy)); 477 aj = bmat->j; 478 aa = bmat->a; 479 ii = bmat->i; 480 for (i = 0; i < m; i++) { 481 jrow = ii[i]; 482 n = ii[i + 1] - jrow; 483 PetscCall(VecPlaceArray(bmat->left, yy + bs * i)); 484 PetscCall(VecPlaceArray(bmat->middle, xx + bs * i)); 485 /* if we ALWAYS required a diagonal entry then could remove this if test */ 486 if (aj[jrow] == i) { 487 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); 488 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left)); 489 PetscCall(VecResetArray(bmat->right)); 490 jrow++; 491 n--; 492 } 493 for (j = 0; j < n; j++) { 494 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); /* upper triangular part */ 495 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left)); 496 PetscCall(VecResetArray(bmat->right)); 497 498 PetscCall(VecPlaceArray(bmat->right, yy + bs * aj[jrow])); /* lower triangular part */ 499 PetscCall(MatMultTransposeAdd(aa[jrow], bmat->middle, bmat->right, bmat->right)); 500 PetscCall(VecResetArray(bmat->right)); 501 jrow++; 502 } 503 PetscCall(VecResetArray(bmat->left)); 504 PetscCall(VecResetArray(bmat->middle)); 505 } 506 PetscCall(VecRestoreArray(x, &xx)); 507 PetscCall(VecRestoreArray(y, &yy)); 508 PetscFunctionReturn(PETSC_SUCCESS); 509 } 510 511 static PetscErrorCode MatMultAdd_BlockMat(Mat A, Vec x, Vec y, Vec z) 512 { 513 PetscFunctionBegin; 514 PetscFunctionReturn(PETSC_SUCCESS); 515 } 516 517 static PetscErrorCode MatMultTranspose_BlockMat(Mat A, Vec x, Vec y) 518 { 519 PetscFunctionBegin; 520 PetscFunctionReturn(PETSC_SUCCESS); 521 } 522 523 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A, Vec x, Vec y, Vec z) 524 { 525 PetscFunctionBegin; 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 /* 530 Adds diagonal pointers to sparse matrix nonzero structure. 531 */ 532 static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A) 533 { 534 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 535 PetscInt i, j, mbs = A->rmap->n / A->rmap->bs; 536 537 PetscFunctionBegin; 538 if (!a->diag) PetscCall(PetscMalloc1(mbs, &a->diag)); 539 for (i = 0; i < mbs; i++) { 540 a->diag[i] = a->i[i + 1]; 541 for (j = a->i[i]; j < a->i[i + 1]; j++) { 542 if (a->j[j] == i) { 543 a->diag[i] = j; 544 break; 545 } 546 } 547 } 548 PetscFunctionReturn(PETSC_SUCCESS); 549 } 550 551 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 552 { 553 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 554 Mat_SeqAIJ *c; 555 PetscInt i, k, first, step, lensi, nrows, ncols; 556 PetscInt *j_new, *i_new, *aj = a->j, *ailen = a->ilen; 557 PetscScalar *a_new; 558 Mat C, *aa = a->a; 559 PetscBool stride, equal; 560 561 PetscFunctionBegin; 562 PetscCall(ISEqual(isrow, iscol, &equal)); 563 PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for identical column and row indices"); 564 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &stride)); 565 PetscCheck(stride, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for stride indices"); 566 PetscCall(ISStrideGetInfo(iscol, &first, &step)); 567 PetscCheck(step == A->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_SUP, "Can only select one entry from each block"); 568 569 PetscCall(ISGetLocalSize(isrow, &nrows)); 570 ncols = nrows; 571 572 /* create submatrix */ 573 if (scall == MAT_REUSE_MATRIX) { 574 PetscInt n_cols, n_rows; 575 C = *B; 576 PetscCall(MatGetSize(C, &n_rows, &n_cols)); 577 PetscCheck(n_rows == nrows && n_cols == ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Reused submatrix wrong size"); 578 PetscCall(MatZeroEntries(C)); 579 } else { 580 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 581 PetscCall(MatSetSizes(C, nrows, ncols, PETSC_DETERMINE, PETSC_DETERMINE)); 582 if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C, MATSEQSBAIJ)); 583 else PetscCall(MatSetType(C, MATSEQAIJ)); 584 PetscCall(MatSeqAIJSetPreallocation(C, 0, ailen)); 585 PetscCall(MatSeqSBAIJSetPreallocation(C, 1, 0, ailen)); 586 } 587 c = (Mat_SeqAIJ *)C->data; 588 589 /* loop over rows inserting into submatrix */ 590 a_new = c->a; 591 j_new = c->j; 592 i_new = c->i; 593 594 for (i = 0; i < nrows; i++) { 595 lensi = ailen[i]; 596 for (k = 0; k < lensi; k++) { 597 *j_new++ = *aj++; 598 PetscCall(MatGetValue(*aa++, first, first, a_new++)); 599 } 600 i_new[i + 1] = i_new[i] + lensi; 601 c->ilen[i] = lensi; 602 } 603 604 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 605 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 606 *B = C; 607 PetscFunctionReturn(PETSC_SUCCESS); 608 } 609 610 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A, MatAssemblyType mode) 611 { 612 Mat_BlockMat *a = (Mat_BlockMat *)A->data; 613 PetscInt fshift = 0, i, j, *ai = a->i, *aj = a->j, *imax = a->imax; 614 PetscInt m = a->mbs, *ip, N, *ailen = a->ilen, rmax = 0; 615 Mat *aa = a->a, *ap; 616 617 PetscFunctionBegin; 618 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 619 620 if (m) rmax = ailen[0]; /* determine row with most nonzeros */ 621 for (i = 1; i < m; i++) { 622 /* move each row back by the amount of empty slots (fshift) before it*/ 623 fshift += imax[i - 1] - ailen[i - 1]; 624 rmax = PetscMax(rmax, ailen[i]); 625 if (fshift) { 626 ip = aj + ai[i]; 627 ap = aa + ai[i]; 628 N = ailen[i]; 629 for (j = 0; j < N; j++) { 630 ip[j - fshift] = ip[j]; 631 ap[j - fshift] = ap[j]; 632 } 633 } 634 ai[i] = ai[i - 1] + ailen[i - 1]; 635 } 636 if (m) { 637 fshift += imax[m - 1] - ailen[m - 1]; 638 ai[m] = ai[m - 1] + ailen[m - 1]; 639 } 640 /* reset ilen and imax for each row */ 641 for (i = 0; i < m; i++) ailen[i] = imax[i] = ai[i + 1] - ai[i]; 642 a->nz = ai[m]; 643 for (i = 0; i < a->nz; i++) { 644 PetscAssert(aa[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT, i, aj[i], a->nz); 645 PetscCall(MatAssemblyBegin(aa[i], MAT_FINAL_ASSEMBLY)); 646 PetscCall(MatAssemblyEnd(aa[i], MAT_FINAL_ASSEMBLY)); 647 } 648 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n", m, A->cmap->n / A->cmap->bs, fshift, a->nz)); 649 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs)); 650 PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", rmax)); 651 652 A->info.mallocs += a->reallocs; 653 a->reallocs = 0; 654 A->info.nz_unneeded = (double)fshift; 655 a->rmax = rmax; 656 PetscCall(MatMarkDiagonal_BlockMat(A)); 657 PetscFunctionReturn(PETSC_SUCCESS); 658 } 659 660 static PetscErrorCode MatSetOption_BlockMat(Mat A, MatOption opt, PetscBool flg) 661 { 662 PetscFunctionBegin; 663 if (opt == MAT_SYMMETRIC && flg) { 664 A->ops->sor = MatSOR_BlockMat_Symmetric; 665 A->ops->mult = MatMult_BlockMat_Symmetric; 666 } else { 667 PetscCall(PetscInfo(A, "Unused matrix option %s\n", MatOptions[opt])); 668 } 669 PetscFunctionReturn(PETSC_SUCCESS); 670 } 671 672 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat, 673 NULL, 674 NULL, 675 MatMult_BlockMat, 676 /* 4*/ MatMultAdd_BlockMat, 677 MatMultTranspose_BlockMat, 678 MatMultTransposeAdd_BlockMat, 679 NULL, 680 NULL, 681 NULL, 682 /* 10*/ NULL, 683 NULL, 684 NULL, 685 MatSOR_BlockMat, 686 NULL, 687 /* 15*/ NULL, 688 NULL, 689 NULL, 690 NULL, 691 NULL, 692 /* 20*/ NULL, 693 MatAssemblyEnd_BlockMat, 694 MatSetOption_BlockMat, 695 NULL, 696 /* 24*/ NULL, 697 NULL, 698 NULL, 699 NULL, 700 NULL, 701 /* 29*/ NULL, 702 NULL, 703 NULL, 704 NULL, 705 NULL, 706 /* 34*/ NULL, 707 NULL, 708 NULL, 709 NULL, 710 NULL, 711 /* 39*/ NULL, 712 NULL, 713 NULL, 714 NULL, 715 NULL, 716 /* 44*/ NULL, 717 NULL, 718 MatShift_Basic, 719 NULL, 720 NULL, 721 /* 49*/ NULL, 722 NULL, 723 NULL, 724 NULL, 725 NULL, 726 /* 54*/ NULL, 727 NULL, 728 NULL, 729 NULL, 730 NULL, 731 /* 59*/ MatCreateSubMatrix_BlockMat, 732 MatDestroy_BlockMat, 733 MatView_BlockMat, 734 NULL, 735 NULL, 736 /* 64*/ NULL, 737 NULL, 738 NULL, 739 NULL, 740 NULL, 741 /* 69*/ NULL, 742 NULL, 743 NULL, 744 NULL, 745 NULL, 746 /* 74*/ NULL, 747 NULL, 748 NULL, 749 NULL, 750 NULL, 751 /* 79*/ NULL, 752 NULL, 753 NULL, 754 NULL, 755 MatLoad_BlockMat, 756 /* 84*/ NULL, 757 NULL, 758 NULL, 759 NULL, 760 NULL, 761 /* 89*/ NULL, 762 NULL, 763 NULL, 764 NULL, 765 NULL, 766 /* 94*/ NULL, 767 NULL, 768 NULL, 769 NULL, 770 NULL, 771 /* 99*/ NULL, 772 NULL, 773 NULL, 774 NULL, 775 NULL, 776 /*104*/ NULL, 777 NULL, 778 NULL, 779 NULL, 780 NULL, 781 /*109*/ NULL, 782 NULL, 783 NULL, 784 NULL, 785 NULL, 786 /*114*/ NULL, 787 NULL, 788 NULL, 789 NULL, 790 NULL, 791 /*119*/ NULL, 792 NULL, 793 NULL, 794 NULL, 795 NULL, 796 /*124*/ NULL, 797 NULL, 798 NULL, 799 NULL, 800 NULL, 801 /*129*/ NULL, 802 NULL, 803 NULL, 804 NULL, 805 NULL, 806 /*134*/ NULL, 807 NULL, 808 NULL, 809 NULL, 810 NULL, 811 /*139*/ NULL, 812 NULL, 813 NULL, 814 NULL, 815 NULL, 816 /*144*/ NULL, 817 NULL, 818 NULL, 819 NULL, 820 NULL, 821 NULL, 822 /*150*/ NULL, 823 NULL, 824 NULL, 825 NULL, 826 NULL, 827 /*155*/ NULL, 828 NULL}; 829 830 /*@C 831 MatBlockMatSetPreallocation - For good matrix assembly performance 832 the user should preallocate the matrix storage by setting the parameter nz 833 (or the array nnz). By setting these parameters accurately, performance 834 during matrix assembly can be increased by more than a factor of 50. 835 836 Collective 837 838 Input Parameters: 839 + B - The matrix 840 . bs - size of each block in matrix 841 . nz - number of nonzeros per block row (same for all rows) 842 - nnz - array containing the number of nonzeros in the various block rows 843 (possibly different for each row) or `NULL` 844 845 Level: intermediate 846 847 Notes: 848 If `nnz` is given then `nz` is ignored 849 850 Specify the preallocated storage with either `nz` or `nnz` (not both). 851 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 852 allocation. 853 854 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()` 855 @*/ 856 PetscErrorCode MatBlockMatSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 857 { 858 PetscFunctionBegin; 859 PetscTryMethod(B, "MatBlockMatSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 860 PetscFunctionReturn(PETSC_SUCCESS); 861 } 862 863 static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A, PetscInt bs, PetscInt nz, PetscInt *nnz) 864 { 865 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data; 866 PetscInt i; 867 868 PetscFunctionBegin; 869 PetscCall(PetscLayoutSetBlockSize(A->rmap, bs)); 870 PetscCall(PetscLayoutSetBlockSize(A->cmap, bs)); 871 PetscCall(PetscLayoutSetUp(A->rmap)); 872 PetscCall(PetscLayoutSetUp(A->cmap)); 873 PetscCall(PetscLayoutGetBlockSize(A->rmap, &bs)); 874 875 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 876 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 877 if (nnz) { 878 for (i = 0; i < A->rmap->n / bs; i++) { 879 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 880 PetscCheck(nnz[i] <= A->cmap->n / bs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], A->cmap->n / bs); 881 } 882 } 883 bmat->mbs = A->rmap->n / bs; 884 885 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->right)); 886 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->middle)); 887 PetscCall(VecCreateSeq(PETSC_COMM_SELF, bs, &bmat->left)); 888 889 if (!bmat->imax) PetscCall(PetscMalloc2(A->rmap->n, &bmat->imax, A->rmap->n, &bmat->ilen)); 890 if (PetscLikely(nnz)) { 891 nz = 0; 892 for (i = 0; i < A->rmap->n / A->rmap->bs; i++) { 893 bmat->imax[i] = nnz[i]; 894 nz += nnz[i]; 895 } 896 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Currently requires block row by row preallocation"); 897 898 /* bmat->ilen will count nonzeros in each row so far. */ 899 PetscCall(PetscArrayzero(bmat->ilen, bmat->mbs)); 900 901 /* allocate the matrix space */ 902 PetscCall(MatSeqXAIJFreeAIJ(A, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i)); 903 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscScalar), (void **)&bmat->a)); 904 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&bmat->j)); 905 PetscCall(PetscShmgetAllocateArray(A->rmap->n + 1, sizeof(PetscInt), (void **)&bmat->i)); 906 bmat->free_a = PETSC_TRUE; 907 bmat->free_ij = PETSC_TRUE; 908 909 bmat->i[0] = 0; 910 for (i = 1; i < bmat->mbs + 1; i++) bmat->i[i] = bmat->i[i - 1] + bmat->imax[i - 1]; 911 912 bmat->nz = 0; 913 bmat->maxnz = nz; 914 A->info.nz_unneeded = (double)bmat->maxnz; 915 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 916 PetscFunctionReturn(PETSC_SUCCESS); 917 } 918 919 /*MC 920 MATBLOCKMAT - A matrix that is defined by a set of `Mat`'s that represents a sparse block matrix 921 consisting of (usually) sparse blocks. 922 923 Level: advanced 924 925 .seealso: [](ch_matrices), `Mat`, `MatCreateBlockMat()` 926 M*/ 927 928 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A) 929 { 930 Mat_BlockMat *b; 931 932 PetscFunctionBegin; 933 PetscCall(PetscNew(&b)); 934 A->data = (void *)b; 935 A->ops[0] = MatOps_Values; 936 A->assembled = PETSC_TRUE; 937 A->preallocated = PETSC_FALSE; 938 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATBLOCKMAT)); 939 940 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatBlockMatSetPreallocation_C", MatBlockMatSetPreallocation_BlockMat)); 941 PetscFunctionReturn(PETSC_SUCCESS); 942 } 943 944 /*@C 945 MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential `Mat` object 946 947 Collective 948 949 Input Parameters: 950 + comm - MPI communicator 951 . m - number of rows 952 . n - number of columns 953 . bs - size of each submatrix 954 . nz - expected maximum number of nonzero blocks in row (use `PETSC_DEFAULT` if not known) 955 - nnz - expected number of nonzers per block row if known (use `NULL` otherwise) 956 957 Output Parameter: 958 . A - the matrix 959 960 Level: intermediate 961 962 Notes: 963 Matrices of this type are nominally-sparse matrices in which each "entry" is a `Mat` object. Each `Mat` must 964 have the same size and be sequential. The local and global sizes must be compatible with this decomposition. 965 966 For matrices containing parallel submatrices and variable block sizes, see `MATNEST`. 967 968 Developer Notes: 969 I don't like the name, it is not `MATNESTMAT` 970 971 .seealso: [](ch_matrices), `Mat`, `MATBLOCKMAT`, `MatCreateNest()` 972 @*/ 973 PetscErrorCode MatCreateBlockMat(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt bs, PetscInt nz, PetscInt *nnz, Mat *A) 974 { 975 PetscFunctionBegin; 976 PetscCall(MatCreate(comm, A)); 977 PetscCall(MatSetSizes(*A, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 978 PetscCall(MatSetType(*A, MATBLOCKMAT)); 979 PetscCall(MatBlockMatSetPreallocation(*A, bs, nz, nnz)); 980 PetscFunctionReturn(PETSC_SUCCESS); 981 } 982