/* Defines the basic matrix operations for sequential dense. Portions of this code are under: Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. */ #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ #include <../src/mat/impls/dense/mpi/mpidense.h> #include #include <../src/mat/impls/aij/seq/aij.h> #include PetscErrorCode MatSeqDenseSymmetrize_Private(Mat A, PetscBool hermitian) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscInt j, k, n = A->rmap->n; PetscScalar *v; PetscFunctionBegin; PetscCheck(A->rmap->n == A->cmap->n, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot symmetrize a rectangular matrix"); PetscCall(MatDenseGetArray(A, &v)); if (!hermitian) { for (k = 0; k < n; k++) { for (j = k; j < n; j++) v[j * mat->lda + k] = v[k * mat->lda + j]; } } else { for (k = 0; k < n; k++) { for (j = k; j < n; j++) v[j * mat->lda + k] = PetscConj(v[k * mat->lda + j]); } } PetscCall(MatDenseRestoreArray(A, &v)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatSeqDenseInvertFactors_Private(Mat A) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscBLASInt info, n; PetscFunctionBegin; if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); PetscCall(PetscBLASIntCast(A->cmap->n, &n)); if (A->factortype == MAT_FACTOR_LU) { PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); if (!mat->fwork) { mat->lfwork = n; PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); } PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); PetscCall(PetscFPTrapPop()); PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); } else if (A->factortype == MAT_FACTOR_CHOLESKY) { if (A->spd == PETSC_BOOL3_TRUE) { PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &n, mat->v, &mat->lda, &info)); PetscCall(PetscFPTrapPop()); PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); #if defined(PETSC_USE_COMPLEX) } else if (A->hermitian == PETSC_BOOL3_TRUE) { PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); PetscCallBLAS("LAPACKhetri", LAPACKhetri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); PetscCall(PetscFPTrapPop()); PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); #endif } else { /* symmetric case */ PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); PetscCall(PetscFPTrapPop()); PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_FALSE)); } PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad Inversion: zero pivot in row %" PetscBLASInt_FMT, info - 1); PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix must be factored to solve"); A->ops->solve = NULL; A->ops->matsolve = NULL; A->ops->solvetranspose = NULL; A->ops->matsolvetranspose = NULL; A->ops->solveadd = NULL; A->ops->solvetransposeadd = NULL; A->factortype = MAT_FACTOR_NONE; PetscCall(PetscFree(A->solvertype)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatZeroRowsColumns_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { Mat_SeqDense *l = (Mat_SeqDense *)A->data; PetscInt m = l->lda, n = A->cmap->n, r = A->rmap->n, i, j; PetscScalar *slot, *bb, *v; const PetscScalar *xx; PetscFunctionBegin; if (PetscDefined(USE_DEBUG)) { for (i = 0; i < N; i++) { PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); PetscCheck(rows[i] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Col %" PetscInt_FMT " requested to be zeroed greater than or equal number of cols %" PetscInt_FMT, rows[i], A->cmap->n); } } if (!N) PetscFunctionReturn(PETSC_SUCCESS); /* fix right-hand side if needed */ if (x && b) { Vec xt; PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); PetscCall(VecDuplicate(x, &xt)); PetscCall(VecCopy(x, xt)); PetscCall(VecScale(xt, -1.0)); PetscCall(MatMultAdd(A, xt, b, b)); PetscCall(VecDestroy(&xt)); PetscCall(VecGetArrayRead(x, &xx)); PetscCall(VecGetArray(b, &bb)); for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; PetscCall(VecRestoreArrayRead(x, &xx)); PetscCall(VecRestoreArray(b, &bb)); } PetscCall(MatDenseGetArray(A, &v)); for (i = 0; i < N; i++) { slot = v + rows[i] * m; PetscCall(PetscArrayzero(slot, r)); } for (i = 0; i < N; i++) { slot = v + rows[i]; for (j = 0; j < n; j++) { *slot = 0.0; slot += m; } } if (diag != 0.0) { PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); for (i = 0; i < N; i++) { slot = v + (m + 1) * rows[i]; *slot = diag; } } PetscCall(MatDenseRestoreArray(A, &v)); PetscFunctionReturn(PETSC_SUCCESS); } PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { Mat B = NULL; Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; Mat_SeqDense *b; PetscInt *ai = a->i, *aj = a->j, m = A->rmap->N, n = A->cmap->N, i; const MatScalar *av; PetscBool isseqdense; PetscFunctionBegin; if (reuse == MAT_REUSE_MATRIX) { PetscCall(PetscObjectTypeCompare((PetscObject)*newmat, MATSEQDENSE, &isseqdense)); PetscCheck(isseqdense, PetscObjectComm((PetscObject)*newmat), PETSC_ERR_USER, "Cannot reuse matrix of type %s", ((PetscObject)*newmat)->type_name); } if (reuse != MAT_REUSE_MATRIX) { PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); PetscCall(MatSetSizes(B, m, n, m, n)); PetscCall(MatSetType(B, MATSEQDENSE)); PetscCall(MatSeqDenseSetPreallocation(B, NULL)); b = (Mat_SeqDense *)B->data; } else { b = (Mat_SeqDense *)((*newmat)->data); for (i = 0; i < n; i++) PetscCall(PetscArrayzero(b->v + i * b->lda, m)); } PetscCall(MatSeqAIJGetArrayRead(A, &av)); for (i = 0; i < m; i++) { PetscInt j; for (j = 0; j < ai[1] - ai[0]; j++) { b->v[*aj * b->lda + i] = *av; aj++; av++; } ai++; } PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); if (reuse == MAT_INPLACE_MATRIX) { PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); PetscCall(MatHeaderReplace(A, &B)); } else { if (B) *newmat = B; PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY)); } PetscFunctionReturn(PETSC_SUCCESS); } PETSC_INTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { Mat B = NULL; Mat_SeqDense *a = (Mat_SeqDense *)A->data; PetscInt i, j; PetscInt *rows, *nnz; MatScalar *aa = a->v, *vals; PetscFunctionBegin; PetscCall(PetscCalloc3(A->rmap->n, &rows, A->rmap->n, &nnz, A->rmap->n, &vals)); if (reuse != MAT_REUSE_MATRIX) { PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); PetscCall(MatSetType(B, MATSEQAIJ)); for (j = 0; j < A->cmap->n; j++) { for (i = 0; i < A->rmap->n; i++) if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) ++nnz[i]; aa += a->lda; } PetscCall(MatSeqAIJSetPreallocation(B, PETSC_DETERMINE, nnz)); } else B = *newmat; aa = a->v; for (j = 0; j < A->cmap->n; j++) { PetscInt numRows = 0; for (i = 0; i < A->rmap->n; i++) if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) { rows[numRows] = i; vals[numRows++] = aa[i]; } PetscCall(MatSetValues(B, numRows, rows, 1, &j, vals, INSERT_VALUES)); aa += a->lda; } PetscCall(PetscFree3(rows, nnz, vals)); PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &B)); else if (reuse != MAT_REUSE_MATRIX) *newmat = B; PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatAXPY_SeqDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) { Mat_SeqDense *x = (Mat_SeqDense *)X->data, *y = (Mat_SeqDense *)Y->data; const PetscScalar *xv; PetscScalar *yv; PetscBLASInt N, m, ldax = 0, lday = 0, one = 1; PetscFunctionBegin; PetscCall(MatDenseGetArrayRead(X, &xv)); PetscCall(MatDenseGetArray(Y, &yv)); PetscCall(PetscBLASIntCast(X->rmap->n * X->cmap->n, &N)); PetscCall(PetscBLASIntCast(X->rmap->n, &m)); PetscCall(PetscBLASIntCast(x->lda, &ldax)); PetscCall(PetscBLASIntCast(y->lda, &lday)); if (ldax > m || lday > m) { for (PetscInt j = 0; j < X->cmap->n; j++) PetscCallBLAS("BLASaxpy", BLASaxpy_(&m, &alpha, PetscSafePointerPlusOffset(xv, j * ldax), &one, PetscSafePointerPlusOffset(yv, j * lday), &one)); } else { PetscCallBLAS("BLASaxpy", BLASaxpy_(&N, &alpha, xv, &one, yv, &one)); } PetscCall(MatDenseRestoreArrayRead(X, &xv)); PetscCall(MatDenseRestoreArray(Y, &yv)); PetscCall(PetscLogFlops(PetscMax(2.0 * N - 1, 0))); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatGetInfo_SeqDense(Mat A, MatInfoType flag, MatInfo *info) { PetscLogDouble N = A->rmap->n * A->cmap->n; PetscFunctionBegin; info->block_size = 1.0; info->nz_allocated = N; info->nz_used = N; info->nz_unneeded = 0; info->assemblies = A->num_ass; info->mallocs = 0; info->memory = 0; /* REVIEW ME */ info->fill_ratio_given = 0; info->fill_ratio_needed = 0; info->factor_mallocs = 0; PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatScale_SeqDense(Mat A, PetscScalar alpha) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; PetscScalar *v; PetscBLASInt one = 1, j, nz, lda = 0; PetscFunctionBegin; PetscCall(MatDenseGetArray(A, &v)); PetscCall(PetscBLASIntCast(a->lda, &lda)); if (lda > A->rmap->n) { PetscCall(PetscBLASIntCast(A->rmap->n, &nz)); for (j = 0; j < A->cmap->n; j++) PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v + j * lda, &one)); } else { PetscCall(PetscBLASIntCast(A->rmap->n * A->cmap->n, &nz)); PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v, &one)); } PetscCall(PetscLogFlops(A->rmap->n * A->cmap->n)); PetscCall(MatDenseRestoreArray(A, &v)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatShift_SeqDense(Mat A, PetscScalar alpha) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; PetscScalar *v; PetscInt j, k; PetscFunctionBegin; PetscCall(MatDenseGetArray(A, &v)); k = PetscMin(A->rmap->n, A->cmap->n); for (j = 0; j < k; j++) v[j + j * a->lda] += alpha; PetscCall(PetscLogFlops(k)); PetscCall(MatDenseRestoreArray(A, &v)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatIsHermitian_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; PetscInt i, j, m = A->rmap->n, N = a->lda; const PetscScalar *v; PetscFunctionBegin; *fl = PETSC_FALSE; if (A->rmap->n != A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); PetscCall(MatDenseGetArrayRead(A, &v)); for (i = 0; i < m; i++) { for (j = i; j < m; j++) { if (PetscAbsScalar(v[i + j * N] - PetscConj(v[j + i * N])) > rtol) goto restore; } } *fl = PETSC_TRUE; restore: PetscCall(MatDenseRestoreArrayRead(A, &v)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatIsSymmetric_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; PetscInt i, j, m = A->rmap->n, N = a->lda; const PetscScalar *v; PetscFunctionBegin; *fl = PETSC_FALSE; if (A->rmap->n != A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); PetscCall(MatDenseGetArrayRead(A, &v)); for (i = 0; i < m; i++) { for (j = i; j < m; j++) { if (PetscAbsScalar(v[i + j * N] - v[j + i * N]) > rtol) goto restore; } } *fl = PETSC_TRUE; restore: PetscCall(MatDenseRestoreArrayRead(A, &v)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatDuplicateNoCreate_SeqDense(Mat newi, Mat A, MatDuplicateOption cpvalues) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscInt lda = mat->lda, j, m, nlda = lda; PetscBool isdensecpu; PetscFunctionBegin; PetscCall(PetscLayoutReference(A->rmap, &newi->rmap)); PetscCall(PetscLayoutReference(A->cmap, &newi->cmap)); if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { /* propagate LDA */ PetscCall(MatDenseSetLDA(newi, lda)); } PetscCall(PetscObjectTypeCompare((PetscObject)newi, MATSEQDENSE, &isdensecpu)); if (isdensecpu) PetscCall(MatSeqDenseSetPreallocation(newi, NULL)); if (cpvalues == MAT_COPY_VALUES) { const PetscScalar *av; PetscScalar *v; PetscCall(MatDenseGetArrayRead(A, &av)); PetscCall(MatDenseGetArrayWrite(newi, &v)); PetscCall(MatDenseGetLDA(newi, &nlda)); m = A->rmap->n; if (lda > m || nlda > m) { for (j = 0; j < A->cmap->n; j++) PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(v, j * nlda), PetscSafePointerPlusOffset(av, j * lda), m)); } else { PetscCall(PetscArraycpy(v, av, A->rmap->n * A->cmap->n)); } PetscCall(MatDenseRestoreArrayWrite(newi, &v)); PetscCall(MatDenseRestoreArrayRead(A, &av)); PetscCall(MatPropagateSymmetryOptions(A, newi)); } PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatDuplicate_SeqDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) { PetscFunctionBegin; PetscCall(MatCreate(PetscObjectComm((PetscObject)A), newmat)); PetscCall(MatSetSizes(*newmat, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); PetscCall(MatSetType(*newmat, ((PetscObject)A)->type_name)); PetscCall(MatDuplicateNoCreate_SeqDense(*newmat, A, cpvalues)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatSolve_SeqDense_Internal_LU(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscBLASInt info; PetscFunctionBegin; PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_(T ? "T" : "N", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); PetscCall(PetscFPTrapPop()); PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve %" PetscBLASInt_FMT, info); PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatSolve_SeqDense_Internal_Cholesky(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscBLASInt info; PetscFunctionBegin; if (A->spd == PETSC_BOOL3_TRUE) { if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &m, &nrhs, mat->v, &mat->lda, x, &m, &info)); PetscCall(PetscFPTrapPop()); PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "POTRS Bad solve %" PetscBLASInt_FMT, info); if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); #if defined(PETSC_USE_COMPLEX) } else if (A->hermitian == PETSC_BOOL3_TRUE) { if (T) PetscCall(MatConjugate_SeqDense(A)); PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); PetscCallBLAS("LAPACKhetrs", LAPACKhetrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); PetscCall(PetscFPTrapPop()); PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "HETRS Bad solve %" PetscBLASInt_FMT, info); if (T) PetscCall(MatConjugate_SeqDense(A)); #endif } else { /* symmetric case */ PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); PetscCallBLAS("LAPACKsytrs", LAPACKsytrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); PetscCall(PetscFPTrapPop()); PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "SYTRS Bad solve %" PetscBLASInt_FMT, info); } PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatSolve_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscBLASInt info; char trans; PetscFunctionBegin; if (PetscDefined(USE_COMPLEX)) { trans = 'C'; } else { trans = 'T'; } PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); { /* lwork depends on the number of right-hand sides */ PetscBLASInt nlfwork, lfwork = -1; PetscScalar fwork; PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); nlfwork = (PetscBLASInt)PetscRealPart(fwork); if (nlfwork > mat->lfwork) { mat->lfwork = nlfwork; PetscCall(PetscFree(mat->fwork)); PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); } } PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); PetscCall(PetscFPTrapPop()); PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %" PetscBLASInt_FMT, info); PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &mat->rank, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); PetscCall(PetscFPTrapPop()); PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %" PetscBLASInt_FMT, info); for (PetscInt j = 0; j < nrhs; j++) { for (PetscInt i = mat->rank; i < k; i++) x[j * ldx + i] = 0.; } PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatSolveTranspose_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscBLASInt info; PetscFunctionBegin; if (A->rmap->n == A->cmap->n && mat->rank == A->rmap->n) { PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &m, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); PetscCall(PetscFPTrapPop()); PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %" PetscBLASInt_FMT, info); if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); { /* lwork depends on the number of right-hand sides */ PetscBLASInt nlfwork, lfwork = -1; PetscScalar fwork; PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); nlfwork = (PetscBLASInt)PetscRealPart(fwork); if (nlfwork > mat->lfwork) { mat->lfwork = nlfwork; PetscCall(PetscFree(mat->fwork)); PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); } } PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); PetscCall(PetscFPTrapPop()); PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %" PetscBLASInt_FMT, info); if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "QR factored matrix cannot be used for transpose solve"); PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatSolve_SeqDense_SetUp(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscScalar *y; PetscBLASInt m = 0, k = 0; PetscFunctionBegin; PetscCall(PetscBLASIntCast(A->rmap->n, &m)); PetscCall(PetscBLASIntCast(A->cmap->n, &k)); if (k < m) { PetscCall(VecCopy(xx, mat->qrrhs)); PetscCall(VecGetArray(mat->qrrhs, &y)); } else { PetscCall(VecCopy(xx, yy)); PetscCall(VecGetArray(yy, &y)); } *_y = y; *_k = k; *_m = m; PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatSolve_SeqDense_TearDown(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscScalar *y = NULL; PetscBLASInt m, k; PetscFunctionBegin; y = *_y; *_y = NULL; k = *_k; m = *_m; if (k < m) { PetscScalar *yv; PetscCall(VecGetArray(yy, &yv)); PetscCall(PetscArraycpy(yv, y, k)); PetscCall(VecRestoreArray(yy, &yv)); PetscCall(VecRestoreArray(mat->qrrhs, &y)); } else { PetscCall(VecRestoreArray(yy, &y)); } PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatSolve_SeqDense_LU(Mat A, Vec xx, Vec yy) { PetscScalar *y = NULL; PetscBLASInt m = 0, k = 0; PetscFunctionBegin; PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_FALSE)); PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatSolveTranspose_SeqDense_LU(Mat A, Vec xx, Vec yy) { PetscScalar *y = NULL; PetscBLASInt m = 0, k = 0; PetscFunctionBegin; PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_TRUE)); PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatSolve_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) { PetscScalar *y = NULL; PetscBLASInt m = 0, k = 0; PetscFunctionBegin; PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_FALSE)); PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatSolveTranspose_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) { PetscScalar *y = NULL; PetscBLASInt m = 0, k = 0; PetscFunctionBegin; PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_TRUE)); PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatSolve_SeqDense_QR(Mat A, Vec xx, Vec yy) { PetscScalar *y = NULL; PetscBLASInt m = 0, k = 0; PetscFunctionBegin; PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); PetscCall(MatSolve_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatSolveTranspose_SeqDense_QR(Mat A, Vec xx, Vec yy) { PetscScalar *y = NULL; PetscBLASInt m = 0, k = 0; PetscFunctionBegin; PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatMatSolve_SeqDense_SetUp(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) { const PetscScalar *b; PetscScalar *y; PetscInt n, _ldb, _ldx; PetscBLASInt nrhs = 0, m = 0, k = 0, ldb = 0, ldx = 0, ldy = 0; PetscFunctionBegin; *_ldy = 0; *_m = 0; *_nrhs = 0; *_k = 0; *_y = NULL; PetscCall(PetscBLASIntCast(A->rmap->n, &m)); PetscCall(PetscBLASIntCast(A->cmap->n, &k)); PetscCall(MatGetSize(B, NULL, &n)); PetscCall(PetscBLASIntCast(n, &nrhs)); PetscCall(MatDenseGetLDA(B, &_ldb)); PetscCall(PetscBLASIntCast(_ldb, &ldb)); PetscCall(MatDenseGetLDA(X, &_ldx)); PetscCall(PetscBLASIntCast(_ldx, &ldx)); if (ldx < m) { PetscCall(MatDenseGetArrayRead(B, &b)); PetscCall(PetscMalloc1(nrhs * m, &y)); if (ldb == m) { PetscCall(PetscArraycpy(y, b, ldb * nrhs)); } else { for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * m], &b[j * ldb], m)); } ldy = m; PetscCall(MatDenseRestoreArrayRead(B, &b)); } else { if (ldb == ldx) { PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN)); PetscCall(MatDenseGetArray(X, &y)); } else { PetscCall(MatDenseGetArray(X, &y)); PetscCall(MatDenseGetArrayRead(B, &b)); for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * ldx], &b[j * ldb], m)); PetscCall(MatDenseRestoreArrayRead(B, &b)); } ldy = ldx; } *_y = y; *_ldy = ldy; *_k = k; *_m = m; *_nrhs = nrhs; PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatMatSolve_SeqDense_TearDown(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) { PetscScalar *y; PetscInt _ldx; PetscBLASInt k, ldy, nrhs, ldx = 0; PetscFunctionBegin; y = *_y; *_y = NULL; k = *_k; ldy = *_ldy; nrhs = *_nrhs; PetscCall(MatDenseGetLDA(X, &_ldx)); PetscCall(PetscBLASIntCast(_ldx, &ldx)); if (ldx != ldy) { PetscScalar *xv; PetscCall(MatDenseGetArray(X, &xv)); for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&xv[j * ldx], &y[j * ldy], k)); PetscCall(MatDenseRestoreArray(X, &xv)); PetscCall(PetscFree(y)); } else { PetscCall(MatDenseRestoreArray(X, &y)); } PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatMatSolve_SeqDense_LU(Mat A, Mat B, Mat X) { PetscScalar *y; PetscBLASInt m, k, ldy, nrhs; PetscFunctionBegin; PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_FALSE)); PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatMatSolveTranspose_SeqDense_LU(Mat A, Mat B, Mat X) { PetscScalar *y; PetscBLASInt m, k, ldy, nrhs; PetscFunctionBegin; PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_TRUE)); PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatMatSolve_SeqDense_Cholesky(Mat A, Mat B, Mat X) { PetscScalar *y; PetscBLASInt m, k, ldy, nrhs; PetscFunctionBegin; PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_FALSE)); PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatMatSolveTranspose_SeqDense_Cholesky(Mat A, Mat B, Mat X) { PetscScalar *y; PetscBLASInt m, k, ldy, nrhs; PetscFunctionBegin; PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_TRUE)); PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatMatSolve_SeqDense_QR(Mat A, Mat B, Mat X) { PetscScalar *y; PetscBLASInt m, k, ldy, nrhs; PetscFunctionBegin; PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); PetscCall(MatSolve_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatMatSolveTranspose_SeqDense_QR(Mat A, Mat B, Mat X) { PetscScalar *y; PetscBLASInt m, k, ldy, nrhs; PetscFunctionBegin; PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); PetscFunctionReturn(PETSC_SUCCESS); } /* COMMENT: I have chosen to hide row permutation in the pivots, rather than put it in the Mat->row slot.*/ PetscErrorCode MatLUFactor_SeqDense(Mat A, IS row, IS col, PETSC_UNUSED const MatFactorInfo *minfo) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscBLASInt n, m, info; PetscFunctionBegin; PetscCall(PetscBLASIntCast(A->cmap->n, &n)); PetscCall(PetscBLASIntCast(A->rmap->n, &m)); if (!mat->pivots) PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&m, &n, mat->v, &mat->lda, mat->pivots, &info)); PetscCall(PetscFPTrapPop()); PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to LU factorization %" PetscBLASInt_FMT, info); PetscCheck(info <= 0, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Bad LU factorization %" PetscBLASInt_FMT, info); A->ops->solve = MatSolve_SeqDense_LU; A->ops->matsolve = MatMatSolve_SeqDense_LU; A->ops->solvetranspose = MatSolveTranspose_SeqDense_LU; A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_LU; A->factortype = MAT_FACTOR_LU; PetscCall(PetscFree(A->solvertype)); PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); PetscCall(PetscLogFlops((2.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info) { PetscFunctionBegin; PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); PetscUseTypeMethod(fact, lufactor, NULL, NULL, info); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, IS col, PETSC_UNUSED const MatFactorInfo *info) { PetscFunctionBegin; fact->preallocated = PETSC_TRUE; fact->assembled = PETSC_TRUE; fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense; PetscFunctionReturn(PETSC_SUCCESS); } /* Cholesky as L*L^T or L*D*L^T and the symmetric/hermitian complex variants */ PetscErrorCode MatCholeskyFactor_SeqDense(Mat A, IS perm, PETSC_UNUSED const MatFactorInfo *minfo) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscBLASInt info, n; PetscFunctionBegin; PetscCall(PetscBLASIntCast(A->cmap->n, &n)); if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); if (A->spd == PETSC_BOOL3_TRUE) { PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &n, mat->v, &mat->lda, &info)); PetscCall(PetscFPTrapPop()); #if defined(PETSC_USE_COMPLEX) } else if (A->hermitian == PETSC_BOOL3_TRUE) { if (!mat->pivots) PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); if (!mat->fwork) { PetscScalar dummy; mat->lfwork = -1; PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); PetscCall(PetscFPTrapPop()); PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork)); PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); } PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); PetscCall(PetscFPTrapPop()); #endif } else { /* symmetric case */ if (!mat->pivots) PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); if (!mat->fwork) { PetscScalar dummy; mat->lfwork = -1; PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); PetscCall(PetscFPTrapPop()); PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork)); PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); } PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); PetscCall(PetscFPTrapPop()); } PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad factorization: zero pivot in row %" PetscBLASInt_FMT, info - 1); A->ops->solve = MatSolve_SeqDense_Cholesky; A->ops->matsolve = MatMatSolve_SeqDense_Cholesky; A->ops->solvetranspose = MatSolveTranspose_SeqDense_Cholesky; A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_Cholesky; A->factortype = MAT_FACTOR_CHOLESKY; PetscCall(PetscFree(A->solvertype)); PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info) { PetscFunctionBegin; PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); PetscUseTypeMethod(fact, choleskyfactor, NULL, info); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) { PetscFunctionBegin; fact->assembled = PETSC_TRUE; fact->preallocated = PETSC_TRUE; fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense; PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatQRFactor_SeqDense(Mat A, IS col, PETSC_UNUSED const MatFactorInfo *minfo) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscBLASInt n, m, info, min, max; PetscFunctionBegin; PetscCall(PetscBLASIntCast(A->cmap->n, &n)); PetscCall(PetscBLASIntCast(A->rmap->n, &m)); max = PetscMax(m, n); min = PetscMin(m, n); if (!mat->tau) PetscCall(PetscMalloc1(min, &mat->tau)); if (!mat->pivots) PetscCall(PetscMalloc1(n, &mat->pivots)); if (!mat->qrrhs) PetscCall(MatCreateVecs(A, NULL, &mat->qrrhs)); if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); if (!mat->fwork) { PetscScalar dummy; mat->lfwork = -1; PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, &dummy, &mat->lfwork, &info)); PetscCall(PetscFPTrapPop()); PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork)); PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); } PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, mat->fwork, &mat->lfwork, &info)); PetscCall(PetscFPTrapPop()); PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to QR factorization %" PetscBLASInt_FMT, info); // TODO: try to estimate rank or test for and use geqp3 for rank revealing QR. For now just say rank is min of m and n mat->rank = min; A->ops->solve = MatSolve_SeqDense_QR; A->ops->matsolve = MatMatSolve_SeqDense_QR; A->factortype = MAT_FACTOR_QR; if (m == n) { A->ops->solvetranspose = MatSolveTranspose_SeqDense_QR; A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_QR; } PetscCall(PetscFree(A->solvertype)); PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); PetscCall(PetscLogFlops(2.0 * min * min * (max - min / 3.0))); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatQRFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info) { PetscFunctionBegin; PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); PetscUseMethod(fact, "MatQRFactor_C", (Mat, IS, const MatFactorInfo *), (fact, NULL, info)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatQRFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) { PetscFunctionBegin; fact->assembled = PETSC_TRUE; fact->preallocated = PETSC_TRUE; PetscCall(PetscObjectComposeFunction((PetscObject)fact, "MatQRFactorNumeric_C", MatQRFactorNumeric_SeqDense)); PetscFunctionReturn(PETSC_SUCCESS); } /* uses LAPACK */ PETSC_INTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A, MatFactorType ftype, Mat *fact) { PetscFunctionBegin; PetscCall(MatCreate(PetscObjectComm((PetscObject)A), fact)); PetscCall(MatSetSizes(*fact, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); PetscCall(MatSetType(*fact, MATDENSE)); (*fact)->trivialsymbolic = PETSC_TRUE; if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { (*fact)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqDense; (*fact)->ops->ilufactorsymbolic = MatLUFactorSymbolic_SeqDense; } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense; } else if (ftype == MAT_FACTOR_QR) { PetscCall(PetscObjectComposeFunction((PetscObject)*fact, "MatQRFactorSymbolic_C", MatQRFactorSymbolic_SeqDense)); } (*fact)->factortype = ftype; PetscCall(PetscFree((*fact)->solvertype)); PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*fact)->solvertype)); PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_LU])); PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ILU])); PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_CHOLESKY])); PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ICC])); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatSOR_SeqDense(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal shift, PetscInt its, PetscInt lits, Vec xx) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscScalar *x, *v = mat->v, zero = 0.0, xt; const PetscScalar *b; PetscInt m = A->rmap->n, i; PetscBLASInt o = 1, bm = 0; PetscFunctionBegin; #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) PetscCheck(A->offloadmask != PETSC_OFFLOAD_GPU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); #endif if (shift == -1) shift = 0.0; /* negative shift indicates do not error on zero diagonal; this code never zeros on zero diagonal */ PetscCall(PetscBLASIntCast(m, &bm)); if (flag & SOR_ZERO_INITIAL_GUESS) { /* this is a hack fix, should have another version without the second BLASdotu */ PetscCall(VecSet(xx, zero)); } PetscCall(VecGetArray(xx, &x)); PetscCall(VecGetArrayRead(bb, &b)); its = its * lits; PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); while (its--) { if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { for (i = 0; i < m; i++) { PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); x[i] = (1. - omega) * x[i] + (xt + v[i + i * m] * x[i]) * omega / (v[i + i * m] + shift); } } if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { for (i = m - 1; i >= 0; i--) { PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); x[i] = (1. - omega) * x[i] + (xt + v[i + i * m] * x[i]) * omega / (v[i + i * m] + shift); } } } PetscCall(VecRestoreArrayRead(bb, &b)); PetscCall(VecRestoreArray(xx, &x)); PetscFunctionReturn(PETSC_SUCCESS); } PETSC_INTERN PetscErrorCode MatMultColumnRangeKernel_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end, PetscBool trans, PetscBool herm) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscScalar *y, _DOne = 1.0, _DZero = 0.0; PetscBLASInt m, n, _One = 1; const PetscScalar *v = mat->v, *x; PetscFunctionBegin; PetscCall(PetscBLASIntCast(A->rmap->n, &m)); PetscCall(PetscBLASIntCast(c_end - c_start, &n)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArrayWrite(yy, &y)); if (!m || !n) { PetscBLASInt i; if (trans) for (i = 0; i < n; i++) y[i] = 0.0; else for (i = 0; i < m; i++) y[i] = 0.0; } else { if (trans) { if (herm) PetscCallBLAS("BLASgemv", BLASgemv_("C", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DZero, y + c_start, &_One)); else PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DZero, y + c_start, &_One)); } else { PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x + c_start, &_One, &_DZero, y, &_One)); } PetscCall(PetscLogFlops(2.0 * m * n - n)); } PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArrayWrite(yy, &y)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatMultHermitianTransposeColumnRange_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) { PetscFunctionBegin; PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, c_start, c_end, PETSC_TRUE, PETSC_TRUE)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatMult_SeqDense(Mat A, Vec xx, Vec yy) { PetscFunctionBegin; PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_FALSE, PETSC_FALSE)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatMultTranspose_SeqDense(Mat A, Vec xx, Vec yy) { PetscFunctionBegin; PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_FALSE)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatMultHermitianTranspose_SeqDense(Mat A, Vec xx, Vec yy) { PetscFunctionBegin; PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_TRUE)); PetscFunctionReturn(PETSC_SUCCESS); } PETSC_INTERN PetscErrorCode MatMultAddColumnRangeKernel_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end, PetscBool trans, PetscBool herm) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; const PetscScalar *v = mat->v, *x; PetscScalar *y, _DOne = 1.0; PetscBLASInt m, n, _One = 1; PetscFunctionBegin; PetscCall(PetscBLASIntCast(A->rmap->n, &m)); PetscCall(PetscBLASIntCast(c_end - c_start, &n)); PetscCall(VecCopy(zz, yy)); if (!m || !n) PetscFunctionReturn(PETSC_SUCCESS); PetscCall(VecGetArray(yy, &y)); PetscCall(VecGetArrayRead(xx, &x)); if (trans) { if (herm) PetscCallBLAS("BLASgemv", BLASgemv_("C", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DOne, y + c_start, &_One)); else PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DOne, y + c_start, &_One)); } else { PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x + c_start, &_One, &_DOne, y, &_One)); } PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscCall(PetscLogFlops(2.0 * m * n)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatMultColumnRange_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) { PetscFunctionBegin; PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, c_start, c_end, PETSC_FALSE, PETSC_FALSE)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatMultAddColumnRange_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end) { PetscFunctionBegin; PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, c_start, c_end, PETSC_FALSE, PETSC_FALSE)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatMultHermitianTransposeAddColumnRange_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end) { PetscFunctionBegin; PetscMPIInt rank; PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, c_start, c_end, PETSC_TRUE, PETSC_TRUE)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatMultAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) { PetscFunctionBegin; PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_FALSE, PETSC_FALSE)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatMultTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) { PetscFunctionBegin; PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_FALSE)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatMultHermitianTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) { PetscFunctionBegin; PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_TRUE)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatGetRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscInt i; PetscFunctionBegin; if (ncols) *ncols = A->cmap->n; if (cols) { PetscCall(PetscMalloc1(A->cmap->n, cols)); for (i = 0; i < A->cmap->n; i++) (*cols)[i] = i; } if (vals) { const PetscScalar *v; PetscCall(MatDenseGetArrayRead(A, &v)); PetscCall(PetscMalloc1(A->cmap->n, vals)); v += row; for (i = 0; i < A->cmap->n; i++) { (*vals)[i] = *v; v += mat->lda; } PetscCall(MatDenseRestoreArrayRead(A, &v)); } PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatRestoreRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) { PetscFunctionBegin; if (cols) PetscCall(PetscFree(*cols)); if (vals) PetscCall(PetscFree(*vals)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatSetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], const PetscScalar v[], InsertMode addv) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscScalar *av; PetscInt i, j, idx = 0; #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) PetscOffloadMask oldf; #endif PetscFunctionBegin; PetscCall(MatDenseGetArray(A, &av)); if (!mat->roworiented) { if (addv == INSERT_VALUES) { for (j = 0; j < n; j++) { if (indexn[j] < 0) { idx += m; continue; } PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); for (i = 0; i < m; i++) { if (indexm[i] < 0) { idx++; continue; } PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); av[indexn[j] * mat->lda + indexm[i]] = v ? v[idx++] : (idx++, 0.0); } } } else { for (j = 0; j < n; j++) { if (indexn[j] < 0) { idx += m; continue; } PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); for (i = 0; i < m; i++) { if (indexm[i] < 0) { idx++; continue; } PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); av[indexn[j] * mat->lda + indexm[i]] += v ? v[idx++] : (idx++, 0.0); } } } } else { if (addv == INSERT_VALUES) { for (i = 0; i < m; i++) { if (indexm[i] < 0) { idx += n; continue; } PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); for (j = 0; j < n; j++) { if (indexn[j] < 0) { idx++; continue; } PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); av[indexn[j] * mat->lda + indexm[i]] = v ? v[idx++] : (idx++, 0.0); } } } else { for (i = 0; i < m; i++) { if (indexm[i] < 0) { idx += n; continue; } PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); for (j = 0; j < n; j++) { if (indexn[j] < 0) { idx++; continue; } PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); av[indexn[j] * mat->lda + indexm[i]] += v ? v[idx++] : (idx++, 0.0); } } } } /* hack to prevent unneeded copy to the GPU while returning the array */ #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) oldf = A->offloadmask; A->offloadmask = PETSC_OFFLOAD_GPU; #endif PetscCall(MatDenseRestoreArray(A, &av)); #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) A->offloadmask = (oldf == PETSC_OFFLOAD_UNALLOCATED ? PETSC_OFFLOAD_UNALLOCATED : PETSC_OFFLOAD_CPU); #endif PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatGetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], PetscScalar v[]) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; const PetscScalar *vv; PetscInt i, j; PetscFunctionBegin; PetscCall(MatDenseGetArrayRead(A, &vv)); /* row-oriented output */ for (i = 0; i < m; i++) { if (indexm[i] < 0) { v += n; continue; } PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested larger than number rows %" PetscInt_FMT, indexm[i], A->rmap->n); for (j = 0; j < n; j++) { if (indexn[j] < 0) { v++; continue; } PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " requested larger than number columns %" PetscInt_FMT, indexn[j], A->cmap->n); *v++ = vv[indexn[j] * mat->lda + indexm[i]]; } } PetscCall(MatDenseRestoreArrayRead(A, &vv)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatView_Dense_Binary(Mat mat, PetscViewer viewer) { PetscBool skipHeader; PetscViewerFormat format; PetscInt header[4], M, N, m, lda, i, j; PetscCount k; const PetscScalar *v; PetscScalar *vwork; PetscFunctionBegin; PetscCall(PetscViewerSetUp(viewer)); PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); PetscCall(PetscViewerGetFormat(viewer, &format)); if (skipHeader) format = PETSC_VIEWER_NATIVE; PetscCall(MatGetSize(mat, &M, &N)); /* write matrix header */ header[0] = MAT_FILE_CLASSID; header[1] = M; header[2] = N; header[3] = (format == PETSC_VIEWER_NATIVE) ? MATRIX_BINARY_FORMAT_DENSE : M * N; if (!skipHeader) PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); PetscCall(MatGetLocalSize(mat, &m, NULL)); if (format != PETSC_VIEWER_NATIVE) { PetscInt nnz = m * N, *iwork; /* store row lengths for each row */ PetscCall(PetscMalloc1(nnz, &iwork)); for (i = 0; i < m; i++) iwork[i] = N; PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); /* store column indices (zero start index) */ for (k = 0, i = 0; i < m; i++) for (j = 0; j < N; j++, k++) iwork[k] = j; PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); PetscCall(PetscFree(iwork)); } /* store matrix values as a dense matrix in row major order */ PetscCall(PetscMalloc1(m * N, &vwork)); PetscCall(MatDenseGetArrayRead(mat, &v)); PetscCall(MatDenseGetLDA(mat, &lda)); for (k = 0, i = 0; i < m; i++) for (j = 0; j < N; j++, k++) vwork[k] = v[i + (size_t)lda * j]; PetscCall(MatDenseRestoreArrayRead(mat, &v)); PetscCall(PetscViewerBinaryWriteAll(viewer, vwork, m * N, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); PetscCall(PetscFree(vwork)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatLoad_Dense_Binary(Mat mat, PetscViewer viewer) { PetscBool skipHeader; PetscInt header[4], M, N, m, nz, lda, i, j, k; PetscInt rows, cols; PetscScalar *v, *vwork; PetscFunctionBegin; PetscCall(PetscViewerSetUp(viewer)); PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); if (!skipHeader) { PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); M = header[1]; N = header[2]; PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); nz = header[3]; PetscCheck(nz == MATRIX_BINARY_FORMAT_DENSE || nz >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Unknown matrix format %" PetscInt_FMT " in file", nz); } else { PetscCall(MatGetSize(mat, &M, &N)); PetscCheck(M >= 0 && N >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "Matrix binary file header was skipped, thus the user must specify the global sizes of input matrix"); nz = MATRIX_BINARY_FORMAT_DENSE; } /* setup global sizes if not set */ if (mat->rmap->N < 0) mat->rmap->N = M; if (mat->cmap->N < 0) mat->cmap->N = N; PetscCall(MatSetUp(mat)); /* check if global sizes are correct */ PetscCall(MatGetSize(mat, &rows, &cols)); PetscCheck(M == rows && N == cols, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); PetscCall(MatGetSize(mat, NULL, &N)); PetscCall(MatGetLocalSize(mat, &m, NULL)); PetscCall(MatDenseGetArray(mat, &v)); PetscCall(MatDenseGetLDA(mat, &lda)); if (nz == MATRIX_BINARY_FORMAT_DENSE) { /* matrix in file is dense format */ PetscCount nnz = (size_t)m * N; /* read in matrix values */ PetscCall(PetscMalloc1(nnz, &vwork)); PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); /* store values in column major order */ for (j = 0; j < N; j++) for (i = 0; i < m; i++) v[i + (size_t)lda * j] = vwork[(size_t)i * N + j]; PetscCall(PetscFree(vwork)); } else { /* matrix in file is sparse format */ PetscInt nnz = 0, *rlens, *icols; /* read in row lengths */ PetscCall(PetscMalloc1(m, &rlens)); PetscCall(PetscViewerBinaryReadAll(viewer, rlens, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); for (i = 0; i < m; i++) nnz += rlens[i]; /* read in column indices and values */ PetscCall(PetscMalloc2(nnz, &icols, nnz, &vwork)); PetscCall(PetscViewerBinaryReadAll(viewer, icols, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); /* store values in column major order */ for (k = 0, i = 0; i < m; i++) for (j = 0; j < rlens[i]; j++, k++) v[i + lda * icols[k]] = vwork[k]; PetscCall(PetscFree(rlens)); PetscCall(PetscFree2(icols, vwork)); } PetscCall(MatDenseRestoreArray(mat, &v)); PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatLoad_SeqDense(Mat newMat, PetscViewer viewer) { PetscBool isbinary, ishdf5; PetscFunctionBegin; PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); /* force binary viewer to load .info file if it has not yet done so */ PetscCall(PetscViewerSetUp(viewer)); PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); if (isbinary) { PetscCall(MatLoad_Dense_Binary(newMat, viewer)); } else if (ishdf5) { #if defined(PETSC_HAVE_HDF5) PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); #else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); #endif } else { SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); } PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatView_SeqDense_ASCII(Mat A, PetscViewer viewer) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; PetscInt i, j; const char *name; PetscScalar *v, *av; PetscViewerFormat format; #if defined(PETSC_USE_COMPLEX) PetscBool allreal = PETSC_TRUE; #endif PetscFunctionBegin; PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&av)); PetscCall(PetscViewerGetFormat(viewer, &format)); if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { PetscFunctionReturn(PETSC_SUCCESS); /* do nothing for now */ } else if (format == PETSC_VIEWER_ASCII_COMMON) { PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); for (i = 0; i < A->rmap->n; i++) { v = av + i; PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i)); for (j = 0; j < A->cmap->n; j++) { #if defined(PETSC_USE_COMPLEX) if (PetscRealPart(*v) != 0.0 && PetscImaginaryPart(*v) != 0.0) { PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", j, (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); } else if (PetscRealPart(*v)) { PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)PetscRealPart(*v))); } #else if (*v) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)*v)); #endif v += a->lda; } PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); } PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); } else { PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); #if defined(PETSC_USE_COMPLEX) /* determine if matrix has all real values */ for (j = 0; j < A->cmap->n; j++) { v = av + j * a->lda; for (i = 0; i < A->rmap->n; i++) { if (PetscImaginaryPart(v[i])) { allreal = PETSC_FALSE; break; } } } #endif if (format == PETSC_VIEWER_ASCII_MATLAB) { PetscCall(PetscObjectGetName((PetscObject)A, &name)); PetscCall(PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", A->rmap->n, A->cmap->n)); PetscCall(PetscViewerASCIIPrintf(viewer, "%s = zeros(%" PetscInt_FMT ",%" PetscInt_FMT ");\n", name, A->rmap->n, A->cmap->n)); PetscCall(PetscViewerASCIIPrintf(viewer, "%s = [\n", name)); } for (i = 0; i < A->rmap->n; i++) { v = av + i; for (j = 0; j < A->cmap->n; j++) { #if defined(PETSC_USE_COMPLEX) if (allreal) { PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(*v))); } else { PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei ", (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); } #else PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)*v)); #endif v += a->lda; } PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); } if (format == PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, "];\n")); PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); } PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&av)); PetscCall(PetscViewerFlush(viewer)); PetscFunctionReturn(PETSC_SUCCESS); } #include static PetscErrorCode MatView_SeqDense_Draw_Zoom(PetscDraw draw, void *Aa) { Mat A = (Mat)Aa; PetscInt m = A->rmap->n, n = A->cmap->n, i, j; int color = PETSC_DRAW_WHITE; const PetscScalar *v; PetscViewer viewer; PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; PetscViewerFormat format; PetscFunctionBegin; PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); PetscCall(PetscViewerGetFormat(viewer, &format)); PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); /* Loop over matrix elements drawing boxes */ PetscCall(MatDenseGetArrayRead(A, &v)); if (format != PETSC_VIEWER_DRAW_CONTOUR) { PetscDrawCollectiveBegin(draw); /* Blue for negative and Red for positive */ for (j = 0; j < n; j++) { x_l = j; x_r = x_l + 1.0; for (i = 0; i < m; i++) { y_l = m - i - 1.0; y_r = y_l + 1.0; if (PetscRealPart(v[j * m + i]) > 0.) color = PETSC_DRAW_RED; else if (PetscRealPart(v[j * m + i]) < 0.) color = PETSC_DRAW_BLUE; else continue; PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); } } PetscDrawCollectiveEnd(draw); } else { /* use contour shading to indicate magnitude of values */ /* first determine max of all nonzero values */ PetscReal minv = 0.0, maxv = 0.0; PetscDraw popup; for (i = 0; i < m * n; i++) { if (PetscAbsScalar(v[i]) > maxv) maxv = PetscAbsScalar(v[i]); } if (minv >= maxv) maxv = minv + PETSC_SMALL; PetscCall(PetscDrawGetPopup(draw, &popup)); PetscCall(PetscDrawScalePopup(popup, minv, maxv)); PetscDrawCollectiveBegin(draw); for (j = 0; j < n; j++) { x_l = j; x_r = x_l + 1.0; for (i = 0; i < m; i++) { y_l = m - i - 1.0; y_r = y_l + 1.0; color = PetscDrawRealToColor(PetscAbsScalar(v[j * m + i]), minv, maxv); PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); } } PetscDrawCollectiveEnd(draw); } PetscCall(MatDenseRestoreArrayRead(A, &v)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatView_SeqDense_Draw(Mat A, PetscViewer viewer) { PetscDraw draw; PetscBool isnull; PetscReal xr, yr, xl, yl, h, w; PetscFunctionBegin; PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); PetscCall(PetscDrawIsNull(draw, &isnull)); if (isnull) PetscFunctionReturn(PETSC_SUCCESS); xr = A->cmap->n; yr = A->rmap->n; h = yr / 10.0; w = xr / 10.0; xr += w; yr += h; xl = -w; yl = -h; PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); PetscCall(PetscDrawZoom(draw, MatView_SeqDense_Draw_Zoom, A)); PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); PetscCall(PetscDrawSave(draw)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatView_SeqDense(Mat A, PetscViewer viewer) { PetscBool isascii, isbinary, isdraw; PetscFunctionBegin; PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); if (isascii) PetscCall(MatView_SeqDense_ASCII(A, viewer)); else if (isbinary) PetscCall(MatView_Dense_Binary(A, viewer)); else if (isdraw) PetscCall(MatView_SeqDense_Draw(A, viewer)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatDensePlaceArray_SeqDense(Mat A, const PetscScalar *array) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; PetscFunctionBegin; PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); PetscCheck(!a->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseResetArray() first"); a->unplacedarray = a->v; a->unplaced_user_alloc = a->user_alloc; a->v = (PetscScalar *)array; a->user_alloc = PETSC_TRUE; #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) A->offloadmask = PETSC_OFFLOAD_CPU; #endif PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatDenseResetArray_SeqDense(Mat A) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; PetscFunctionBegin; PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); a->v = a->unplacedarray; a->user_alloc = a->unplaced_user_alloc; a->unplacedarray = NULL; #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) A->offloadmask = PETSC_OFFLOAD_CPU; #endif PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatDenseReplaceArray_SeqDense(Mat A, const PetscScalar *array) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; PetscFunctionBegin; PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); if (!a->user_alloc) PetscCall(PetscFree(a->v)); a->v = (PetscScalar *)array; a->user_alloc = PETSC_FALSE; #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) A->offloadmask = PETSC_OFFLOAD_CPU; #endif PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatDestroy_SeqDense(Mat mat) { Mat_SeqDense *l = (Mat_SeqDense *)mat->data; PetscFunctionBegin; PetscCall(PetscLogObjectState((PetscObject)mat, "Rows %" PetscInt_FMT " Cols %" PetscInt_FMT, mat->rmap->n, mat->cmap->n)); PetscCall(VecDestroy(&l->qrrhs)); PetscCall(PetscFree(l->tau)); PetscCall(PetscFree(l->pivots)); PetscCall(PetscFree(l->fwork)); if (!l->user_alloc) PetscCall(PetscFree(l->v)); if (!l->unplaced_user_alloc) PetscCall(PetscFree(l->unplacedarray)); PetscCheck(!l->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); PetscCheck(!l->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); PetscCall(VecDestroy(&l->cvec)); PetscCall(MatDestroy(&l->cmat)); PetscCall(PetscFree(mat->data)); PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactor_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorSymbolic_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorNumeric_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqaij_C", NULL)); #if defined(PETSC_HAVE_ELEMENTAL) PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_elemental_C", NULL)); #endif #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_scalapack_C", NULL)); #endif #if defined(PETSC_HAVE_CUDA) PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensecuda_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdense_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensecuda_C", NULL)); #endif #if defined(PETSC_HAVE_HIP) PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensehip_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdense_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensehip_C", NULL)); #endif PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSeqDenseSetPreallocation_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqaij_seqdense_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdense_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqbaij_seqdense_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqsbaij_seqdense_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatTranspose_SeqDense(Mat A, MatReuse reuse, Mat *matout) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscInt k, j, m = A->rmap->n, M = mat->lda, n = A->cmap->n; PetscScalar *v, tmp; PetscFunctionBegin; if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); if (reuse == MAT_INPLACE_MATRIX) { if (m == n) { /* in place transpose */ PetscCall(MatDenseGetArray(A, &v)); for (j = 0; j < m; j++) { for (k = 0; k < j; k++) { tmp = v[j + k * M]; v[j + k * M] = v[k + j * M]; v[k + j * M] = tmp; } } PetscCall(MatDenseRestoreArray(A, &v)); } else { /* reuse memory, temporary allocates new memory */ PetscScalar *v2; PetscLayout tmplayout; PetscCall(PetscMalloc1((size_t)m * n, &v2)); PetscCall(MatDenseGetArray(A, &v)); for (j = 0; j < n; j++) { for (k = 0; k < m; k++) v2[j + (size_t)k * n] = v[k + (size_t)j * M]; } PetscCall(PetscArraycpy(v, v2, (size_t)m * n)); PetscCall(PetscFree(v2)); PetscCall(MatDenseRestoreArray(A, &v)); /* cleanup size dependent quantities */ PetscCall(VecDestroy(&mat->cvec)); PetscCall(MatDestroy(&mat->cmat)); PetscCall(PetscFree(mat->pivots)); PetscCall(PetscFree(mat->fwork)); /* swap row/col layouts */ PetscCall(PetscBLASIntCast(n, &mat->lda)); tmplayout = A->rmap; A->rmap = A->cmap; A->cmap = tmplayout; } } else { /* out-of-place transpose */ Mat tmat; Mat_SeqDense *tmatd; PetscScalar *v2; PetscInt M2; if (reuse == MAT_INITIAL_MATRIX) { PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &tmat)); PetscCall(MatSetSizes(tmat, A->cmap->n, A->rmap->n, A->cmap->n, A->rmap->n)); PetscCall(MatSetType(tmat, ((PetscObject)A)->type_name)); PetscCall(MatSeqDenseSetPreallocation(tmat, NULL)); } else tmat = *matout; PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&v)); PetscCall(MatDenseGetArray(tmat, &v2)); tmatd = (Mat_SeqDense *)tmat->data; M2 = tmatd->lda; for (j = 0; j < n; j++) { for (k = 0; k < m; k++) v2[j + k * M2] = v[k + j * M]; } PetscCall(MatDenseRestoreArray(tmat, &v2)); PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&v)); PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); *matout = tmat; } PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatEqual_SeqDense(Mat A1, Mat A2, PetscBool *flg) { Mat_SeqDense *mat1 = (Mat_SeqDense *)A1->data; Mat_SeqDense *mat2 = (Mat_SeqDense *)A2->data; PetscInt i; const PetscScalar *v1, *v2; PetscFunctionBegin; if (A1->rmap->n != A2->rmap->n) { *flg = PETSC_FALSE; PetscFunctionReturn(PETSC_SUCCESS); } if (A1->cmap->n != A2->cmap->n) { *flg = PETSC_FALSE; PetscFunctionReturn(PETSC_SUCCESS); } PetscCall(MatDenseGetArrayRead(A1, &v1)); PetscCall(MatDenseGetArrayRead(A2, &v2)); for (i = 0; i < A1->cmap->n; i++) { PetscCall(PetscArraycmp(v1, v2, A1->rmap->n, flg)); if (*flg == PETSC_FALSE) PetscFunctionReturn(PETSC_SUCCESS); v1 += mat1->lda; v2 += mat2->lda; } PetscCall(MatDenseRestoreArrayRead(A1, &v1)); PetscCall(MatDenseRestoreArrayRead(A2, &v2)); *flg = PETSC_TRUE; PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatGetDiagonal_SeqDense(Mat A, Vec v) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscInt i, n, len; PetscScalar *x; const PetscScalar *vv; PetscFunctionBegin; PetscCall(VecGetSize(v, &n)); PetscCall(VecGetArray(v, &x)); len = PetscMin(A->rmap->n, A->cmap->n); PetscCall(MatDenseGetArrayRead(A, &vv)); PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); for (i = 0; i < len; i++) x[i] = vv[i * mat->lda + i]; PetscCall(MatDenseRestoreArrayRead(A, &vv)); PetscCall(VecRestoreArray(v, &x)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatDiagonalScale_SeqDense(Mat A, Vec ll, Vec rr) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; const PetscScalar *l, *r; PetscScalar x, *v, *vv; PetscInt i, j, m = A->rmap->n, n = A->cmap->n; PetscFunctionBegin; PetscCall(MatDenseGetArray(A, &vv)); if (ll) { PetscCall(VecGetSize(ll, &m)); PetscCall(VecGetArrayRead(ll, &l)); PetscCheck(m == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vec wrong size"); for (i = 0; i < m; i++) { x = l[i]; v = vv + i; for (j = 0; j < n; j++) { (*v) *= x; v += mat->lda; } } PetscCall(VecRestoreArrayRead(ll, &l)); PetscCall(PetscLogFlops(1.0 * n * m)); } if (rr) { PetscCall(VecGetSize(rr, &n)); PetscCall(VecGetArrayRead(rr, &r)); PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec wrong size"); for (i = 0; i < n; i++) { x = r[i]; v = vv + i * mat->lda; for (j = 0; j < m; j++) (*v++) *= x; } PetscCall(VecRestoreArrayRead(rr, &r)); PetscCall(PetscLogFlops(1.0 * n * m)); } PetscCall(MatDenseRestoreArray(A, &vv)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatNorm_SeqDense(Mat A, NormType type, PetscReal *nrm) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscScalar *v, *vv; PetscReal sum = 0.0; PetscInt lda, m = A->rmap->n, i, j; PetscFunctionBegin; PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&vv)); PetscCall(MatDenseGetLDA(A, &lda)); v = vv; if (type == NORM_FROBENIUS) { if (lda > m) { for (j = 0; j < A->cmap->n; j++) { v = vv + j * lda; for (i = 0; i < m; i++) { sum += PetscRealPart(PetscConj(*v) * (*v)); v++; } } } else { #if defined(PETSC_USE_REAL___FP16) PetscBLASInt one = 1, cnt = A->cmap->n * A->rmap->n; PetscCallBLAS("BLASnrm2", *nrm = BLASnrm2_(&cnt, v, &one)); } #else for (i = 0; i < A->cmap->n * A->rmap->n; i++) { sum += PetscRealPart(PetscConj(*v) * (*v)); v++; } } *nrm = PetscSqrtReal(sum); #endif PetscCall(PetscLogFlops(2.0 * A->cmap->n * A->rmap->n)); } else if (type == NORM_1) { *nrm = 0.0; for (j = 0; j < A->cmap->n; j++) { v = vv + j * mat->lda; sum = 0.0; for (i = 0; i < A->rmap->n; i++) { sum += PetscAbsScalar(*v); v++; } if (sum > *nrm) *nrm = sum; } PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); } else if (type == NORM_INFINITY) { *nrm = 0.0; for (j = 0; j < A->rmap->n; j++) { v = vv + j; sum = 0.0; for (i = 0; i < A->cmap->n; i++) { sum += PetscAbsScalar(*v); v += mat->lda; } if (sum > *nrm) *nrm = sum; } PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No two norm"); PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&vv)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatSetOption_SeqDense(Mat A, MatOption op, PetscBool flg) { Mat_SeqDense *aij = (Mat_SeqDense *)A->data; PetscFunctionBegin; switch (op) { case MAT_ROW_ORIENTED: aij->roworiented = flg; break; default: break; } PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatZeroEntries_SeqDense(Mat A) { Mat_SeqDense *l = (Mat_SeqDense *)A->data; PetscInt lda = l->lda, m = A->rmap->n, n = A->cmap->n, j; PetscScalar *v; PetscFunctionBegin; PetscCall(MatDenseGetArrayWrite(A, &v)); if (lda > m) { for (j = 0; j < n; j++) PetscCall(PetscArrayzero(v + j * lda, m)); } else { PetscCall(PetscArrayzero(v, PetscInt64Mult(m, n))); } PetscCall(MatDenseRestoreArrayWrite(A, &v)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatZeroRows_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { Mat_SeqDense *l = (Mat_SeqDense *)A->data; PetscInt m = l->lda, n = A->cmap->n, i, j; PetscScalar *slot, *bb, *v; const PetscScalar *xx; PetscFunctionBegin; if (PetscDefined(USE_DEBUG)) { for (i = 0; i < N; i++) { PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); } } if (!N) PetscFunctionReturn(PETSC_SUCCESS); /* fix right-hand side if needed */ if (x && b) { PetscCall(VecGetArrayRead(x, &xx)); PetscCall(VecGetArray(b, &bb)); for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; PetscCall(VecRestoreArrayRead(x, &xx)); PetscCall(VecRestoreArray(b, &bb)); } PetscCall(MatDenseGetArray(A, &v)); for (i = 0; i < N; i++) { slot = v + rows[i]; for (j = 0; j < n; j++) { *slot = 0.0; slot += m; } } if (diag != 0.0) { PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); for (i = 0; i < N; i++) { slot = v + (m + 1) * rows[i]; *slot = diag; } } PetscCall(MatDenseRestoreArray(A, &v)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatDenseGetLDA_SeqDense(Mat A, PetscInt *lda) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscFunctionBegin; *lda = mat->lda; PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatDenseGetArray_SeqDense(Mat A, PetscScalar **array) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscFunctionBegin; PetscCheck(!mat->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); *array = mat->v; PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatDenseRestoreArray_SeqDense(Mat A, PetscScalar **array) { PetscFunctionBegin; if (array) *array = NULL; PetscFunctionReturn(PETSC_SUCCESS); } /*@ MatDenseGetLDA - gets the leading dimension of the array returned from `MatDenseGetArray()` Not Collective Input Parameter: . A - a `MATDENSE` or `MATDENSECUDA` matrix Output Parameter: . lda - the leading dimension Level: intermediate .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseSetLDA()` @*/ PetscErrorCode MatDenseGetLDA(Mat A, PetscInt *lda) { PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); PetscAssertPointer(lda, 2); MatCheckPreallocated(A, 1); PetscUseMethod(A, "MatDenseGetLDA_C", (Mat, PetscInt *), (A, lda)); PetscFunctionReturn(PETSC_SUCCESS); } /*@ MatDenseSetLDA - Sets the leading dimension of the array used by the `MATDENSE` matrix Collective if the matrix layouts have not yet been setup Input Parameters: + A - a `MATDENSE` or `MATDENSECUDA` matrix - lda - the leading dimension Level: intermediate .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetLDA()` @*/ PetscErrorCode MatDenseSetLDA(Mat A, PetscInt lda) { PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); PetscTryMethod(A, "MatDenseSetLDA_C", (Mat, PetscInt), (A, lda)); PetscFunctionReturn(PETSC_SUCCESS); } /*@C MatDenseGetArray - gives read-write access to the array where the data for a `MATDENSE` matrix is stored Logically Collective Input Parameter: . A - a dense matrix Output Parameter: . array - pointer to the data Level: intermediate .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` @*/ PetscErrorCode MatDenseGetArray(Mat A, PetscScalar *array[]) PeNS { PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); PetscAssertPointer(array, 2); PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array)); PetscFunctionReturn(PETSC_SUCCESS); } /*@C MatDenseRestoreArray - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArray()` Logically Collective Input Parameters: + A - a dense matrix - array - pointer to the data (may be `NULL`) Level: intermediate .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` @*/ PetscErrorCode MatDenseRestoreArray(Mat A, PetscScalar *array[]) PeNS { PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); if (array) PetscAssertPointer(array, 2); PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array)); PetscCall(PetscObjectStateIncrease((PetscObject)A)); #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) A->offloadmask = PETSC_OFFLOAD_CPU; #endif PetscFunctionReturn(PETSC_SUCCESS); } /*@C MatDenseGetArrayRead - gives read-only access to the array where the data for a `MATDENSE` matrix is stored Not Collective Input Parameter: . A - a dense matrix Output Parameter: . array - pointer to the data Level: intermediate .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` @*/ PetscErrorCode MatDenseGetArrayRead(Mat A, const PetscScalar *array[]) PeNS { PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); PetscAssertPointer(array, 2); PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); PetscFunctionReturn(PETSC_SUCCESS); } /*@C MatDenseRestoreArrayRead - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayRead()` Not Collective Input Parameters: + A - a dense matrix - array - pointer to the data (may be `NULL`) Level: intermediate .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` @*/ PetscErrorCode MatDenseRestoreArrayRead(Mat A, const PetscScalar *array[]) PeNS { PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); if (array) PetscAssertPointer(array, 2); PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); PetscFunctionReturn(PETSC_SUCCESS); } /*@C MatDenseGetArrayWrite - gives write-only access to the array where the data for a `MATDENSE` matrix is stored Not Collective Input Parameter: . A - a dense matrix Output Parameter: . array - pointer to the data Level: intermediate .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` @*/ PetscErrorCode MatDenseGetArrayWrite(Mat A, PetscScalar *array[]) PeNS { PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); PetscAssertPointer(array, 2); PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array)); PetscFunctionReturn(PETSC_SUCCESS); } /*@C MatDenseRestoreArrayWrite - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayWrite()` Not Collective Input Parameters: + A - a dense matrix - array - pointer to the data (may be `NULL`) Level: intermediate .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` @*/ PetscErrorCode MatDenseRestoreArrayWrite(Mat A, PetscScalar *array[]) PeNS { PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); if (array) PetscAssertPointer(array, 2); PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array)); PetscCall(PetscObjectStateIncrease((PetscObject)A)); #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) A->offloadmask = PETSC_OFFLOAD_CPU; #endif PetscFunctionReturn(PETSC_SUCCESS); } /*@C MatDenseGetArrayAndMemType - gives read-write access to the array where the data for a `MATDENSE` matrix is stored Logically Collective Input Parameter: . A - a dense matrix Output Parameters: + array - pointer to the data - mtype - memory type of the returned pointer Level: intermediate Note: If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., an array on device is always returned and is guaranteed to contain the matrix's latest data. .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayAndMemType()`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` @*/ PetscErrorCode MatDenseGetArrayAndMemType(Mat A, PetscScalar *array[], PetscMemType *mtype) { PetscBool isMPI; PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); PetscAssertPointer(array, 2); PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); if (isMPI) { /* Dispatch here so that the code can be reused for all subclasses of MATDENSE */ PetscCall(MatDenseGetArrayAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); } else { PetscErrorCode (*fptr)(Mat, PetscScalar **, PetscMemType *); PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayAndMemType_C", &fptr)); if (fptr) { PetscCall((*fptr)(A, array, mtype)); } else { PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array)); if (mtype) *mtype = PETSC_MEMTYPE_HOST; } } PetscFunctionReturn(PETSC_SUCCESS); } /*@C MatDenseRestoreArrayAndMemType - returns access to the array that is obtained by `MatDenseGetArrayAndMemType()` Logically Collective Input Parameters: + A - a dense matrix - array - pointer to the data Level: intermediate .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` @*/ PetscErrorCode MatDenseRestoreArrayAndMemType(Mat A, PetscScalar *array[]) { PetscBool isMPI; PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); if (array) PetscAssertPointer(array, 2); PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); if (isMPI) { PetscCall(MatDenseRestoreArrayAndMemType(((Mat_MPIDense *)A->data)->A, array)); } else { PetscErrorCode (*fptr)(Mat, PetscScalar **); PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayAndMemType_C", &fptr)); if (fptr) { PetscCall((*fptr)(A, array)); } else { PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array)); } if (array) *array = NULL; } PetscCall(PetscObjectStateIncrease((PetscObject)A)); PetscFunctionReturn(PETSC_SUCCESS); } /*@C MatDenseGetArrayReadAndMemType - gives read-only access to the array where the data for a `MATDENSE` matrix is stored Logically Collective Input Parameter: . A - a dense matrix Output Parameters: + array - pointer to the data - mtype - memory type of the returned pointer Level: intermediate Note: If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., an array on device is always returned and is guaranteed to contain the matrix's latest data. .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayReadAndMemType()`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` @*/ PetscErrorCode MatDenseGetArrayReadAndMemType(Mat A, const PetscScalar *array[], PetscMemType *mtype) { PetscBool isMPI; PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); PetscAssertPointer(array, 2); PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); if (isMPI) { /* Dispatch here so that the code can be reused for all subclasses of MATDENSE */ PetscCall(MatDenseGetArrayReadAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); } else { PetscErrorCode (*fptr)(Mat, const PetscScalar **, PetscMemType *); PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayReadAndMemType_C", &fptr)); if (fptr) { PetscCall((*fptr)(A, array, mtype)); } else { PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); if (mtype) *mtype = PETSC_MEMTYPE_HOST; } } PetscFunctionReturn(PETSC_SUCCESS); } /*@C MatDenseRestoreArrayReadAndMemType - returns access to the array that is obtained by `MatDenseGetArrayReadAndMemType()` Logically Collective Input Parameters: + A - a dense matrix - array - pointer to the data Level: intermediate .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` @*/ PetscErrorCode MatDenseRestoreArrayReadAndMemType(Mat A, const PetscScalar *array[]) { PetscBool isMPI; PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); if (array) PetscAssertPointer(array, 2); PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); if (isMPI) { PetscCall(MatDenseRestoreArrayReadAndMemType(((Mat_MPIDense *)A->data)->A, array)); } else { PetscErrorCode (*fptr)(Mat, const PetscScalar **); PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayReadAndMemType_C", &fptr)); if (fptr) { PetscCall((*fptr)(A, array)); } else { PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); } if (array) *array = NULL; } PetscFunctionReturn(PETSC_SUCCESS); } /*@C MatDenseGetArrayWriteAndMemType - gives write-only access to the array where the data for a `MATDENSE` matrix is stored Logically Collective Input Parameter: . A - a dense matrix Output Parameters: + array - pointer to the data - mtype - memory type of the returned pointer Level: intermediate Note: If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., an array on device is always returned and is guaranteed to contain the matrix's latest data. .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayWriteAndMemType()`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` @*/ PetscErrorCode MatDenseGetArrayWriteAndMemType(Mat A, PetscScalar *array[], PetscMemType *mtype) { PetscBool isMPI; PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); PetscAssertPointer(array, 2); PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); if (isMPI) { PetscCall(MatDenseGetArrayWriteAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); } else { PetscErrorCode (*fptr)(Mat, PetscScalar **, PetscMemType *); PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayWriteAndMemType_C", &fptr)); if (fptr) { PetscCall((*fptr)(A, array, mtype)); } else { PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array)); if (mtype) *mtype = PETSC_MEMTYPE_HOST; } } PetscFunctionReturn(PETSC_SUCCESS); } /*@C MatDenseRestoreArrayWriteAndMemType - returns access to the array that is obtained by `MatDenseGetArrayReadAndMemType()` Logically Collective Input Parameters: + A - a dense matrix - array - pointer to the data Level: intermediate .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` @*/ PetscErrorCode MatDenseRestoreArrayWriteAndMemType(Mat A, PetscScalar *array[]) { PetscBool isMPI; PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); if (array) PetscAssertPointer(array, 2); PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); if (isMPI) { PetscCall(MatDenseRestoreArrayWriteAndMemType(((Mat_MPIDense *)A->data)->A, array)); } else { PetscErrorCode (*fptr)(Mat, PetscScalar **); PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayWriteAndMemType_C", &fptr)); if (fptr) { PetscCall((*fptr)(A, array)); } else { PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array)); } if (array) *array = NULL; } PetscCall(PetscObjectStateIncrease((PetscObject)A)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatCreateSubMatrix_SeqDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscInt i, j, nrows, ncols, ldb; const PetscInt *irow, *icol; PetscScalar *av, *bv, *v = mat->v; Mat newmat; PetscFunctionBegin; PetscCall(ISGetIndices(isrow, &irow)); PetscCall(ISGetIndices(iscol, &icol)); PetscCall(ISGetLocalSize(isrow, &nrows)); PetscCall(ISGetLocalSize(iscol, &ncols)); /* Check submatrixcall */ if (scall == MAT_REUSE_MATRIX) { PetscInt n_cols, n_rows; PetscCall(MatGetSize(*B, &n_rows, &n_cols)); if (n_rows != nrows || n_cols != ncols) { /* resize the result matrix to match number of requested rows/columns */ PetscCall(MatSetSizes(*B, nrows, ncols, nrows, ncols)); } newmat = *B; } else { /* Create and fill new matrix */ PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); PetscCall(MatSetSizes(newmat, nrows, ncols, nrows, ncols)); PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); PetscCall(MatSeqDenseSetPreallocation(newmat, NULL)); } /* Now extract the data pointers and do the copy,column at a time */ PetscCall(MatDenseGetArray(newmat, &bv)); PetscCall(MatDenseGetLDA(newmat, &ldb)); for (i = 0; i < ncols; i++) { av = v + mat->lda * icol[i]; for (j = 0; j < nrows; j++) bv[j] = av[irow[j]]; bv += ldb; } PetscCall(MatDenseRestoreArray(newmat, &bv)); /* Assemble the matrices so that the correct flags are set */ PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); /* Free work space */ PetscCall(ISRestoreIndices(isrow, &irow)); PetscCall(ISRestoreIndices(iscol, &icol)); *B = newmat; PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatCreateSubMatrices_SeqDense(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) { PetscInt i; PetscFunctionBegin; if (scall == MAT_INITIAL_MATRIX) PetscCall(PetscCalloc1(n, B)); for (i = 0; i < n; i++) PetscCall(MatCreateSubMatrix_SeqDense(A, irow[i], icol[i], scall, &(*B)[i])); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatCopy_SeqDense(Mat A, Mat B, MatStructure str) { Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data; const PetscScalar *va; PetscScalar *vb; PetscInt lda1 = a->lda, lda2 = b->lda, m = A->rmap->n, n = A->cmap->n, j; PetscFunctionBegin; /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ if (A->ops->copy != B->ops->copy) { PetscCall(MatCopy_Basic(A, B, str)); PetscFunctionReturn(PETSC_SUCCESS); } PetscCheck(m == B->rmap->n && n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "size(B) != size(A)"); PetscCall(MatDenseGetArrayRead(A, &va)); PetscCall(MatDenseGetArray(B, &vb)); if (lda1 > m || lda2 > m) { for (j = 0; j < n; j++) PetscCall(PetscArraycpy(vb + j * lda2, va + j * lda1, m)); } else { PetscCall(PetscArraycpy(vb, va, A->rmap->n * A->cmap->n)); } PetscCall(MatDenseRestoreArray(B, &vb)); PetscCall(MatDenseRestoreArrayRead(A, &va)); PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatSetUp_SeqDense(Mat A) { PetscFunctionBegin; PetscCall(PetscLayoutSetUp(A->rmap)); PetscCall(PetscLayoutSetUp(A->cmap)); if (!A->preallocated) PetscCall(MatSeqDenseSetPreallocation(A, NULL)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatConjugate_SeqDense(Mat A) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscInt i, j; PetscInt min = PetscMin(A->rmap->n, A->cmap->n); PetscScalar *aa; PetscFunctionBegin; PetscCall(MatDenseGetArray(A, &aa)); for (j = 0; j < A->cmap->n; j++) for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscConj(aa[i + j * mat->lda]); PetscCall(MatDenseRestoreArray(A, &aa)); if (mat->tau) for (i = 0; i < min; i++) mat->tau[i] = PetscConj(mat->tau[i]); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatRealPart_SeqDense(Mat A) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscInt i, j; PetscScalar *aa; PetscFunctionBegin; PetscCall(MatDenseGetArray(A, &aa)); for (j = 0; j < A->cmap->n; j++) { for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscRealPart(aa[i + j * mat->lda]); } PetscCall(MatDenseRestoreArray(A, &aa)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatImaginaryPart_SeqDense(Mat A) { Mat_SeqDense *mat = (Mat_SeqDense *)A->data; PetscInt i, j; PetscScalar *aa; PetscFunctionBegin; PetscCall(MatDenseGetArray(A, &aa)); for (j = 0; j < A->cmap->n; j++) { for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscImaginaryPart(aa[i + j * mat->lda]); } PetscCall(MatDenseRestoreArray(A, &aa)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) { PetscInt m = A->rmap->n, n = B->cmap->n; PetscBool cisdense = PETSC_FALSE; PetscFunctionBegin; PetscCall(MatSetSizes(C, m, n, m, n)); #if defined(PETSC_HAVE_CUDA) PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); #endif #if defined(PETSC_HAVE_HIP) PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); #endif if (!cisdense) { PetscBool flg; PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); } PetscCall(MatSetUp(C)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) { Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data, *c = (Mat_SeqDense *)C->data; PetscBLASInt m, n, k; const PetscScalar *av, *bv; PetscScalar *cv; PetscScalar _DOne = 1.0, _DZero = 0.0; PetscFunctionBegin; PetscCall(PetscBLASIntCast(C->rmap->n, &m)); PetscCall(PetscBLASIntCast(C->cmap->n, &n)); PetscCall(PetscBLASIntCast(A->cmap->n, &k)); if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); PetscCall(MatDenseGetArrayRead(A, &av)); PetscCall(MatDenseGetArrayRead(B, &bv)); PetscCall(MatDenseGetArrayWrite(C, &cv)); PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); PetscCall(MatDenseRestoreArrayRead(A, &av)); PetscCall(MatDenseRestoreArrayRead(B, &bv)); PetscCall(MatDenseRestoreArrayWrite(C, &cv)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) { PetscInt m = A->rmap->n, n = B->rmap->n; PetscBool cisdense = PETSC_FALSE; PetscFunctionBegin; PetscCall(MatSetSizes(C, m, n, m, n)); #if defined(PETSC_HAVE_CUDA) PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); #endif #if defined(PETSC_HAVE_HIP) PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); #endif if (!cisdense) { PetscBool flg; PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); } PetscCall(MatSetUp(C)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; Mat_SeqDense *b = (Mat_SeqDense *)B->data; Mat_SeqDense *c = (Mat_SeqDense *)C->data; const PetscScalar *av, *bv; PetscScalar *cv; PetscBLASInt m, n, k; PetscScalar _DOne = 1.0, _DZero = 0.0; PetscFunctionBegin; PetscCall(PetscBLASIntCast(C->rmap->n, &m)); PetscCall(PetscBLASIntCast(C->cmap->n, &n)); PetscCall(PetscBLASIntCast(A->cmap->n, &k)); if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); PetscCall(MatDenseGetArrayRead(A, &av)); PetscCall(MatDenseGetArrayRead(B, &bv)); PetscCall(MatDenseGetArrayWrite(C, &cv)); PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); PetscCall(MatDenseRestoreArrayRead(A, &av)); PetscCall(MatDenseRestoreArrayRead(B, &bv)); PetscCall(MatDenseRestoreArrayWrite(C, &cv)); PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) { PetscInt m = A->cmap->n, n = B->cmap->n; PetscBool cisdense = PETSC_FALSE; PetscFunctionBegin; PetscCall(MatSetSizes(C, m, n, m, n)); #if defined(PETSC_HAVE_CUDA) PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); #endif #if defined(PETSC_HAVE_HIP) PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); #endif if (!cisdense) { PetscBool flg; PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); } PetscCall(MatSetUp(C)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; Mat_SeqDense *b = (Mat_SeqDense *)B->data; Mat_SeqDense *c = (Mat_SeqDense *)C->data; const PetscScalar *av, *bv; PetscScalar *cv; PetscBLASInt m, n, k; PetscScalar _DOne = 1.0, _DZero = 0.0; PetscFunctionBegin; PetscCall(PetscBLASIntCast(C->rmap->n, &m)); PetscCall(PetscBLASIntCast(C->cmap->n, &n)); PetscCall(PetscBLASIntCast(A->rmap->n, &k)); if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); PetscCall(MatDenseGetArrayRead(A, &av)); PetscCall(MatDenseGetArrayRead(B, &bv)); PetscCall(MatDenseGetArrayWrite(C, &cv)); PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); PetscCall(MatDenseRestoreArrayRead(A, &av)); PetscCall(MatDenseRestoreArrayRead(B, &bv)); PetscCall(MatDenseRestoreArrayWrite(C, &cv)); PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatProductSetFromOptions_SeqDense_AB(Mat C) { PetscFunctionBegin; C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqDense; C->ops->productsymbolic = MatProductSymbolic_AB; PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatProductSetFromOptions_SeqDense_AtB(Mat C) { PetscFunctionBegin; C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqDense_SeqDense; C->ops->productsymbolic = MatProductSymbolic_AtB; PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatProductSetFromOptions_SeqDense_ABt(Mat C) { PetscFunctionBegin; C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqDense_SeqDense; C->ops->productsymbolic = MatProductSymbolic_ABt; PetscFunctionReturn(PETSC_SUCCESS); } PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense(Mat C) { Mat_Product *product = C->product; PetscFunctionBegin; switch (product->type) { case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_SeqDense_AB(C)); break; case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_SeqDense_AtB(C)); break; case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_SeqDense_ABt(C)); break; default: break; } PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatGetRowMax_SeqDense(Mat A, Vec v, PetscInt idx[]) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; PetscScalar *x; const PetscScalar *aa; PetscFunctionBegin; PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); PetscCall(VecGetArray(v, &x)); PetscCall(VecGetLocalSize(v, &p)); PetscCall(MatDenseGetArrayRead(A, &aa)); PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); for (i = 0; i < m; i++) { x[i] = aa[i]; if (idx) idx[i] = 0; for (j = 1; j < n; j++) { if (PetscRealPart(x[i]) < PetscRealPart(aa[i + a->lda * j])) { x[i] = aa[i + a->lda * j]; if (idx) idx[i] = j; } } } PetscCall(MatDenseRestoreArrayRead(A, &aa)); PetscCall(VecRestoreArray(v, &x)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatGetRowMaxAbs_SeqDense(Mat A, Vec v, PetscInt idx[]) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; PetscScalar *x; PetscReal atmp; const PetscScalar *aa; PetscFunctionBegin; PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); PetscCall(VecGetArray(v, &x)); PetscCall(VecGetLocalSize(v, &p)); PetscCall(MatDenseGetArrayRead(A, &aa)); PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); for (i = 0; i < m; i++) { x[i] = PetscAbsScalar(aa[i]); for (j = 1; j < n; j++) { atmp = PetscAbsScalar(aa[i + a->lda * j]); if (PetscAbsScalar(x[i]) < atmp) { x[i] = atmp; if (idx) idx[i] = j; } } } PetscCall(MatDenseRestoreArrayRead(A, &aa)); PetscCall(VecRestoreArray(v, &x)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatGetRowMin_SeqDense(Mat A, Vec v, PetscInt idx[]) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; PetscScalar *x; const PetscScalar *aa; PetscFunctionBegin; PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); PetscCall(MatDenseGetArrayRead(A, &aa)); PetscCall(VecGetArray(v, &x)); PetscCall(VecGetLocalSize(v, &p)); PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); for (i = 0; i < m; i++) { x[i] = aa[i]; if (idx) idx[i] = 0; for (j = 1; j < n; j++) { if (PetscRealPart(x[i]) > PetscRealPart(aa[i + a->lda * j])) { x[i] = aa[i + a->lda * j]; if (idx) idx[i] = j; } } } PetscCall(VecRestoreArray(v, &x)); PetscCall(MatDenseRestoreArrayRead(A, &aa)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatGetColumnVector_SeqDense(Mat A, Vec v, PetscInt col) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; PetscScalar *x; const PetscScalar *aa; PetscFunctionBegin; PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); PetscCall(MatDenseGetArrayRead(A, &aa)); PetscCall(VecGetArray(v, &x)); PetscCall(PetscArraycpy(x, aa + col * a->lda, A->rmap->n)); PetscCall(VecRestoreArray(v, &x)); PetscCall(MatDenseRestoreArrayRead(A, &aa)); PetscFunctionReturn(PETSC_SUCCESS); } PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat A, PetscInt type, PetscReal *reductions) { PetscInt i, j, m, n; const PetscScalar *a; PetscFunctionBegin; PetscCall(MatGetSize(A, &m, &n)); PetscCall(PetscArrayzero(reductions, n)); PetscCall(MatDenseGetArrayRead(A, &a)); if (type == NORM_2) { for (i = 0; i < n; i++) { for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j] * a[j]); a = PetscSafePointerPlusOffset(a, m); } } else if (type == NORM_1) { for (i = 0; i < n; i++) { for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j]); a = PetscSafePointerPlusOffset(a, m); } } else if (type == NORM_INFINITY) { for (i = 0; i < n; i++) { for (j = 0; j < m; j++) reductions[i] = PetscMax(PetscAbsScalar(a[j]), reductions[i]); a = PetscSafePointerPlusOffset(a, m); } } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { for (i = 0; i < n; i++) { for (j = 0; j < m; j++) reductions[i] += PetscRealPart(a[j]); a = PetscSafePointerPlusOffset(a, m); } } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { for (i = 0; i < n; i++) { for (j = 0; j < m; j++) reductions[i] += PetscImaginaryPart(a[j]); a = PetscSafePointerPlusOffset(a, m); } } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); PetscCall(MatDenseRestoreArrayRead(A, &a)); if (type == NORM_2) { for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { for (i = 0; i < n; i++) reductions[i] /= m; } PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatSetRandom_SeqDense(Mat x, PetscRandom rctx) { PetscScalar *a; PetscInt lda, m, n, i, j; PetscFunctionBegin; PetscCall(MatGetSize(x, &m, &n)); PetscCall(MatDenseGetLDA(x, &lda)); PetscCall(MatDenseGetArrayWrite(x, &a)); for (j = 0; j < n; j++) { for (i = 0; i < m; i++) PetscCall(PetscRandomGetValue(rctx, a + j * lda + i)); } PetscCall(MatDenseRestoreArrayWrite(x, &a)); PetscFunctionReturn(PETSC_SUCCESS); } /* vals is not const */ static PetscErrorCode MatDenseGetColumn_SeqDense(Mat A, PetscInt col, PetscScalar **vals) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; PetscScalar *v; PetscFunctionBegin; PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); PetscCall(MatDenseGetArray(A, &v)); *vals = v + col * a->lda; PetscCall(MatDenseRestoreArray(A, &v)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatDenseRestoreColumn_SeqDense(Mat A, PetscScalar **vals) { PetscFunctionBegin; if (vals) *vals = NULL; /* user cannot accidentally use the array later */ PetscFunctionReturn(PETSC_SUCCESS); } static struct _MatOps MatOps_Values = {MatSetValues_SeqDense, MatGetRow_SeqDense, MatRestoreRow_SeqDense, MatMult_SeqDense, /* 4*/ MatMultAdd_SeqDense, MatMultTranspose_SeqDense, MatMultTransposeAdd_SeqDense, NULL, NULL, NULL, /* 10*/ NULL, MatLUFactor_SeqDense, MatCholeskyFactor_SeqDense, MatSOR_SeqDense, MatTranspose_SeqDense, /* 15*/ MatGetInfo_SeqDense, MatEqual_SeqDense, MatGetDiagonal_SeqDense, MatDiagonalScale_SeqDense, MatNorm_SeqDense, /* 20*/ NULL, NULL, MatSetOption_SeqDense, MatZeroEntries_SeqDense, /* 24*/ MatZeroRows_SeqDense, NULL, NULL, NULL, NULL, /* 29*/ MatSetUp_SeqDense, NULL, NULL, NULL, NULL, /* 34*/ MatDuplicate_SeqDense, NULL, NULL, NULL, NULL, /* 39*/ MatAXPY_SeqDense, MatCreateSubMatrices_SeqDense, NULL, MatGetValues_SeqDense, MatCopy_SeqDense, /* 44*/ MatGetRowMax_SeqDense, MatScale_SeqDense, MatShift_SeqDense, NULL, MatZeroRowsColumns_SeqDense, /* 49*/ MatSetRandom_SeqDense, NULL, NULL, NULL, NULL, /* 54*/ NULL, NULL, NULL, NULL, NULL, /* 59*/ MatCreateSubMatrix_SeqDense, MatDestroy_SeqDense, MatView_SeqDense, NULL, NULL, /* 64*/ NULL, NULL, NULL, NULL, MatGetRowMaxAbs_SeqDense, /* 69*/ NULL, NULL, NULL, NULL, NULL, /* 74*/ NULL, NULL, NULL, NULL, MatLoad_SeqDense, /* 79*/ MatIsSymmetric_SeqDense, MatIsHermitian_SeqDense, NULL, NULL, NULL, /* 84*/ NULL, MatMatMultNumeric_SeqDense_SeqDense, NULL, NULL, MatMatTransposeMultNumeric_SeqDense_SeqDense, /* 89*/ NULL, MatProductSetFromOptions_SeqDense, NULL, NULL, MatConjugate_SeqDense, /* 94*/ NULL, NULL, MatRealPart_SeqDense, MatImaginaryPart_SeqDense, NULL, /* 99*/ NULL, NULL, NULL, MatGetRowMin_SeqDense, MatGetColumnVector_SeqDense, /*104*/ NULL, NULL, NULL, NULL, NULL, /*109*/ NULL, NULL, MatMultHermitianTranspose_SeqDense, MatMultHermitianTransposeAdd_SeqDense, NULL, /*114*/ NULL, MatGetColumnReductions_SeqDense, NULL, NULL, NULL, /*119*/ NULL, NULL, MatTransposeMatMultNumeric_SeqDense_SeqDense, NULL, NULL, /*124*/ NULL, NULL, NULL, NULL, NULL, /*129*/ NULL, MatCreateMPIMatConcatenateSeqMat_SeqDense, NULL, NULL, NULL, /*134*/ NULL, NULL, NULL, NULL, NULL, /*139*/ NULL, NULL, NULL, NULL, NULL}; /*@ MatCreateSeqDense - Creates a `MATSEQDENSE` that is stored in column major order (the usual Fortran format). Collective Input Parameters: + comm - MPI communicator, set to `PETSC_COMM_SELF` . m - number of rows . n - number of columns - data - optional location of matrix data in column major order. Use `NULL` for PETSc to control all matrix memory allocation. Output Parameter: . A - the matrix Level: intermediate Note: The data input variable is intended primarily for Fortran programmers who wish to allocate their own matrix memory space. Most users should set `data` = `NULL`. Developer Note: Many of the matrix operations for this variant use the BLAS and LAPACK routines. .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` @*/ PetscErrorCode MatCreateSeqDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscScalar data[], Mat *A) { PetscFunctionBegin; PetscCall(MatCreate(comm, A)); PetscCall(MatSetSizes(*A, m, n, m, n)); PetscCall(MatSetType(*A, MATSEQDENSE)); PetscCall(MatSeqDenseSetPreallocation(*A, data)); PetscFunctionReturn(PETSC_SUCCESS); } /*@ MatSeqDenseSetPreallocation - Sets the array used for storing the matrix elements of a `MATSEQDENSE` matrix Collective Input Parameters: + B - the matrix - data - the array (or `NULL`) Level: intermediate Note: The data input variable is intended primarily for Fortran programmers who wish to allocate their own matrix memory space. Most users should need not call this routine. .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`, `MatDenseSetLDA()` @*/ PetscErrorCode MatSeqDenseSetPreallocation(Mat B, PetscScalar data[]) { PetscFunctionBegin; PetscValidHeaderSpecific(B, MAT_CLASSID, 1); PetscTryMethod(B, "MatSeqDenseSetPreallocation_C", (Mat, PetscScalar[]), (B, data)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatSeqDenseSetPreallocation_SeqDense(Mat B, PetscScalar *data) { Mat_SeqDense *b = (Mat_SeqDense *)B->data; PetscFunctionBegin; PetscCheck(!b->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); B->preallocated = PETSC_TRUE; PetscCall(PetscLayoutSetUp(B->rmap)); PetscCall(PetscLayoutSetUp(B->cmap)); if (b->lda <= 0) PetscCall(PetscBLASIntCast(B->rmap->n, &b->lda)); if (!data) { /* petsc-allocated storage */ if (!b->user_alloc) PetscCall(PetscFree(b->v)); PetscCall(PetscCalloc1((size_t)b->lda * B->cmap->n, &b->v)); b->user_alloc = PETSC_FALSE; } else { /* user-allocated storage */ if (!b->user_alloc) PetscCall(PetscFree(b->v)); b->v = data; b->user_alloc = PETSC_TRUE; } B->assembled = PETSC_TRUE; PetscFunctionReturn(PETSC_SUCCESS); } #if defined(PETSC_HAVE_ELEMENTAL) PETSC_INTERN PetscErrorCode MatConvert_SeqDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { Mat mat_elemental; const PetscScalar *array; PetscScalar *v_colwise; PetscInt M = A->rmap->N, N = A->cmap->N, i, j, k, *rows, *cols; PetscFunctionBegin; PetscCall(PetscMalloc3(M * N, &v_colwise, M, &rows, N, &cols)); PetscCall(MatDenseGetArrayRead(A, &array)); /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */ k = 0; for (j = 0; j < N; j++) { cols[j] = j; for (i = 0; i < M; i++) v_colwise[j * M + i] = array[k++]; } for (i = 0; i < M; i++) rows[i] = i; PetscCall(MatDenseRestoreArrayRead(A, &array)); PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N)); PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); PetscCall(MatSetUp(mat_elemental)); /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ PetscCall(MatSetValues(mat_elemental, M, rows, N, cols, v_colwise, ADD_VALUES)); PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); PetscCall(PetscFree3(v_colwise, rows, cols)); if (reuse == MAT_INPLACE_MATRIX) { PetscCall(MatHeaderReplace(A, &mat_elemental)); } else { *newmat = mat_elemental; } PetscFunctionReturn(PETSC_SUCCESS); } #endif PetscErrorCode MatDenseSetLDA_SeqDense(Mat B, PetscInt lda) { Mat_SeqDense *b = (Mat_SeqDense *)B->data; PetscBool data; PetscFunctionBegin; data = (B->rmap->n > 0 && B->cmap->n > 0) ? (b->v ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; PetscCheck(b->user_alloc || !data || b->lda == lda, PETSC_COMM_SELF, PETSC_ERR_ORDER, "LDA cannot be changed after allocation of internal storage"); PetscCheck(lda >= B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "LDA %" PetscInt_FMT " must be at least matrix dimension %" PetscInt_FMT, lda, B->rmap->n); PetscCall(PetscBLASIntCast(lda, &b->lda)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) { PetscFunctionBegin; PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIDense(comm, inmat, n, scall, outmat)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatDenseCreateColumnVec_Private(Mat A, Vec *v) { PetscBool isstd, iskok, iscuda, iship; PetscMPIInt size; #if PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP) /* we pass the data of A, to prevent allocating needless GPU memory the first time VecCUPMPlaceArray is called. */ const PetscScalar *a; #endif PetscFunctionBegin; *v = NULL; PetscCall(PetscStrcmpAny(A->defaultvectype, &isstd, VECSTANDARD, VECSEQ, VECMPI, "")); PetscCall(PetscStrcmpAny(A->defaultvectype, &iskok, VECKOKKOS, VECSEQKOKKOS, VECMPIKOKKOS, "")); PetscCall(PetscStrcmpAny(A->defaultvectype, &iscuda, VECCUDA, VECSEQCUDA, VECMPICUDA, "")); PetscCall(PetscStrcmpAny(A->defaultvectype, &iship, VECHIP, VECSEQHIP, VECMPIHIP, "")); PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); if (isstd) { if (size > 1) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, v)); else PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, v)); } else if (iskok) { PetscCheck(PetscDefined(HAVE_KOKKOS_KERNELS), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using KOKKOS kernels support"); #if PetscDefined(HAVE_KOKKOS_KERNELS) if (size > 1) PetscCall(VecCreateMPIKokkosWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, v)); else PetscCall(VecCreateSeqKokkosWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, v)); #endif } else if (iscuda) { PetscCheck(PetscDefined(HAVE_CUDA), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using CUDA support"); #if PetscDefined(HAVE_CUDA) PetscCall(MatDenseCUDAGetArrayRead(A, &a)); if (size > 1) PetscCall(VecCreateMPICUDAWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, a, v)); else PetscCall(VecCreateSeqCUDAWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, a, v)); #endif } else if (iship) { PetscCheck(PetscDefined(HAVE_HIP), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using HIP support"); #if PetscDefined(HAVE_HIP) PetscCall(MatDenseHIPGetArrayRead(A, &a)); if (size > 1) PetscCall(VecCreateMPIHIPWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, a, v)); else PetscCall(VecCreateSeqHIPWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, a, v)); #endif } PetscCheck(*v, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not coded for type %s", A->defaultvectype); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatDenseGetColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; PetscFunctionBegin; PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); a->vecinuse = col + 1; PetscCall(MatDenseGetArray(A, (PetscScalar **)&a->ptrinuse)); PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); *v = a->cvec; PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatDenseRestoreColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; PetscFunctionBegin; PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); VecCheckAssembled(a->cvec); a->vecinuse = 0; PetscCall(MatDenseRestoreArray(A, (PetscScalar **)&a->ptrinuse)); PetscCall(VecResetArray(a->cvec)); if (v) *v = NULL; PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatDenseGetColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; PetscFunctionBegin; PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); a->vecinuse = col + 1; PetscCall(MatDenseGetArrayRead(A, &a->ptrinuse)); PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)a->lda))); PetscCall(VecLockReadPush(a->cvec)); *v = a->cvec; PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatDenseRestoreColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; PetscFunctionBegin; PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); VecCheckAssembled(a->cvec); a->vecinuse = 0; PetscCall(MatDenseRestoreArrayRead(A, &a->ptrinuse)); PetscCall(VecLockReadPop(a->cvec)); PetscCall(VecResetArray(a->cvec)); if (v) *v = NULL; PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatDenseGetColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; PetscFunctionBegin; PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); a->vecinuse = col + 1; PetscCall(MatDenseGetArrayWrite(A, (PetscScalar **)&a->ptrinuse)); PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)a->lda))); *v = a->cvec; PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatDenseRestoreColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; PetscFunctionBegin; PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); VecCheckAssembled(a->cvec); a->vecinuse = 0; PetscCall(MatDenseRestoreArrayWrite(A, (PetscScalar **)&a->ptrinuse)); PetscCall(VecResetArray(a->cvec)); if (v) *v = NULL; PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatDenseGetSubMatrix_SeqDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; PetscFunctionBegin; PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); if (a->cmat && (cend - cbegin != a->cmat->cmap->N || rend - rbegin != a->cmat->rmap->N)) PetscCall(MatDestroy(&a->cmat)); if (!a->cmat) { PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), rend - rbegin, PETSC_DECIDE, rend - rbegin, cend - cbegin, PetscSafePointerPlusOffset(a->v, rbegin + (size_t)cbegin * a->lda), &a->cmat)); } else { PetscCall(MatDensePlaceArray(a->cmat, PetscSafePointerPlusOffset(a->v, rbegin + (size_t)cbegin * a->lda))); } PetscCall(MatDenseSetLDA(a->cmat, a->lda)); a->matinuse = cbegin + 1; *v = a->cmat; #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) A->offloadmask = PETSC_OFFLOAD_CPU; #endif PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatDenseRestoreSubMatrix_SeqDense(Mat A, Mat *v) { Mat_SeqDense *a = (Mat_SeqDense *)A->data; PetscFunctionBegin; PetscCheck(a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); PetscCheck(a->cmat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column matrix"); PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); a->matinuse = 0; PetscCall(MatDenseResetArray(a->cmat)); *v = NULL; #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) A->offloadmask = PETSC_OFFLOAD_CPU; #endif PetscFunctionReturn(PETSC_SUCCESS); } /*MC MATSEQDENSE - MATSEQDENSE = "seqdense" - A matrix type to be used for sequential dense matrices. Options Database Key: . -mat_type seqdense - sets the matrix type to `MATSEQDENSE` during a call to `MatSetFromOptions()` Level: beginner .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreateSeqDense()` M*/ PetscErrorCode MatCreate_SeqDense(Mat B) { Mat_SeqDense *b; PetscMPIInt size; PetscFunctionBegin; PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); PetscCall(PetscNew(&b)); B->data = (void *)b; B->ops[0] = MatOps_Values; b->roworiented = PETSC_TRUE; PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatQRFactor_C", MatQRFactor_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetLDA_C", MatDenseGetLDA_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseSetLDA_C", MatDenseSetLDA_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArray_C", MatDenseGetArray_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArray_C", MatDenseRestoreArray_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDensePlaceArray_C", MatDensePlaceArray_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseResetArray_C", MatDenseResetArray_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseReplaceArray_C", MatDenseReplaceArray_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayRead_C", MatDenseGetArray_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayRead_C", MatDenseRestoreArray_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayWrite_C", MatDenseGetArray_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArray_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqaij_C", MatConvert_SeqDense_SeqAIJ)); #if defined(PETSC_HAVE_ELEMENTAL) PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_elemental_C", MatConvert_SeqDense_Elemental)); #endif #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_scalapack_C", MatConvert_Dense_ScaLAPACK)); #endif #if defined(PETSC_HAVE_CUDA) PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensecuda_C", MatConvert_SeqDense_SeqDenseCUDA)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdense_C", MatProductSetFromOptions_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); #endif #if defined(PETSC_HAVE_HIP) PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensehip_C", MatConvert_SeqDense_SeqDenseHIP)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", MatProductSetFromOptions_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdense_C", MatProductSetFromOptions_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensehip_C", MatProductSetFromOptions_SeqDense)); #endif PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqDenseSetPreallocation_C", MatSeqDenseSetPreallocation_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdense_C", MatProductSetFromOptions_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqsbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumn_C", MatDenseGetColumn_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultColumnRange_C", MatMultColumnRange_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultAddColumnRange_C", MatMultAddColumnRange_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_SeqDense)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_SeqDense)); PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQDENSE)); PetscFunctionReturn(PETSC_SUCCESS); } /*@C MatDenseGetColumn - gives access to a column of a dense matrix. This is only the local part of the column. You MUST call `MatDenseRestoreColumn()` to avoid memory bleeding. Not Collective Input Parameters: + A - a `MATSEQDENSE` or `MATMPIDENSE` matrix - col - column index Output Parameter: . vals - pointer to the data Level: intermediate Note: Use `MatDenseGetColumnVec()` to get access to a column of a `MATDENSE` treated as a `Vec` .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreColumn()`, `MatDenseGetColumnVec()` @*/ PetscErrorCode MatDenseGetColumn(Mat A, PetscInt col, PetscScalar *vals[]) { PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); PetscValidLogicalCollectiveInt(A, col, 2); PetscAssertPointer(vals, 3); PetscUseMethod(A, "MatDenseGetColumn_C", (Mat, PetscInt, PetscScalar **), (A, col, vals)); PetscFunctionReturn(PETSC_SUCCESS); } /*@C MatDenseRestoreColumn - returns access to a column of a `MATDENSE` matrix which is returned by `MatDenseGetColumn()`. Not Collective Input Parameters: + A - a `MATSEQDENSE` or `MATMPIDENSE` matrix - vals - pointer to the data (may be `NULL`) Level: intermediate .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetColumn()` @*/ PetscErrorCode MatDenseRestoreColumn(Mat A, PetscScalar *vals[]) { PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); PetscAssertPointer(vals, 2); PetscUseMethod(A, "MatDenseRestoreColumn_C", (Mat, PetscScalar **), (A, vals)); PetscFunctionReturn(PETSC_SUCCESS); } /*@ MatDenseGetColumnVec - Gives read-write access to a column of a `MATDENSE` matrix, represented as a `Vec`. Collective Input Parameters: + A - the `Mat` object - col - the column index Output Parameter: . v - the vector Level: intermediate Notes: The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVec()` when the vector is no longer needed. Use `MatDenseGetColumnVecRead()` to obtain read-only access or `MatDenseGetColumnVecWrite()` for write-only access. .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`, `MatDenseGetColumn()` @*/ PetscErrorCode MatDenseGetColumnVec(Mat A, PetscInt col, Vec *v) { PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); PetscValidType(A, 1); PetscValidLogicalCollectiveInt(A, col, 2); PetscAssertPointer(v, 3); PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); PetscUseMethod(A, "MatDenseGetColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); PetscFunctionReturn(PETSC_SUCCESS); } /*@ MatDenseRestoreColumnVec - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVec()`. Collective Input Parameters: + A - the `Mat` object . col - the column index - v - the `Vec` object (may be `NULL`) Level: intermediate .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` @*/ PetscErrorCode MatDenseRestoreColumnVec(Mat A, PetscInt col, Vec *v) { PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); PetscValidType(A, 1); PetscValidLogicalCollectiveInt(A, col, 2); if (v) PetscValidHeaderSpecific(*v, VEC_CLASSID, 3); PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); PetscUseMethod(A, "MatDenseRestoreColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); PetscFunctionReturn(PETSC_SUCCESS); } /*@ MatDenseGetColumnVecRead - Gives read-only access to a column of a dense matrix, represented as a `Vec`. Collective Input Parameters: + A - the `Mat` object - col - the column index Output Parameter: . v - the vector Level: intermediate Notes: The vector is owned by PETSc and users cannot modify it. Users need to call `MatDenseRestoreColumnVecRead()` when the vector is no longer needed. Use `MatDenseGetColumnVec()` to obtain read-write access or `MatDenseGetColumnVecWrite()` for write-only access. .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` @*/ PetscErrorCode MatDenseGetColumnVecRead(Mat A, PetscInt col, Vec *v) { PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); PetscValidType(A, 1); PetscValidLogicalCollectiveInt(A, col, 2); PetscAssertPointer(v, 3); PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); PetscUseMethod(A, "MatDenseGetColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); PetscFunctionReturn(PETSC_SUCCESS); } /*@ MatDenseRestoreColumnVecRead - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVecRead()`. Collective Input Parameters: + A - the `Mat` object . col - the column index - v - the `Vec` object (may be `NULL`) Level: intermediate .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecWrite()` @*/ PetscErrorCode MatDenseRestoreColumnVecRead(Mat A, PetscInt col, Vec *v) { PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); PetscValidType(A, 1); PetscValidLogicalCollectiveInt(A, col, 2); if (v) PetscValidHeaderSpecific(*v, VEC_CLASSID, 3); PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); PetscUseMethod(A, "MatDenseRestoreColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); PetscFunctionReturn(PETSC_SUCCESS); } /*@ MatDenseGetColumnVecWrite - Gives write-only access to a column of a dense matrix, represented as a `Vec`. Collective Input Parameters: + A - the `Mat` object - col - the column index Output Parameter: . v - the vector Level: intermediate Notes: The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVecWrite()` when the vector is no longer needed. Use `MatDenseGetColumnVec()` to obtain read-write access or `MatDenseGetColumnVecRead()` for read-only access. .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` @*/ PetscErrorCode MatDenseGetColumnVecWrite(Mat A, PetscInt col, Vec *v) { PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); PetscValidType(A, 1); PetscValidLogicalCollectiveInt(A, col, 2); PetscAssertPointer(v, 3); PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); PetscUseMethod(A, "MatDenseGetColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); PetscFunctionReturn(PETSC_SUCCESS); } /*@ MatDenseRestoreColumnVecWrite - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVecWrite()`. Collective Input Parameters: + A - the `Mat` object . col - the column index - v - the `Vec` object (may be `NULL`) Level: intermediate .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()` @*/ PetscErrorCode MatDenseRestoreColumnVecWrite(Mat A, PetscInt col, Vec *v) { PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); PetscValidType(A, 1); PetscValidLogicalCollectiveInt(A, col, 2); if (v) PetscValidHeaderSpecific(*v, VEC_CLASSID, 3); PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); PetscUseMethod(A, "MatDenseRestoreColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); PetscFunctionReturn(PETSC_SUCCESS); } /*@ MatDenseGetSubMatrix - Gives access to a block of rows and columns of a dense matrix, represented as a `Mat`. Collective Input Parameters: + A - the `Mat` object . rbegin - the first global row index in the block (if `PETSC_DECIDE`, is 0) . rend - the global row index past the last one in the block (if `PETSC_DECIDE`, is `M`) . cbegin - the first global column index in the block (if `PETSC_DECIDE`, is 0) - cend - the global column index past the last one in the block (if `PETSC_DECIDE`, is `N`) Output Parameter: . v - the matrix Level: intermediate Notes: The matrix is owned by PETSc. Users need to call `MatDenseRestoreSubMatrix()` when the matrix is no longer needed. The output matrix is not redistributed by PETSc, so depending on the values of `rbegin` and `rend`, some processes may have no local rows. .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreSubMatrix()` @*/ PetscErrorCode MatDenseGetSubMatrix(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) { PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); PetscValidType(A, 1); PetscValidLogicalCollectiveInt(A, rbegin, 2); PetscValidLogicalCollectiveInt(A, rend, 3); PetscValidLogicalCollectiveInt(A, cbegin, 4); PetscValidLogicalCollectiveInt(A, cend, 5); PetscAssertPointer(v, 6); if (rbegin == PETSC_DECIDE) rbegin = 0; if (rend == PETSC_DECIDE) rend = A->rmap->N; if (cbegin == PETSC_DECIDE) cbegin = 0; if (cend == PETSC_DECIDE) cend = A->cmap->N; PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); PetscCheck(rbegin >= 0 && rbegin <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", rbegin, A->rmap->N); PetscCheck(rend >= rbegin && rend <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", rend, rbegin, A->rmap->N); PetscCheck(cbegin >= 0 && cbegin <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", cbegin, A->cmap->N); PetscCheck(cend >= cbegin && cend <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", cend, cbegin, A->cmap->N); PetscUseMethod(A, "MatDenseGetSubMatrix_C", (Mat, PetscInt, PetscInt, PetscInt, PetscInt, Mat *), (A, rbegin, rend, cbegin, cend, v)); PetscFunctionReturn(PETSC_SUCCESS); } /*@ MatDenseRestoreSubMatrix - Returns access to a block of columns of a dense matrix obtained from `MatDenseGetSubMatrix()`. Collective Input Parameters: + A - the `Mat` object - v - the `Mat` object (cannot be `NULL`) Level: intermediate .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseGetSubMatrix()` @*/ PetscErrorCode MatDenseRestoreSubMatrix(Mat A, Mat *v) { PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); PetscValidType(A, 1); PetscAssertPointer(v, 2); PetscValidHeaderSpecific(*v, MAT_CLASSID, 2); PetscUseMethod(A, "MatDenseRestoreSubMatrix_C", (Mat, Mat *), (A, v)); PetscFunctionReturn(PETSC_SUCCESS); } #include #include PetscErrorCode MatSeqDenseInvert(Mat A) { PetscInt m; const PetscReal shift = 0.0; PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; PetscScalar *values; PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); PetscCall(MatDenseGetArray(A, &values)); PetscCall(MatGetLocalSize(A, &m, NULL)); allowzeropivot = PetscNot(A->erroriffailure); /* factor and invert each block */ switch (m) { case 1: values[0] = (PetscScalar)1.0 / (values[0] + shift); break; case 2: PetscCall(PetscKernel_A_gets_inverse_A_2(values, shift, allowzeropivot, &zeropivotdetected)); if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; break; case 3: PetscCall(PetscKernel_A_gets_inverse_A_3(values, shift, allowzeropivot, &zeropivotdetected)); if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; break; case 4: PetscCall(PetscKernel_A_gets_inverse_A_4(values, shift, allowzeropivot, &zeropivotdetected)); if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; break; case 5: { PetscScalar work[25]; PetscInt ipvt[5]; PetscCall(PetscKernel_A_gets_inverse_A_5(values, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; } break; case 6: PetscCall(PetscKernel_A_gets_inverse_A_6(values, shift, allowzeropivot, &zeropivotdetected)); if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; break; case 7: PetscCall(PetscKernel_A_gets_inverse_A_7(values, shift, allowzeropivot, &zeropivotdetected)); if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; break; default: { PetscInt *v_pivots, *IJ, j; PetscScalar *v_work; PetscCall(PetscMalloc3(m, &v_work, m, &v_pivots, m, &IJ)); for (j = 0; j < m; j++) IJ[j] = j; PetscCall(PetscKernel_A_gets_inverse_A(m, values, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; PetscCall(PetscFree3(v_work, v_pivots, IJ)); } } PetscCall(MatDenseRestoreArray(A, &values)); PetscFunctionReturn(PETSC_SUCCESS); }