1 /* 2 Defines the basic matrix operations for sequential dense. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/dense/mpi/mpidense.h> 9 #include <petscblaslapack.h> 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <petsc/private/vecimpl.h> 12 13 PetscErrorCode MatSeqDenseSymmetrize_Private(Mat A, PetscBool hermitian) 14 { 15 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 16 PetscInt j, k, n = A->rmap->n; 17 PetscScalar *v; 18 19 PetscFunctionBegin; 20 PetscCheck(A->rmap->n == A->cmap->n, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot symmetrize a rectangular matrix"); 21 PetscCall(MatDenseGetArray(A, &v)); 22 if (!hermitian) { 23 for (k = 0; k < n; k++) { 24 for (j = k; j < n; j++) v[j * mat->lda + k] = v[k * mat->lda + j]; 25 } 26 } else { 27 for (k = 0; k < n; k++) { 28 for (j = k; j < n; j++) v[j * mat->lda + k] = PetscConj(v[k * mat->lda + j]); 29 } 30 } 31 PetscCall(MatDenseRestoreArray(A, &v)); 32 PetscFunctionReturn(PETSC_SUCCESS); 33 } 34 35 PetscErrorCode MatSeqDenseInvertFactors_Private(Mat A) 36 { 37 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 38 PetscBLASInt info, n; 39 40 PetscFunctionBegin; 41 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 42 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 43 if (A->factortype == MAT_FACTOR_LU) { 44 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 45 if (!mat->fwork) { 46 mat->lfwork = n; 47 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 48 } 49 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 50 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 51 PetscCall(PetscFPTrapPop()); 52 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 53 } else if (A->factortype == MAT_FACTOR_CHOLESKY) { 54 if (A->spd == PETSC_BOOL3_TRUE) { 55 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 56 PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &n, mat->v, &mat->lda, &info)); 57 PetscCall(PetscFPTrapPop()); 58 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 59 #if defined(PETSC_USE_COMPLEX) 60 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 61 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 62 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 63 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 64 PetscCallBLAS("LAPACKhetri", LAPACKhetri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 65 PetscCall(PetscFPTrapPop()); 66 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 67 #endif 68 } else { /* symmetric case */ 69 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 70 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 71 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 72 PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 73 PetscCall(PetscFPTrapPop()); 74 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_FALSE)); 75 } 76 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad Inversion: zero pivot in row %" PetscBLASInt_FMT, info - 1); 77 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 78 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix must be factored to solve"); 79 80 A->ops->solve = NULL; 81 A->ops->matsolve = NULL; 82 A->ops->solvetranspose = NULL; 83 A->ops->matsolvetranspose = NULL; 84 A->ops->solveadd = NULL; 85 A->ops->solvetransposeadd = NULL; 86 A->factortype = MAT_FACTOR_NONE; 87 PetscCall(PetscFree(A->solvertype)); 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 91 static PetscErrorCode MatZeroRowsColumns_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 92 { 93 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 94 PetscInt m = l->lda, n = A->cmap->n, r = A->rmap->n, i, j; 95 PetscScalar *slot, *bb, *v; 96 const PetscScalar *xx; 97 98 PetscFunctionBegin; 99 if (PetscDefined(USE_DEBUG)) { 100 for (i = 0; i < N; i++) { 101 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 102 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 103 PetscCheck(rows[i] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Col %" PetscInt_FMT " requested to be zeroed greater than or equal number of cols %" PetscInt_FMT, rows[i], A->cmap->n); 104 } 105 } 106 if (!N) PetscFunctionReturn(PETSC_SUCCESS); 107 108 /* fix right-hand side if needed */ 109 if (x && b) { 110 Vec xt; 111 112 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 113 PetscCall(VecDuplicate(x, &xt)); 114 PetscCall(VecCopy(x, xt)); 115 PetscCall(VecScale(xt, -1.0)); 116 PetscCall(MatMultAdd(A, xt, b, b)); 117 PetscCall(VecDestroy(&xt)); 118 PetscCall(VecGetArrayRead(x, &xx)); 119 PetscCall(VecGetArray(b, &bb)); 120 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 121 PetscCall(VecRestoreArrayRead(x, &xx)); 122 PetscCall(VecRestoreArray(b, &bb)); 123 } 124 125 PetscCall(MatDenseGetArray(A, &v)); 126 for (i = 0; i < N; i++) { 127 slot = v + rows[i] * m; 128 PetscCall(PetscArrayzero(slot, r)); 129 } 130 for (i = 0; i < N; i++) { 131 slot = v + rows[i]; 132 for (j = 0; j < n; j++) { 133 *slot = 0.0; 134 slot += m; 135 } 136 } 137 if (diag != 0.0) { 138 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 139 for (i = 0; i < N; i++) { 140 slot = v + (m + 1) * rows[i]; 141 *slot = diag; 142 } 143 } 144 PetscCall(MatDenseRestoreArray(A, &v)); 145 PetscFunctionReturn(PETSC_SUCCESS); 146 } 147 148 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 149 { 150 Mat B = NULL; 151 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 152 Mat_SeqDense *b; 153 PetscInt *ai = a->i, *aj = a->j, m = A->rmap->N, n = A->cmap->N, i; 154 const MatScalar *av; 155 PetscBool isseqdense; 156 157 PetscFunctionBegin; 158 if (reuse == MAT_REUSE_MATRIX) { 159 PetscCall(PetscObjectTypeCompare((PetscObject)*newmat, MATSEQDENSE, &isseqdense)); 160 PetscCheck(isseqdense, PetscObjectComm((PetscObject)*newmat), PETSC_ERR_USER, "Cannot reuse matrix of type %s", ((PetscObject)*newmat)->type_name); 161 } 162 if (reuse != MAT_REUSE_MATRIX) { 163 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 164 PetscCall(MatSetSizes(B, m, n, m, n)); 165 PetscCall(MatSetType(B, MATSEQDENSE)); 166 PetscCall(MatSeqDenseSetPreallocation(B, NULL)); 167 b = (Mat_SeqDense *)B->data; 168 } else { 169 b = (Mat_SeqDense *)((*newmat)->data); 170 for (i = 0; i < n; i++) PetscCall(PetscArrayzero(b->v + i * b->lda, m)); 171 } 172 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 173 for (i = 0; i < m; i++) { 174 PetscInt j; 175 for (j = 0; j < ai[1] - ai[0]; j++) { 176 b->v[*aj * b->lda + i] = *av; 177 aj++; 178 av++; 179 } 180 ai++; 181 } 182 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 183 184 if (reuse == MAT_INPLACE_MATRIX) { 185 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 186 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 187 PetscCall(MatHeaderReplace(A, &B)); 188 } else { 189 if (B) *newmat = B; 190 PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY)); 191 PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY)); 192 } 193 PetscFunctionReturn(PETSC_SUCCESS); 194 } 195 196 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 197 { 198 Mat B = NULL; 199 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 200 PetscInt i, j; 201 PetscInt *rows, *nnz; 202 MatScalar *aa = a->v, *vals; 203 204 PetscFunctionBegin; 205 PetscCall(PetscCalloc3(A->rmap->n, &rows, A->rmap->n, &nnz, A->rmap->n, &vals)); 206 if (reuse != MAT_REUSE_MATRIX) { 207 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 208 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 209 PetscCall(MatSetType(B, MATSEQAIJ)); 210 for (j = 0; j < A->cmap->n; j++) { 211 for (i = 0; i < A->rmap->n; i++) 212 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) ++nnz[i]; 213 aa += a->lda; 214 } 215 PetscCall(MatSeqAIJSetPreallocation(B, PETSC_DETERMINE, nnz)); 216 } else B = *newmat; 217 aa = a->v; 218 for (j = 0; j < A->cmap->n; j++) { 219 PetscInt numRows = 0; 220 for (i = 0; i < A->rmap->n; i++) 221 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) { 222 rows[numRows] = i; 223 vals[numRows++] = aa[i]; 224 } 225 PetscCall(MatSetValues(B, numRows, rows, 1, &j, vals, INSERT_VALUES)); 226 aa += a->lda; 227 } 228 PetscCall(PetscFree3(rows, nnz, vals)); 229 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 230 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 231 232 if (reuse == MAT_INPLACE_MATRIX) { 233 PetscCall(MatHeaderReplace(A, &B)); 234 } else if (reuse != MAT_REUSE_MATRIX) *newmat = B; 235 PetscFunctionReturn(PETSC_SUCCESS); 236 } 237 238 PetscErrorCode MatAXPY_SeqDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 239 { 240 Mat_SeqDense *x = (Mat_SeqDense *)X->data, *y = (Mat_SeqDense *)Y->data; 241 const PetscScalar *xv; 242 PetscScalar *yv; 243 PetscBLASInt N, m, ldax = 0, lday = 0, one = 1; 244 245 PetscFunctionBegin; 246 PetscCall(MatDenseGetArrayRead(X, &xv)); 247 PetscCall(MatDenseGetArray(Y, &yv)); 248 PetscCall(PetscBLASIntCast(X->rmap->n * X->cmap->n, &N)); 249 PetscCall(PetscBLASIntCast(X->rmap->n, &m)); 250 PetscCall(PetscBLASIntCast(x->lda, &ldax)); 251 PetscCall(PetscBLASIntCast(y->lda, &lday)); 252 if (ldax > m || lday > m) { 253 for (PetscInt j = 0; j < X->cmap->n; j++) PetscCallBLAS("BLASaxpy", BLASaxpy_(&m, &alpha, PetscSafePointerPlusOffset(xv, j * ldax), &one, PetscSafePointerPlusOffset(yv, j * lday), &one)); 254 } else { 255 PetscCallBLAS("BLASaxpy", BLASaxpy_(&N, &alpha, xv, &one, yv, &one)); 256 } 257 PetscCall(MatDenseRestoreArrayRead(X, &xv)); 258 PetscCall(MatDenseRestoreArray(Y, &yv)); 259 PetscCall(PetscLogFlops(PetscMax(2.0 * N - 1, 0))); 260 PetscFunctionReturn(PETSC_SUCCESS); 261 } 262 263 static PetscErrorCode MatGetInfo_SeqDense(Mat A, MatInfoType flag, MatInfo *info) 264 { 265 PetscLogDouble N = A->rmap->n * A->cmap->n; 266 267 PetscFunctionBegin; 268 info->block_size = 1.0; 269 info->nz_allocated = N; 270 info->nz_used = N; 271 info->nz_unneeded = 0; 272 info->assemblies = A->num_ass; 273 info->mallocs = 0; 274 info->memory = 0; /* REVIEW ME */ 275 info->fill_ratio_given = 0; 276 info->fill_ratio_needed = 0; 277 info->factor_mallocs = 0; 278 PetscFunctionReturn(PETSC_SUCCESS); 279 } 280 281 PetscErrorCode MatScale_SeqDense(Mat A, PetscScalar alpha) 282 { 283 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 284 PetscScalar *v; 285 PetscBLASInt one = 1, j, nz, lda = 0; 286 287 PetscFunctionBegin; 288 PetscCall(MatDenseGetArray(A, &v)); 289 PetscCall(PetscBLASIntCast(a->lda, &lda)); 290 if (lda > A->rmap->n) { 291 PetscCall(PetscBLASIntCast(A->rmap->n, &nz)); 292 for (j = 0; j < A->cmap->n; j++) PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v + j * lda, &one)); 293 } else { 294 PetscCall(PetscBLASIntCast(A->rmap->n * A->cmap->n, &nz)); 295 PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v, &one)); 296 } 297 PetscCall(PetscLogFlops(A->rmap->n * A->cmap->n)); 298 PetscCall(MatDenseRestoreArray(A, &v)); 299 PetscFunctionReturn(PETSC_SUCCESS); 300 } 301 302 PetscErrorCode MatShift_SeqDense(Mat A, PetscScalar alpha) 303 { 304 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 305 PetscScalar *v; 306 PetscInt j, k; 307 308 PetscFunctionBegin; 309 PetscCall(MatDenseGetArray(A, &v)); 310 k = PetscMin(A->rmap->n, A->cmap->n); 311 for (j = 0; j < k; j++) v[j + j * a->lda] += alpha; 312 PetscCall(PetscLogFlops(k)); 313 PetscCall(MatDenseRestoreArray(A, &v)); 314 PetscFunctionReturn(PETSC_SUCCESS); 315 } 316 317 static PetscErrorCode MatIsHermitian_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) 318 { 319 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 320 PetscInt i, j, m = A->rmap->n, N = a->lda; 321 const PetscScalar *v; 322 323 PetscFunctionBegin; 324 *fl = PETSC_FALSE; 325 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 326 PetscCall(MatDenseGetArrayRead(A, &v)); 327 for (i = 0; i < m; i++) { 328 for (j = i; j < m; j++) { 329 if (PetscAbsScalar(v[i + j * N] - PetscConj(v[j + i * N])) > rtol) goto restore; 330 } 331 } 332 *fl = PETSC_TRUE; 333 restore: 334 PetscCall(MatDenseRestoreArrayRead(A, &v)); 335 PetscFunctionReturn(PETSC_SUCCESS); 336 } 337 338 static PetscErrorCode MatIsSymmetric_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) 339 { 340 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 341 PetscInt i, j, m = A->rmap->n, N = a->lda; 342 const PetscScalar *v; 343 344 PetscFunctionBegin; 345 *fl = PETSC_FALSE; 346 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 347 PetscCall(MatDenseGetArrayRead(A, &v)); 348 for (i = 0; i < m; i++) { 349 for (j = i; j < m; j++) { 350 if (PetscAbsScalar(v[i + j * N] - v[j + i * N]) > rtol) goto restore; 351 } 352 } 353 *fl = PETSC_TRUE; 354 restore: 355 PetscCall(MatDenseRestoreArrayRead(A, &v)); 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 PetscErrorCode MatDuplicateNoCreate_SeqDense(Mat newi, Mat A, MatDuplicateOption cpvalues) 360 { 361 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 362 PetscInt lda = mat->lda, j, m, nlda = lda; 363 PetscBool isdensecpu; 364 365 PetscFunctionBegin; 366 PetscCall(PetscLayoutReference(A->rmap, &newi->rmap)); 367 PetscCall(PetscLayoutReference(A->cmap, &newi->cmap)); 368 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { /* propagate LDA */ 369 PetscCall(MatDenseSetLDA(newi, lda)); 370 } 371 PetscCall(PetscObjectTypeCompare((PetscObject)newi, MATSEQDENSE, &isdensecpu)); 372 if (isdensecpu) PetscCall(MatSeqDenseSetPreallocation(newi, NULL)); 373 if (cpvalues == MAT_COPY_VALUES) { 374 const PetscScalar *av; 375 PetscScalar *v; 376 377 PetscCall(MatDenseGetArrayRead(A, &av)); 378 PetscCall(MatDenseGetArrayWrite(newi, &v)); 379 PetscCall(MatDenseGetLDA(newi, &nlda)); 380 m = A->rmap->n; 381 if (lda > m || nlda > m) { 382 for (j = 0; j < A->cmap->n; j++) PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(v, j * nlda), PetscSafePointerPlusOffset(av, j * lda), m)); 383 } else { 384 PetscCall(PetscArraycpy(v, av, A->rmap->n * A->cmap->n)); 385 } 386 PetscCall(MatDenseRestoreArrayWrite(newi, &v)); 387 PetscCall(MatDenseRestoreArrayRead(A, &av)); 388 PetscCall(MatPropagateSymmetryOptions(A, newi)); 389 } 390 PetscFunctionReturn(PETSC_SUCCESS); 391 } 392 393 PetscErrorCode MatDuplicate_SeqDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 394 { 395 PetscFunctionBegin; 396 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), newmat)); 397 PetscCall(MatSetSizes(*newmat, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 398 PetscCall(MatSetType(*newmat, ((PetscObject)A)->type_name)); 399 PetscCall(MatDuplicateNoCreate_SeqDense(*newmat, A, cpvalues)); 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 static PetscErrorCode MatSolve_SeqDense_Internal_LU(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) 404 { 405 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 406 PetscBLASInt info; 407 408 PetscFunctionBegin; 409 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 410 PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_(T ? "T" : "N", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 411 PetscCall(PetscFPTrapPop()); 412 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve %" PetscBLASInt_FMT, info); 413 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 414 PetscFunctionReturn(PETSC_SUCCESS); 415 } 416 417 static PetscErrorCode MatSolve_SeqDense_Internal_Cholesky(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) 418 { 419 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 420 PetscBLASInt info; 421 422 PetscFunctionBegin; 423 if (A->spd == PETSC_BOOL3_TRUE) { 424 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 425 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 426 PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &m, &nrhs, mat->v, &mat->lda, x, &m, &info)); 427 PetscCall(PetscFPTrapPop()); 428 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "POTRS Bad solve %" PetscBLASInt_FMT, info); 429 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 430 #if defined(PETSC_USE_COMPLEX) 431 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 432 if (T) PetscCall(MatConjugate_SeqDense(A)); 433 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 434 PetscCallBLAS("LAPACKhetrs", LAPACKhetrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 435 PetscCall(PetscFPTrapPop()); 436 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "HETRS Bad solve %" PetscBLASInt_FMT, info); 437 if (T) PetscCall(MatConjugate_SeqDense(A)); 438 #endif 439 } else { /* symmetric case */ 440 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 441 PetscCallBLAS("LAPACKsytrs", LAPACKsytrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 442 PetscCall(PetscFPTrapPop()); 443 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "SYTRS Bad solve %" PetscBLASInt_FMT, info); 444 } 445 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 446 PetscFunctionReturn(PETSC_SUCCESS); 447 } 448 449 static PetscErrorCode MatSolve_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) 450 { 451 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 452 PetscBLASInt info; 453 char trans; 454 455 PetscFunctionBegin; 456 if (PetscDefined(USE_COMPLEX)) { 457 trans = 'C'; 458 } else { 459 trans = 'T'; 460 } 461 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 462 { /* lwork depends on the number of right-hand sides */ 463 PetscBLASInt nlfwork, lfwork = -1; 464 PetscScalar fwork; 465 466 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 467 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 468 if (nlfwork > mat->lfwork) { 469 mat->lfwork = nlfwork; 470 PetscCall(PetscFree(mat->fwork)); 471 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 472 } 473 } 474 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 475 PetscCall(PetscFPTrapPop()); 476 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %" PetscBLASInt_FMT, info); 477 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 478 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &mat->rank, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 479 PetscCall(PetscFPTrapPop()); 480 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %" PetscBLASInt_FMT, info); 481 for (PetscInt j = 0; j < nrhs; j++) { 482 for (PetscInt i = mat->rank; i < k; i++) x[j * ldx + i] = 0.; 483 } 484 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 static PetscErrorCode MatSolveTranspose_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) 489 { 490 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 491 PetscBLASInt info; 492 493 PetscFunctionBegin; 494 if (A->rmap->n == A->cmap->n && mat->rank == A->rmap->n) { 495 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 496 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &m, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 497 PetscCall(PetscFPTrapPop()); 498 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %" PetscBLASInt_FMT, info); 499 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 500 { /* lwork depends on the number of right-hand sides */ 501 PetscBLASInt nlfwork, lfwork = -1; 502 PetscScalar fwork; 503 504 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 505 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 506 if (nlfwork > mat->lfwork) { 507 mat->lfwork = nlfwork; 508 PetscCall(PetscFree(mat->fwork)); 509 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 510 } 511 } 512 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 513 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 514 PetscCall(PetscFPTrapPop()); 515 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %" PetscBLASInt_FMT, info); 516 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 517 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "QR factored matrix cannot be used for transpose solve"); 518 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 519 PetscFunctionReturn(PETSC_SUCCESS); 520 } 521 522 static PetscErrorCode MatSolve_SeqDense_SetUp(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) 523 { 524 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 525 PetscScalar *y; 526 PetscBLASInt m = 0, k = 0; 527 528 PetscFunctionBegin; 529 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 530 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 531 if (k < m) { 532 PetscCall(VecCopy(xx, mat->qrrhs)); 533 PetscCall(VecGetArray(mat->qrrhs, &y)); 534 } else { 535 PetscCall(VecCopy(xx, yy)); 536 PetscCall(VecGetArray(yy, &y)); 537 } 538 *_y = y; 539 *_k = k; 540 *_m = m; 541 PetscFunctionReturn(PETSC_SUCCESS); 542 } 543 544 static PetscErrorCode MatSolve_SeqDense_TearDown(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) 545 { 546 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 547 PetscScalar *y = NULL; 548 PetscBLASInt m, k; 549 550 PetscFunctionBegin; 551 y = *_y; 552 *_y = NULL; 553 k = *_k; 554 m = *_m; 555 if (k < m) { 556 PetscScalar *yv; 557 PetscCall(VecGetArray(yy, &yv)); 558 PetscCall(PetscArraycpy(yv, y, k)); 559 PetscCall(VecRestoreArray(yy, &yv)); 560 PetscCall(VecRestoreArray(mat->qrrhs, &y)); 561 } else { 562 PetscCall(VecRestoreArray(yy, &y)); 563 } 564 PetscFunctionReturn(PETSC_SUCCESS); 565 } 566 567 static PetscErrorCode MatSolve_SeqDense_LU(Mat A, Vec xx, Vec yy) 568 { 569 PetscScalar *y = NULL; 570 PetscBLASInt m = 0, k = 0; 571 572 PetscFunctionBegin; 573 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 574 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_FALSE)); 575 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 576 PetscFunctionReturn(PETSC_SUCCESS); 577 } 578 579 static PetscErrorCode MatSolveTranspose_SeqDense_LU(Mat A, Vec xx, Vec yy) 580 { 581 PetscScalar *y = NULL; 582 PetscBLASInt m = 0, k = 0; 583 584 PetscFunctionBegin; 585 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 586 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_TRUE)); 587 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 588 PetscFunctionReturn(PETSC_SUCCESS); 589 } 590 591 static PetscErrorCode MatSolve_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) 592 { 593 PetscScalar *y = NULL; 594 PetscBLASInt m = 0, k = 0; 595 596 PetscFunctionBegin; 597 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 598 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_FALSE)); 599 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 600 PetscFunctionReturn(PETSC_SUCCESS); 601 } 602 603 static PetscErrorCode MatSolveTranspose_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) 604 { 605 PetscScalar *y = NULL; 606 PetscBLASInt m = 0, k = 0; 607 608 PetscFunctionBegin; 609 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 610 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_TRUE)); 611 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 612 PetscFunctionReturn(PETSC_SUCCESS); 613 } 614 615 static PetscErrorCode MatSolve_SeqDense_QR(Mat A, Vec xx, Vec yy) 616 { 617 PetscScalar *y = NULL; 618 PetscBLASInt m = 0, k = 0; 619 620 PetscFunctionBegin; 621 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 622 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 623 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 624 PetscFunctionReturn(PETSC_SUCCESS); 625 } 626 627 static PetscErrorCode MatSolveTranspose_SeqDense_QR(Mat A, Vec xx, Vec yy) 628 { 629 PetscScalar *y = NULL; 630 PetscBLASInt m = 0, k = 0; 631 632 PetscFunctionBegin; 633 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 634 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 635 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 636 PetscFunctionReturn(PETSC_SUCCESS); 637 } 638 639 static PetscErrorCode MatMatSolve_SeqDense_SetUp(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) 640 { 641 const PetscScalar *b; 642 PetscScalar *y; 643 PetscInt n, _ldb, _ldx; 644 PetscBLASInt nrhs = 0, m = 0, k = 0, ldb = 0, ldx = 0, ldy = 0; 645 646 PetscFunctionBegin; 647 *_ldy = 0; 648 *_m = 0; 649 *_nrhs = 0; 650 *_k = 0; 651 *_y = NULL; 652 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 653 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 654 PetscCall(MatGetSize(B, NULL, &n)); 655 PetscCall(PetscBLASIntCast(n, &nrhs)); 656 PetscCall(MatDenseGetLDA(B, &_ldb)); 657 PetscCall(PetscBLASIntCast(_ldb, &ldb)); 658 PetscCall(MatDenseGetLDA(X, &_ldx)); 659 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 660 if (ldx < m) { 661 PetscCall(MatDenseGetArrayRead(B, &b)); 662 PetscCall(PetscMalloc1(nrhs * m, &y)); 663 if (ldb == m) { 664 PetscCall(PetscArraycpy(y, b, ldb * nrhs)); 665 } else { 666 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * m], &b[j * ldb], m)); 667 } 668 ldy = m; 669 PetscCall(MatDenseRestoreArrayRead(B, &b)); 670 } else { 671 if (ldb == ldx) { 672 PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN)); 673 PetscCall(MatDenseGetArray(X, &y)); 674 } else { 675 PetscCall(MatDenseGetArray(X, &y)); 676 PetscCall(MatDenseGetArrayRead(B, &b)); 677 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * ldx], &b[j * ldb], m)); 678 PetscCall(MatDenseRestoreArrayRead(B, &b)); 679 } 680 ldy = ldx; 681 } 682 *_y = y; 683 *_ldy = ldy; 684 *_k = k; 685 *_m = m; 686 *_nrhs = nrhs; 687 PetscFunctionReturn(PETSC_SUCCESS); 688 } 689 690 static PetscErrorCode MatMatSolve_SeqDense_TearDown(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) 691 { 692 PetscScalar *y; 693 PetscInt _ldx; 694 PetscBLASInt k, ldy, nrhs, ldx = 0; 695 696 PetscFunctionBegin; 697 y = *_y; 698 *_y = NULL; 699 k = *_k; 700 ldy = *_ldy; 701 nrhs = *_nrhs; 702 PetscCall(MatDenseGetLDA(X, &_ldx)); 703 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 704 if (ldx != ldy) { 705 PetscScalar *xv; 706 PetscCall(MatDenseGetArray(X, &xv)); 707 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&xv[j * ldx], &y[j * ldy], k)); 708 PetscCall(MatDenseRestoreArray(X, &xv)); 709 PetscCall(PetscFree(y)); 710 } else { 711 PetscCall(MatDenseRestoreArray(X, &y)); 712 } 713 PetscFunctionReturn(PETSC_SUCCESS); 714 } 715 716 static PetscErrorCode MatMatSolve_SeqDense_LU(Mat A, Mat B, Mat X) 717 { 718 PetscScalar *y; 719 PetscBLASInt m, k, ldy, nrhs; 720 721 PetscFunctionBegin; 722 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 723 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 724 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 725 PetscFunctionReturn(PETSC_SUCCESS); 726 } 727 728 static PetscErrorCode MatMatSolveTranspose_SeqDense_LU(Mat A, Mat B, Mat X) 729 { 730 PetscScalar *y; 731 PetscBLASInt m, k, ldy, nrhs; 732 733 PetscFunctionBegin; 734 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 735 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 736 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 740 static PetscErrorCode MatMatSolve_SeqDense_Cholesky(Mat A, Mat B, Mat X) 741 { 742 PetscScalar *y; 743 PetscBLASInt m, k, ldy, nrhs; 744 745 PetscFunctionBegin; 746 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 747 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 748 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 749 PetscFunctionReturn(PETSC_SUCCESS); 750 } 751 752 static PetscErrorCode MatMatSolveTranspose_SeqDense_Cholesky(Mat A, Mat B, Mat X) 753 { 754 PetscScalar *y; 755 PetscBLASInt m, k, ldy, nrhs; 756 757 PetscFunctionBegin; 758 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 759 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 760 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 761 PetscFunctionReturn(PETSC_SUCCESS); 762 } 763 764 static PetscErrorCode MatMatSolve_SeqDense_QR(Mat A, Mat B, Mat X) 765 { 766 PetscScalar *y; 767 PetscBLASInt m, k, ldy, nrhs; 768 769 PetscFunctionBegin; 770 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 771 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 772 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 773 PetscFunctionReturn(PETSC_SUCCESS); 774 } 775 776 static PetscErrorCode MatMatSolveTranspose_SeqDense_QR(Mat A, Mat B, Mat X) 777 { 778 PetscScalar *y; 779 PetscBLASInt m, k, ldy, nrhs; 780 781 PetscFunctionBegin; 782 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 783 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 784 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 785 PetscFunctionReturn(PETSC_SUCCESS); 786 } 787 788 /* COMMENT: I have chosen to hide row permutation in the pivots, 789 rather than put it in the Mat->row slot.*/ 790 PetscErrorCode MatLUFactor_SeqDense(Mat A, IS row, IS col, const MatFactorInfo *minfo) 791 { 792 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 793 PetscBLASInt n, m, info; 794 795 PetscFunctionBegin; 796 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 797 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 798 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 799 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 800 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 801 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&m, &n, mat->v, &mat->lda, mat->pivots, &info)); 802 PetscCall(PetscFPTrapPop()); 803 804 PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to LU factorization %" PetscBLASInt_FMT, info); 805 PetscCheck(info <= 0, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Bad LU factorization %" PetscBLASInt_FMT, info); 806 807 A->ops->solve = MatSolve_SeqDense_LU; 808 A->ops->matsolve = MatMatSolve_SeqDense_LU; 809 A->ops->solvetranspose = MatSolveTranspose_SeqDense_LU; 810 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_LU; 811 A->factortype = MAT_FACTOR_LU; 812 813 PetscCall(PetscFree(A->solvertype)); 814 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 815 816 PetscCall(PetscLogFlops((2.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3)); 817 PetscFunctionReturn(PETSC_SUCCESS); 818 } 819 820 static PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 821 { 822 MatFactorInfo info; 823 824 PetscFunctionBegin; 825 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 826 PetscUseTypeMethod(fact, lufactor, NULL, NULL, &info); 827 PetscFunctionReturn(PETSC_SUCCESS); 828 } 829 830 PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) 831 { 832 PetscFunctionBegin; 833 fact->preallocated = PETSC_TRUE; 834 fact->assembled = PETSC_TRUE; 835 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense; 836 PetscFunctionReturn(PETSC_SUCCESS); 837 } 838 839 /* Cholesky as L*L^T or L*D*L^T and the symmetric/hermitian complex variants */ 840 PetscErrorCode MatCholeskyFactor_SeqDense(Mat A, IS perm, const MatFactorInfo *factinfo) 841 { 842 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 843 PetscBLASInt info, n; 844 845 PetscFunctionBegin; 846 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 847 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 848 if (A->spd == PETSC_BOOL3_TRUE) { 849 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 850 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &n, mat->v, &mat->lda, &info)); 851 PetscCall(PetscFPTrapPop()); 852 #if defined(PETSC_USE_COMPLEX) 853 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 854 if (!mat->pivots) PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); 855 if (!mat->fwork) { 856 PetscScalar dummy; 857 858 mat->lfwork = -1; 859 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 860 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 861 PetscCall(PetscFPTrapPop()); 862 PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork)); 863 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 864 } 865 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 866 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 867 PetscCall(PetscFPTrapPop()); 868 #endif 869 } else { /* symmetric case */ 870 if (!mat->pivots) PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); 871 if (!mat->fwork) { 872 PetscScalar dummy; 873 874 mat->lfwork = -1; 875 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 876 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 877 PetscCall(PetscFPTrapPop()); 878 PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork)); 879 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 880 } 881 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 882 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 883 PetscCall(PetscFPTrapPop()); 884 } 885 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad factorization: zero pivot in row %" PetscBLASInt_FMT, info - 1); 886 887 A->ops->solve = MatSolve_SeqDense_Cholesky; 888 A->ops->matsolve = MatMatSolve_SeqDense_Cholesky; 889 A->ops->solvetranspose = MatSolveTranspose_SeqDense_Cholesky; 890 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_Cholesky; 891 A->factortype = MAT_FACTOR_CHOLESKY; 892 893 PetscCall(PetscFree(A->solvertype)); 894 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 895 896 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 897 PetscFunctionReturn(PETSC_SUCCESS); 898 } 899 900 static PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 901 { 902 MatFactorInfo info; 903 904 PetscFunctionBegin; 905 info.fill = 1.0; 906 907 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 908 PetscUseTypeMethod(fact, choleskyfactor, NULL, &info); 909 PetscFunctionReturn(PETSC_SUCCESS); 910 } 911 912 PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) 913 { 914 PetscFunctionBegin; 915 fact->assembled = PETSC_TRUE; 916 fact->preallocated = PETSC_TRUE; 917 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense; 918 PetscFunctionReturn(PETSC_SUCCESS); 919 } 920 921 PetscErrorCode MatQRFactor_SeqDense(Mat A, IS col, const MatFactorInfo *minfo) 922 { 923 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 924 PetscBLASInt n, m, info, min, max; 925 926 PetscFunctionBegin; 927 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 928 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 929 max = PetscMax(m, n); 930 min = PetscMin(m, n); 931 if (!mat->tau) PetscCall(PetscMalloc1(min, &mat->tau)); 932 if (!mat->pivots) PetscCall(PetscMalloc1(n, &mat->pivots)); 933 if (!mat->qrrhs) PetscCall(MatCreateVecs(A, NULL, &mat->qrrhs)); 934 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 935 if (!mat->fwork) { 936 PetscScalar dummy; 937 938 mat->lfwork = -1; 939 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 940 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, &dummy, &mat->lfwork, &info)); 941 PetscCall(PetscFPTrapPop()); 942 PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork)); 943 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 944 } 945 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 946 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, mat->fwork, &mat->lfwork, &info)); 947 PetscCall(PetscFPTrapPop()); 948 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to QR factorization %" PetscBLASInt_FMT, info); 949 // TODO: try to estimate rank or test for and use geqp3 for rank revealing QR. For now just say rank is min of m and n 950 mat->rank = min; 951 952 A->ops->solve = MatSolve_SeqDense_QR; 953 A->ops->matsolve = MatMatSolve_SeqDense_QR; 954 A->factortype = MAT_FACTOR_QR; 955 if (m == n) { 956 A->ops->solvetranspose = MatSolveTranspose_SeqDense_QR; 957 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_QR; 958 } 959 960 PetscCall(PetscFree(A->solvertype)); 961 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 962 963 PetscCall(PetscLogFlops(2.0 * min * min * (max - min / 3.0))); 964 PetscFunctionReturn(PETSC_SUCCESS); 965 } 966 967 static PetscErrorCode MatQRFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 968 { 969 MatFactorInfo info; 970 971 PetscFunctionBegin; 972 info.fill = 1.0; 973 974 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 975 PetscUseMethod(fact, "MatQRFactor_C", (Mat, IS, const MatFactorInfo *), (fact, NULL, &info)); 976 PetscFunctionReturn(PETSC_SUCCESS); 977 } 978 979 PetscErrorCode MatQRFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) 980 { 981 PetscFunctionBegin; 982 fact->assembled = PETSC_TRUE; 983 fact->preallocated = PETSC_TRUE; 984 PetscCall(PetscObjectComposeFunction((PetscObject)fact, "MatQRFactorNumeric_C", MatQRFactorNumeric_SeqDense)); 985 PetscFunctionReturn(PETSC_SUCCESS); 986 } 987 988 /* uses LAPACK */ 989 PETSC_INTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A, MatFactorType ftype, Mat *fact) 990 { 991 PetscFunctionBegin; 992 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), fact)); 993 PetscCall(MatSetSizes(*fact, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 994 PetscCall(MatSetType(*fact, MATDENSE)); 995 (*fact)->trivialsymbolic = PETSC_TRUE; 996 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 997 (*fact)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqDense; 998 (*fact)->ops->ilufactorsymbolic = MatLUFactorSymbolic_SeqDense; 999 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 1000 (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense; 1001 } else if (ftype == MAT_FACTOR_QR) { 1002 PetscCall(PetscObjectComposeFunction((PetscObject)*fact, "MatQRFactorSymbolic_C", MatQRFactorSymbolic_SeqDense)); 1003 } 1004 (*fact)->factortype = ftype; 1005 1006 PetscCall(PetscFree((*fact)->solvertype)); 1007 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*fact)->solvertype)); 1008 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_LU])); 1009 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ILU])); 1010 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_CHOLESKY])); 1011 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ICC])); 1012 PetscFunctionReturn(PETSC_SUCCESS); 1013 } 1014 1015 static PetscErrorCode MatSOR_SeqDense(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal shift, PetscInt its, PetscInt lits, Vec xx) 1016 { 1017 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1018 PetscScalar *x, *v = mat->v, zero = 0.0, xt; 1019 const PetscScalar *b; 1020 PetscInt m = A->rmap->n, i; 1021 PetscBLASInt o = 1, bm = 0; 1022 1023 PetscFunctionBegin; 1024 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1025 PetscCheck(A->offloadmask != PETSC_OFFLOAD_GPU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1026 #endif 1027 if (shift == -1) shift = 0.0; /* negative shift indicates do not error on zero diagonal; this code never zeros on zero diagonal */ 1028 PetscCall(PetscBLASIntCast(m, &bm)); 1029 if (flag & SOR_ZERO_INITIAL_GUESS) { 1030 /* this is a hack fix, should have another version without the second BLASdotu */ 1031 PetscCall(VecSet(xx, zero)); 1032 } 1033 PetscCall(VecGetArray(xx, &x)); 1034 PetscCall(VecGetArrayRead(bb, &b)); 1035 its = its * lits; 1036 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 1037 while (its--) { 1038 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 1039 for (i = 0; i < m; i++) { 1040 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1041 x[i] = (1. - omega) * x[i] + (xt + v[i + i * m] * x[i]) * omega / (v[i + i * m] + shift); 1042 } 1043 } 1044 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1045 for (i = m - 1; i >= 0; i--) { 1046 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1047 x[i] = (1. - omega) * x[i] + (xt + v[i + i * m] * x[i]) * omega / (v[i + i * m] + shift); 1048 } 1049 } 1050 } 1051 PetscCall(VecRestoreArrayRead(bb, &b)); 1052 PetscCall(VecRestoreArray(xx, &x)); 1053 PetscFunctionReturn(PETSC_SUCCESS); 1054 } 1055 1056 PETSC_INTERN PetscErrorCode MatMultColumnRangeKernel_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end, PetscBool trans, PetscBool herm) 1057 { 1058 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1059 PetscScalar *y, _DOne = 1.0, _DZero = 0.0; 1060 PetscBLASInt m, n, _One = 1; 1061 const PetscScalar *v = mat->v, *x; 1062 1063 PetscFunctionBegin; 1064 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1065 PetscCall(PetscBLASIntCast(c_end - c_start, &n)); 1066 PetscCall(VecGetArrayRead(xx, &x)); 1067 PetscCall(VecGetArrayWrite(yy, &y)); 1068 if (!m || !n) { 1069 PetscBLASInt i; 1070 if (trans) 1071 for (i = 0; i < n; i++) y[i] = 0.0; 1072 else 1073 for (i = 0; i < m; i++) y[i] = 0.0; 1074 } else { 1075 if (trans) { 1076 if (herm) PetscCallBLAS("BLASgemv", BLASgemv_("C", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DZero, y + c_start, &_One)); 1077 else PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DZero, y + c_start, &_One)); 1078 } else { 1079 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x + c_start, &_One, &_DZero, y, &_One)); 1080 } 1081 PetscCall(PetscLogFlops(2.0 * m * n - n)); 1082 } 1083 PetscCall(VecRestoreArrayRead(xx, &x)); 1084 PetscCall(VecRestoreArrayWrite(yy, &y)); 1085 PetscFunctionReturn(PETSC_SUCCESS); 1086 } 1087 1088 PetscErrorCode MatMultHermitianTransposeColumnRange_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 1089 { 1090 PetscFunctionBegin; 1091 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, c_start, c_end, PETSC_TRUE, PETSC_TRUE)); 1092 PetscFunctionReturn(PETSC_SUCCESS); 1093 } 1094 1095 PetscErrorCode MatMult_SeqDense(Mat A, Vec xx, Vec yy) 1096 { 1097 PetscFunctionBegin; 1098 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_FALSE, PETSC_FALSE)); 1099 PetscFunctionReturn(PETSC_SUCCESS); 1100 } 1101 1102 PetscErrorCode MatMultTranspose_SeqDense(Mat A, Vec xx, Vec yy) 1103 { 1104 PetscFunctionBegin; 1105 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_FALSE)); 1106 PetscFunctionReturn(PETSC_SUCCESS); 1107 } 1108 1109 PetscErrorCode MatMultHermitianTranspose_SeqDense(Mat A, Vec xx, Vec yy) 1110 { 1111 PetscFunctionBegin; 1112 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_TRUE)); 1113 PetscFunctionReturn(PETSC_SUCCESS); 1114 } 1115 1116 PETSC_INTERN PetscErrorCode MatMultAddColumnRangeKernel_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end, PetscBool trans, PetscBool herm) 1117 { 1118 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1119 const PetscScalar *v = mat->v, *x; 1120 PetscScalar *y, _DOne = 1.0; 1121 PetscBLASInt m, n, _One = 1; 1122 1123 PetscFunctionBegin; 1124 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1125 PetscCall(PetscBLASIntCast(c_end - c_start, &n)); 1126 PetscCall(VecCopy(zz, yy)); 1127 if (!m || !n) PetscFunctionReturn(PETSC_SUCCESS); 1128 PetscCall(VecGetArray(yy, &y)); 1129 PetscCall(VecGetArrayRead(xx, &x)); 1130 if (trans) { 1131 if (herm) PetscCallBLAS("BLASgemv", BLASgemv_("C", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DOne, y + c_start, &_One)); 1132 else PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DOne, y + c_start, &_One)); 1133 } else { 1134 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x + c_start, &_One, &_DOne, y, &_One)); 1135 } 1136 PetscCall(VecRestoreArrayRead(xx, &x)); 1137 PetscCall(VecRestoreArray(yy, &y)); 1138 PetscCall(PetscLogFlops(2.0 * m * n)); 1139 PetscFunctionReturn(PETSC_SUCCESS); 1140 } 1141 1142 PetscErrorCode MatMultColumnRange_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 1143 { 1144 PetscFunctionBegin; 1145 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, c_start, c_end, PETSC_FALSE, PETSC_FALSE)); 1146 PetscFunctionReturn(PETSC_SUCCESS); 1147 } 1148 1149 PetscErrorCode MatMultAddColumnRange_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end) 1150 { 1151 PetscFunctionBegin; 1152 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, c_start, c_end, PETSC_FALSE, PETSC_FALSE)); 1153 PetscFunctionReturn(PETSC_SUCCESS); 1154 } 1155 1156 PetscErrorCode MatMultHermitianTransposeAddColumnRange_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end) 1157 { 1158 PetscFunctionBegin; 1159 PetscMPIInt rank; 1160 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 1161 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, c_start, c_end, PETSC_TRUE, PETSC_TRUE)); 1162 PetscFunctionReturn(PETSC_SUCCESS); 1163 } 1164 1165 PetscErrorCode MatMultAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1166 { 1167 PetscFunctionBegin; 1168 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_FALSE, PETSC_FALSE)); 1169 PetscFunctionReturn(PETSC_SUCCESS); 1170 } 1171 1172 PetscErrorCode MatMultTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1173 { 1174 PetscFunctionBegin; 1175 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_FALSE)); 1176 PetscFunctionReturn(PETSC_SUCCESS); 1177 } 1178 1179 PetscErrorCode MatMultHermitianTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1180 { 1181 PetscFunctionBegin; 1182 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_TRUE)); 1183 PetscFunctionReturn(PETSC_SUCCESS); 1184 } 1185 1186 static PetscErrorCode MatGetRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) 1187 { 1188 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1189 PetscInt i; 1190 1191 PetscFunctionBegin; 1192 if (ncols) *ncols = A->cmap->n; 1193 if (cols) { 1194 PetscCall(PetscMalloc1(A->cmap->n, cols)); 1195 for (i = 0; i < A->cmap->n; i++) (*cols)[i] = i; 1196 } 1197 if (vals) { 1198 const PetscScalar *v; 1199 1200 PetscCall(MatDenseGetArrayRead(A, &v)); 1201 PetscCall(PetscMalloc1(A->cmap->n, vals)); 1202 v += row; 1203 for (i = 0; i < A->cmap->n; i++) { 1204 (*vals)[i] = *v; 1205 v += mat->lda; 1206 } 1207 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1208 } 1209 PetscFunctionReturn(PETSC_SUCCESS); 1210 } 1211 1212 static PetscErrorCode MatRestoreRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) 1213 { 1214 PetscFunctionBegin; 1215 if (cols) PetscCall(PetscFree(*cols)); 1216 if (vals) PetscCall(PetscFree(*vals)); 1217 PetscFunctionReturn(PETSC_SUCCESS); 1218 } 1219 1220 static PetscErrorCode MatSetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], const PetscScalar v[], InsertMode addv) 1221 { 1222 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1223 PetscScalar *av; 1224 PetscInt i, j, idx = 0; 1225 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1226 PetscOffloadMask oldf; 1227 #endif 1228 1229 PetscFunctionBegin; 1230 PetscCall(MatDenseGetArray(A, &av)); 1231 if (!mat->roworiented) { 1232 if (addv == INSERT_VALUES) { 1233 for (j = 0; j < n; j++) { 1234 if (indexn[j] < 0) { 1235 idx += m; 1236 continue; 1237 } 1238 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1239 for (i = 0; i < m; i++) { 1240 if (indexm[i] < 0) { 1241 idx++; 1242 continue; 1243 } 1244 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1245 av[indexn[j] * mat->lda + indexm[i]] = v ? v[idx++] : (idx++, 0.0); 1246 } 1247 } 1248 } else { 1249 for (j = 0; j < n; j++) { 1250 if (indexn[j] < 0) { 1251 idx += m; 1252 continue; 1253 } 1254 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1255 for (i = 0; i < m; i++) { 1256 if (indexm[i] < 0) { 1257 idx++; 1258 continue; 1259 } 1260 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1261 av[indexn[j] * mat->lda + indexm[i]] += v ? v[idx++] : (idx++, 0.0); 1262 } 1263 } 1264 } 1265 } else { 1266 if (addv == INSERT_VALUES) { 1267 for (i = 0; i < m; i++) { 1268 if (indexm[i] < 0) { 1269 idx += n; 1270 continue; 1271 } 1272 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1273 for (j = 0; j < n; j++) { 1274 if (indexn[j] < 0) { 1275 idx++; 1276 continue; 1277 } 1278 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1279 av[indexn[j] * mat->lda + indexm[i]] = v ? v[idx++] : (idx++, 0.0); 1280 } 1281 } 1282 } else { 1283 for (i = 0; i < m; i++) { 1284 if (indexm[i] < 0) { 1285 idx += n; 1286 continue; 1287 } 1288 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1289 for (j = 0; j < n; j++) { 1290 if (indexn[j] < 0) { 1291 idx++; 1292 continue; 1293 } 1294 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1295 av[indexn[j] * mat->lda + indexm[i]] += v ? v[idx++] : (idx++, 0.0); 1296 } 1297 } 1298 } 1299 } 1300 /* hack to prevent unneeded copy to the GPU while returning the array */ 1301 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1302 oldf = A->offloadmask; 1303 A->offloadmask = PETSC_OFFLOAD_GPU; 1304 #endif 1305 PetscCall(MatDenseRestoreArray(A, &av)); 1306 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1307 A->offloadmask = (oldf == PETSC_OFFLOAD_UNALLOCATED ? PETSC_OFFLOAD_UNALLOCATED : PETSC_OFFLOAD_CPU); 1308 #endif 1309 PetscFunctionReturn(PETSC_SUCCESS); 1310 } 1311 1312 static PetscErrorCode MatGetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], PetscScalar v[]) 1313 { 1314 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1315 const PetscScalar *vv; 1316 PetscInt i, j; 1317 1318 PetscFunctionBegin; 1319 PetscCall(MatDenseGetArrayRead(A, &vv)); 1320 /* row-oriented output */ 1321 for (i = 0; i < m; i++) { 1322 if (indexm[i] < 0) { 1323 v += n; 1324 continue; 1325 } 1326 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested larger than number rows %" PetscInt_FMT, indexm[i], A->rmap->n); 1327 for (j = 0; j < n; j++) { 1328 if (indexn[j] < 0) { 1329 v++; 1330 continue; 1331 } 1332 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " requested larger than number columns %" PetscInt_FMT, indexn[j], A->cmap->n); 1333 *v++ = vv[indexn[j] * mat->lda + indexm[i]]; 1334 } 1335 } 1336 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1337 PetscFunctionReturn(PETSC_SUCCESS); 1338 } 1339 1340 PetscErrorCode MatView_Dense_Binary(Mat mat, PetscViewer viewer) 1341 { 1342 PetscBool skipHeader; 1343 PetscViewerFormat format; 1344 PetscInt header[4], M, N, m, lda, i, j; 1345 PetscCount k; 1346 const PetscScalar *v; 1347 PetscScalar *vwork; 1348 1349 PetscFunctionBegin; 1350 PetscCall(PetscViewerSetUp(viewer)); 1351 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1352 PetscCall(PetscViewerGetFormat(viewer, &format)); 1353 if (skipHeader) format = PETSC_VIEWER_NATIVE; 1354 1355 PetscCall(MatGetSize(mat, &M, &N)); 1356 1357 /* write matrix header */ 1358 header[0] = MAT_FILE_CLASSID; 1359 header[1] = M; 1360 header[2] = N; 1361 header[3] = (format == PETSC_VIEWER_NATIVE) ? MATRIX_BINARY_FORMAT_DENSE : M * N; 1362 if (!skipHeader) PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1363 1364 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1365 if (format != PETSC_VIEWER_NATIVE) { 1366 PetscInt nnz = m * N, *iwork; 1367 /* store row lengths for each row */ 1368 PetscCall(PetscMalloc1(nnz, &iwork)); 1369 for (i = 0; i < m; i++) iwork[i] = N; 1370 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1371 /* store column indices (zero start index) */ 1372 for (k = 0, i = 0; i < m; i++) 1373 for (j = 0; j < N; j++, k++) iwork[k] = j; 1374 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1375 PetscCall(PetscFree(iwork)); 1376 } 1377 /* store matrix values as a dense matrix in row major order */ 1378 PetscCall(PetscMalloc1(m * N, &vwork)); 1379 PetscCall(MatDenseGetArrayRead(mat, &v)); 1380 PetscCall(MatDenseGetLDA(mat, &lda)); 1381 for (k = 0, i = 0; i < m; i++) 1382 for (j = 0; j < N; j++, k++) vwork[k] = v[i + (size_t)lda * j]; 1383 PetscCall(MatDenseRestoreArrayRead(mat, &v)); 1384 PetscCall(PetscViewerBinaryWriteAll(viewer, vwork, m * N, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1385 PetscCall(PetscFree(vwork)); 1386 PetscFunctionReturn(PETSC_SUCCESS); 1387 } 1388 1389 PetscErrorCode MatLoad_Dense_Binary(Mat mat, PetscViewer viewer) 1390 { 1391 PetscBool skipHeader; 1392 PetscInt header[4], M, N, m, nz, lda, i, j, k; 1393 PetscInt rows, cols; 1394 PetscScalar *v, *vwork; 1395 1396 PetscFunctionBegin; 1397 PetscCall(PetscViewerSetUp(viewer)); 1398 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1399 1400 if (!skipHeader) { 1401 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 1402 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 1403 M = header[1]; 1404 N = header[2]; 1405 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 1406 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 1407 nz = header[3]; 1408 PetscCheck(nz == MATRIX_BINARY_FORMAT_DENSE || nz >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Unknown matrix format %" PetscInt_FMT " in file", nz); 1409 } else { 1410 PetscCall(MatGetSize(mat, &M, &N)); 1411 PetscCheck(M >= 0 && N >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "Matrix binary file header was skipped, thus the user must specify the global sizes of input matrix"); 1412 nz = MATRIX_BINARY_FORMAT_DENSE; 1413 } 1414 1415 /* setup global sizes if not set */ 1416 if (mat->rmap->N < 0) mat->rmap->N = M; 1417 if (mat->cmap->N < 0) mat->cmap->N = N; 1418 PetscCall(MatSetUp(mat)); 1419 /* check if global sizes are correct */ 1420 PetscCall(MatGetSize(mat, &rows, &cols)); 1421 PetscCheck(M == rows && N == cols, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 1422 1423 PetscCall(MatGetSize(mat, NULL, &N)); 1424 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1425 PetscCall(MatDenseGetArray(mat, &v)); 1426 PetscCall(MatDenseGetLDA(mat, &lda)); 1427 if (nz == MATRIX_BINARY_FORMAT_DENSE) { /* matrix in file is dense format */ 1428 PetscCount nnz = (size_t)m * N; 1429 /* read in matrix values */ 1430 PetscCall(PetscMalloc1(nnz, &vwork)); 1431 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1432 /* store values in column major order */ 1433 for (j = 0; j < N; j++) 1434 for (i = 0; i < m; i++) v[i + (size_t)lda * j] = vwork[(size_t)i * N + j]; 1435 PetscCall(PetscFree(vwork)); 1436 } else { /* matrix in file is sparse format */ 1437 PetscInt nnz = 0, *rlens, *icols; 1438 /* read in row lengths */ 1439 PetscCall(PetscMalloc1(m, &rlens)); 1440 PetscCall(PetscViewerBinaryReadAll(viewer, rlens, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1441 for (i = 0; i < m; i++) nnz += rlens[i]; 1442 /* read in column indices and values */ 1443 PetscCall(PetscMalloc2(nnz, &icols, nnz, &vwork)); 1444 PetscCall(PetscViewerBinaryReadAll(viewer, icols, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1445 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1446 /* store values in column major order */ 1447 for (k = 0, i = 0; i < m; i++) 1448 for (j = 0; j < rlens[i]; j++, k++) v[i + lda * icols[k]] = vwork[k]; 1449 PetscCall(PetscFree(rlens)); 1450 PetscCall(PetscFree2(icols, vwork)); 1451 } 1452 PetscCall(MatDenseRestoreArray(mat, &v)); 1453 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 1454 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 1455 PetscFunctionReturn(PETSC_SUCCESS); 1456 } 1457 1458 static PetscErrorCode MatLoad_SeqDense(Mat newMat, PetscViewer viewer) 1459 { 1460 PetscBool isbinary, ishdf5; 1461 1462 PetscFunctionBegin; 1463 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1464 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1465 /* force binary viewer to load .info file if it has not yet done so */ 1466 PetscCall(PetscViewerSetUp(viewer)); 1467 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1468 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1469 if (isbinary) { 1470 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1471 } else if (ishdf5) { 1472 #if defined(PETSC_HAVE_HDF5) 1473 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1474 #else 1475 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 1476 #endif 1477 } else { 1478 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1479 } 1480 PetscFunctionReturn(PETSC_SUCCESS); 1481 } 1482 1483 static PetscErrorCode MatView_SeqDense_ASCII(Mat A, PetscViewer viewer) 1484 { 1485 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1486 PetscInt i, j; 1487 const char *name; 1488 PetscScalar *v, *av; 1489 PetscViewerFormat format; 1490 #if defined(PETSC_USE_COMPLEX) 1491 PetscBool allreal = PETSC_TRUE; 1492 #endif 1493 1494 PetscFunctionBegin; 1495 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&av)); 1496 PetscCall(PetscViewerGetFormat(viewer, &format)); 1497 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1498 PetscFunctionReturn(PETSC_SUCCESS); /* do nothing for now */ 1499 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1500 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1501 for (i = 0; i < A->rmap->n; i++) { 1502 v = av + i; 1503 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i)); 1504 for (j = 0; j < A->cmap->n; j++) { 1505 #if defined(PETSC_USE_COMPLEX) 1506 if (PetscRealPart(*v) != 0.0 && PetscImaginaryPart(*v) != 0.0) { 1507 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", j, (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1508 } else if (PetscRealPart(*v)) { 1509 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)PetscRealPart(*v))); 1510 } 1511 #else 1512 if (*v) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)*v)); 1513 #endif 1514 v += a->lda; 1515 } 1516 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1517 } 1518 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1519 } else { 1520 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1521 #if defined(PETSC_USE_COMPLEX) 1522 /* determine if matrix has all real values */ 1523 for (j = 0; j < A->cmap->n; j++) { 1524 v = av + j * a->lda; 1525 for (i = 0; i < A->rmap->n; i++) { 1526 if (PetscImaginaryPart(v[i])) { 1527 allreal = PETSC_FALSE; 1528 break; 1529 } 1530 } 1531 } 1532 #endif 1533 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1534 PetscCall(PetscObjectGetName((PetscObject)A, &name)); 1535 PetscCall(PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", A->rmap->n, A->cmap->n)); 1536 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = zeros(%" PetscInt_FMT ",%" PetscInt_FMT ");\n", name, A->rmap->n, A->cmap->n)); 1537 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = [\n", name)); 1538 } 1539 1540 for (i = 0; i < A->rmap->n; i++) { 1541 v = av + i; 1542 for (j = 0; j < A->cmap->n; j++) { 1543 #if defined(PETSC_USE_COMPLEX) 1544 if (allreal) { 1545 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(*v))); 1546 } else { 1547 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei ", (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1548 } 1549 #else 1550 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)*v)); 1551 #endif 1552 v += a->lda; 1553 } 1554 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1555 } 1556 if (format == PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, "];\n")); 1557 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1558 } 1559 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&av)); 1560 PetscCall(PetscViewerFlush(viewer)); 1561 PetscFunctionReturn(PETSC_SUCCESS); 1562 } 1563 1564 #include <petscdraw.h> 1565 static PetscErrorCode MatView_SeqDense_Draw_Zoom(PetscDraw draw, void *Aa) 1566 { 1567 Mat A = (Mat)Aa; 1568 PetscInt m = A->rmap->n, n = A->cmap->n, i, j; 1569 int color = PETSC_DRAW_WHITE; 1570 const PetscScalar *v; 1571 PetscViewer viewer; 1572 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1573 PetscViewerFormat format; 1574 1575 PetscFunctionBegin; 1576 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1577 PetscCall(PetscViewerGetFormat(viewer, &format)); 1578 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1579 1580 /* Loop over matrix elements drawing boxes */ 1581 PetscCall(MatDenseGetArrayRead(A, &v)); 1582 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1583 PetscDrawCollectiveBegin(draw); 1584 /* Blue for negative and Red for positive */ 1585 for (j = 0; j < n; j++) { 1586 x_l = j; 1587 x_r = x_l + 1.0; 1588 for (i = 0; i < m; i++) { 1589 y_l = m - i - 1.0; 1590 y_r = y_l + 1.0; 1591 if (PetscRealPart(v[j * m + i]) > 0.) color = PETSC_DRAW_RED; 1592 else if (PetscRealPart(v[j * m + i]) < 0.) color = PETSC_DRAW_BLUE; 1593 else continue; 1594 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1595 } 1596 } 1597 PetscDrawCollectiveEnd(draw); 1598 } else { 1599 /* use contour shading to indicate magnitude of values */ 1600 /* first determine max of all nonzero values */ 1601 PetscReal minv = 0.0, maxv = 0.0; 1602 PetscDraw popup; 1603 1604 for (i = 0; i < m * n; i++) { 1605 if (PetscAbsScalar(v[i]) > maxv) maxv = PetscAbsScalar(v[i]); 1606 } 1607 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1608 PetscCall(PetscDrawGetPopup(draw, &popup)); 1609 PetscCall(PetscDrawScalePopup(popup, minv, maxv)); 1610 1611 PetscDrawCollectiveBegin(draw); 1612 for (j = 0; j < n; j++) { 1613 x_l = j; 1614 x_r = x_l + 1.0; 1615 for (i = 0; i < m; i++) { 1616 y_l = m - i - 1.0; 1617 y_r = y_l + 1.0; 1618 color = PetscDrawRealToColor(PetscAbsScalar(v[j * m + i]), minv, maxv); 1619 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1620 } 1621 } 1622 PetscDrawCollectiveEnd(draw); 1623 } 1624 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1625 PetscFunctionReturn(PETSC_SUCCESS); 1626 } 1627 1628 static PetscErrorCode MatView_SeqDense_Draw(Mat A, PetscViewer viewer) 1629 { 1630 PetscDraw draw; 1631 PetscBool isnull; 1632 PetscReal xr, yr, xl, yl, h, w; 1633 1634 PetscFunctionBegin; 1635 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1636 PetscCall(PetscDrawIsNull(draw, &isnull)); 1637 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1638 1639 xr = A->cmap->n; 1640 yr = A->rmap->n; 1641 h = yr / 10.0; 1642 w = xr / 10.0; 1643 xr += w; 1644 yr += h; 1645 xl = -w; 1646 yl = -h; 1647 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 1648 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 1649 PetscCall(PetscDrawZoom(draw, MatView_SeqDense_Draw_Zoom, A)); 1650 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 1651 PetscCall(PetscDrawSave(draw)); 1652 PetscFunctionReturn(PETSC_SUCCESS); 1653 } 1654 1655 PetscErrorCode MatView_SeqDense(Mat A, PetscViewer viewer) 1656 { 1657 PetscBool iascii, isbinary, isdraw; 1658 1659 PetscFunctionBegin; 1660 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1661 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1662 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1663 if (iascii) PetscCall(MatView_SeqDense_ASCII(A, viewer)); 1664 else if (isbinary) PetscCall(MatView_Dense_Binary(A, viewer)); 1665 else if (isdraw) PetscCall(MatView_SeqDense_Draw(A, viewer)); 1666 PetscFunctionReturn(PETSC_SUCCESS); 1667 } 1668 1669 static PetscErrorCode MatDensePlaceArray_SeqDense(Mat A, const PetscScalar *array) 1670 { 1671 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1672 1673 PetscFunctionBegin; 1674 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1675 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1676 PetscCheck(!a->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseResetArray() first"); 1677 a->unplacedarray = a->v; 1678 a->unplaced_user_alloc = a->user_alloc; 1679 a->v = (PetscScalar *)array; 1680 a->user_alloc = PETSC_TRUE; 1681 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1682 A->offloadmask = PETSC_OFFLOAD_CPU; 1683 #endif 1684 PetscFunctionReturn(PETSC_SUCCESS); 1685 } 1686 1687 static PetscErrorCode MatDenseResetArray_SeqDense(Mat A) 1688 { 1689 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1690 1691 PetscFunctionBegin; 1692 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1693 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1694 a->v = a->unplacedarray; 1695 a->user_alloc = a->unplaced_user_alloc; 1696 a->unplacedarray = NULL; 1697 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1698 A->offloadmask = PETSC_OFFLOAD_CPU; 1699 #endif 1700 PetscFunctionReturn(PETSC_SUCCESS); 1701 } 1702 1703 static PetscErrorCode MatDenseReplaceArray_SeqDense(Mat A, const PetscScalar *array) 1704 { 1705 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1706 1707 PetscFunctionBegin; 1708 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1709 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1710 if (!a->user_alloc) PetscCall(PetscFree(a->v)); 1711 a->v = (PetscScalar *)array; 1712 a->user_alloc = PETSC_FALSE; 1713 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1714 A->offloadmask = PETSC_OFFLOAD_CPU; 1715 #endif 1716 PetscFunctionReturn(PETSC_SUCCESS); 1717 } 1718 1719 PetscErrorCode MatDestroy_SeqDense(Mat mat) 1720 { 1721 Mat_SeqDense *l = (Mat_SeqDense *)mat->data; 1722 1723 PetscFunctionBegin; 1724 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows %" PetscInt_FMT " Cols %" PetscInt_FMT, mat->rmap->n, mat->cmap->n)); 1725 PetscCall(VecDestroy(&l->qrrhs)); 1726 PetscCall(PetscFree(l->tau)); 1727 PetscCall(PetscFree(l->pivots)); 1728 PetscCall(PetscFree(l->fwork)); 1729 if (!l->user_alloc) PetscCall(PetscFree(l->v)); 1730 if (!l->unplaced_user_alloc) PetscCall(PetscFree(l->unplacedarray)); 1731 PetscCheck(!l->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1732 PetscCheck(!l->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1733 PetscCall(VecDestroy(&l->cvec)); 1734 PetscCall(MatDestroy(&l->cmat)); 1735 PetscCall(PetscFree(mat->data)); 1736 1737 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactor_C", NULL)); 1739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorSymbolic_C", NULL)); 1740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorNumeric_C", NULL)); 1741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 1742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 1743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 1744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 1745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 1746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 1747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 1748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 1749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 1750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 1751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 1752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqaij_C", NULL)); 1753 #if defined(PETSC_HAVE_ELEMENTAL) 1754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_elemental_C", NULL)); 1755 #endif 1756 #if defined(PETSC_HAVE_SCALAPACK) 1757 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_scalapack_C", NULL)); 1758 #endif 1759 #if defined(PETSC_HAVE_CUDA) 1760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensecuda_C", NULL)); 1761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", NULL)); 1762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdense_C", NULL)); 1763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensecuda_C", NULL)); 1764 #endif 1765 #if defined(PETSC_HAVE_HIP) 1766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensehip_C", NULL)); 1767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", NULL)); 1768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdense_C", NULL)); 1769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensehip_C", NULL)); 1770 #endif 1771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSeqDenseSetPreallocation_C", NULL)); 1772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqaij_seqdense_C", NULL)); 1773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdense_C", NULL)); 1774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqbaij_seqdense_C", NULL)); 1775 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqsbaij_seqdense_C", NULL)); 1776 1777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 1778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 1779 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 1780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 1781 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 1782 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 1783 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 1784 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 1785 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 1786 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 1787 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", NULL)); 1788 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); 1789 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); 1790 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); 1791 PetscFunctionReturn(PETSC_SUCCESS); 1792 } 1793 1794 static PetscErrorCode MatTranspose_SeqDense(Mat A, MatReuse reuse, Mat *matout) 1795 { 1796 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1797 PetscInt k, j, m = A->rmap->n, M = mat->lda, n = A->cmap->n; 1798 PetscScalar *v, tmp; 1799 1800 PetscFunctionBegin; 1801 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1802 if (reuse == MAT_INPLACE_MATRIX) { 1803 if (m == n) { /* in place transpose */ 1804 PetscCall(MatDenseGetArray(A, &v)); 1805 for (j = 0; j < m; j++) { 1806 for (k = 0; k < j; k++) { 1807 tmp = v[j + k * M]; 1808 v[j + k * M] = v[k + j * M]; 1809 v[k + j * M] = tmp; 1810 } 1811 } 1812 PetscCall(MatDenseRestoreArray(A, &v)); 1813 } else { /* reuse memory, temporary allocates new memory */ 1814 PetscScalar *v2; 1815 PetscLayout tmplayout; 1816 1817 PetscCall(PetscMalloc1((size_t)m * n, &v2)); 1818 PetscCall(MatDenseGetArray(A, &v)); 1819 for (j = 0; j < n; j++) { 1820 for (k = 0; k < m; k++) v2[j + (size_t)k * n] = v[k + (size_t)j * M]; 1821 } 1822 PetscCall(PetscArraycpy(v, v2, (size_t)m * n)); 1823 PetscCall(PetscFree(v2)); 1824 PetscCall(MatDenseRestoreArray(A, &v)); 1825 /* cleanup size dependent quantities */ 1826 PetscCall(VecDestroy(&mat->cvec)); 1827 PetscCall(MatDestroy(&mat->cmat)); 1828 PetscCall(PetscFree(mat->pivots)); 1829 PetscCall(PetscFree(mat->fwork)); 1830 /* swap row/col layouts */ 1831 PetscCall(PetscBLASIntCast(n, &mat->lda)); 1832 tmplayout = A->rmap; 1833 A->rmap = A->cmap; 1834 A->cmap = tmplayout; 1835 } 1836 } else { /* out-of-place transpose */ 1837 Mat tmat; 1838 Mat_SeqDense *tmatd; 1839 PetscScalar *v2; 1840 PetscInt M2; 1841 1842 if (reuse == MAT_INITIAL_MATRIX) { 1843 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &tmat)); 1844 PetscCall(MatSetSizes(tmat, A->cmap->n, A->rmap->n, A->cmap->n, A->rmap->n)); 1845 PetscCall(MatSetType(tmat, ((PetscObject)A)->type_name)); 1846 PetscCall(MatSeqDenseSetPreallocation(tmat, NULL)); 1847 } else tmat = *matout; 1848 1849 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&v)); 1850 PetscCall(MatDenseGetArray(tmat, &v2)); 1851 tmatd = (Mat_SeqDense *)tmat->data; 1852 M2 = tmatd->lda; 1853 for (j = 0; j < n; j++) { 1854 for (k = 0; k < m; k++) v2[j + k * M2] = v[k + j * M]; 1855 } 1856 PetscCall(MatDenseRestoreArray(tmat, &v2)); 1857 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&v)); 1858 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 1859 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 1860 *matout = tmat; 1861 } 1862 PetscFunctionReturn(PETSC_SUCCESS); 1863 } 1864 1865 static PetscErrorCode MatEqual_SeqDense(Mat A1, Mat A2, PetscBool *flg) 1866 { 1867 Mat_SeqDense *mat1 = (Mat_SeqDense *)A1->data; 1868 Mat_SeqDense *mat2 = (Mat_SeqDense *)A2->data; 1869 PetscInt i; 1870 const PetscScalar *v1, *v2; 1871 1872 PetscFunctionBegin; 1873 if (A1->rmap->n != A2->rmap->n) { 1874 *flg = PETSC_FALSE; 1875 PetscFunctionReturn(PETSC_SUCCESS); 1876 } 1877 if (A1->cmap->n != A2->cmap->n) { 1878 *flg = PETSC_FALSE; 1879 PetscFunctionReturn(PETSC_SUCCESS); 1880 } 1881 PetscCall(MatDenseGetArrayRead(A1, &v1)); 1882 PetscCall(MatDenseGetArrayRead(A2, &v2)); 1883 for (i = 0; i < A1->cmap->n; i++) { 1884 PetscCall(PetscArraycmp(v1, v2, A1->rmap->n, flg)); 1885 if (*flg == PETSC_FALSE) PetscFunctionReturn(PETSC_SUCCESS); 1886 v1 += mat1->lda; 1887 v2 += mat2->lda; 1888 } 1889 PetscCall(MatDenseRestoreArrayRead(A1, &v1)); 1890 PetscCall(MatDenseRestoreArrayRead(A2, &v2)); 1891 *flg = PETSC_TRUE; 1892 PetscFunctionReturn(PETSC_SUCCESS); 1893 } 1894 1895 PetscErrorCode MatGetDiagonal_SeqDense(Mat A, Vec v) 1896 { 1897 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1898 PetscInt i, n, len; 1899 PetscScalar *x; 1900 const PetscScalar *vv; 1901 1902 PetscFunctionBegin; 1903 PetscCall(VecGetSize(v, &n)); 1904 PetscCall(VecGetArray(v, &x)); 1905 len = PetscMin(A->rmap->n, A->cmap->n); 1906 PetscCall(MatDenseGetArrayRead(A, &vv)); 1907 PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 1908 for (i = 0; i < len; i++) x[i] = vv[i * mat->lda + i]; 1909 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1910 PetscCall(VecRestoreArray(v, &x)); 1911 PetscFunctionReturn(PETSC_SUCCESS); 1912 } 1913 1914 static PetscErrorCode MatDiagonalScale_SeqDense(Mat A, Vec ll, Vec rr) 1915 { 1916 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1917 const PetscScalar *l, *r; 1918 PetscScalar x, *v, *vv; 1919 PetscInt i, j, m = A->rmap->n, n = A->cmap->n; 1920 1921 PetscFunctionBegin; 1922 PetscCall(MatDenseGetArray(A, &vv)); 1923 if (ll) { 1924 PetscCall(VecGetSize(ll, &m)); 1925 PetscCall(VecGetArrayRead(ll, &l)); 1926 PetscCheck(m == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vec wrong size"); 1927 for (i = 0; i < m; i++) { 1928 x = l[i]; 1929 v = vv + i; 1930 for (j = 0; j < n; j++) { 1931 (*v) *= x; 1932 v += mat->lda; 1933 } 1934 } 1935 PetscCall(VecRestoreArrayRead(ll, &l)); 1936 PetscCall(PetscLogFlops(1.0 * n * m)); 1937 } 1938 if (rr) { 1939 PetscCall(VecGetSize(rr, &n)); 1940 PetscCall(VecGetArrayRead(rr, &r)); 1941 PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec wrong size"); 1942 for (i = 0; i < n; i++) { 1943 x = r[i]; 1944 v = vv + i * mat->lda; 1945 for (j = 0; j < m; j++) (*v++) *= x; 1946 } 1947 PetscCall(VecRestoreArrayRead(rr, &r)); 1948 PetscCall(PetscLogFlops(1.0 * n * m)); 1949 } 1950 PetscCall(MatDenseRestoreArray(A, &vv)); 1951 PetscFunctionReturn(PETSC_SUCCESS); 1952 } 1953 1954 PetscErrorCode MatNorm_SeqDense(Mat A, NormType type, PetscReal *nrm) 1955 { 1956 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1957 PetscScalar *v, *vv; 1958 PetscReal sum = 0.0; 1959 PetscInt lda, m = A->rmap->n, i, j; 1960 1961 PetscFunctionBegin; 1962 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&vv)); 1963 PetscCall(MatDenseGetLDA(A, &lda)); 1964 v = vv; 1965 if (type == NORM_FROBENIUS) { 1966 if (lda > m) { 1967 for (j = 0; j < A->cmap->n; j++) { 1968 v = vv + j * lda; 1969 for (i = 0; i < m; i++) { 1970 sum += PetscRealPart(PetscConj(*v) * (*v)); 1971 v++; 1972 } 1973 } 1974 } else { 1975 #if defined(PETSC_USE_REAL___FP16) 1976 PetscBLASInt one = 1, cnt = A->cmap->n * A->rmap->n; 1977 PetscCallBLAS("BLASnrm2", *nrm = BLASnrm2_(&cnt, v, &one)); 1978 } 1979 #else 1980 for (i = 0; i < A->cmap->n * A->rmap->n; i++) { 1981 sum += PetscRealPart(PetscConj(*v) * (*v)); 1982 v++; 1983 } 1984 } 1985 *nrm = PetscSqrtReal(sum); 1986 #endif 1987 PetscCall(PetscLogFlops(2.0 * A->cmap->n * A->rmap->n)); 1988 } else if (type == NORM_1) { 1989 *nrm = 0.0; 1990 for (j = 0; j < A->cmap->n; j++) { 1991 v = vv + j * mat->lda; 1992 sum = 0.0; 1993 for (i = 0; i < A->rmap->n; i++) { 1994 sum += PetscAbsScalar(*v); 1995 v++; 1996 } 1997 if (sum > *nrm) *nrm = sum; 1998 } 1999 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 2000 } else if (type == NORM_INFINITY) { 2001 *nrm = 0.0; 2002 for (j = 0; j < A->rmap->n; j++) { 2003 v = vv + j; 2004 sum = 0.0; 2005 for (i = 0; i < A->cmap->n; i++) { 2006 sum += PetscAbsScalar(*v); 2007 v += mat->lda; 2008 } 2009 if (sum > *nrm) *nrm = sum; 2010 } 2011 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 2012 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No two norm"); 2013 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&vv)); 2014 PetscFunctionReturn(PETSC_SUCCESS); 2015 } 2016 2017 static PetscErrorCode MatSetOption_SeqDense(Mat A, MatOption op, PetscBool flg) 2018 { 2019 Mat_SeqDense *aij = (Mat_SeqDense *)A->data; 2020 2021 PetscFunctionBegin; 2022 switch (op) { 2023 case MAT_ROW_ORIENTED: 2024 aij->roworiented = flg; 2025 break; 2026 default: 2027 break; 2028 } 2029 PetscFunctionReturn(PETSC_SUCCESS); 2030 } 2031 2032 PetscErrorCode MatZeroEntries_SeqDense(Mat A) 2033 { 2034 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 2035 PetscInt lda = l->lda, m = A->rmap->n, n = A->cmap->n, j; 2036 PetscScalar *v; 2037 2038 PetscFunctionBegin; 2039 PetscCall(MatDenseGetArrayWrite(A, &v)); 2040 if (lda > m) { 2041 for (j = 0; j < n; j++) PetscCall(PetscArrayzero(v + j * lda, m)); 2042 } else { 2043 PetscCall(PetscArrayzero(v, PetscInt64Mult(m, n))); 2044 } 2045 PetscCall(MatDenseRestoreArrayWrite(A, &v)); 2046 PetscFunctionReturn(PETSC_SUCCESS); 2047 } 2048 2049 static PetscErrorCode MatZeroRows_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 2050 { 2051 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 2052 PetscInt m = l->lda, n = A->cmap->n, i, j; 2053 PetscScalar *slot, *bb, *v; 2054 const PetscScalar *xx; 2055 2056 PetscFunctionBegin; 2057 if (PetscDefined(USE_DEBUG)) { 2058 for (i = 0; i < N; i++) { 2059 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 2060 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 2061 } 2062 } 2063 if (!N) PetscFunctionReturn(PETSC_SUCCESS); 2064 2065 /* fix right-hand side if needed */ 2066 if (x && b) { 2067 PetscCall(VecGetArrayRead(x, &xx)); 2068 PetscCall(VecGetArray(b, &bb)); 2069 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 2070 PetscCall(VecRestoreArrayRead(x, &xx)); 2071 PetscCall(VecRestoreArray(b, &bb)); 2072 } 2073 2074 PetscCall(MatDenseGetArray(A, &v)); 2075 for (i = 0; i < N; i++) { 2076 slot = v + rows[i]; 2077 for (j = 0; j < n; j++) { 2078 *slot = 0.0; 2079 slot += m; 2080 } 2081 } 2082 if (diag != 0.0) { 2083 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 2084 for (i = 0; i < N; i++) { 2085 slot = v + (m + 1) * rows[i]; 2086 *slot = diag; 2087 } 2088 } 2089 PetscCall(MatDenseRestoreArray(A, &v)); 2090 PetscFunctionReturn(PETSC_SUCCESS); 2091 } 2092 2093 static PetscErrorCode MatDenseGetLDA_SeqDense(Mat A, PetscInt *lda) 2094 { 2095 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2096 2097 PetscFunctionBegin; 2098 *lda = mat->lda; 2099 PetscFunctionReturn(PETSC_SUCCESS); 2100 } 2101 2102 PetscErrorCode MatDenseGetArray_SeqDense(Mat A, PetscScalar **array) 2103 { 2104 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2105 2106 PetscFunctionBegin; 2107 PetscCheck(!mat->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2108 *array = mat->v; 2109 PetscFunctionReturn(PETSC_SUCCESS); 2110 } 2111 2112 PetscErrorCode MatDenseRestoreArray_SeqDense(Mat A, PetscScalar **array) 2113 { 2114 PetscFunctionBegin; 2115 if (array) *array = NULL; 2116 PetscFunctionReturn(PETSC_SUCCESS); 2117 } 2118 2119 /*@ 2120 MatDenseGetLDA - gets the leading dimension of the array returned from `MatDenseGetArray()` 2121 2122 Not Collective 2123 2124 Input Parameter: 2125 . A - a `MATDENSE` or `MATDENSECUDA` matrix 2126 2127 Output Parameter: 2128 . lda - the leading dimension 2129 2130 Level: intermediate 2131 2132 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseSetLDA()` 2133 @*/ 2134 PetscErrorCode MatDenseGetLDA(Mat A, PetscInt *lda) 2135 { 2136 PetscFunctionBegin; 2137 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2138 PetscAssertPointer(lda, 2); 2139 MatCheckPreallocated(A, 1); 2140 PetscUseMethod(A, "MatDenseGetLDA_C", (Mat, PetscInt *), (A, lda)); 2141 PetscFunctionReturn(PETSC_SUCCESS); 2142 } 2143 2144 /*@ 2145 MatDenseSetLDA - Sets the leading dimension of the array used by the `MATDENSE` matrix 2146 2147 Collective if the matrix layouts have not yet been setup 2148 2149 Input Parameters: 2150 + A - a `MATDENSE` or `MATDENSECUDA` matrix 2151 - lda - the leading dimension 2152 2153 Level: intermediate 2154 2155 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetLDA()` 2156 @*/ 2157 PetscErrorCode MatDenseSetLDA(Mat A, PetscInt lda) 2158 { 2159 PetscFunctionBegin; 2160 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2161 PetscTryMethod(A, "MatDenseSetLDA_C", (Mat, PetscInt), (A, lda)); 2162 PetscFunctionReturn(PETSC_SUCCESS); 2163 } 2164 2165 /*@C 2166 MatDenseGetArray - gives read-write access to the array where the data for a `MATDENSE` matrix is stored 2167 2168 Logically Collective 2169 2170 Input Parameter: 2171 . A - a dense matrix 2172 2173 Output Parameter: 2174 . array - pointer to the data 2175 2176 Level: intermediate 2177 2178 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2179 @*/ 2180 PetscErrorCode MatDenseGetArray(Mat A, PetscScalar *array[]) PeNS 2181 { 2182 PetscFunctionBegin; 2183 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2184 PetscAssertPointer(array, 2); 2185 PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array)); 2186 PetscFunctionReturn(PETSC_SUCCESS); 2187 } 2188 2189 /*@C 2190 MatDenseRestoreArray - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArray()` 2191 2192 Logically Collective 2193 2194 Input Parameters: 2195 + A - a dense matrix 2196 - array - pointer to the data (may be `NULL`) 2197 2198 Level: intermediate 2199 2200 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2201 @*/ 2202 PetscErrorCode MatDenseRestoreArray(Mat A, PetscScalar *array[]) PeNS 2203 { 2204 PetscFunctionBegin; 2205 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2206 if (array) PetscAssertPointer(array, 2); 2207 PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array)); 2208 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2209 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2210 A->offloadmask = PETSC_OFFLOAD_CPU; 2211 #endif 2212 PetscFunctionReturn(PETSC_SUCCESS); 2213 } 2214 2215 /*@C 2216 MatDenseGetArrayRead - gives read-only access to the array where the data for a `MATDENSE` matrix is stored 2217 2218 Not Collective 2219 2220 Input Parameter: 2221 . A - a dense matrix 2222 2223 Output Parameter: 2224 . array - pointer to the data 2225 2226 Level: intermediate 2227 2228 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2229 @*/ 2230 PetscErrorCode MatDenseGetArrayRead(Mat A, const PetscScalar *array[]) PeNS 2231 { 2232 PetscFunctionBegin; 2233 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2234 PetscAssertPointer(array, 2); 2235 PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2236 PetscFunctionReturn(PETSC_SUCCESS); 2237 } 2238 2239 /*@C 2240 MatDenseRestoreArrayRead - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayRead()` 2241 2242 Not Collective 2243 2244 Input Parameters: 2245 + A - a dense matrix 2246 - array - pointer to the data (may be `NULL`) 2247 2248 Level: intermediate 2249 2250 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2251 @*/ 2252 PetscErrorCode MatDenseRestoreArrayRead(Mat A, const PetscScalar *array[]) PeNS 2253 { 2254 PetscFunctionBegin; 2255 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2256 if (array) PetscAssertPointer(array, 2); 2257 PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2258 PetscFunctionReturn(PETSC_SUCCESS); 2259 } 2260 2261 /*@C 2262 MatDenseGetArrayWrite - gives write-only access to the array where the data for a `MATDENSE` matrix is stored 2263 2264 Not Collective 2265 2266 Input Parameter: 2267 . A - a dense matrix 2268 2269 Output Parameter: 2270 . array - pointer to the data 2271 2272 Level: intermediate 2273 2274 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2275 @*/ 2276 PetscErrorCode MatDenseGetArrayWrite(Mat A, PetscScalar *array[]) PeNS 2277 { 2278 PetscFunctionBegin; 2279 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2280 PetscAssertPointer(array, 2); 2281 PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2282 PetscFunctionReturn(PETSC_SUCCESS); 2283 } 2284 2285 /*@C 2286 MatDenseRestoreArrayWrite - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayWrite()` 2287 2288 Not Collective 2289 2290 Input Parameters: 2291 + A - a dense matrix 2292 - array - pointer to the data (may be `NULL`) 2293 2294 Level: intermediate 2295 2296 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2297 @*/ 2298 PetscErrorCode MatDenseRestoreArrayWrite(Mat A, PetscScalar *array[]) PeNS 2299 { 2300 PetscFunctionBegin; 2301 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2302 if (array) PetscAssertPointer(array, 2); 2303 PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2304 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2305 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2306 A->offloadmask = PETSC_OFFLOAD_CPU; 2307 #endif 2308 PetscFunctionReturn(PETSC_SUCCESS); 2309 } 2310 2311 /*@C 2312 MatDenseGetArrayAndMemType - gives read-write access to the array where the data for a `MATDENSE` matrix is stored 2313 2314 Logically Collective 2315 2316 Input Parameter: 2317 . A - a dense matrix 2318 2319 Output Parameters: 2320 + array - pointer to the data 2321 - mtype - memory type of the returned pointer 2322 2323 Level: intermediate 2324 2325 Note: 2326 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., 2327 an array on device is always returned and is guaranteed to contain the matrix's latest data. 2328 2329 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayAndMemType()`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArrayRead()`, 2330 `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` 2331 @*/ 2332 PetscErrorCode MatDenseGetArrayAndMemType(Mat A, PetscScalar *array[], PetscMemType *mtype) 2333 { 2334 PetscBool isMPI; 2335 2336 PetscFunctionBegin; 2337 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2338 PetscAssertPointer(array, 2); 2339 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ 2340 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2341 if (isMPI) { 2342 /* Dispatch here so that the code can be reused for all subclasses of MATDENSE */ 2343 PetscCall(MatDenseGetArrayAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); 2344 } else { 2345 PetscErrorCode (*fptr)(Mat, PetscScalar **, PetscMemType *); 2346 2347 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayAndMemType_C", &fptr)); 2348 if (fptr) { 2349 PetscCall((*fptr)(A, array, mtype)); 2350 } else { 2351 PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array)); 2352 if (mtype) *mtype = PETSC_MEMTYPE_HOST; 2353 } 2354 } 2355 PetscFunctionReturn(PETSC_SUCCESS); 2356 } 2357 2358 /*@C 2359 MatDenseRestoreArrayAndMemType - returns access to the array that is obtained by `MatDenseGetArrayAndMemType()` 2360 2361 Logically Collective 2362 2363 Input Parameters: 2364 + A - a dense matrix 2365 - array - pointer to the data 2366 2367 Level: intermediate 2368 2369 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2370 @*/ 2371 PetscErrorCode MatDenseRestoreArrayAndMemType(Mat A, PetscScalar *array[]) 2372 { 2373 PetscBool isMPI; 2374 2375 PetscFunctionBegin; 2376 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2377 PetscAssertPointer(array, 2); 2378 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2379 if (isMPI) { 2380 PetscCall(MatDenseRestoreArrayAndMemType(((Mat_MPIDense *)A->data)->A, array)); 2381 } else { 2382 PetscErrorCode (*fptr)(Mat, PetscScalar **); 2383 2384 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayAndMemType_C", &fptr)); 2385 if (fptr) { 2386 PetscCall((*fptr)(A, array)); 2387 } else { 2388 PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array)); 2389 } 2390 *array = NULL; 2391 } 2392 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2393 PetscFunctionReturn(PETSC_SUCCESS); 2394 } 2395 2396 /*@C 2397 MatDenseGetArrayReadAndMemType - gives read-only access to the array where the data for a `MATDENSE` matrix is stored 2398 2399 Logically Collective 2400 2401 Input Parameter: 2402 . A - a dense matrix 2403 2404 Output Parameters: 2405 + array - pointer to the data 2406 - mtype - memory type of the returned pointer 2407 2408 Level: intermediate 2409 2410 Note: 2411 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., 2412 an array on device is always returned and is guaranteed to contain the matrix's latest data. 2413 2414 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayReadAndMemType()`, `MatDenseGetArrayWriteAndMemType()`, 2415 `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` 2416 @*/ 2417 PetscErrorCode MatDenseGetArrayReadAndMemType(Mat A, const PetscScalar *array[], PetscMemType *mtype) 2418 { 2419 PetscBool isMPI; 2420 2421 PetscFunctionBegin; 2422 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2423 PetscAssertPointer(array, 2); 2424 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ 2425 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2426 if (isMPI) { /* Dispatch here so that the code can be reused for all subclasses of MATDENSE */ 2427 PetscCall(MatDenseGetArrayReadAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); 2428 } else { 2429 PetscErrorCode (*fptr)(Mat, const PetscScalar **, PetscMemType *); 2430 2431 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayReadAndMemType_C", &fptr)); 2432 if (fptr) { 2433 PetscCall((*fptr)(A, array, mtype)); 2434 } else { 2435 PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2436 if (mtype) *mtype = PETSC_MEMTYPE_HOST; 2437 } 2438 } 2439 PetscFunctionReturn(PETSC_SUCCESS); 2440 } 2441 2442 /*@C 2443 MatDenseRestoreArrayReadAndMemType - returns access to the array that is obtained by `MatDenseGetArrayReadAndMemType()` 2444 2445 Logically Collective 2446 2447 Input Parameters: 2448 + A - a dense matrix 2449 - array - pointer to the data 2450 2451 Level: intermediate 2452 2453 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2454 @*/ 2455 PetscErrorCode MatDenseRestoreArrayReadAndMemType(Mat A, const PetscScalar *array[]) 2456 { 2457 PetscBool isMPI; 2458 2459 PetscFunctionBegin; 2460 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2461 PetscAssertPointer(array, 2); 2462 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2463 if (isMPI) { 2464 PetscCall(MatDenseRestoreArrayReadAndMemType(((Mat_MPIDense *)A->data)->A, array)); 2465 } else { 2466 PetscErrorCode (*fptr)(Mat, const PetscScalar **); 2467 2468 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayReadAndMemType_C", &fptr)); 2469 if (fptr) { 2470 PetscCall((*fptr)(A, array)); 2471 } else { 2472 PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2473 } 2474 *array = NULL; 2475 } 2476 PetscFunctionReturn(PETSC_SUCCESS); 2477 } 2478 2479 /*@C 2480 MatDenseGetArrayWriteAndMemType - gives write-only access to the array where the data for a `MATDENSE` matrix is stored 2481 2482 Logically Collective 2483 2484 Input Parameter: 2485 . A - a dense matrix 2486 2487 Output Parameters: 2488 + array - pointer to the data 2489 - mtype - memory type of the returned pointer 2490 2491 Level: intermediate 2492 2493 Note: 2494 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., 2495 an array on device is always returned and is guaranteed to contain the matrix's latest data. 2496 2497 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayWriteAndMemType()`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArrayRead()`, 2498 `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` 2499 @*/ 2500 PetscErrorCode MatDenseGetArrayWriteAndMemType(Mat A, PetscScalar *array[], PetscMemType *mtype) 2501 { 2502 PetscBool isMPI; 2503 2504 PetscFunctionBegin; 2505 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2506 PetscAssertPointer(array, 2); 2507 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ 2508 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2509 if (isMPI) { 2510 PetscCall(MatDenseGetArrayWriteAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); 2511 } else { 2512 PetscErrorCode (*fptr)(Mat, PetscScalar **, PetscMemType *); 2513 2514 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayWriteAndMemType_C", &fptr)); 2515 if (fptr) { 2516 PetscCall((*fptr)(A, array, mtype)); 2517 } else { 2518 PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2519 if (mtype) *mtype = PETSC_MEMTYPE_HOST; 2520 } 2521 } 2522 PetscFunctionReturn(PETSC_SUCCESS); 2523 } 2524 2525 /*@C 2526 MatDenseRestoreArrayWriteAndMemType - returns access to the array that is obtained by `MatDenseGetArrayReadAndMemType()` 2527 2528 Logically Collective 2529 2530 Input Parameters: 2531 + A - a dense matrix 2532 - array - pointer to the data 2533 2534 Level: intermediate 2535 2536 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2537 @*/ 2538 PetscErrorCode MatDenseRestoreArrayWriteAndMemType(Mat A, PetscScalar *array[]) 2539 { 2540 PetscBool isMPI; 2541 2542 PetscFunctionBegin; 2543 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2544 PetscAssertPointer(array, 2); 2545 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2546 if (isMPI) { 2547 PetscCall(MatDenseRestoreArrayWriteAndMemType(((Mat_MPIDense *)A->data)->A, array)); 2548 } else { 2549 PetscErrorCode (*fptr)(Mat, PetscScalar **); 2550 2551 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayWriteAndMemType_C", &fptr)); 2552 if (fptr) { 2553 PetscCall((*fptr)(A, array)); 2554 } else { 2555 PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2556 } 2557 *array = NULL; 2558 } 2559 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2560 PetscFunctionReturn(PETSC_SUCCESS); 2561 } 2562 2563 static PetscErrorCode MatCreateSubMatrix_SeqDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 2564 { 2565 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2566 PetscInt i, j, nrows, ncols, ldb; 2567 const PetscInt *irow, *icol; 2568 PetscScalar *av, *bv, *v = mat->v; 2569 Mat newmat; 2570 2571 PetscFunctionBegin; 2572 PetscCall(ISGetIndices(isrow, &irow)); 2573 PetscCall(ISGetIndices(iscol, &icol)); 2574 PetscCall(ISGetLocalSize(isrow, &nrows)); 2575 PetscCall(ISGetLocalSize(iscol, &ncols)); 2576 2577 /* Check submatrixcall */ 2578 if (scall == MAT_REUSE_MATRIX) { 2579 PetscInt n_cols, n_rows; 2580 PetscCall(MatGetSize(*B, &n_rows, &n_cols)); 2581 if (n_rows != nrows || n_cols != ncols) { 2582 /* resize the result matrix to match number of requested rows/columns */ 2583 PetscCall(MatSetSizes(*B, nrows, ncols, nrows, ncols)); 2584 } 2585 newmat = *B; 2586 } else { 2587 /* Create and fill new matrix */ 2588 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 2589 PetscCall(MatSetSizes(newmat, nrows, ncols, nrows, ncols)); 2590 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 2591 PetscCall(MatSeqDenseSetPreallocation(newmat, NULL)); 2592 } 2593 2594 /* Now extract the data pointers and do the copy,column at a time */ 2595 PetscCall(MatDenseGetArray(newmat, &bv)); 2596 PetscCall(MatDenseGetLDA(newmat, &ldb)); 2597 for (i = 0; i < ncols; i++) { 2598 av = v + mat->lda * icol[i]; 2599 for (j = 0; j < nrows; j++) bv[j] = av[irow[j]]; 2600 bv += ldb; 2601 } 2602 PetscCall(MatDenseRestoreArray(newmat, &bv)); 2603 2604 /* Assemble the matrices so that the correct flags are set */ 2605 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 2606 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 2607 2608 /* Free work space */ 2609 PetscCall(ISRestoreIndices(isrow, &irow)); 2610 PetscCall(ISRestoreIndices(iscol, &icol)); 2611 *B = newmat; 2612 PetscFunctionReturn(PETSC_SUCCESS); 2613 } 2614 2615 static PetscErrorCode MatCreateSubMatrices_SeqDense(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 2616 { 2617 PetscInt i; 2618 2619 PetscFunctionBegin; 2620 if (scall == MAT_INITIAL_MATRIX) PetscCall(PetscCalloc1(n, B)); 2621 2622 for (i = 0; i < n; i++) PetscCall(MatCreateSubMatrix_SeqDense(A, irow[i], icol[i], scall, &(*B)[i])); 2623 PetscFunctionReturn(PETSC_SUCCESS); 2624 } 2625 2626 PetscErrorCode MatCopy_SeqDense(Mat A, Mat B, MatStructure str) 2627 { 2628 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data; 2629 const PetscScalar *va; 2630 PetscScalar *vb; 2631 PetscInt lda1 = a->lda, lda2 = b->lda, m = A->rmap->n, n = A->cmap->n, j; 2632 2633 PetscFunctionBegin; 2634 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2635 if (A->ops->copy != B->ops->copy) { 2636 PetscCall(MatCopy_Basic(A, B, str)); 2637 PetscFunctionReturn(PETSC_SUCCESS); 2638 } 2639 PetscCheck(m == B->rmap->n && n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "size(B) != size(A)"); 2640 PetscCall(MatDenseGetArrayRead(A, &va)); 2641 PetscCall(MatDenseGetArray(B, &vb)); 2642 if (lda1 > m || lda2 > m) { 2643 for (j = 0; j < n; j++) PetscCall(PetscArraycpy(vb + j * lda2, va + j * lda1, m)); 2644 } else { 2645 PetscCall(PetscArraycpy(vb, va, A->rmap->n * A->cmap->n)); 2646 } 2647 PetscCall(MatDenseRestoreArray(B, &vb)); 2648 PetscCall(MatDenseRestoreArrayRead(A, &va)); 2649 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2650 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2651 PetscFunctionReturn(PETSC_SUCCESS); 2652 } 2653 2654 PetscErrorCode MatSetUp_SeqDense(Mat A) 2655 { 2656 PetscFunctionBegin; 2657 PetscCall(PetscLayoutSetUp(A->rmap)); 2658 PetscCall(PetscLayoutSetUp(A->cmap)); 2659 if (!A->preallocated) PetscCall(MatSeqDenseSetPreallocation(A, NULL)); 2660 PetscFunctionReturn(PETSC_SUCCESS); 2661 } 2662 2663 PetscErrorCode MatConjugate_SeqDense(Mat A) 2664 { 2665 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2666 PetscInt i, j; 2667 PetscInt min = PetscMin(A->rmap->n, A->cmap->n); 2668 PetscScalar *aa; 2669 2670 PetscFunctionBegin; 2671 PetscCall(MatDenseGetArray(A, &aa)); 2672 for (j = 0; j < A->cmap->n; j++) { 2673 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscConj(aa[i + j * mat->lda]); 2674 } 2675 PetscCall(MatDenseRestoreArray(A, &aa)); 2676 if (mat->tau) 2677 for (i = 0; i < min; i++) mat->tau[i] = PetscConj(mat->tau[i]); 2678 PetscFunctionReturn(PETSC_SUCCESS); 2679 } 2680 2681 static PetscErrorCode MatRealPart_SeqDense(Mat A) 2682 { 2683 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2684 PetscInt i, j; 2685 PetscScalar *aa; 2686 2687 PetscFunctionBegin; 2688 PetscCall(MatDenseGetArray(A, &aa)); 2689 for (j = 0; j < A->cmap->n; j++) { 2690 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscRealPart(aa[i + j * mat->lda]); 2691 } 2692 PetscCall(MatDenseRestoreArray(A, &aa)); 2693 PetscFunctionReturn(PETSC_SUCCESS); 2694 } 2695 2696 static PetscErrorCode MatImaginaryPart_SeqDense(Mat A) 2697 { 2698 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2699 PetscInt i, j; 2700 PetscScalar *aa; 2701 2702 PetscFunctionBegin; 2703 PetscCall(MatDenseGetArray(A, &aa)); 2704 for (j = 0; j < A->cmap->n; j++) { 2705 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscImaginaryPart(aa[i + j * mat->lda]); 2706 } 2707 PetscCall(MatDenseRestoreArray(A, &aa)); 2708 PetscFunctionReturn(PETSC_SUCCESS); 2709 } 2710 2711 PetscErrorCode MatMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2712 { 2713 PetscInt m = A->rmap->n, n = B->cmap->n; 2714 PetscBool cisdense = PETSC_FALSE; 2715 2716 PetscFunctionBegin; 2717 PetscCall(MatSetSizes(C, m, n, m, n)); 2718 #if defined(PETSC_HAVE_CUDA) 2719 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2720 #endif 2721 #if defined(PETSC_HAVE_HIP) 2722 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2723 #endif 2724 if (!cisdense) { 2725 PetscBool flg; 2726 2727 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2728 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2729 } 2730 PetscCall(MatSetUp(C)); 2731 PetscFunctionReturn(PETSC_SUCCESS); 2732 } 2733 2734 PetscErrorCode MatMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2735 { 2736 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data, *c = (Mat_SeqDense *)C->data; 2737 PetscBLASInt m, n, k; 2738 const PetscScalar *av, *bv; 2739 PetscScalar *cv; 2740 PetscScalar _DOne = 1.0, _DZero = 0.0; 2741 2742 PetscFunctionBegin; 2743 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2744 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2745 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2746 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); 2747 PetscCall(MatDenseGetArrayRead(A, &av)); 2748 PetscCall(MatDenseGetArrayRead(B, &bv)); 2749 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2750 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2751 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2752 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2753 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2754 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2755 PetscFunctionReturn(PETSC_SUCCESS); 2756 } 2757 2758 PetscErrorCode MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2759 { 2760 PetscInt m = A->rmap->n, n = B->rmap->n; 2761 PetscBool cisdense = PETSC_FALSE; 2762 2763 PetscFunctionBegin; 2764 PetscCall(MatSetSizes(C, m, n, m, n)); 2765 #if defined(PETSC_HAVE_CUDA) 2766 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2767 #endif 2768 #if defined(PETSC_HAVE_HIP) 2769 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2770 #endif 2771 if (!cisdense) { 2772 PetscBool flg; 2773 2774 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2775 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2776 } 2777 PetscCall(MatSetUp(C)); 2778 PetscFunctionReturn(PETSC_SUCCESS); 2779 } 2780 2781 PetscErrorCode MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2782 { 2783 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2784 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2785 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2786 const PetscScalar *av, *bv; 2787 PetscScalar *cv; 2788 PetscBLASInt m, n, k; 2789 PetscScalar _DOne = 1.0, _DZero = 0.0; 2790 2791 PetscFunctionBegin; 2792 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2793 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2794 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2795 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); 2796 PetscCall(MatDenseGetArrayRead(A, &av)); 2797 PetscCall(MatDenseGetArrayRead(B, &bv)); 2798 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2799 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2800 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2801 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2802 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2803 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2804 PetscFunctionReturn(PETSC_SUCCESS); 2805 } 2806 2807 PetscErrorCode MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2808 { 2809 PetscInt m = A->cmap->n, n = B->cmap->n; 2810 PetscBool cisdense = PETSC_FALSE; 2811 2812 PetscFunctionBegin; 2813 PetscCall(MatSetSizes(C, m, n, m, n)); 2814 #if defined(PETSC_HAVE_CUDA) 2815 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2816 #endif 2817 #if defined(PETSC_HAVE_HIP) 2818 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2819 #endif 2820 if (!cisdense) { 2821 PetscBool flg; 2822 2823 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2824 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2825 } 2826 PetscCall(MatSetUp(C)); 2827 PetscFunctionReturn(PETSC_SUCCESS); 2828 } 2829 2830 PetscErrorCode MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2831 { 2832 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2833 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2834 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2835 const PetscScalar *av, *bv; 2836 PetscScalar *cv; 2837 PetscBLASInt m, n, k; 2838 PetscScalar _DOne = 1.0, _DZero = 0.0; 2839 2840 PetscFunctionBegin; 2841 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2842 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2843 PetscCall(PetscBLASIntCast(A->rmap->n, &k)); 2844 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); 2845 PetscCall(MatDenseGetArrayRead(A, &av)); 2846 PetscCall(MatDenseGetArrayRead(B, &bv)); 2847 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2848 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2849 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2850 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2851 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2852 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2853 PetscFunctionReturn(PETSC_SUCCESS); 2854 } 2855 2856 static PetscErrorCode MatProductSetFromOptions_SeqDense_AB(Mat C) 2857 { 2858 PetscFunctionBegin; 2859 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqDense; 2860 C->ops->productsymbolic = MatProductSymbolic_AB; 2861 PetscFunctionReturn(PETSC_SUCCESS); 2862 } 2863 2864 static PetscErrorCode MatProductSetFromOptions_SeqDense_AtB(Mat C) 2865 { 2866 PetscFunctionBegin; 2867 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqDense_SeqDense; 2868 C->ops->productsymbolic = MatProductSymbolic_AtB; 2869 PetscFunctionReturn(PETSC_SUCCESS); 2870 } 2871 2872 static PetscErrorCode MatProductSetFromOptions_SeqDense_ABt(Mat C) 2873 { 2874 PetscFunctionBegin; 2875 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqDense_SeqDense; 2876 C->ops->productsymbolic = MatProductSymbolic_ABt; 2877 PetscFunctionReturn(PETSC_SUCCESS); 2878 } 2879 2880 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense(Mat C) 2881 { 2882 Mat_Product *product = C->product; 2883 2884 PetscFunctionBegin; 2885 switch (product->type) { 2886 case MATPRODUCT_AB: 2887 PetscCall(MatProductSetFromOptions_SeqDense_AB(C)); 2888 break; 2889 case MATPRODUCT_AtB: 2890 PetscCall(MatProductSetFromOptions_SeqDense_AtB(C)); 2891 break; 2892 case MATPRODUCT_ABt: 2893 PetscCall(MatProductSetFromOptions_SeqDense_ABt(C)); 2894 break; 2895 default: 2896 break; 2897 } 2898 PetscFunctionReturn(PETSC_SUCCESS); 2899 } 2900 2901 static PetscErrorCode MatGetRowMax_SeqDense(Mat A, Vec v, PetscInt idx[]) 2902 { 2903 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2904 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2905 PetscScalar *x; 2906 const PetscScalar *aa; 2907 2908 PetscFunctionBegin; 2909 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2910 PetscCall(VecGetArray(v, &x)); 2911 PetscCall(VecGetLocalSize(v, &p)); 2912 PetscCall(MatDenseGetArrayRead(A, &aa)); 2913 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2914 for (i = 0; i < m; i++) { 2915 x[i] = aa[i]; 2916 if (idx) idx[i] = 0; 2917 for (j = 1; j < n; j++) { 2918 if (PetscRealPart(x[i]) < PetscRealPart(aa[i + a->lda * j])) { 2919 x[i] = aa[i + a->lda * j]; 2920 if (idx) idx[i] = j; 2921 } 2922 } 2923 } 2924 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2925 PetscCall(VecRestoreArray(v, &x)); 2926 PetscFunctionReturn(PETSC_SUCCESS); 2927 } 2928 2929 static PetscErrorCode MatGetRowMaxAbs_SeqDense(Mat A, Vec v, PetscInt idx[]) 2930 { 2931 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2932 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2933 PetscScalar *x; 2934 PetscReal atmp; 2935 const PetscScalar *aa; 2936 2937 PetscFunctionBegin; 2938 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2939 PetscCall(VecGetArray(v, &x)); 2940 PetscCall(VecGetLocalSize(v, &p)); 2941 PetscCall(MatDenseGetArrayRead(A, &aa)); 2942 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2943 for (i = 0; i < m; i++) { 2944 x[i] = PetscAbsScalar(aa[i]); 2945 for (j = 1; j < n; j++) { 2946 atmp = PetscAbsScalar(aa[i + a->lda * j]); 2947 if (PetscAbsScalar(x[i]) < atmp) { 2948 x[i] = atmp; 2949 if (idx) idx[i] = j; 2950 } 2951 } 2952 } 2953 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2954 PetscCall(VecRestoreArray(v, &x)); 2955 PetscFunctionReturn(PETSC_SUCCESS); 2956 } 2957 2958 static PetscErrorCode MatGetRowMin_SeqDense(Mat A, Vec v, PetscInt idx[]) 2959 { 2960 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2961 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2962 PetscScalar *x; 2963 const PetscScalar *aa; 2964 2965 PetscFunctionBegin; 2966 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2967 PetscCall(MatDenseGetArrayRead(A, &aa)); 2968 PetscCall(VecGetArray(v, &x)); 2969 PetscCall(VecGetLocalSize(v, &p)); 2970 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2971 for (i = 0; i < m; i++) { 2972 x[i] = aa[i]; 2973 if (idx) idx[i] = 0; 2974 for (j = 1; j < n; j++) { 2975 if (PetscRealPart(x[i]) > PetscRealPart(aa[i + a->lda * j])) { 2976 x[i] = aa[i + a->lda * j]; 2977 if (idx) idx[i] = j; 2978 } 2979 } 2980 } 2981 PetscCall(VecRestoreArray(v, &x)); 2982 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2983 PetscFunctionReturn(PETSC_SUCCESS); 2984 } 2985 2986 PetscErrorCode MatGetColumnVector_SeqDense(Mat A, Vec v, PetscInt col) 2987 { 2988 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2989 PetscScalar *x; 2990 const PetscScalar *aa; 2991 2992 PetscFunctionBegin; 2993 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2994 PetscCall(MatDenseGetArrayRead(A, &aa)); 2995 PetscCall(VecGetArray(v, &x)); 2996 PetscCall(PetscArraycpy(x, aa + col * a->lda, A->rmap->n)); 2997 PetscCall(VecRestoreArray(v, &x)); 2998 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2999 PetscFunctionReturn(PETSC_SUCCESS); 3000 } 3001 3002 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat A, PetscInt type, PetscReal *reductions) 3003 { 3004 PetscInt i, j, m, n; 3005 const PetscScalar *a; 3006 3007 PetscFunctionBegin; 3008 PetscCall(MatGetSize(A, &m, &n)); 3009 PetscCall(PetscArrayzero(reductions, n)); 3010 PetscCall(MatDenseGetArrayRead(A, &a)); 3011 if (type == NORM_2) { 3012 for (i = 0; i < n; i++) { 3013 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j] * a[j]); 3014 a = PetscSafePointerPlusOffset(a, m); 3015 } 3016 } else if (type == NORM_1) { 3017 for (i = 0; i < n; i++) { 3018 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j]); 3019 a = PetscSafePointerPlusOffset(a, m); 3020 } 3021 } else if (type == NORM_INFINITY) { 3022 for (i = 0; i < n; i++) { 3023 for (j = 0; j < m; j++) reductions[i] = PetscMax(PetscAbsScalar(a[j]), reductions[i]); 3024 a = PetscSafePointerPlusOffset(a, m); 3025 } 3026 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 3027 for (i = 0; i < n; i++) { 3028 for (j = 0; j < m; j++) reductions[i] += PetscRealPart(a[j]); 3029 a = PetscSafePointerPlusOffset(a, m); 3030 } 3031 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 3032 for (i = 0; i < n; i++) { 3033 for (j = 0; j < m; j++) reductions[i] += PetscImaginaryPart(a[j]); 3034 a = PetscSafePointerPlusOffset(a, m); 3035 } 3036 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 3037 PetscCall(MatDenseRestoreArrayRead(A, &a)); 3038 if (type == NORM_2) { 3039 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 3040 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 3041 for (i = 0; i < n; i++) reductions[i] /= m; 3042 } 3043 PetscFunctionReturn(PETSC_SUCCESS); 3044 } 3045 3046 PetscErrorCode MatSetRandom_SeqDense(Mat x, PetscRandom rctx) 3047 { 3048 PetscScalar *a; 3049 PetscInt lda, m, n, i, j; 3050 3051 PetscFunctionBegin; 3052 PetscCall(MatGetSize(x, &m, &n)); 3053 PetscCall(MatDenseGetLDA(x, &lda)); 3054 PetscCall(MatDenseGetArrayWrite(x, &a)); 3055 for (j = 0; j < n; j++) { 3056 for (i = 0; i < m; i++) PetscCall(PetscRandomGetValue(rctx, a + j * lda + i)); 3057 } 3058 PetscCall(MatDenseRestoreArrayWrite(x, &a)); 3059 PetscFunctionReturn(PETSC_SUCCESS); 3060 } 3061 3062 static PetscErrorCode MatMissingDiagonal_SeqDense(Mat A, PetscBool *missing, PetscInt *d) 3063 { 3064 PetscFunctionBegin; 3065 *missing = PETSC_FALSE; 3066 PetscFunctionReturn(PETSC_SUCCESS); 3067 } 3068 3069 /* vals is not const */ 3070 static PetscErrorCode MatDenseGetColumn_SeqDense(Mat A, PetscInt col, PetscScalar **vals) 3071 { 3072 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3073 PetscScalar *v; 3074 3075 PetscFunctionBegin; 3076 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3077 PetscCall(MatDenseGetArray(A, &v)); 3078 *vals = v + col * a->lda; 3079 PetscCall(MatDenseRestoreArray(A, &v)); 3080 PetscFunctionReturn(PETSC_SUCCESS); 3081 } 3082 3083 static PetscErrorCode MatDenseRestoreColumn_SeqDense(Mat A, PetscScalar **vals) 3084 { 3085 PetscFunctionBegin; 3086 if (vals) *vals = NULL; /* user cannot accidentally use the array later */ 3087 PetscFunctionReturn(PETSC_SUCCESS); 3088 } 3089 3090 static struct _MatOps MatOps_Values = {MatSetValues_SeqDense, 3091 MatGetRow_SeqDense, 3092 MatRestoreRow_SeqDense, 3093 MatMult_SeqDense, 3094 /* 4*/ MatMultAdd_SeqDense, 3095 MatMultTranspose_SeqDense, 3096 MatMultTransposeAdd_SeqDense, 3097 NULL, 3098 NULL, 3099 NULL, 3100 /* 10*/ NULL, 3101 MatLUFactor_SeqDense, 3102 MatCholeskyFactor_SeqDense, 3103 MatSOR_SeqDense, 3104 MatTranspose_SeqDense, 3105 /* 15*/ MatGetInfo_SeqDense, 3106 MatEqual_SeqDense, 3107 MatGetDiagonal_SeqDense, 3108 MatDiagonalScale_SeqDense, 3109 MatNorm_SeqDense, 3110 /* 20*/ NULL, 3111 NULL, 3112 MatSetOption_SeqDense, 3113 MatZeroEntries_SeqDense, 3114 /* 24*/ MatZeroRows_SeqDense, 3115 NULL, 3116 NULL, 3117 NULL, 3118 NULL, 3119 /* 29*/ MatSetUp_SeqDense, 3120 NULL, 3121 NULL, 3122 NULL, 3123 NULL, 3124 /* 34*/ MatDuplicate_SeqDense, 3125 NULL, 3126 NULL, 3127 NULL, 3128 NULL, 3129 /* 39*/ MatAXPY_SeqDense, 3130 MatCreateSubMatrices_SeqDense, 3131 NULL, 3132 MatGetValues_SeqDense, 3133 MatCopy_SeqDense, 3134 /* 44*/ MatGetRowMax_SeqDense, 3135 MatScale_SeqDense, 3136 MatShift_SeqDense, 3137 NULL, 3138 MatZeroRowsColumns_SeqDense, 3139 /* 49*/ MatSetRandom_SeqDense, 3140 NULL, 3141 NULL, 3142 NULL, 3143 NULL, 3144 /* 54*/ NULL, 3145 NULL, 3146 NULL, 3147 NULL, 3148 NULL, 3149 /* 59*/ MatCreateSubMatrix_SeqDense, 3150 MatDestroy_SeqDense, 3151 MatView_SeqDense, 3152 NULL, 3153 NULL, 3154 /* 64*/ NULL, 3155 NULL, 3156 NULL, 3157 NULL, 3158 NULL, 3159 /* 69*/ MatGetRowMaxAbs_SeqDense, 3160 NULL, 3161 NULL, 3162 NULL, 3163 NULL, 3164 /* 74*/ NULL, 3165 NULL, 3166 NULL, 3167 NULL, 3168 NULL, 3169 /* 79*/ NULL, 3170 NULL, 3171 NULL, 3172 NULL, 3173 /* 83*/ MatLoad_SeqDense, 3174 MatIsSymmetric_SeqDense, 3175 MatIsHermitian_SeqDense, 3176 NULL, 3177 NULL, 3178 NULL, 3179 /* 89*/ NULL, 3180 NULL, 3181 MatMatMultNumeric_SeqDense_SeqDense, 3182 NULL, 3183 NULL, 3184 /* 94*/ NULL, 3185 NULL, 3186 NULL, 3187 MatMatTransposeMultNumeric_SeqDense_SeqDense, 3188 NULL, 3189 /* 99*/ MatProductSetFromOptions_SeqDense, 3190 NULL, 3191 NULL, 3192 MatConjugate_SeqDense, 3193 NULL, 3194 /*104*/ NULL, 3195 MatRealPart_SeqDense, 3196 MatImaginaryPart_SeqDense, 3197 NULL, 3198 NULL, 3199 /*109*/ NULL, 3200 NULL, 3201 MatGetRowMin_SeqDense, 3202 MatGetColumnVector_SeqDense, 3203 MatMissingDiagonal_SeqDense, 3204 /*114*/ NULL, 3205 NULL, 3206 NULL, 3207 NULL, 3208 NULL, 3209 /*119*/ NULL, 3210 NULL, 3211 MatMultHermitianTranspose_SeqDense, 3212 MatMultHermitianTransposeAdd_SeqDense, 3213 NULL, 3214 /*124*/ NULL, 3215 MatGetColumnReductions_SeqDense, 3216 NULL, 3217 NULL, 3218 NULL, 3219 /*129*/ NULL, 3220 NULL, 3221 NULL, 3222 MatTransposeMatMultNumeric_SeqDense_SeqDense, 3223 NULL, 3224 /*134*/ NULL, 3225 NULL, 3226 NULL, 3227 NULL, 3228 NULL, 3229 /*139*/ NULL, 3230 NULL, 3231 NULL, 3232 NULL, 3233 NULL, 3234 MatCreateMPIMatConcatenateSeqMat_SeqDense, 3235 /*145*/ NULL, 3236 NULL, 3237 NULL, 3238 NULL, 3239 NULL, 3240 /*150*/ NULL, 3241 NULL, 3242 NULL, 3243 NULL, 3244 NULL, 3245 /*155*/ NULL, 3246 NULL}; 3247 3248 /*@ 3249 MatCreateSeqDense - Creates a `MATSEQDENSE` that 3250 is stored in column major order (the usual Fortran format). 3251 3252 Collective 3253 3254 Input Parameters: 3255 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3256 . m - number of rows 3257 . n - number of columns 3258 - data - optional location of matrix data in column major order. Use `NULL` for PETSc 3259 to control all matrix memory allocation. 3260 3261 Output Parameter: 3262 . A - the matrix 3263 3264 Level: intermediate 3265 3266 Note: 3267 The data input variable is intended primarily for Fortran programmers 3268 who wish to allocate their own matrix memory space. Most users should 3269 set `data` = `NULL`. 3270 3271 Developer Note: 3272 Many of the matrix operations for this variant use the BLAS and LAPACK routines. 3273 3274 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 3275 @*/ 3276 PetscErrorCode MatCreateSeqDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscScalar data[], Mat *A) 3277 { 3278 PetscFunctionBegin; 3279 PetscCall(MatCreate(comm, A)); 3280 PetscCall(MatSetSizes(*A, m, n, m, n)); 3281 PetscCall(MatSetType(*A, MATSEQDENSE)); 3282 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 3283 PetscFunctionReturn(PETSC_SUCCESS); 3284 } 3285 3286 /*@ 3287 MatSeqDenseSetPreallocation - Sets the array used for storing the matrix elements of a `MATSEQDENSE` matrix 3288 3289 Collective 3290 3291 Input Parameters: 3292 + B - the matrix 3293 - data - the array (or `NULL`) 3294 3295 Level: intermediate 3296 3297 Note: 3298 The data input variable is intended primarily for Fortran programmers 3299 who wish to allocate their own matrix memory space. Most users should 3300 need not call this routine. 3301 3302 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`, `MatDenseSetLDA()` 3303 @*/ 3304 PetscErrorCode MatSeqDenseSetPreallocation(Mat B, PetscScalar data[]) 3305 { 3306 PetscFunctionBegin; 3307 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3308 PetscTryMethod(B, "MatSeqDenseSetPreallocation_C", (Mat, PetscScalar[]), (B, data)); 3309 PetscFunctionReturn(PETSC_SUCCESS); 3310 } 3311 3312 PetscErrorCode MatSeqDenseSetPreallocation_SeqDense(Mat B, PetscScalar *data) 3313 { 3314 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3315 3316 PetscFunctionBegin; 3317 PetscCheck(!b->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3318 B->preallocated = PETSC_TRUE; 3319 3320 PetscCall(PetscLayoutSetUp(B->rmap)); 3321 PetscCall(PetscLayoutSetUp(B->cmap)); 3322 3323 if (b->lda <= 0) PetscCall(PetscBLASIntCast(B->rmap->n, &b->lda)); 3324 3325 if (!data) { /* petsc-allocated storage */ 3326 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 3327 PetscCall(PetscCalloc1((size_t)b->lda * B->cmap->n, &b->v)); 3328 3329 b->user_alloc = PETSC_FALSE; 3330 } else { /* user-allocated storage */ 3331 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 3332 b->v = data; 3333 b->user_alloc = PETSC_TRUE; 3334 } 3335 B->assembled = PETSC_TRUE; 3336 PetscFunctionReturn(PETSC_SUCCESS); 3337 } 3338 3339 #if defined(PETSC_HAVE_ELEMENTAL) 3340 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 3341 { 3342 Mat mat_elemental; 3343 const PetscScalar *array; 3344 PetscScalar *v_colwise; 3345 PetscInt M = A->rmap->N, N = A->cmap->N, i, j, k, *rows, *cols; 3346 3347 PetscFunctionBegin; 3348 PetscCall(PetscMalloc3(M * N, &v_colwise, M, &rows, N, &cols)); 3349 PetscCall(MatDenseGetArrayRead(A, &array)); 3350 /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */ 3351 k = 0; 3352 for (j = 0; j < N; j++) { 3353 cols[j] = j; 3354 for (i = 0; i < M; i++) v_colwise[j * M + i] = array[k++]; 3355 } 3356 for (i = 0; i < M; i++) rows[i] = i; 3357 PetscCall(MatDenseRestoreArrayRead(A, &array)); 3358 3359 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 3360 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N)); 3361 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 3362 PetscCall(MatSetUp(mat_elemental)); 3363 3364 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 3365 PetscCall(MatSetValues(mat_elemental, M, rows, N, cols, v_colwise, ADD_VALUES)); 3366 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 3367 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 3368 PetscCall(PetscFree3(v_colwise, rows, cols)); 3369 3370 if (reuse == MAT_INPLACE_MATRIX) { 3371 PetscCall(MatHeaderReplace(A, &mat_elemental)); 3372 } else { 3373 *newmat = mat_elemental; 3374 } 3375 PetscFunctionReturn(PETSC_SUCCESS); 3376 } 3377 #endif 3378 3379 PetscErrorCode MatDenseSetLDA_SeqDense(Mat B, PetscInt lda) 3380 { 3381 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3382 PetscBool data; 3383 3384 PetscFunctionBegin; 3385 data = (B->rmap->n > 0 && B->cmap->n > 0) ? (b->v ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 3386 PetscCheck(b->user_alloc || !data || b->lda == lda, PETSC_COMM_SELF, PETSC_ERR_ORDER, "LDA cannot be changed after allocation of internal storage"); 3387 PetscCheck(lda >= B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "LDA %" PetscInt_FMT " must be at least matrix dimension %" PetscInt_FMT, lda, B->rmap->n); 3388 PetscCall(PetscBLASIntCast(lda, &b->lda)); 3389 PetscFunctionReturn(PETSC_SUCCESS); 3390 } 3391 3392 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3393 { 3394 PetscFunctionBegin; 3395 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIDense(comm, inmat, n, scall, outmat)); 3396 PetscFunctionReturn(PETSC_SUCCESS); 3397 } 3398 3399 PetscErrorCode MatDenseCreateColumnVec_Private(Mat A, Vec *v) 3400 { 3401 PetscBool isstd, iskok, iscuda, iship; 3402 PetscMPIInt size; 3403 #if PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP) 3404 /* we pass the data of A, to prevent allocating needless GPU memory the first time VecCUPMPlaceArray is called. */ 3405 const PetscScalar *a; 3406 #endif 3407 3408 PetscFunctionBegin; 3409 *v = NULL; 3410 PetscCall(PetscStrcmpAny(A->defaultvectype, &isstd, VECSTANDARD, VECSEQ, VECMPI, "")); 3411 PetscCall(PetscStrcmpAny(A->defaultvectype, &iskok, VECKOKKOS, VECSEQKOKKOS, VECMPIKOKKOS, "")); 3412 PetscCall(PetscStrcmpAny(A->defaultvectype, &iscuda, VECCUDA, VECSEQCUDA, VECMPICUDA, "")); 3413 PetscCall(PetscStrcmpAny(A->defaultvectype, &iship, VECHIP, VECSEQHIP, VECMPIHIP, "")); 3414 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3415 if (isstd) { 3416 if (size > 1) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, v)); 3417 else PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, v)); 3418 } else if (iskok) { 3419 PetscCheck(PetscDefined(HAVE_KOKKOS_KERNELS), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using KOKKOS kernels support"); 3420 #if PetscDefined(HAVE_KOKKOS_KERNELS) 3421 if (size > 1) PetscCall(VecCreateMPIKokkosWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, v)); 3422 else PetscCall(VecCreateSeqKokkosWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, v)); 3423 #endif 3424 } else if (iscuda) { 3425 PetscCheck(PetscDefined(HAVE_CUDA), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using CUDA support"); 3426 #if PetscDefined(HAVE_CUDA) 3427 PetscCall(MatDenseCUDAGetArrayRead(A, &a)); 3428 if (size > 1) PetscCall(VecCreateMPICUDAWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, a, v)); 3429 else PetscCall(VecCreateSeqCUDAWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, a, v)); 3430 #endif 3431 } else if (iship) { 3432 PetscCheck(PetscDefined(HAVE_HIP), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using HIP support"); 3433 #if PetscDefined(HAVE_HIP) 3434 PetscCall(MatDenseHIPGetArrayRead(A, &a)); 3435 if (size > 1) PetscCall(VecCreateMPIHIPWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, a, v)); 3436 else PetscCall(VecCreateSeqHIPWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, a, v)); 3437 #endif 3438 } 3439 PetscCheck(*v, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not coded for type %s", A->defaultvectype); 3440 PetscFunctionReturn(PETSC_SUCCESS); 3441 } 3442 3443 PetscErrorCode MatDenseGetColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) 3444 { 3445 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3446 3447 PetscFunctionBegin; 3448 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3449 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3450 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 3451 a->vecinuse = col + 1; 3452 PetscCall(MatDenseGetArray(A, (PetscScalar **)&a->ptrinuse)); 3453 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3454 *v = a->cvec; 3455 PetscFunctionReturn(PETSC_SUCCESS); 3456 } 3457 3458 PetscErrorCode MatDenseRestoreColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) 3459 { 3460 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3461 3462 PetscFunctionBegin; 3463 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3464 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3465 VecCheckAssembled(a->cvec); 3466 a->vecinuse = 0; 3467 PetscCall(MatDenseRestoreArray(A, (PetscScalar **)&a->ptrinuse)); 3468 PetscCall(VecResetArray(a->cvec)); 3469 if (v) *v = NULL; 3470 PetscFunctionReturn(PETSC_SUCCESS); 3471 } 3472 3473 PetscErrorCode MatDenseGetColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) 3474 { 3475 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3476 3477 PetscFunctionBegin; 3478 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3479 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3480 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 3481 a->vecinuse = col + 1; 3482 PetscCall(MatDenseGetArrayRead(A, &a->ptrinuse)); 3483 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)a->lda))); 3484 PetscCall(VecLockReadPush(a->cvec)); 3485 *v = a->cvec; 3486 PetscFunctionReturn(PETSC_SUCCESS); 3487 } 3488 3489 PetscErrorCode MatDenseRestoreColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) 3490 { 3491 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3492 3493 PetscFunctionBegin; 3494 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3495 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3496 VecCheckAssembled(a->cvec); 3497 a->vecinuse = 0; 3498 PetscCall(MatDenseRestoreArrayRead(A, &a->ptrinuse)); 3499 PetscCall(VecLockReadPop(a->cvec)); 3500 PetscCall(VecResetArray(a->cvec)); 3501 if (v) *v = NULL; 3502 PetscFunctionReturn(PETSC_SUCCESS); 3503 } 3504 3505 PetscErrorCode MatDenseGetColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) 3506 { 3507 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3508 3509 PetscFunctionBegin; 3510 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3511 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3512 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 3513 a->vecinuse = col + 1; 3514 PetscCall(MatDenseGetArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3515 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)a->lda))); 3516 *v = a->cvec; 3517 PetscFunctionReturn(PETSC_SUCCESS); 3518 } 3519 3520 PetscErrorCode MatDenseRestoreColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) 3521 { 3522 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3523 3524 PetscFunctionBegin; 3525 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3526 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3527 VecCheckAssembled(a->cvec); 3528 a->vecinuse = 0; 3529 PetscCall(MatDenseRestoreArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3530 PetscCall(VecResetArray(a->cvec)); 3531 if (v) *v = NULL; 3532 PetscFunctionReturn(PETSC_SUCCESS); 3533 } 3534 3535 PetscErrorCode MatDenseGetSubMatrix_SeqDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 3536 { 3537 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3538 3539 PetscFunctionBegin; 3540 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3541 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3542 if (a->cmat && (cend - cbegin != a->cmat->cmap->N || rend - rbegin != a->cmat->rmap->N)) PetscCall(MatDestroy(&a->cmat)); 3543 if (!a->cmat) { 3544 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), rend - rbegin, PETSC_DECIDE, rend - rbegin, cend - cbegin, PetscSafePointerPlusOffset(a->v, rbegin + (size_t)cbegin * a->lda), &a->cmat)); 3545 } else { 3546 PetscCall(MatDensePlaceArray(a->cmat, PetscSafePointerPlusOffset(a->v, rbegin + (size_t)cbegin * a->lda))); 3547 } 3548 PetscCall(MatDenseSetLDA(a->cmat, a->lda)); 3549 a->matinuse = cbegin + 1; 3550 *v = a->cmat; 3551 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 3552 A->offloadmask = PETSC_OFFLOAD_CPU; 3553 #endif 3554 PetscFunctionReturn(PETSC_SUCCESS); 3555 } 3556 3557 PetscErrorCode MatDenseRestoreSubMatrix_SeqDense(Mat A, Mat *v) 3558 { 3559 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3560 3561 PetscFunctionBegin; 3562 PetscCheck(a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 3563 PetscCheck(a->cmat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column matrix"); 3564 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 3565 a->matinuse = 0; 3566 PetscCall(MatDenseResetArray(a->cmat)); 3567 if (v) *v = NULL; 3568 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 3569 A->offloadmask = PETSC_OFFLOAD_CPU; 3570 #endif 3571 PetscFunctionReturn(PETSC_SUCCESS); 3572 } 3573 3574 /*MC 3575 MATSEQDENSE - MATSEQDENSE = "seqdense" - A matrix type to be used for sequential dense matrices. 3576 3577 Options Database Key: 3578 . -mat_type seqdense - sets the matrix type to `MATSEQDENSE` during a call to `MatSetFromOptions()` 3579 3580 Level: beginner 3581 3582 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreateSeqDense()` 3583 M*/ 3584 PetscErrorCode MatCreate_SeqDense(Mat B) 3585 { 3586 Mat_SeqDense *b; 3587 PetscMPIInt size; 3588 3589 PetscFunctionBegin; 3590 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3591 PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3592 3593 PetscCall(PetscNew(&b)); 3594 B->data = (void *)b; 3595 B->ops[0] = MatOps_Values; 3596 3597 b->roworiented = PETSC_TRUE; 3598 3599 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatQRFactor_C", MatQRFactor_SeqDense)); 3600 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetLDA_C", MatDenseGetLDA_SeqDense)); 3601 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseSetLDA_C", MatDenseSetLDA_SeqDense)); 3602 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArray_C", MatDenseGetArray_SeqDense)); 3603 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArray_C", MatDenseRestoreArray_SeqDense)); 3604 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDensePlaceArray_C", MatDensePlaceArray_SeqDense)); 3605 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseResetArray_C", MatDenseResetArray_SeqDense)); 3606 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseReplaceArray_C", MatDenseReplaceArray_SeqDense)); 3607 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayRead_C", MatDenseGetArray_SeqDense)); 3608 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayRead_C", MatDenseRestoreArray_SeqDense)); 3609 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayWrite_C", MatDenseGetArray_SeqDense)); 3610 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArray_SeqDense)); 3611 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqaij_C", MatConvert_SeqDense_SeqAIJ)); 3612 #if defined(PETSC_HAVE_ELEMENTAL) 3613 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_elemental_C", MatConvert_SeqDense_Elemental)); 3614 #endif 3615 #if defined(PETSC_HAVE_SCALAPACK) 3616 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 3617 #endif 3618 #if defined(PETSC_HAVE_CUDA) 3619 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensecuda_C", MatConvert_SeqDense_SeqDenseCUDA)); 3620 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3621 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdense_C", MatProductSetFromOptions_SeqDense)); 3622 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3623 #endif 3624 #if defined(PETSC_HAVE_HIP) 3625 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensehip_C", MatConvert_SeqDense_SeqDenseHIP)); 3626 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", MatProductSetFromOptions_SeqDense)); 3627 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdense_C", MatProductSetFromOptions_SeqDense)); 3628 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensehip_C", MatProductSetFromOptions_SeqDense)); 3629 #endif 3630 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqDenseSetPreallocation_C", MatSeqDenseSetPreallocation_SeqDense)); 3631 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 3632 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdense_C", MatProductSetFromOptions_SeqDense)); 3633 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3634 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqsbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3635 3636 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumn_C", MatDenseGetColumn_SeqDense)); 3637 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_SeqDense)); 3638 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_SeqDense)); 3639 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_SeqDense)); 3640 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_SeqDense)); 3641 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_SeqDense)); 3642 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_SeqDense)); 3643 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_SeqDense)); 3644 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_SeqDense)); 3645 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_SeqDense)); 3646 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultColumnRange_C", MatMultColumnRange_SeqDense)); 3647 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultAddColumnRange_C", MatMultAddColumnRange_SeqDense)); 3648 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_SeqDense)); 3649 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_SeqDense)); 3650 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQDENSE)); 3651 PetscFunctionReturn(PETSC_SUCCESS); 3652 } 3653 3654 /*@C 3655 MatDenseGetColumn - gives access to a column of a dense matrix. This is only the local part of the column. You MUST call `MatDenseRestoreColumn()` to avoid memory bleeding. 3656 3657 Not Collective 3658 3659 Input Parameters: 3660 + A - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3661 - col - column index 3662 3663 Output Parameter: 3664 . vals - pointer to the data 3665 3666 Level: intermediate 3667 3668 Note: 3669 Use `MatDenseGetColumnVec()` to get access to a column of a `MATDENSE` treated as a `Vec` 3670 3671 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreColumn()`, `MatDenseGetColumnVec()` 3672 @*/ 3673 PetscErrorCode MatDenseGetColumn(Mat A, PetscInt col, PetscScalar *vals[]) 3674 { 3675 PetscFunctionBegin; 3676 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3677 PetscValidLogicalCollectiveInt(A, col, 2); 3678 PetscAssertPointer(vals, 3); 3679 PetscUseMethod(A, "MatDenseGetColumn_C", (Mat, PetscInt, PetscScalar **), (A, col, vals)); 3680 PetscFunctionReturn(PETSC_SUCCESS); 3681 } 3682 3683 /*@C 3684 MatDenseRestoreColumn - returns access to a column of a `MATDENSE` matrix which is returned by `MatDenseGetColumn()`. 3685 3686 Not Collective 3687 3688 Input Parameters: 3689 + A - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3690 - vals - pointer to the data (may be `NULL`) 3691 3692 Level: intermediate 3693 3694 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetColumn()` 3695 @*/ 3696 PetscErrorCode MatDenseRestoreColumn(Mat A, PetscScalar *vals[]) 3697 { 3698 PetscFunctionBegin; 3699 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3700 PetscAssertPointer(vals, 2); 3701 PetscUseMethod(A, "MatDenseRestoreColumn_C", (Mat, PetscScalar **), (A, vals)); 3702 PetscFunctionReturn(PETSC_SUCCESS); 3703 } 3704 3705 /*@ 3706 MatDenseGetColumnVec - Gives read-write access to a column of a `MATDENSE` matrix, represented as a `Vec`. 3707 3708 Collective 3709 3710 Input Parameters: 3711 + A - the `Mat` object 3712 - col - the column index 3713 3714 Output Parameter: 3715 . v - the vector 3716 3717 Level: intermediate 3718 3719 Notes: 3720 The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVec()` when the vector is no longer needed. 3721 3722 Use `MatDenseGetColumnVecRead()` to obtain read-only access or `MatDenseGetColumnVecWrite()` for write-only access. 3723 3724 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`, `MatDenseGetColumn()` 3725 @*/ 3726 PetscErrorCode MatDenseGetColumnVec(Mat A, PetscInt col, Vec *v) 3727 { 3728 PetscFunctionBegin; 3729 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3730 PetscValidType(A, 1); 3731 PetscValidLogicalCollectiveInt(A, col, 2); 3732 PetscAssertPointer(v, 3); 3733 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3734 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3735 PetscUseMethod(A, "MatDenseGetColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3736 PetscFunctionReturn(PETSC_SUCCESS); 3737 } 3738 3739 /*@ 3740 MatDenseRestoreColumnVec - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVec()`. 3741 3742 Collective 3743 3744 Input Parameters: 3745 + A - the `Mat` object 3746 . col - the column index 3747 - v - the `Vec` object (may be `NULL`) 3748 3749 Level: intermediate 3750 3751 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3752 @*/ 3753 PetscErrorCode MatDenseRestoreColumnVec(Mat A, PetscInt col, Vec *v) 3754 { 3755 PetscFunctionBegin; 3756 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3757 PetscValidType(A, 1); 3758 PetscValidLogicalCollectiveInt(A, col, 2); 3759 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3760 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3761 PetscUseMethod(A, "MatDenseRestoreColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3762 PetscFunctionReturn(PETSC_SUCCESS); 3763 } 3764 3765 /*@ 3766 MatDenseGetColumnVecRead - Gives read-only access to a column of a dense matrix, represented as a `Vec`. 3767 3768 Collective 3769 3770 Input Parameters: 3771 + A - the `Mat` object 3772 - col - the column index 3773 3774 Output Parameter: 3775 . v - the vector 3776 3777 Level: intermediate 3778 3779 Notes: 3780 The vector is owned by PETSc and users cannot modify it. 3781 3782 Users need to call `MatDenseRestoreColumnVecRead()` when the vector is no longer needed. 3783 3784 Use `MatDenseGetColumnVec()` to obtain read-write access or `MatDenseGetColumnVecWrite()` for write-only access. 3785 3786 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3787 @*/ 3788 PetscErrorCode MatDenseGetColumnVecRead(Mat A, PetscInt col, Vec *v) 3789 { 3790 PetscFunctionBegin; 3791 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3792 PetscValidType(A, 1); 3793 PetscValidLogicalCollectiveInt(A, col, 2); 3794 PetscAssertPointer(v, 3); 3795 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3796 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3797 PetscUseMethod(A, "MatDenseGetColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3798 PetscFunctionReturn(PETSC_SUCCESS); 3799 } 3800 3801 /*@ 3802 MatDenseRestoreColumnVecRead - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVecRead()`. 3803 3804 Collective 3805 3806 Input Parameters: 3807 + A - the `Mat` object 3808 . col - the column index 3809 - v - the `Vec` object (may be `NULL`) 3810 3811 Level: intermediate 3812 3813 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecWrite()` 3814 @*/ 3815 PetscErrorCode MatDenseRestoreColumnVecRead(Mat A, PetscInt col, Vec *v) 3816 { 3817 PetscFunctionBegin; 3818 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3819 PetscValidType(A, 1); 3820 PetscValidLogicalCollectiveInt(A, col, 2); 3821 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3822 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3823 PetscUseMethod(A, "MatDenseRestoreColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3824 PetscFunctionReturn(PETSC_SUCCESS); 3825 } 3826 3827 /*@ 3828 MatDenseGetColumnVecWrite - Gives write-only access to a column of a dense matrix, represented as a `Vec`. 3829 3830 Collective 3831 3832 Input Parameters: 3833 + A - the `Mat` object 3834 - col - the column index 3835 3836 Output Parameter: 3837 . v - the vector 3838 3839 Level: intermediate 3840 3841 Notes: 3842 The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVecWrite()` when the vector is no longer needed. 3843 3844 Use `MatDenseGetColumnVec()` to obtain read-write access or `MatDenseGetColumnVecRead()` for read-only access. 3845 3846 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3847 @*/ 3848 PetscErrorCode MatDenseGetColumnVecWrite(Mat A, PetscInt col, Vec *v) 3849 { 3850 PetscFunctionBegin; 3851 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3852 PetscValidType(A, 1); 3853 PetscValidLogicalCollectiveInt(A, col, 2); 3854 PetscAssertPointer(v, 3); 3855 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3856 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3857 PetscUseMethod(A, "MatDenseGetColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3858 PetscFunctionReturn(PETSC_SUCCESS); 3859 } 3860 3861 /*@ 3862 MatDenseRestoreColumnVecWrite - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVecWrite()`. 3863 3864 Collective 3865 3866 Input Parameters: 3867 + A - the `Mat` object 3868 . col - the column index 3869 - v - the `Vec` object (may be `NULL`) 3870 3871 Level: intermediate 3872 3873 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()` 3874 @*/ 3875 PetscErrorCode MatDenseRestoreColumnVecWrite(Mat A, PetscInt col, Vec *v) 3876 { 3877 PetscFunctionBegin; 3878 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3879 PetscValidType(A, 1); 3880 PetscValidLogicalCollectiveInt(A, col, 2); 3881 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3882 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3883 PetscUseMethod(A, "MatDenseRestoreColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3884 PetscFunctionReturn(PETSC_SUCCESS); 3885 } 3886 3887 /*@ 3888 MatDenseGetSubMatrix - Gives access to a block of rows and columns of a dense matrix, represented as a `Mat`. 3889 3890 Collective 3891 3892 Input Parameters: 3893 + A - the `Mat` object 3894 . rbegin - the first global row index in the block (if `PETSC_DECIDE`, is 0) 3895 . rend - the global row index past the last one in the block (if `PETSC_DECIDE`, is `M`) 3896 . cbegin - the first global column index in the block (if `PETSC_DECIDE`, is 0) 3897 - cend - the global column index past the last one in the block (if `PETSC_DECIDE`, is `N`) 3898 3899 Output Parameter: 3900 . v - the matrix 3901 3902 Level: intermediate 3903 3904 Notes: 3905 The matrix is owned by PETSc. Users need to call `MatDenseRestoreSubMatrix()` when the matrix is no longer needed. 3906 3907 The output matrix is not redistributed by PETSc, so depending on the values of `rbegin` and `rend`, some processes may have no local rows. 3908 3909 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreSubMatrix()` 3910 @*/ 3911 PetscErrorCode MatDenseGetSubMatrix(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 3912 { 3913 PetscFunctionBegin; 3914 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3915 PetscValidType(A, 1); 3916 PetscValidLogicalCollectiveInt(A, rbegin, 2); 3917 PetscValidLogicalCollectiveInt(A, rend, 3); 3918 PetscValidLogicalCollectiveInt(A, cbegin, 4); 3919 PetscValidLogicalCollectiveInt(A, cend, 5); 3920 PetscAssertPointer(v, 6); 3921 if (rbegin == PETSC_DECIDE) rbegin = 0; 3922 if (rend == PETSC_DECIDE) rend = A->rmap->N; 3923 if (cbegin == PETSC_DECIDE) cbegin = 0; 3924 if (cend == PETSC_DECIDE) cend = A->cmap->N; 3925 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3926 PetscCheck(rbegin >= 0 && rbegin <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", rbegin, A->rmap->N); 3927 PetscCheck(rend >= rbegin && rend <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", rend, rbegin, A->rmap->N); 3928 PetscCheck(cbegin >= 0 && cbegin <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", cbegin, A->cmap->N); 3929 PetscCheck(cend >= cbegin && cend <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", cend, cbegin, A->cmap->N); 3930 PetscUseMethod(A, "MatDenseGetSubMatrix_C", (Mat, PetscInt, PetscInt, PetscInt, PetscInt, Mat *), (A, rbegin, rend, cbegin, cend, v)); 3931 PetscFunctionReturn(PETSC_SUCCESS); 3932 } 3933 3934 /*@ 3935 MatDenseRestoreSubMatrix - Returns access to a block of columns of a dense matrix obtained from `MatDenseGetSubMatrix()`. 3936 3937 Collective 3938 3939 Input Parameters: 3940 + A - the `Mat` object 3941 - v - the `Mat` object (may be `NULL`) 3942 3943 Level: intermediate 3944 3945 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseGetSubMatrix()` 3946 @*/ 3947 PetscErrorCode MatDenseRestoreSubMatrix(Mat A, Mat *v) 3948 { 3949 PetscFunctionBegin; 3950 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3951 PetscValidType(A, 1); 3952 PetscAssertPointer(v, 2); 3953 PetscUseMethod(A, "MatDenseRestoreSubMatrix_C", (Mat, Mat *), (A, v)); 3954 PetscFunctionReturn(PETSC_SUCCESS); 3955 } 3956 3957 #include <petscblaslapack.h> 3958 #include <petsc/private/kernels/blockinvert.h> 3959 3960 PetscErrorCode MatSeqDenseInvert(Mat A) 3961 { 3962 PetscInt m; 3963 const PetscReal shift = 0.0; 3964 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 3965 PetscScalar *values; 3966 3967 PetscFunctionBegin; 3968 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3969 PetscCall(MatDenseGetArray(A, &values)); 3970 PetscCall(MatGetLocalSize(A, &m, NULL)); 3971 allowzeropivot = PetscNot(A->erroriffailure); 3972 /* factor and invert each block */ 3973 switch (m) { 3974 case 1: 3975 values[0] = (PetscScalar)1.0 / (values[0] + shift); 3976 break; 3977 case 2: 3978 PetscCall(PetscKernel_A_gets_inverse_A_2(values, shift, allowzeropivot, &zeropivotdetected)); 3979 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3980 break; 3981 case 3: 3982 PetscCall(PetscKernel_A_gets_inverse_A_3(values, shift, allowzeropivot, &zeropivotdetected)); 3983 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3984 break; 3985 case 4: 3986 PetscCall(PetscKernel_A_gets_inverse_A_4(values, shift, allowzeropivot, &zeropivotdetected)); 3987 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3988 break; 3989 case 5: { 3990 PetscScalar work[25]; 3991 PetscInt ipvt[5]; 3992 3993 PetscCall(PetscKernel_A_gets_inverse_A_5(values, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 3994 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3995 } break; 3996 case 6: 3997 PetscCall(PetscKernel_A_gets_inverse_A_6(values, shift, allowzeropivot, &zeropivotdetected)); 3998 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3999 break; 4000 case 7: 4001 PetscCall(PetscKernel_A_gets_inverse_A_7(values, shift, allowzeropivot, &zeropivotdetected)); 4002 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4003 break; 4004 default: { 4005 PetscInt *v_pivots, *IJ, j; 4006 PetscScalar *v_work; 4007 4008 PetscCall(PetscMalloc3(m, &v_work, m, &v_pivots, m, &IJ)); 4009 for (j = 0; j < m; j++) IJ[j] = j; 4010 PetscCall(PetscKernel_A_gets_inverse_A(m, values, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 4011 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4012 PetscCall(PetscFree3(v_work, v_pivots, IJ)); 4013 } 4014 } 4015 PetscCall(MatDenseRestoreArray(A, &values)); 4016 PetscFunctionReturn(PETSC_SUCCESS); 4017 } 4018