1 /* 2 Defines the basic matrix operations for sequential dense. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/dense/mpi/mpidense.h> 9 #include <petscblaslapack.h> 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <petsc/private/vecimpl.h> 12 13 PetscErrorCode MatSeqDenseSymmetrize_Private(Mat A, PetscBool hermitian) 14 { 15 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 16 PetscInt j, k, n = A->rmap->n; 17 PetscScalar *v; 18 19 PetscFunctionBegin; 20 PetscCheck(A->rmap->n == A->cmap->n, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot symmetrize a rectangular matrix"); 21 PetscCall(MatDenseGetArray(A, &v)); 22 if (!hermitian) { 23 for (k = 0; k < n; k++) { 24 for (j = k; j < n; j++) v[j * mat->lda + k] = v[k * mat->lda + j]; 25 } 26 } else { 27 for (k = 0; k < n; k++) { 28 for (j = k; j < n; j++) v[j * mat->lda + k] = PetscConj(v[k * mat->lda + j]); 29 } 30 } 31 PetscCall(MatDenseRestoreArray(A, &v)); 32 PetscFunctionReturn(PETSC_SUCCESS); 33 } 34 35 PetscErrorCode MatSeqDenseInvertFactors_Private(Mat A) 36 { 37 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 38 PetscBLASInt info, n; 39 40 PetscFunctionBegin; 41 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 42 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 43 if (A->factortype == MAT_FACTOR_LU) { 44 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 45 if (!mat->fwork) { 46 mat->lfwork = n; 47 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 48 } 49 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 50 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 51 PetscCall(PetscFPTrapPop()); 52 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 53 } else if (A->factortype == MAT_FACTOR_CHOLESKY) { 54 if (A->spd == PETSC_BOOL3_TRUE) { 55 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 56 PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &n, mat->v, &mat->lda, &info)); 57 PetscCall(PetscFPTrapPop()); 58 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 59 #if defined(PETSC_USE_COMPLEX) 60 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 61 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 62 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 63 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 64 PetscCallBLAS("LAPACKhetri", LAPACKhetri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 65 PetscCall(PetscFPTrapPop()); 66 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 67 #endif 68 } else { /* symmetric case */ 69 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 70 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 71 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 72 PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 73 PetscCall(PetscFPTrapPop()); 74 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_FALSE)); 75 } 76 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad Inversion: zero pivot in row %" PetscBLASInt_FMT, info - 1); 77 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 78 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix must be factored to solve"); 79 80 A->ops->solve = NULL; 81 A->ops->matsolve = NULL; 82 A->ops->solvetranspose = NULL; 83 A->ops->matsolvetranspose = NULL; 84 A->ops->solveadd = NULL; 85 A->ops->solvetransposeadd = NULL; 86 A->factortype = MAT_FACTOR_NONE; 87 PetscCall(PetscFree(A->solvertype)); 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 91 static PetscErrorCode MatZeroRowsColumns_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 92 { 93 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 94 PetscInt m = l->lda, n = A->cmap->n, r = A->rmap->n, i, j; 95 PetscScalar *slot, *bb, *v; 96 const PetscScalar *xx; 97 98 PetscFunctionBegin; 99 if (PetscDefined(USE_DEBUG)) { 100 for (i = 0; i < N; i++) { 101 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 102 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 103 PetscCheck(rows[i] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Col %" PetscInt_FMT " requested to be zeroed greater than or equal number of cols %" PetscInt_FMT, rows[i], A->cmap->n); 104 } 105 } 106 if (!N) PetscFunctionReturn(PETSC_SUCCESS); 107 108 /* fix right-hand side if needed */ 109 if (x && b) { 110 Vec xt; 111 112 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 113 PetscCall(VecDuplicate(x, &xt)); 114 PetscCall(VecCopy(x, xt)); 115 PetscCall(VecScale(xt, -1.0)); 116 PetscCall(MatMultAdd(A, xt, b, b)); 117 PetscCall(VecDestroy(&xt)); 118 PetscCall(VecGetArrayRead(x, &xx)); 119 PetscCall(VecGetArray(b, &bb)); 120 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 121 PetscCall(VecRestoreArrayRead(x, &xx)); 122 PetscCall(VecRestoreArray(b, &bb)); 123 } 124 125 PetscCall(MatDenseGetArray(A, &v)); 126 for (i = 0; i < N; i++) { 127 slot = v + rows[i] * m; 128 PetscCall(PetscArrayzero(slot, r)); 129 } 130 for (i = 0; i < N; i++) { 131 slot = v + rows[i]; 132 for (j = 0; j < n; j++) { 133 *slot = 0.0; 134 slot += m; 135 } 136 } 137 if (diag != 0.0) { 138 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 139 for (i = 0; i < N; i++) { 140 slot = v + (m + 1) * rows[i]; 141 *slot = diag; 142 } 143 } 144 PetscCall(MatDenseRestoreArray(A, &v)); 145 PetscFunctionReturn(PETSC_SUCCESS); 146 } 147 148 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 149 { 150 Mat B = NULL; 151 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 152 Mat_SeqDense *b; 153 PetscInt *ai = a->i, *aj = a->j, m = A->rmap->N, n = A->cmap->N, i; 154 const MatScalar *av; 155 PetscBool isseqdense; 156 157 PetscFunctionBegin; 158 if (reuse == MAT_REUSE_MATRIX) { 159 PetscCall(PetscObjectTypeCompare((PetscObject)*newmat, MATSEQDENSE, &isseqdense)); 160 PetscCheck(isseqdense, PetscObjectComm((PetscObject)*newmat), PETSC_ERR_USER, "Cannot reuse matrix of type %s", ((PetscObject)*newmat)->type_name); 161 } 162 if (reuse != MAT_REUSE_MATRIX) { 163 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 164 PetscCall(MatSetSizes(B, m, n, m, n)); 165 PetscCall(MatSetType(B, MATSEQDENSE)); 166 PetscCall(MatSeqDenseSetPreallocation(B, NULL)); 167 b = (Mat_SeqDense *)B->data; 168 } else { 169 b = (Mat_SeqDense *)((*newmat)->data); 170 for (i = 0; i < n; i++) PetscCall(PetscArrayzero(b->v + i * b->lda, m)); 171 } 172 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 173 for (i = 0; i < m; i++) { 174 PetscInt j; 175 for (j = 0; j < ai[1] - ai[0]; j++) { 176 b->v[*aj * b->lda + i] = *av; 177 aj++; 178 av++; 179 } 180 ai++; 181 } 182 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 183 184 if (reuse == MAT_INPLACE_MATRIX) { 185 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 186 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 187 PetscCall(MatHeaderReplace(A, &B)); 188 } else { 189 if (B) *newmat = B; 190 PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY)); 191 PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY)); 192 } 193 PetscFunctionReturn(PETSC_SUCCESS); 194 } 195 196 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 197 { 198 Mat B = NULL; 199 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 200 PetscInt i, j; 201 PetscInt *rows, *nnz; 202 MatScalar *aa = a->v, *vals; 203 204 PetscFunctionBegin; 205 PetscCall(PetscCalloc3(A->rmap->n, &rows, A->rmap->n, &nnz, A->rmap->n, &vals)); 206 if (reuse != MAT_REUSE_MATRIX) { 207 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 208 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 209 PetscCall(MatSetType(B, MATSEQAIJ)); 210 for (j = 0; j < A->cmap->n; j++) { 211 for (i = 0; i < A->rmap->n; i++) 212 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) ++nnz[i]; 213 aa += a->lda; 214 } 215 PetscCall(MatSeqAIJSetPreallocation(B, PETSC_DETERMINE, nnz)); 216 } else B = *newmat; 217 aa = a->v; 218 for (j = 0; j < A->cmap->n; j++) { 219 PetscInt numRows = 0; 220 for (i = 0; i < A->rmap->n; i++) 221 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) { 222 rows[numRows] = i; 223 vals[numRows++] = aa[i]; 224 } 225 PetscCall(MatSetValues(B, numRows, rows, 1, &j, vals, INSERT_VALUES)); 226 aa += a->lda; 227 } 228 PetscCall(PetscFree3(rows, nnz, vals)); 229 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 230 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 231 232 if (reuse == MAT_INPLACE_MATRIX) { 233 PetscCall(MatHeaderReplace(A, &B)); 234 } else if (reuse != MAT_REUSE_MATRIX) *newmat = B; 235 PetscFunctionReturn(PETSC_SUCCESS); 236 } 237 238 PetscErrorCode MatAXPY_SeqDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 239 { 240 Mat_SeqDense *x = (Mat_SeqDense *)X->data, *y = (Mat_SeqDense *)Y->data; 241 const PetscScalar *xv; 242 PetscScalar *yv; 243 PetscBLASInt N, m, ldax = 0, lday = 0, one = 1; 244 245 PetscFunctionBegin; 246 PetscCall(MatDenseGetArrayRead(X, &xv)); 247 PetscCall(MatDenseGetArray(Y, &yv)); 248 PetscCall(PetscBLASIntCast(X->rmap->n * X->cmap->n, &N)); 249 PetscCall(PetscBLASIntCast(X->rmap->n, &m)); 250 PetscCall(PetscBLASIntCast(x->lda, &ldax)); 251 PetscCall(PetscBLASIntCast(y->lda, &lday)); 252 if (ldax > m || lday > m) { 253 for (PetscInt j = 0; j < X->cmap->n; j++) PetscCallBLAS("BLASaxpy", BLASaxpy_(&m, &alpha, PetscSafePointerPlusOffset(xv, j * ldax), &one, PetscSafePointerPlusOffset(yv, j * lday), &one)); 254 } else { 255 PetscCallBLAS("BLASaxpy", BLASaxpy_(&N, &alpha, xv, &one, yv, &one)); 256 } 257 PetscCall(MatDenseRestoreArrayRead(X, &xv)); 258 PetscCall(MatDenseRestoreArray(Y, &yv)); 259 PetscCall(PetscLogFlops(PetscMax(2.0 * N - 1, 0))); 260 PetscFunctionReturn(PETSC_SUCCESS); 261 } 262 263 static PetscErrorCode MatGetInfo_SeqDense(Mat A, MatInfoType flag, MatInfo *info) 264 { 265 PetscLogDouble N = A->rmap->n * A->cmap->n; 266 267 PetscFunctionBegin; 268 info->block_size = 1.0; 269 info->nz_allocated = N; 270 info->nz_used = N; 271 info->nz_unneeded = 0; 272 info->assemblies = A->num_ass; 273 info->mallocs = 0; 274 info->memory = 0; /* REVIEW ME */ 275 info->fill_ratio_given = 0; 276 info->fill_ratio_needed = 0; 277 info->factor_mallocs = 0; 278 PetscFunctionReturn(PETSC_SUCCESS); 279 } 280 281 PetscErrorCode MatScale_SeqDense(Mat A, PetscScalar alpha) 282 { 283 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 284 PetscScalar *v; 285 PetscBLASInt one = 1, j, nz, lda = 0; 286 287 PetscFunctionBegin; 288 PetscCall(MatDenseGetArray(A, &v)); 289 PetscCall(PetscBLASIntCast(a->lda, &lda)); 290 if (lda > A->rmap->n) { 291 PetscCall(PetscBLASIntCast(A->rmap->n, &nz)); 292 for (j = 0; j < A->cmap->n; j++) PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v + j * lda, &one)); 293 } else { 294 PetscCall(PetscBLASIntCast(A->rmap->n * A->cmap->n, &nz)); 295 PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v, &one)); 296 } 297 PetscCall(PetscLogFlops(A->rmap->n * A->cmap->n)); 298 PetscCall(MatDenseRestoreArray(A, &v)); 299 PetscFunctionReturn(PETSC_SUCCESS); 300 } 301 302 PetscErrorCode MatShift_SeqDense(Mat A, PetscScalar alpha) 303 { 304 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 305 PetscScalar *v; 306 PetscInt j, k; 307 308 PetscFunctionBegin; 309 PetscCall(MatDenseGetArray(A, &v)); 310 k = PetscMin(A->rmap->n, A->cmap->n); 311 for (j = 0; j < k; j++) v[j + j * a->lda] += alpha; 312 PetscCall(PetscLogFlops(k)); 313 PetscCall(MatDenseRestoreArray(A, &v)); 314 PetscFunctionReturn(PETSC_SUCCESS); 315 } 316 317 static PetscErrorCode MatIsHermitian_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) 318 { 319 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 320 PetscInt i, j, m = A->rmap->n, N = a->lda; 321 const PetscScalar *v; 322 323 PetscFunctionBegin; 324 *fl = PETSC_FALSE; 325 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 326 PetscCall(MatDenseGetArrayRead(A, &v)); 327 for (i = 0; i < m; i++) { 328 for (j = i; j < m; j++) { 329 if (PetscAbsScalar(v[i + j * N] - PetscConj(v[j + i * N])) > rtol) goto restore; 330 } 331 } 332 *fl = PETSC_TRUE; 333 restore: 334 PetscCall(MatDenseRestoreArrayRead(A, &v)); 335 PetscFunctionReturn(PETSC_SUCCESS); 336 } 337 338 static PetscErrorCode MatIsSymmetric_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) 339 { 340 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 341 PetscInt i, j, m = A->rmap->n, N = a->lda; 342 const PetscScalar *v; 343 344 PetscFunctionBegin; 345 *fl = PETSC_FALSE; 346 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 347 PetscCall(MatDenseGetArrayRead(A, &v)); 348 for (i = 0; i < m; i++) { 349 for (j = i; j < m; j++) { 350 if (PetscAbsScalar(v[i + j * N] - v[j + i * N]) > rtol) goto restore; 351 } 352 } 353 *fl = PETSC_TRUE; 354 restore: 355 PetscCall(MatDenseRestoreArrayRead(A, &v)); 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 PetscErrorCode MatDuplicateNoCreate_SeqDense(Mat newi, Mat A, MatDuplicateOption cpvalues) 360 { 361 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 362 PetscInt lda = mat->lda, j, m, nlda = lda; 363 PetscBool isdensecpu; 364 365 PetscFunctionBegin; 366 PetscCall(PetscLayoutReference(A->rmap, &newi->rmap)); 367 PetscCall(PetscLayoutReference(A->cmap, &newi->cmap)); 368 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { /* propagate LDA */ 369 PetscCall(MatDenseSetLDA(newi, lda)); 370 } 371 PetscCall(PetscObjectTypeCompare((PetscObject)newi, MATSEQDENSE, &isdensecpu)); 372 if (isdensecpu) PetscCall(MatSeqDenseSetPreallocation(newi, NULL)); 373 if (cpvalues == MAT_COPY_VALUES) { 374 const PetscScalar *av; 375 PetscScalar *v; 376 377 PetscCall(MatDenseGetArrayRead(A, &av)); 378 PetscCall(MatDenseGetArrayWrite(newi, &v)); 379 PetscCall(MatDenseGetLDA(newi, &nlda)); 380 m = A->rmap->n; 381 if (lda > m || nlda > m) { 382 for (j = 0; j < A->cmap->n; j++) PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(v, j * nlda), PetscSafePointerPlusOffset(av, j * lda), m)); 383 } else { 384 PetscCall(PetscArraycpy(v, av, A->rmap->n * A->cmap->n)); 385 } 386 PetscCall(MatDenseRestoreArrayWrite(newi, &v)); 387 PetscCall(MatDenseRestoreArrayRead(A, &av)); 388 PetscCall(MatPropagateSymmetryOptions(A, newi)); 389 } 390 PetscFunctionReturn(PETSC_SUCCESS); 391 } 392 393 PetscErrorCode MatDuplicate_SeqDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 394 { 395 PetscFunctionBegin; 396 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), newmat)); 397 PetscCall(MatSetSizes(*newmat, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 398 PetscCall(MatSetType(*newmat, ((PetscObject)A)->type_name)); 399 PetscCall(MatDuplicateNoCreate_SeqDense(*newmat, A, cpvalues)); 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 static PetscErrorCode MatSolve_SeqDense_Internal_LU(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) 404 { 405 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 406 PetscBLASInt info; 407 408 PetscFunctionBegin; 409 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 410 PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_(T ? "T" : "N", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 411 PetscCall(PetscFPTrapPop()); 412 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve %" PetscBLASInt_FMT, info); 413 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 414 PetscFunctionReturn(PETSC_SUCCESS); 415 } 416 417 static PetscErrorCode MatConjugate_SeqDense(Mat); 418 419 static PetscErrorCode MatSolve_SeqDense_Internal_Cholesky(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) 420 { 421 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 422 PetscBLASInt info; 423 424 PetscFunctionBegin; 425 if (A->spd == PETSC_BOOL3_TRUE) { 426 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 427 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 428 PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &m, &nrhs, mat->v, &mat->lda, x, &m, &info)); 429 PetscCall(PetscFPTrapPop()); 430 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "POTRS Bad solve %" PetscBLASInt_FMT, info); 431 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 432 #if defined(PETSC_USE_COMPLEX) 433 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 434 if (T) PetscCall(MatConjugate_SeqDense(A)); 435 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 436 PetscCallBLAS("LAPACKhetrs", LAPACKhetrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 437 PetscCall(PetscFPTrapPop()); 438 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "HETRS Bad solve %" PetscBLASInt_FMT, info); 439 if (T) PetscCall(MatConjugate_SeqDense(A)); 440 #endif 441 } else { /* symmetric case */ 442 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 443 PetscCallBLAS("LAPACKsytrs", LAPACKsytrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 444 PetscCall(PetscFPTrapPop()); 445 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "SYTRS Bad solve %" PetscBLASInt_FMT, info); 446 } 447 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 448 PetscFunctionReturn(PETSC_SUCCESS); 449 } 450 451 static PetscErrorCode MatSolve_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) 452 { 453 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 454 PetscBLASInt info; 455 char trans; 456 457 PetscFunctionBegin; 458 if (PetscDefined(USE_COMPLEX)) { 459 trans = 'C'; 460 } else { 461 trans = 'T'; 462 } 463 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 464 { /* lwork depends on the number of right-hand sides */ 465 PetscBLASInt nlfwork, lfwork = -1; 466 PetscScalar fwork; 467 468 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 469 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 470 if (nlfwork > mat->lfwork) { 471 mat->lfwork = nlfwork; 472 PetscCall(PetscFree(mat->fwork)); 473 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 474 } 475 } 476 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 477 PetscCall(PetscFPTrapPop()); 478 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %" PetscBLASInt_FMT, info); 479 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 480 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &mat->rank, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 481 PetscCall(PetscFPTrapPop()); 482 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %" PetscBLASInt_FMT, info); 483 for (PetscInt j = 0; j < nrhs; j++) { 484 for (PetscInt i = mat->rank; i < k; i++) x[j * ldx + i] = 0.; 485 } 486 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 487 PetscFunctionReturn(PETSC_SUCCESS); 488 } 489 490 static PetscErrorCode MatSolveTranspose_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) 491 { 492 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 493 PetscBLASInt info; 494 495 PetscFunctionBegin; 496 if (A->rmap->n == A->cmap->n && mat->rank == A->rmap->n) { 497 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 498 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &m, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 499 PetscCall(PetscFPTrapPop()); 500 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %" PetscBLASInt_FMT, info); 501 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 502 { /* lwork depends on the number of right-hand sides */ 503 PetscBLASInt nlfwork, lfwork = -1; 504 PetscScalar fwork; 505 506 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 507 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 508 if (nlfwork > mat->lfwork) { 509 mat->lfwork = nlfwork; 510 PetscCall(PetscFree(mat->fwork)); 511 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 512 } 513 } 514 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 515 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 516 PetscCall(PetscFPTrapPop()); 517 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %" PetscBLASInt_FMT, info); 518 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 519 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "QR factored matrix cannot be used for transpose solve"); 520 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 521 PetscFunctionReturn(PETSC_SUCCESS); 522 } 523 524 static PetscErrorCode MatSolve_SeqDense_SetUp(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) 525 { 526 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 527 PetscScalar *y; 528 PetscBLASInt m = 0, k = 0; 529 530 PetscFunctionBegin; 531 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 532 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 533 if (k < m) { 534 PetscCall(VecCopy(xx, mat->qrrhs)); 535 PetscCall(VecGetArray(mat->qrrhs, &y)); 536 } else { 537 PetscCall(VecCopy(xx, yy)); 538 PetscCall(VecGetArray(yy, &y)); 539 } 540 *_y = y; 541 *_k = k; 542 *_m = m; 543 PetscFunctionReturn(PETSC_SUCCESS); 544 } 545 546 static PetscErrorCode MatSolve_SeqDense_TearDown(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) 547 { 548 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 549 PetscScalar *y = NULL; 550 PetscBLASInt m, k; 551 552 PetscFunctionBegin; 553 y = *_y; 554 *_y = NULL; 555 k = *_k; 556 m = *_m; 557 if (k < m) { 558 PetscScalar *yv; 559 PetscCall(VecGetArray(yy, &yv)); 560 PetscCall(PetscArraycpy(yv, y, k)); 561 PetscCall(VecRestoreArray(yy, &yv)); 562 PetscCall(VecRestoreArray(mat->qrrhs, &y)); 563 } else { 564 PetscCall(VecRestoreArray(yy, &y)); 565 } 566 PetscFunctionReturn(PETSC_SUCCESS); 567 } 568 569 static PetscErrorCode MatSolve_SeqDense_LU(Mat A, Vec xx, Vec yy) 570 { 571 PetscScalar *y = NULL; 572 PetscBLASInt m = 0, k = 0; 573 574 PetscFunctionBegin; 575 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 576 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_FALSE)); 577 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 578 PetscFunctionReturn(PETSC_SUCCESS); 579 } 580 581 static PetscErrorCode MatSolveTranspose_SeqDense_LU(Mat A, Vec xx, Vec yy) 582 { 583 PetscScalar *y = NULL; 584 PetscBLASInt m = 0, k = 0; 585 586 PetscFunctionBegin; 587 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 588 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_TRUE)); 589 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 590 PetscFunctionReturn(PETSC_SUCCESS); 591 } 592 593 static PetscErrorCode MatSolve_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) 594 { 595 PetscScalar *y = NULL; 596 PetscBLASInt m = 0, k = 0; 597 598 PetscFunctionBegin; 599 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 600 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_FALSE)); 601 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 602 PetscFunctionReturn(PETSC_SUCCESS); 603 } 604 605 static PetscErrorCode MatSolveTranspose_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) 606 { 607 PetscScalar *y = NULL; 608 PetscBLASInt m = 0, k = 0; 609 610 PetscFunctionBegin; 611 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 612 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_TRUE)); 613 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 614 PetscFunctionReturn(PETSC_SUCCESS); 615 } 616 617 static PetscErrorCode MatSolve_SeqDense_QR(Mat A, Vec xx, Vec yy) 618 { 619 PetscScalar *y = NULL; 620 PetscBLASInt m = 0, k = 0; 621 622 PetscFunctionBegin; 623 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 624 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 625 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 626 PetscFunctionReturn(PETSC_SUCCESS); 627 } 628 629 static PetscErrorCode MatSolveTranspose_SeqDense_QR(Mat A, Vec xx, Vec yy) 630 { 631 PetscScalar *y = NULL; 632 PetscBLASInt m = 0, k = 0; 633 634 PetscFunctionBegin; 635 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 636 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 637 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 638 PetscFunctionReturn(PETSC_SUCCESS); 639 } 640 641 static PetscErrorCode MatMatSolve_SeqDense_SetUp(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) 642 { 643 const PetscScalar *b; 644 PetscScalar *y; 645 PetscInt n, _ldb, _ldx; 646 PetscBLASInt nrhs = 0, m = 0, k = 0, ldb = 0, ldx = 0, ldy = 0; 647 648 PetscFunctionBegin; 649 *_ldy = 0; 650 *_m = 0; 651 *_nrhs = 0; 652 *_k = 0; 653 *_y = NULL; 654 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 655 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 656 PetscCall(MatGetSize(B, NULL, &n)); 657 PetscCall(PetscBLASIntCast(n, &nrhs)); 658 PetscCall(MatDenseGetLDA(B, &_ldb)); 659 PetscCall(PetscBLASIntCast(_ldb, &ldb)); 660 PetscCall(MatDenseGetLDA(X, &_ldx)); 661 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 662 if (ldx < m) { 663 PetscCall(MatDenseGetArrayRead(B, &b)); 664 PetscCall(PetscMalloc1(nrhs * m, &y)); 665 if (ldb == m) { 666 PetscCall(PetscArraycpy(y, b, ldb * nrhs)); 667 } else { 668 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * m], &b[j * ldb], m)); 669 } 670 ldy = m; 671 PetscCall(MatDenseRestoreArrayRead(B, &b)); 672 } else { 673 if (ldb == ldx) { 674 PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN)); 675 PetscCall(MatDenseGetArray(X, &y)); 676 } else { 677 PetscCall(MatDenseGetArray(X, &y)); 678 PetscCall(MatDenseGetArrayRead(B, &b)); 679 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * ldx], &b[j * ldb], m)); 680 PetscCall(MatDenseRestoreArrayRead(B, &b)); 681 } 682 ldy = ldx; 683 } 684 *_y = y; 685 *_ldy = ldy; 686 *_k = k; 687 *_m = m; 688 *_nrhs = nrhs; 689 PetscFunctionReturn(PETSC_SUCCESS); 690 } 691 692 static PetscErrorCode MatMatSolve_SeqDense_TearDown(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) 693 { 694 PetscScalar *y; 695 PetscInt _ldx; 696 PetscBLASInt k, ldy, nrhs, ldx = 0; 697 698 PetscFunctionBegin; 699 y = *_y; 700 *_y = NULL; 701 k = *_k; 702 ldy = *_ldy; 703 nrhs = *_nrhs; 704 PetscCall(MatDenseGetLDA(X, &_ldx)); 705 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 706 if (ldx != ldy) { 707 PetscScalar *xv; 708 PetscCall(MatDenseGetArray(X, &xv)); 709 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&xv[j * ldx], &y[j * ldy], k)); 710 PetscCall(MatDenseRestoreArray(X, &xv)); 711 PetscCall(PetscFree(y)); 712 } else { 713 PetscCall(MatDenseRestoreArray(X, &y)); 714 } 715 PetscFunctionReturn(PETSC_SUCCESS); 716 } 717 718 static PetscErrorCode MatMatSolve_SeqDense_LU(Mat A, Mat B, Mat X) 719 { 720 PetscScalar *y; 721 PetscBLASInt m, k, ldy, nrhs; 722 723 PetscFunctionBegin; 724 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 725 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 726 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 727 PetscFunctionReturn(PETSC_SUCCESS); 728 } 729 730 static PetscErrorCode MatMatSolveTranspose_SeqDense_LU(Mat A, Mat B, Mat X) 731 { 732 PetscScalar *y; 733 PetscBLASInt m, k, ldy, nrhs; 734 735 PetscFunctionBegin; 736 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 737 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 738 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 739 PetscFunctionReturn(PETSC_SUCCESS); 740 } 741 742 static PetscErrorCode MatMatSolve_SeqDense_Cholesky(Mat A, Mat B, Mat X) 743 { 744 PetscScalar *y; 745 PetscBLASInt m, k, ldy, nrhs; 746 747 PetscFunctionBegin; 748 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 749 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 750 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 751 PetscFunctionReturn(PETSC_SUCCESS); 752 } 753 754 static PetscErrorCode MatMatSolveTranspose_SeqDense_Cholesky(Mat A, Mat B, Mat X) 755 { 756 PetscScalar *y; 757 PetscBLASInt m, k, ldy, nrhs; 758 759 PetscFunctionBegin; 760 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 761 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 762 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 763 PetscFunctionReturn(PETSC_SUCCESS); 764 } 765 766 static PetscErrorCode MatMatSolve_SeqDense_QR(Mat A, Mat B, Mat X) 767 { 768 PetscScalar *y; 769 PetscBLASInt m, k, ldy, nrhs; 770 771 PetscFunctionBegin; 772 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 773 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 774 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 775 PetscFunctionReturn(PETSC_SUCCESS); 776 } 777 778 static PetscErrorCode MatMatSolveTranspose_SeqDense_QR(Mat A, Mat B, Mat X) 779 { 780 PetscScalar *y; 781 PetscBLASInt m, k, ldy, nrhs; 782 783 PetscFunctionBegin; 784 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 785 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 786 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 787 PetscFunctionReturn(PETSC_SUCCESS); 788 } 789 790 /* COMMENT: I have chosen to hide row permutation in the pivots, 791 rather than put it in the Mat->row slot.*/ 792 PetscErrorCode MatLUFactor_SeqDense(Mat A, IS row, IS col, const MatFactorInfo *minfo) 793 { 794 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 795 PetscBLASInt n, m, info; 796 797 PetscFunctionBegin; 798 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 799 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 800 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 801 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 802 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 803 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&m, &n, mat->v, &mat->lda, mat->pivots, &info)); 804 PetscCall(PetscFPTrapPop()); 805 806 PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to LU factorization %" PetscBLASInt_FMT, info); 807 PetscCheck(info <= 0, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Bad LU factorization %" PetscBLASInt_FMT, info); 808 809 A->ops->solve = MatSolve_SeqDense_LU; 810 A->ops->matsolve = MatMatSolve_SeqDense_LU; 811 A->ops->solvetranspose = MatSolveTranspose_SeqDense_LU; 812 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_LU; 813 A->factortype = MAT_FACTOR_LU; 814 815 PetscCall(PetscFree(A->solvertype)); 816 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 817 818 PetscCall(PetscLogFlops((2.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3)); 819 PetscFunctionReturn(PETSC_SUCCESS); 820 } 821 822 static PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 823 { 824 MatFactorInfo info; 825 826 PetscFunctionBegin; 827 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 828 PetscUseTypeMethod(fact, lufactor, NULL, NULL, &info); 829 PetscFunctionReturn(PETSC_SUCCESS); 830 } 831 832 PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) 833 { 834 PetscFunctionBegin; 835 fact->preallocated = PETSC_TRUE; 836 fact->assembled = PETSC_TRUE; 837 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense; 838 PetscFunctionReturn(PETSC_SUCCESS); 839 } 840 841 /* Cholesky as L*L^T or L*D*L^T and the symmetric/hermitian complex variants */ 842 PetscErrorCode MatCholeskyFactor_SeqDense(Mat A, IS perm, const MatFactorInfo *factinfo) 843 { 844 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 845 PetscBLASInt info, n; 846 847 PetscFunctionBegin; 848 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 849 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 850 if (A->spd == PETSC_BOOL3_TRUE) { 851 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 852 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &n, mat->v, &mat->lda, &info)); 853 PetscCall(PetscFPTrapPop()); 854 #if defined(PETSC_USE_COMPLEX) 855 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 856 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 857 if (!mat->fwork) { 858 PetscScalar dummy; 859 860 mat->lfwork = -1; 861 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 862 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 863 PetscCall(PetscFPTrapPop()); 864 PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork)); 865 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 866 } 867 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 868 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 869 PetscCall(PetscFPTrapPop()); 870 #endif 871 } else { /* symmetric case */ 872 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 873 if (!mat->fwork) { 874 PetscScalar dummy; 875 876 mat->lfwork = -1; 877 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 878 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 879 PetscCall(PetscFPTrapPop()); 880 PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork)); 881 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 882 } 883 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 884 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 885 PetscCall(PetscFPTrapPop()); 886 } 887 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad factorization: zero pivot in row %" PetscBLASInt_FMT, info - 1); 888 889 A->ops->solve = MatSolve_SeqDense_Cholesky; 890 A->ops->matsolve = MatMatSolve_SeqDense_Cholesky; 891 A->ops->solvetranspose = MatSolveTranspose_SeqDense_Cholesky; 892 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_Cholesky; 893 A->factortype = MAT_FACTOR_CHOLESKY; 894 895 PetscCall(PetscFree(A->solvertype)); 896 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 897 898 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 899 PetscFunctionReturn(PETSC_SUCCESS); 900 } 901 902 static PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 903 { 904 MatFactorInfo info; 905 906 PetscFunctionBegin; 907 info.fill = 1.0; 908 909 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 910 PetscUseTypeMethod(fact, choleskyfactor, NULL, &info); 911 PetscFunctionReturn(PETSC_SUCCESS); 912 } 913 914 PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) 915 { 916 PetscFunctionBegin; 917 fact->assembled = PETSC_TRUE; 918 fact->preallocated = PETSC_TRUE; 919 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense; 920 PetscFunctionReturn(PETSC_SUCCESS); 921 } 922 923 PetscErrorCode MatQRFactor_SeqDense(Mat A, IS col, const MatFactorInfo *minfo) 924 { 925 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 926 PetscBLASInt n, m, info, min, max; 927 928 PetscFunctionBegin; 929 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 930 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 931 max = PetscMax(m, n); 932 min = PetscMin(m, n); 933 if (!mat->tau) { PetscCall(PetscMalloc1(min, &mat->tau)); } 934 if (!mat->pivots) { PetscCall(PetscMalloc1(n, &mat->pivots)); } 935 if (!mat->qrrhs) PetscCall(MatCreateVecs(A, NULL, &mat->qrrhs)); 936 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 937 if (!mat->fwork) { 938 PetscScalar dummy; 939 940 mat->lfwork = -1; 941 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 942 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, &dummy, &mat->lfwork, &info)); 943 PetscCall(PetscFPTrapPop()); 944 PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork)); 945 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 946 } 947 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 948 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, mat->fwork, &mat->lfwork, &info)); 949 PetscCall(PetscFPTrapPop()); 950 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to QR factorization %" PetscBLASInt_FMT, info); 951 // TODO: try to estimate rank or test for and use geqp3 for rank revealing QR. For now just say rank is min of m and n 952 mat->rank = min; 953 954 A->ops->solve = MatSolve_SeqDense_QR; 955 A->ops->matsolve = MatMatSolve_SeqDense_QR; 956 A->factortype = MAT_FACTOR_QR; 957 if (m == n) { 958 A->ops->solvetranspose = MatSolveTranspose_SeqDense_QR; 959 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_QR; 960 } 961 962 PetscCall(PetscFree(A->solvertype)); 963 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 964 965 PetscCall(PetscLogFlops(2.0 * min * min * (max - min / 3.0))); 966 PetscFunctionReturn(PETSC_SUCCESS); 967 } 968 969 static PetscErrorCode MatQRFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 970 { 971 MatFactorInfo info; 972 973 PetscFunctionBegin; 974 info.fill = 1.0; 975 976 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 977 PetscUseMethod(fact, "MatQRFactor_C", (Mat, IS, const MatFactorInfo *), (fact, NULL, &info)); 978 PetscFunctionReturn(PETSC_SUCCESS); 979 } 980 981 PetscErrorCode MatQRFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) 982 { 983 PetscFunctionBegin; 984 fact->assembled = PETSC_TRUE; 985 fact->preallocated = PETSC_TRUE; 986 PetscCall(PetscObjectComposeFunction((PetscObject)fact, "MatQRFactorNumeric_C", MatQRFactorNumeric_SeqDense)); 987 PetscFunctionReturn(PETSC_SUCCESS); 988 } 989 990 /* uses LAPACK */ 991 PETSC_INTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A, MatFactorType ftype, Mat *fact) 992 { 993 PetscFunctionBegin; 994 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), fact)); 995 PetscCall(MatSetSizes(*fact, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 996 PetscCall(MatSetType(*fact, MATDENSE)); 997 (*fact)->trivialsymbolic = PETSC_TRUE; 998 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 999 (*fact)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqDense; 1000 (*fact)->ops->ilufactorsymbolic = MatLUFactorSymbolic_SeqDense; 1001 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 1002 (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense; 1003 } else if (ftype == MAT_FACTOR_QR) { 1004 PetscCall(PetscObjectComposeFunction((PetscObject)*fact, "MatQRFactorSymbolic_C", MatQRFactorSymbolic_SeqDense)); 1005 } 1006 (*fact)->factortype = ftype; 1007 1008 PetscCall(PetscFree((*fact)->solvertype)); 1009 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*fact)->solvertype)); 1010 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_LU])); 1011 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ILU])); 1012 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_CHOLESKY])); 1013 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ICC])); 1014 PetscFunctionReturn(PETSC_SUCCESS); 1015 } 1016 1017 static PetscErrorCode MatSOR_SeqDense(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal shift, PetscInt its, PetscInt lits, Vec xx) 1018 { 1019 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1020 PetscScalar *x, *v = mat->v, zero = 0.0, xt; 1021 const PetscScalar *b; 1022 PetscInt m = A->rmap->n, i; 1023 PetscBLASInt o = 1, bm = 0; 1024 1025 PetscFunctionBegin; 1026 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1027 PetscCheck(A->offloadmask != PETSC_OFFLOAD_GPU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1028 #endif 1029 if (shift == -1) shift = 0.0; /* negative shift indicates do not error on zero diagonal; this code never zeros on zero diagonal */ 1030 PetscCall(PetscBLASIntCast(m, &bm)); 1031 if (flag & SOR_ZERO_INITIAL_GUESS) { 1032 /* this is a hack fix, should have another version without the second BLASdotu */ 1033 PetscCall(VecSet(xx, zero)); 1034 } 1035 PetscCall(VecGetArray(xx, &x)); 1036 PetscCall(VecGetArrayRead(bb, &b)); 1037 its = its * lits; 1038 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 1039 while (its--) { 1040 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 1041 for (i = 0; i < m; i++) { 1042 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1043 x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift); 1044 } 1045 } 1046 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1047 for (i = m - 1; i >= 0; i--) { 1048 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1049 x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift); 1050 } 1051 } 1052 } 1053 PetscCall(VecRestoreArrayRead(bb, &b)); 1054 PetscCall(VecRestoreArray(xx, &x)); 1055 PetscFunctionReturn(PETSC_SUCCESS); 1056 } 1057 1058 static PetscErrorCode MatMultColumnRangeKernel_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end, PetscBool trans, PetscBool herm) 1059 { 1060 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1061 PetscScalar *y, _DOne = 1.0, _DZero = 0.0; 1062 PetscBLASInt m, n, _One = 1; 1063 const PetscScalar *v = mat->v, *x; 1064 1065 PetscFunctionBegin; 1066 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1067 PetscCall(PetscBLASIntCast(c_end - c_start, &n)); 1068 PetscCall(VecGetArrayRead(xx, &x)); 1069 PetscCall(VecGetArrayWrite(yy, &y)); 1070 if (!m || !n) { 1071 PetscBLASInt i; 1072 if (trans) 1073 for (i = 0; i < n; i++) y[i] = 0.0; 1074 else 1075 for (i = 0; i < m; i++) y[i] = 0.0; 1076 } else { 1077 if (trans) { 1078 if (herm) PetscCallBLAS("BLASgemv", BLASgemv_("C", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DZero, y + c_start, &_One)); 1079 else PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DZero, y + c_start, &_One)); 1080 } else { 1081 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x + c_start, &_One, &_DZero, y, &_One)); 1082 } 1083 PetscCall(PetscLogFlops(2.0 * m * n - n)); 1084 } 1085 PetscCall(VecRestoreArrayRead(xx, &x)); 1086 PetscCall(VecRestoreArrayWrite(yy, &y)); 1087 PetscFunctionReturn(PETSC_SUCCESS); 1088 } 1089 1090 PetscErrorCode MatMultHermitianTransposeColumnRange_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 1091 { 1092 PetscFunctionBegin; 1093 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, c_start, c_end, PETSC_TRUE, PETSC_TRUE)); 1094 PetscFunctionReturn(PETSC_SUCCESS); 1095 } 1096 1097 PetscErrorCode MatMult_SeqDense(Mat A, Vec xx, Vec yy) 1098 { 1099 PetscFunctionBegin; 1100 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_FALSE, PETSC_FALSE)); 1101 PetscFunctionReturn(PETSC_SUCCESS); 1102 } 1103 1104 PetscErrorCode MatMultTranspose_SeqDense(Mat A, Vec xx, Vec yy) 1105 { 1106 PetscFunctionBegin; 1107 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_FALSE)); 1108 PetscFunctionReturn(PETSC_SUCCESS); 1109 } 1110 1111 PetscErrorCode MatMultHermitianTranspose_SeqDense(Mat A, Vec xx, Vec yy) 1112 { 1113 PetscFunctionBegin; 1114 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_TRUE)); 1115 PetscFunctionReturn(PETSC_SUCCESS); 1116 } 1117 1118 static PetscErrorCode MatMultAddColumnRangeKernel_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end, PetscBool trans, PetscBool herm) 1119 { 1120 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1121 const PetscScalar *v = mat->v, *x; 1122 PetscScalar *y, _DOne = 1.0; 1123 PetscBLASInt m, n, _One = 1; 1124 1125 PetscFunctionBegin; 1126 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1127 PetscCall(PetscBLASIntCast(c_end - c_start, &n)); 1128 PetscCall(VecCopy(zz, yy)); 1129 if (!m || !n) PetscFunctionReturn(PETSC_SUCCESS); 1130 PetscCall(VecGetArray(yy, &y)); 1131 PetscCall(VecGetArrayRead(xx, &x)); 1132 if (trans) { 1133 if (herm) PetscCallBLAS("BLASgemv", BLASgemv_("C", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DOne, y + c_start, &_One)); 1134 else PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DOne, y + c_start, &_One)); 1135 } else { 1136 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x + c_start, &_One, &_DOne, y, &_One)); 1137 } 1138 PetscCall(VecRestoreArrayRead(xx, &x)); 1139 PetscCall(VecRestoreArray(yy, &y)); 1140 PetscCall(PetscLogFlops(2.0 * m * n)); 1141 PetscFunctionReturn(PETSC_SUCCESS); 1142 } 1143 1144 PetscErrorCode MatMultAddColumnRange_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end) 1145 { 1146 PetscFunctionBegin; 1147 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, c_start, c_end, PETSC_FALSE, PETSC_FALSE)); 1148 PetscFunctionReturn(PETSC_SUCCESS); 1149 } 1150 1151 PetscErrorCode MatMultHermitianTransposeAddColumnRange_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end) 1152 { 1153 PetscFunctionBegin; 1154 PetscMPIInt rank; 1155 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 1156 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, c_start, c_end, PETSC_TRUE, PETSC_TRUE)); 1157 PetscFunctionReturn(PETSC_SUCCESS); 1158 } 1159 1160 PetscErrorCode MatMultAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1161 { 1162 PetscFunctionBegin; 1163 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_FALSE, PETSC_FALSE)); 1164 PetscFunctionReturn(PETSC_SUCCESS); 1165 } 1166 1167 PetscErrorCode MatMultTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1168 { 1169 PetscFunctionBegin; 1170 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_FALSE)); 1171 PetscFunctionReturn(PETSC_SUCCESS); 1172 } 1173 1174 PetscErrorCode MatMultHermitianTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1175 { 1176 PetscFunctionBegin; 1177 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_TRUE)); 1178 PetscFunctionReturn(PETSC_SUCCESS); 1179 } 1180 1181 static PetscErrorCode MatGetRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) 1182 { 1183 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1184 PetscInt i; 1185 1186 PetscFunctionBegin; 1187 if (ncols) *ncols = A->cmap->n; 1188 if (cols) { 1189 PetscCall(PetscMalloc1(A->cmap->n, cols)); 1190 for (i = 0; i < A->cmap->n; i++) (*cols)[i] = i; 1191 } 1192 if (vals) { 1193 const PetscScalar *v; 1194 1195 PetscCall(MatDenseGetArrayRead(A, &v)); 1196 PetscCall(PetscMalloc1(A->cmap->n, vals)); 1197 v += row; 1198 for (i = 0; i < A->cmap->n; i++) { 1199 (*vals)[i] = *v; 1200 v += mat->lda; 1201 } 1202 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1203 } 1204 PetscFunctionReturn(PETSC_SUCCESS); 1205 } 1206 1207 static PetscErrorCode MatRestoreRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) 1208 { 1209 PetscFunctionBegin; 1210 if (cols) PetscCall(PetscFree(*cols)); 1211 if (vals) PetscCall(PetscFree(*vals)); 1212 PetscFunctionReturn(PETSC_SUCCESS); 1213 } 1214 1215 static PetscErrorCode MatSetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], const PetscScalar v[], InsertMode addv) 1216 { 1217 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1218 PetscScalar *av; 1219 PetscInt i, j, idx = 0; 1220 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1221 PetscOffloadMask oldf; 1222 #endif 1223 1224 PetscFunctionBegin; 1225 PetscCall(MatDenseGetArray(A, &av)); 1226 if (!mat->roworiented) { 1227 if (addv == INSERT_VALUES) { 1228 for (j = 0; j < n; j++) { 1229 if (indexn[j] < 0) { 1230 idx += m; 1231 continue; 1232 } 1233 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1234 for (i = 0; i < m; i++) { 1235 if (indexm[i] < 0) { 1236 idx++; 1237 continue; 1238 } 1239 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1240 av[indexn[j] * mat->lda + indexm[i]] = v ? v[idx++] : (idx++, 0.0); 1241 } 1242 } 1243 } else { 1244 for (j = 0; j < n; j++) { 1245 if (indexn[j] < 0) { 1246 idx += m; 1247 continue; 1248 } 1249 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1250 for (i = 0; i < m; i++) { 1251 if (indexm[i] < 0) { 1252 idx++; 1253 continue; 1254 } 1255 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1256 av[indexn[j] * mat->lda + indexm[i]] += v ? v[idx++] : (idx++, 0.0); 1257 } 1258 } 1259 } 1260 } else { 1261 if (addv == INSERT_VALUES) { 1262 for (i = 0; i < m; i++) { 1263 if (indexm[i] < 0) { 1264 idx += n; 1265 continue; 1266 } 1267 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1268 for (j = 0; j < n; j++) { 1269 if (indexn[j] < 0) { 1270 idx++; 1271 continue; 1272 } 1273 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1274 av[indexn[j] * mat->lda + indexm[i]] = v ? v[idx++] : (idx++, 0.0); 1275 } 1276 } 1277 } else { 1278 for (i = 0; i < m; i++) { 1279 if (indexm[i] < 0) { 1280 idx += n; 1281 continue; 1282 } 1283 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1284 for (j = 0; j < n; j++) { 1285 if (indexn[j] < 0) { 1286 idx++; 1287 continue; 1288 } 1289 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1290 av[indexn[j] * mat->lda + indexm[i]] += v ? v[idx++] : (idx++, 0.0); 1291 } 1292 } 1293 } 1294 } 1295 /* hack to prevent unneeded copy to the GPU while returning the array */ 1296 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1297 oldf = A->offloadmask; 1298 A->offloadmask = PETSC_OFFLOAD_GPU; 1299 #endif 1300 PetscCall(MatDenseRestoreArray(A, &av)); 1301 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1302 A->offloadmask = (oldf == PETSC_OFFLOAD_UNALLOCATED ? PETSC_OFFLOAD_UNALLOCATED : PETSC_OFFLOAD_CPU); 1303 #endif 1304 PetscFunctionReturn(PETSC_SUCCESS); 1305 } 1306 1307 static PetscErrorCode MatGetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], PetscScalar v[]) 1308 { 1309 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1310 const PetscScalar *vv; 1311 PetscInt i, j; 1312 1313 PetscFunctionBegin; 1314 PetscCall(MatDenseGetArrayRead(A, &vv)); 1315 /* row-oriented output */ 1316 for (i = 0; i < m; i++) { 1317 if (indexm[i] < 0) { 1318 v += n; 1319 continue; 1320 } 1321 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested larger than number rows %" PetscInt_FMT, indexm[i], A->rmap->n); 1322 for (j = 0; j < n; j++) { 1323 if (indexn[j] < 0) { 1324 v++; 1325 continue; 1326 } 1327 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " requested larger than number columns %" PetscInt_FMT, indexn[j], A->cmap->n); 1328 *v++ = vv[indexn[j] * mat->lda + indexm[i]]; 1329 } 1330 } 1331 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1332 PetscFunctionReturn(PETSC_SUCCESS); 1333 } 1334 1335 PetscErrorCode MatView_Dense_Binary(Mat mat, PetscViewer viewer) 1336 { 1337 PetscBool skipHeader; 1338 PetscViewerFormat format; 1339 PetscInt header[4], M, N, m, lda, i, j; 1340 PetscCount k; 1341 const PetscScalar *v; 1342 PetscScalar *vwork; 1343 1344 PetscFunctionBegin; 1345 PetscCall(PetscViewerSetUp(viewer)); 1346 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1347 PetscCall(PetscViewerGetFormat(viewer, &format)); 1348 if (skipHeader) format = PETSC_VIEWER_NATIVE; 1349 1350 PetscCall(MatGetSize(mat, &M, &N)); 1351 1352 /* write matrix header */ 1353 header[0] = MAT_FILE_CLASSID; 1354 header[1] = M; 1355 header[2] = N; 1356 header[3] = (format == PETSC_VIEWER_NATIVE) ? MATRIX_BINARY_FORMAT_DENSE : M * N; 1357 if (!skipHeader) PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1358 1359 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1360 if (format != PETSC_VIEWER_NATIVE) { 1361 PetscInt nnz = m * N, *iwork; 1362 /* store row lengths for each row */ 1363 PetscCall(PetscMalloc1(nnz, &iwork)); 1364 for (i = 0; i < m; i++) iwork[i] = N; 1365 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1366 /* store column indices (zero start index) */ 1367 for (k = 0, i = 0; i < m; i++) 1368 for (j = 0; j < N; j++, k++) iwork[k] = j; 1369 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1370 PetscCall(PetscFree(iwork)); 1371 } 1372 /* store matrix values as a dense matrix in row major order */ 1373 PetscCall(PetscMalloc1(m * N, &vwork)); 1374 PetscCall(MatDenseGetArrayRead(mat, &v)); 1375 PetscCall(MatDenseGetLDA(mat, &lda)); 1376 for (k = 0, i = 0; i < m; i++) 1377 for (j = 0; j < N; j++, k++) vwork[k] = v[i + (size_t)lda * j]; 1378 PetscCall(MatDenseRestoreArrayRead(mat, &v)); 1379 PetscCall(PetscViewerBinaryWriteAll(viewer, vwork, m * N, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1380 PetscCall(PetscFree(vwork)); 1381 PetscFunctionReturn(PETSC_SUCCESS); 1382 } 1383 1384 PetscErrorCode MatLoad_Dense_Binary(Mat mat, PetscViewer viewer) 1385 { 1386 PetscBool skipHeader; 1387 PetscInt header[4], M, N, m, nz, lda, i, j, k; 1388 PetscInt rows, cols; 1389 PetscScalar *v, *vwork; 1390 1391 PetscFunctionBegin; 1392 PetscCall(PetscViewerSetUp(viewer)); 1393 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1394 1395 if (!skipHeader) { 1396 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 1397 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 1398 M = header[1]; 1399 N = header[2]; 1400 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 1401 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 1402 nz = header[3]; 1403 PetscCheck(nz == MATRIX_BINARY_FORMAT_DENSE || nz >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Unknown matrix format %" PetscInt_FMT " in file", nz); 1404 } else { 1405 PetscCall(MatGetSize(mat, &M, &N)); 1406 PetscCheck(M >= 0 && N >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "Matrix binary file header was skipped, thus the user must specify the global sizes of input matrix"); 1407 nz = MATRIX_BINARY_FORMAT_DENSE; 1408 } 1409 1410 /* setup global sizes if not set */ 1411 if (mat->rmap->N < 0) mat->rmap->N = M; 1412 if (mat->cmap->N < 0) mat->cmap->N = N; 1413 PetscCall(MatSetUp(mat)); 1414 /* check if global sizes are correct */ 1415 PetscCall(MatGetSize(mat, &rows, &cols)); 1416 PetscCheck(M == rows && N == cols, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 1417 1418 PetscCall(MatGetSize(mat, NULL, &N)); 1419 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1420 PetscCall(MatDenseGetArray(mat, &v)); 1421 PetscCall(MatDenseGetLDA(mat, &lda)); 1422 if (nz == MATRIX_BINARY_FORMAT_DENSE) { /* matrix in file is dense format */ 1423 PetscCount nnz = (size_t)m * N; 1424 /* read in matrix values */ 1425 PetscCall(PetscMalloc1(nnz, &vwork)); 1426 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1427 /* store values in column major order */ 1428 for (j = 0; j < N; j++) 1429 for (i = 0; i < m; i++) v[i + (size_t)lda * j] = vwork[(size_t)i * N + j]; 1430 PetscCall(PetscFree(vwork)); 1431 } else { /* matrix in file is sparse format */ 1432 PetscInt nnz = 0, *rlens, *icols; 1433 /* read in row lengths */ 1434 PetscCall(PetscMalloc1(m, &rlens)); 1435 PetscCall(PetscViewerBinaryReadAll(viewer, rlens, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1436 for (i = 0; i < m; i++) nnz += rlens[i]; 1437 /* read in column indices and values */ 1438 PetscCall(PetscMalloc2(nnz, &icols, nnz, &vwork)); 1439 PetscCall(PetscViewerBinaryReadAll(viewer, icols, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1440 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1441 /* store values in column major order */ 1442 for (k = 0, i = 0; i < m; i++) 1443 for (j = 0; j < rlens[i]; j++, k++) v[i + lda * icols[k]] = vwork[k]; 1444 PetscCall(PetscFree(rlens)); 1445 PetscCall(PetscFree2(icols, vwork)); 1446 } 1447 PetscCall(MatDenseRestoreArray(mat, &v)); 1448 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 1449 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 1450 PetscFunctionReturn(PETSC_SUCCESS); 1451 } 1452 1453 static PetscErrorCode MatLoad_SeqDense(Mat newMat, PetscViewer viewer) 1454 { 1455 PetscBool isbinary, ishdf5; 1456 1457 PetscFunctionBegin; 1458 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1459 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1460 /* force binary viewer to load .info file if it has not yet done so */ 1461 PetscCall(PetscViewerSetUp(viewer)); 1462 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1463 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1464 if (isbinary) { 1465 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1466 } else if (ishdf5) { 1467 #if defined(PETSC_HAVE_HDF5) 1468 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1469 #else 1470 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 1471 #endif 1472 } else { 1473 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1474 } 1475 PetscFunctionReturn(PETSC_SUCCESS); 1476 } 1477 1478 static PetscErrorCode MatView_SeqDense_ASCII(Mat A, PetscViewer viewer) 1479 { 1480 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1481 PetscInt i, j; 1482 const char *name; 1483 PetscScalar *v, *av; 1484 PetscViewerFormat format; 1485 #if defined(PETSC_USE_COMPLEX) 1486 PetscBool allreal = PETSC_TRUE; 1487 #endif 1488 1489 PetscFunctionBegin; 1490 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&av)); 1491 PetscCall(PetscViewerGetFormat(viewer, &format)); 1492 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1493 PetscFunctionReturn(PETSC_SUCCESS); /* do nothing for now */ 1494 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1495 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1496 for (i = 0; i < A->rmap->n; i++) { 1497 v = av + i; 1498 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i)); 1499 for (j = 0; j < A->cmap->n; j++) { 1500 #if defined(PETSC_USE_COMPLEX) 1501 if (PetscRealPart(*v) != 0.0 && PetscImaginaryPart(*v) != 0.0) { 1502 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", j, (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1503 } else if (PetscRealPart(*v)) { 1504 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)PetscRealPart(*v))); 1505 } 1506 #else 1507 if (*v) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)*v)); 1508 #endif 1509 v += a->lda; 1510 } 1511 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1512 } 1513 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1514 } else { 1515 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1516 #if defined(PETSC_USE_COMPLEX) 1517 /* determine if matrix has all real values */ 1518 for (j = 0; j < A->cmap->n; j++) { 1519 v = av + j * a->lda; 1520 for (i = 0; i < A->rmap->n; i++) { 1521 if (PetscImaginaryPart(v[i])) { 1522 allreal = PETSC_FALSE; 1523 break; 1524 } 1525 } 1526 } 1527 #endif 1528 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1529 PetscCall(PetscObjectGetName((PetscObject)A, &name)); 1530 PetscCall(PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", A->rmap->n, A->cmap->n)); 1531 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = zeros(%" PetscInt_FMT ",%" PetscInt_FMT ");\n", name, A->rmap->n, A->cmap->n)); 1532 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = [\n", name)); 1533 } 1534 1535 for (i = 0; i < A->rmap->n; i++) { 1536 v = av + i; 1537 for (j = 0; j < A->cmap->n; j++) { 1538 #if defined(PETSC_USE_COMPLEX) 1539 if (allreal) { 1540 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(*v))); 1541 } else { 1542 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei ", (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1543 } 1544 #else 1545 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)*v)); 1546 #endif 1547 v += a->lda; 1548 } 1549 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1550 } 1551 if (format == PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, "];\n")); 1552 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1553 } 1554 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&av)); 1555 PetscCall(PetscViewerFlush(viewer)); 1556 PetscFunctionReturn(PETSC_SUCCESS); 1557 } 1558 1559 #include <petscdraw.h> 1560 static PetscErrorCode MatView_SeqDense_Draw_Zoom(PetscDraw draw, void *Aa) 1561 { 1562 Mat A = (Mat)Aa; 1563 PetscInt m = A->rmap->n, n = A->cmap->n, i, j; 1564 int color = PETSC_DRAW_WHITE; 1565 const PetscScalar *v; 1566 PetscViewer viewer; 1567 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1568 PetscViewerFormat format; 1569 1570 PetscFunctionBegin; 1571 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1572 PetscCall(PetscViewerGetFormat(viewer, &format)); 1573 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1574 1575 /* Loop over matrix elements drawing boxes */ 1576 PetscCall(MatDenseGetArrayRead(A, &v)); 1577 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1578 PetscDrawCollectiveBegin(draw); 1579 /* Blue for negative and Red for positive */ 1580 for (j = 0; j < n; j++) { 1581 x_l = j; 1582 x_r = x_l + 1.0; 1583 for (i = 0; i < m; i++) { 1584 y_l = m - i - 1.0; 1585 y_r = y_l + 1.0; 1586 if (PetscRealPart(v[j * m + i]) > 0.) color = PETSC_DRAW_RED; 1587 else if (PetscRealPart(v[j * m + i]) < 0.) color = PETSC_DRAW_BLUE; 1588 else continue; 1589 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1590 } 1591 } 1592 PetscDrawCollectiveEnd(draw); 1593 } else { 1594 /* use contour shading to indicate magnitude of values */ 1595 /* first determine max of all nonzero values */ 1596 PetscReal minv = 0.0, maxv = 0.0; 1597 PetscDraw popup; 1598 1599 for (i = 0; i < m * n; i++) { 1600 if (PetscAbsScalar(v[i]) > maxv) maxv = PetscAbsScalar(v[i]); 1601 } 1602 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1603 PetscCall(PetscDrawGetPopup(draw, &popup)); 1604 PetscCall(PetscDrawScalePopup(popup, minv, maxv)); 1605 1606 PetscDrawCollectiveBegin(draw); 1607 for (j = 0; j < n; j++) { 1608 x_l = j; 1609 x_r = x_l + 1.0; 1610 for (i = 0; i < m; i++) { 1611 y_l = m - i - 1.0; 1612 y_r = y_l + 1.0; 1613 color = PetscDrawRealToColor(PetscAbsScalar(v[j * m + i]), minv, maxv); 1614 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1615 } 1616 } 1617 PetscDrawCollectiveEnd(draw); 1618 } 1619 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1620 PetscFunctionReturn(PETSC_SUCCESS); 1621 } 1622 1623 static PetscErrorCode MatView_SeqDense_Draw(Mat A, PetscViewer viewer) 1624 { 1625 PetscDraw draw; 1626 PetscBool isnull; 1627 PetscReal xr, yr, xl, yl, h, w; 1628 1629 PetscFunctionBegin; 1630 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1631 PetscCall(PetscDrawIsNull(draw, &isnull)); 1632 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1633 1634 xr = A->cmap->n; 1635 yr = A->rmap->n; 1636 h = yr / 10.0; 1637 w = xr / 10.0; 1638 xr += w; 1639 yr += h; 1640 xl = -w; 1641 yl = -h; 1642 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 1643 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 1644 PetscCall(PetscDrawZoom(draw, MatView_SeqDense_Draw_Zoom, A)); 1645 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 1646 PetscCall(PetscDrawSave(draw)); 1647 PetscFunctionReturn(PETSC_SUCCESS); 1648 } 1649 1650 PetscErrorCode MatView_SeqDense(Mat A, PetscViewer viewer) 1651 { 1652 PetscBool iascii, isbinary, isdraw; 1653 1654 PetscFunctionBegin; 1655 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1656 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1657 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1658 if (iascii) PetscCall(MatView_SeqDense_ASCII(A, viewer)); 1659 else if (isbinary) PetscCall(MatView_Dense_Binary(A, viewer)); 1660 else if (isdraw) PetscCall(MatView_SeqDense_Draw(A, viewer)); 1661 PetscFunctionReturn(PETSC_SUCCESS); 1662 } 1663 1664 static PetscErrorCode MatDensePlaceArray_SeqDense(Mat A, const PetscScalar *array) 1665 { 1666 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1667 1668 PetscFunctionBegin; 1669 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1670 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1671 PetscCheck(!a->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseResetArray() first"); 1672 a->unplacedarray = a->v; 1673 a->unplaced_user_alloc = a->user_alloc; 1674 a->v = (PetscScalar *)array; 1675 a->user_alloc = PETSC_TRUE; 1676 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1677 A->offloadmask = PETSC_OFFLOAD_CPU; 1678 #endif 1679 PetscFunctionReturn(PETSC_SUCCESS); 1680 } 1681 1682 static PetscErrorCode MatDenseResetArray_SeqDense(Mat A) 1683 { 1684 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1685 1686 PetscFunctionBegin; 1687 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1688 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1689 a->v = a->unplacedarray; 1690 a->user_alloc = a->unplaced_user_alloc; 1691 a->unplacedarray = NULL; 1692 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1693 A->offloadmask = PETSC_OFFLOAD_CPU; 1694 #endif 1695 PetscFunctionReturn(PETSC_SUCCESS); 1696 } 1697 1698 static PetscErrorCode MatDenseReplaceArray_SeqDense(Mat A, const PetscScalar *array) 1699 { 1700 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1701 1702 PetscFunctionBegin; 1703 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1704 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1705 if (!a->user_alloc) PetscCall(PetscFree(a->v)); 1706 a->v = (PetscScalar *)array; 1707 a->user_alloc = PETSC_FALSE; 1708 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1709 A->offloadmask = PETSC_OFFLOAD_CPU; 1710 #endif 1711 PetscFunctionReturn(PETSC_SUCCESS); 1712 } 1713 1714 PetscErrorCode MatDestroy_SeqDense(Mat mat) 1715 { 1716 Mat_SeqDense *l = (Mat_SeqDense *)mat->data; 1717 1718 PetscFunctionBegin; 1719 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows %" PetscInt_FMT " Cols %" PetscInt_FMT, mat->rmap->n, mat->cmap->n)); 1720 PetscCall(VecDestroy(&l->qrrhs)); 1721 PetscCall(PetscFree(l->tau)); 1722 PetscCall(PetscFree(l->pivots)); 1723 PetscCall(PetscFree(l->fwork)); 1724 if (!l->user_alloc) PetscCall(PetscFree(l->v)); 1725 if (!l->unplaced_user_alloc) PetscCall(PetscFree(l->unplacedarray)); 1726 PetscCheck(!l->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1727 PetscCheck(!l->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1728 PetscCall(VecDestroy(&l->cvec)); 1729 PetscCall(MatDestroy(&l->cmat)); 1730 PetscCall(PetscFree(mat->data)); 1731 1732 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactor_C", NULL)); 1734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorSymbolic_C", NULL)); 1735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorNumeric_C", NULL)); 1736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 1737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 1738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 1739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 1740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 1741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 1742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 1743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 1744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 1745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 1746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 1747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqaij_C", NULL)); 1748 #if defined(PETSC_HAVE_ELEMENTAL) 1749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_elemental_C", NULL)); 1750 #endif 1751 #if defined(PETSC_HAVE_SCALAPACK) 1752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_scalapack_C", NULL)); 1753 #endif 1754 #if defined(PETSC_HAVE_CUDA) 1755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensecuda_C", NULL)); 1756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", NULL)); 1757 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdense_C", NULL)); 1758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensecuda_C", NULL)); 1759 #endif 1760 #if defined(PETSC_HAVE_HIP) 1761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensehip_C", NULL)); 1762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", NULL)); 1763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdense_C", NULL)); 1764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensehip_C", NULL)); 1765 #endif 1766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSeqDenseSetPreallocation_C", NULL)); 1767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqaij_seqdense_C", NULL)); 1768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdense_C", NULL)); 1769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqbaij_seqdense_C", NULL)); 1770 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqsbaij_seqdense_C", NULL)); 1771 1772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 1773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 1774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 1775 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 1776 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 1777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 1778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 1779 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 1780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 1781 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 1782 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); 1783 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); 1784 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); 1785 PetscFunctionReturn(PETSC_SUCCESS); 1786 } 1787 1788 static PetscErrorCode MatTranspose_SeqDense(Mat A, MatReuse reuse, Mat *matout) 1789 { 1790 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1791 PetscInt k, j, m = A->rmap->n, M = mat->lda, n = A->cmap->n; 1792 PetscScalar *v, tmp; 1793 1794 PetscFunctionBegin; 1795 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1796 if (reuse == MAT_INPLACE_MATRIX) { 1797 if (m == n) { /* in place transpose */ 1798 PetscCall(MatDenseGetArray(A, &v)); 1799 for (j = 0; j < m; j++) { 1800 for (k = 0; k < j; k++) { 1801 tmp = v[j + k * M]; 1802 v[j + k * M] = v[k + j * M]; 1803 v[k + j * M] = tmp; 1804 } 1805 } 1806 PetscCall(MatDenseRestoreArray(A, &v)); 1807 } else { /* reuse memory, temporary allocates new memory */ 1808 PetscScalar *v2; 1809 PetscLayout tmplayout; 1810 1811 PetscCall(PetscMalloc1((size_t)m * n, &v2)); 1812 PetscCall(MatDenseGetArray(A, &v)); 1813 for (j = 0; j < n; j++) { 1814 for (k = 0; k < m; k++) v2[j + (size_t)k * n] = v[k + (size_t)j * M]; 1815 } 1816 PetscCall(PetscArraycpy(v, v2, (size_t)m * n)); 1817 PetscCall(PetscFree(v2)); 1818 PetscCall(MatDenseRestoreArray(A, &v)); 1819 /* cleanup size dependent quantities */ 1820 PetscCall(VecDestroy(&mat->cvec)); 1821 PetscCall(MatDestroy(&mat->cmat)); 1822 PetscCall(PetscFree(mat->pivots)); 1823 PetscCall(PetscFree(mat->fwork)); 1824 /* swap row/col layouts */ 1825 PetscCall(PetscBLASIntCast(n, &mat->lda)); 1826 tmplayout = A->rmap; 1827 A->rmap = A->cmap; 1828 A->cmap = tmplayout; 1829 } 1830 } else { /* out-of-place transpose */ 1831 Mat tmat; 1832 Mat_SeqDense *tmatd; 1833 PetscScalar *v2; 1834 PetscInt M2; 1835 1836 if (reuse == MAT_INITIAL_MATRIX) { 1837 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &tmat)); 1838 PetscCall(MatSetSizes(tmat, A->cmap->n, A->rmap->n, A->cmap->n, A->rmap->n)); 1839 PetscCall(MatSetType(tmat, ((PetscObject)A)->type_name)); 1840 PetscCall(MatSeqDenseSetPreallocation(tmat, NULL)); 1841 } else tmat = *matout; 1842 1843 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&v)); 1844 PetscCall(MatDenseGetArray(tmat, &v2)); 1845 tmatd = (Mat_SeqDense *)tmat->data; 1846 M2 = tmatd->lda; 1847 for (j = 0; j < n; j++) { 1848 for (k = 0; k < m; k++) v2[j + k * M2] = v[k + j * M]; 1849 } 1850 PetscCall(MatDenseRestoreArray(tmat, &v2)); 1851 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&v)); 1852 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 1853 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 1854 *matout = tmat; 1855 } 1856 PetscFunctionReturn(PETSC_SUCCESS); 1857 } 1858 1859 static PetscErrorCode MatEqual_SeqDense(Mat A1, Mat A2, PetscBool *flg) 1860 { 1861 Mat_SeqDense *mat1 = (Mat_SeqDense *)A1->data; 1862 Mat_SeqDense *mat2 = (Mat_SeqDense *)A2->data; 1863 PetscInt i; 1864 const PetscScalar *v1, *v2; 1865 1866 PetscFunctionBegin; 1867 if (A1->rmap->n != A2->rmap->n) { 1868 *flg = PETSC_FALSE; 1869 PetscFunctionReturn(PETSC_SUCCESS); 1870 } 1871 if (A1->cmap->n != A2->cmap->n) { 1872 *flg = PETSC_FALSE; 1873 PetscFunctionReturn(PETSC_SUCCESS); 1874 } 1875 PetscCall(MatDenseGetArrayRead(A1, &v1)); 1876 PetscCall(MatDenseGetArrayRead(A2, &v2)); 1877 for (i = 0; i < A1->cmap->n; i++) { 1878 PetscCall(PetscArraycmp(v1, v2, A1->rmap->n, flg)); 1879 if (*flg == PETSC_FALSE) PetscFunctionReturn(PETSC_SUCCESS); 1880 v1 += mat1->lda; 1881 v2 += mat2->lda; 1882 } 1883 PetscCall(MatDenseRestoreArrayRead(A1, &v1)); 1884 PetscCall(MatDenseRestoreArrayRead(A2, &v2)); 1885 *flg = PETSC_TRUE; 1886 PetscFunctionReturn(PETSC_SUCCESS); 1887 } 1888 1889 PetscErrorCode MatGetDiagonal_SeqDense(Mat A, Vec v) 1890 { 1891 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1892 PetscInt i, n, len; 1893 PetscScalar *x; 1894 const PetscScalar *vv; 1895 1896 PetscFunctionBegin; 1897 PetscCall(VecGetSize(v, &n)); 1898 PetscCall(VecGetArray(v, &x)); 1899 len = PetscMin(A->rmap->n, A->cmap->n); 1900 PetscCall(MatDenseGetArrayRead(A, &vv)); 1901 PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 1902 for (i = 0; i < len; i++) x[i] = vv[i * mat->lda + i]; 1903 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1904 PetscCall(VecRestoreArray(v, &x)); 1905 PetscFunctionReturn(PETSC_SUCCESS); 1906 } 1907 1908 static PetscErrorCode MatDiagonalScale_SeqDense(Mat A, Vec ll, Vec rr) 1909 { 1910 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1911 const PetscScalar *l, *r; 1912 PetscScalar x, *v, *vv; 1913 PetscInt i, j, m = A->rmap->n, n = A->cmap->n; 1914 1915 PetscFunctionBegin; 1916 PetscCall(MatDenseGetArray(A, &vv)); 1917 if (ll) { 1918 PetscCall(VecGetSize(ll, &m)); 1919 PetscCall(VecGetArrayRead(ll, &l)); 1920 PetscCheck(m == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vec wrong size"); 1921 for (i = 0; i < m; i++) { 1922 x = l[i]; 1923 v = vv + i; 1924 for (j = 0; j < n; j++) { 1925 (*v) *= x; 1926 v += mat->lda; 1927 } 1928 } 1929 PetscCall(VecRestoreArrayRead(ll, &l)); 1930 PetscCall(PetscLogFlops(1.0 * n * m)); 1931 } 1932 if (rr) { 1933 PetscCall(VecGetSize(rr, &n)); 1934 PetscCall(VecGetArrayRead(rr, &r)); 1935 PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec wrong size"); 1936 for (i = 0; i < n; i++) { 1937 x = r[i]; 1938 v = vv + i * mat->lda; 1939 for (j = 0; j < m; j++) (*v++) *= x; 1940 } 1941 PetscCall(VecRestoreArrayRead(rr, &r)); 1942 PetscCall(PetscLogFlops(1.0 * n * m)); 1943 } 1944 PetscCall(MatDenseRestoreArray(A, &vv)); 1945 PetscFunctionReturn(PETSC_SUCCESS); 1946 } 1947 1948 PetscErrorCode MatNorm_SeqDense(Mat A, NormType type, PetscReal *nrm) 1949 { 1950 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1951 PetscScalar *v, *vv; 1952 PetscReal sum = 0.0; 1953 PetscInt lda, m = A->rmap->n, i, j; 1954 1955 PetscFunctionBegin; 1956 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&vv)); 1957 PetscCall(MatDenseGetLDA(A, &lda)); 1958 v = vv; 1959 if (type == NORM_FROBENIUS) { 1960 if (lda > m) { 1961 for (j = 0; j < A->cmap->n; j++) { 1962 v = vv + j * lda; 1963 for (i = 0; i < m; i++) { 1964 sum += PetscRealPart(PetscConj(*v) * (*v)); 1965 v++; 1966 } 1967 } 1968 } else { 1969 #if defined(PETSC_USE_REAL___FP16) 1970 PetscBLASInt one = 1, cnt = A->cmap->n * A->rmap->n; 1971 PetscCallBLAS("BLASnrm2", *nrm = BLASnrm2_(&cnt, v, &one)); 1972 } 1973 #else 1974 for (i = 0; i < A->cmap->n * A->rmap->n; i++) { 1975 sum += PetscRealPart(PetscConj(*v) * (*v)); 1976 v++; 1977 } 1978 } 1979 *nrm = PetscSqrtReal(sum); 1980 #endif 1981 PetscCall(PetscLogFlops(2.0 * A->cmap->n * A->rmap->n)); 1982 } else if (type == NORM_1) { 1983 *nrm = 0.0; 1984 for (j = 0; j < A->cmap->n; j++) { 1985 v = vv + j * mat->lda; 1986 sum = 0.0; 1987 for (i = 0; i < A->rmap->n; i++) { 1988 sum += PetscAbsScalar(*v); 1989 v++; 1990 } 1991 if (sum > *nrm) *nrm = sum; 1992 } 1993 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 1994 } else if (type == NORM_INFINITY) { 1995 *nrm = 0.0; 1996 for (j = 0; j < A->rmap->n; j++) { 1997 v = vv + j; 1998 sum = 0.0; 1999 for (i = 0; i < A->cmap->n; i++) { 2000 sum += PetscAbsScalar(*v); 2001 v += mat->lda; 2002 } 2003 if (sum > *nrm) *nrm = sum; 2004 } 2005 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 2006 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No two norm"); 2007 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&vv)); 2008 PetscFunctionReturn(PETSC_SUCCESS); 2009 } 2010 2011 static PetscErrorCode MatSetOption_SeqDense(Mat A, MatOption op, PetscBool flg) 2012 { 2013 Mat_SeqDense *aij = (Mat_SeqDense *)A->data; 2014 2015 PetscFunctionBegin; 2016 switch (op) { 2017 case MAT_ROW_ORIENTED: 2018 aij->roworiented = flg; 2019 break; 2020 case MAT_NEW_NONZERO_LOCATIONS: 2021 case MAT_NEW_NONZERO_LOCATION_ERR: 2022 case MAT_NEW_NONZERO_ALLOCATION_ERR: 2023 case MAT_FORCE_DIAGONAL_ENTRIES: 2024 case MAT_KEEP_NONZERO_PATTERN: 2025 case MAT_IGNORE_OFF_PROC_ENTRIES: 2026 case MAT_USE_HASH_TABLE: 2027 case MAT_IGNORE_ZERO_ENTRIES: 2028 case MAT_IGNORE_LOWER_TRIANGULAR: 2029 case MAT_SORTED_FULL: 2030 case MAT_SUBMAT_SINGLEIS: 2031 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 2032 break; 2033 case MAT_SPD: 2034 case MAT_SYMMETRIC: 2035 case MAT_STRUCTURALLY_SYMMETRIC: 2036 case MAT_HERMITIAN: 2037 case MAT_SYMMETRY_ETERNAL: 2038 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 2039 case MAT_SPD_ETERNAL: 2040 break; 2041 default: 2042 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 2043 } 2044 PetscFunctionReturn(PETSC_SUCCESS); 2045 } 2046 2047 PetscErrorCode MatZeroEntries_SeqDense(Mat A) 2048 { 2049 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 2050 PetscInt lda = l->lda, m = A->rmap->n, n = A->cmap->n, j; 2051 PetscScalar *v; 2052 2053 PetscFunctionBegin; 2054 PetscCall(MatDenseGetArrayWrite(A, &v)); 2055 if (lda > m) { 2056 for (j = 0; j < n; j++) PetscCall(PetscArrayzero(v + j * lda, m)); 2057 } else { 2058 PetscCall(PetscArrayzero(v, PetscInt64Mult(m, n))); 2059 } 2060 PetscCall(MatDenseRestoreArrayWrite(A, &v)); 2061 PetscFunctionReturn(PETSC_SUCCESS); 2062 } 2063 2064 static PetscErrorCode MatZeroRows_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 2065 { 2066 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 2067 PetscInt m = l->lda, n = A->cmap->n, i, j; 2068 PetscScalar *slot, *bb, *v; 2069 const PetscScalar *xx; 2070 2071 PetscFunctionBegin; 2072 if (PetscDefined(USE_DEBUG)) { 2073 for (i = 0; i < N; i++) { 2074 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 2075 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 2076 } 2077 } 2078 if (!N) PetscFunctionReturn(PETSC_SUCCESS); 2079 2080 /* fix right-hand side if needed */ 2081 if (x && b) { 2082 PetscCall(VecGetArrayRead(x, &xx)); 2083 PetscCall(VecGetArray(b, &bb)); 2084 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 2085 PetscCall(VecRestoreArrayRead(x, &xx)); 2086 PetscCall(VecRestoreArray(b, &bb)); 2087 } 2088 2089 PetscCall(MatDenseGetArray(A, &v)); 2090 for (i = 0; i < N; i++) { 2091 slot = v + rows[i]; 2092 for (j = 0; j < n; j++) { 2093 *slot = 0.0; 2094 slot += m; 2095 } 2096 } 2097 if (diag != 0.0) { 2098 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 2099 for (i = 0; i < N; i++) { 2100 slot = v + (m + 1) * rows[i]; 2101 *slot = diag; 2102 } 2103 } 2104 PetscCall(MatDenseRestoreArray(A, &v)); 2105 PetscFunctionReturn(PETSC_SUCCESS); 2106 } 2107 2108 static PetscErrorCode MatDenseGetLDA_SeqDense(Mat A, PetscInt *lda) 2109 { 2110 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2111 2112 PetscFunctionBegin; 2113 *lda = mat->lda; 2114 PetscFunctionReturn(PETSC_SUCCESS); 2115 } 2116 2117 PetscErrorCode MatDenseGetArray_SeqDense(Mat A, PetscScalar **array) 2118 { 2119 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2120 2121 PetscFunctionBegin; 2122 PetscCheck(!mat->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2123 *array = mat->v; 2124 PetscFunctionReturn(PETSC_SUCCESS); 2125 } 2126 2127 PetscErrorCode MatDenseRestoreArray_SeqDense(Mat A, PetscScalar **array) 2128 { 2129 PetscFunctionBegin; 2130 if (array) *array = NULL; 2131 PetscFunctionReturn(PETSC_SUCCESS); 2132 } 2133 2134 /*@ 2135 MatDenseGetLDA - gets the leading dimension of the array returned from `MatDenseGetArray()` 2136 2137 Not Collective 2138 2139 Input Parameter: 2140 . A - a `MATDENSE` or `MATDENSECUDA` matrix 2141 2142 Output Parameter: 2143 . lda - the leading dimension 2144 2145 Level: intermediate 2146 2147 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseSetLDA()` 2148 @*/ 2149 PetscErrorCode MatDenseGetLDA(Mat A, PetscInt *lda) 2150 { 2151 PetscFunctionBegin; 2152 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2153 PetscAssertPointer(lda, 2); 2154 MatCheckPreallocated(A, 1); 2155 PetscUseMethod(A, "MatDenseGetLDA_C", (Mat, PetscInt *), (A, lda)); 2156 PetscFunctionReturn(PETSC_SUCCESS); 2157 } 2158 2159 /*@ 2160 MatDenseSetLDA - Sets the leading dimension of the array used by the `MATDENSE` matrix 2161 2162 Collective if the matrix layouts have not yet been setup 2163 2164 Input Parameters: 2165 + A - a `MATDENSE` or `MATDENSECUDA` matrix 2166 - lda - the leading dimension 2167 2168 Level: intermediate 2169 2170 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetLDA()` 2171 @*/ 2172 PetscErrorCode MatDenseSetLDA(Mat A, PetscInt lda) 2173 { 2174 PetscFunctionBegin; 2175 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2176 PetscTryMethod(A, "MatDenseSetLDA_C", (Mat, PetscInt), (A, lda)); 2177 PetscFunctionReturn(PETSC_SUCCESS); 2178 } 2179 2180 /*@C 2181 MatDenseGetArray - gives read-write access to the array where the data for a `MATDENSE` matrix is stored 2182 2183 Logically Collective 2184 2185 Input Parameter: 2186 . A - a dense matrix 2187 2188 Output Parameter: 2189 . array - pointer to the data 2190 2191 Level: intermediate 2192 2193 Fortran Notes: 2194 `MatDenseGetArray()` Fortran binding is deprecated (since PETSc 3.19), use `MatDenseGetArrayF90()` 2195 2196 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2197 @*/ 2198 PetscErrorCode MatDenseGetArray(Mat A, PetscScalar *array[]) 2199 { 2200 PetscFunctionBegin; 2201 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2202 PetscAssertPointer(array, 2); 2203 PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array)); 2204 PetscFunctionReturn(PETSC_SUCCESS); 2205 } 2206 2207 /*@C 2208 MatDenseRestoreArray - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArray()` 2209 2210 Logically Collective 2211 2212 Input Parameters: 2213 + A - a dense matrix 2214 - array - pointer to the data (may be `NULL`) 2215 2216 Level: intermediate 2217 2218 Fortran Notes: 2219 `MatDenseRestoreArray()` Fortran binding is deprecated (since PETSc 3.19), use `MatDenseRestoreArrayF90()` 2220 2221 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2222 @*/ 2223 PetscErrorCode MatDenseRestoreArray(Mat A, PetscScalar *array[]) 2224 { 2225 PetscFunctionBegin; 2226 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2227 if (array) PetscAssertPointer(array, 2); 2228 PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array)); 2229 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2230 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2231 A->offloadmask = PETSC_OFFLOAD_CPU; 2232 #endif 2233 PetscFunctionReturn(PETSC_SUCCESS); 2234 } 2235 2236 /*@C 2237 MatDenseGetArrayRead - gives read-only access to the array where the data for a `MATDENSE` matrix is stored 2238 2239 Not Collective 2240 2241 Input Parameter: 2242 . A - a dense matrix 2243 2244 Output Parameter: 2245 . array - pointer to the data 2246 2247 Level: intermediate 2248 2249 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2250 @*/ 2251 PetscErrorCode MatDenseGetArrayRead(Mat A, const PetscScalar *array[]) 2252 { 2253 PetscFunctionBegin; 2254 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2255 PetscAssertPointer(array, 2); 2256 PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2257 PetscFunctionReturn(PETSC_SUCCESS); 2258 } 2259 2260 /*@C 2261 MatDenseRestoreArrayRead - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayRead()` 2262 2263 Not Collective 2264 2265 Input Parameters: 2266 + A - a dense matrix 2267 - array - pointer to the data (may be `NULL`) 2268 2269 Level: intermediate 2270 2271 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2272 @*/ 2273 PetscErrorCode MatDenseRestoreArrayRead(Mat A, const PetscScalar *array[]) 2274 { 2275 PetscFunctionBegin; 2276 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2277 if (array) PetscAssertPointer(array, 2); 2278 PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2279 PetscFunctionReturn(PETSC_SUCCESS); 2280 } 2281 2282 /*@C 2283 MatDenseGetArrayWrite - gives write-only access to the array where the data for a `MATDENSE` matrix is stored 2284 2285 Not Collective 2286 2287 Input Parameter: 2288 . A - a dense matrix 2289 2290 Output Parameter: 2291 . array - pointer to the data 2292 2293 Level: intermediate 2294 2295 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2296 @*/ 2297 PetscErrorCode MatDenseGetArrayWrite(Mat A, PetscScalar *array[]) 2298 { 2299 PetscFunctionBegin; 2300 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2301 PetscAssertPointer(array, 2); 2302 PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2303 PetscFunctionReturn(PETSC_SUCCESS); 2304 } 2305 2306 /*@C 2307 MatDenseRestoreArrayWrite - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayWrite()` 2308 2309 Not Collective 2310 2311 Input Parameters: 2312 + A - a dense matrix 2313 - array - pointer to the data (may be `NULL`) 2314 2315 Level: intermediate 2316 2317 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2318 @*/ 2319 PetscErrorCode MatDenseRestoreArrayWrite(Mat A, PetscScalar *array[]) 2320 { 2321 PetscFunctionBegin; 2322 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2323 if (array) PetscAssertPointer(array, 2); 2324 PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2325 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2326 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2327 A->offloadmask = PETSC_OFFLOAD_CPU; 2328 #endif 2329 PetscFunctionReturn(PETSC_SUCCESS); 2330 } 2331 2332 /*@C 2333 MatDenseGetArrayAndMemType - gives read-write access to the array where the data for a `MATDENSE` matrix is stored 2334 2335 Logically Collective 2336 2337 Input Parameter: 2338 . A - a dense matrix 2339 2340 Output Parameters: 2341 + array - pointer to the data 2342 - mtype - memory type of the returned pointer 2343 2344 Level: intermediate 2345 2346 Note: 2347 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., 2348 an array on device is always returned and is guaranteed to contain the matrix's latest data. 2349 2350 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayAndMemType()`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArrayRead()`, 2351 `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` 2352 @*/ 2353 PetscErrorCode MatDenseGetArrayAndMemType(Mat A, PetscScalar *array[], PetscMemType *mtype) 2354 { 2355 PetscBool isMPI; 2356 2357 PetscFunctionBegin; 2358 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2359 PetscAssertPointer(array, 2); 2360 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ 2361 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2362 if (isMPI) { 2363 /* Dispatch here so that the code can be reused for all subclasses of MATDENSE */ 2364 PetscCall(MatDenseGetArrayAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); 2365 } else { 2366 PetscErrorCode (*fptr)(Mat, PetscScalar **, PetscMemType *); 2367 2368 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayAndMemType_C", &fptr)); 2369 if (fptr) { 2370 PetscCall((*fptr)(A, array, mtype)); 2371 } else { 2372 PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array)); 2373 if (mtype) *mtype = PETSC_MEMTYPE_HOST; 2374 } 2375 } 2376 PetscFunctionReturn(PETSC_SUCCESS); 2377 } 2378 2379 /*@C 2380 MatDenseRestoreArrayAndMemType - returns access to the array that is obtained by `MatDenseGetArrayAndMemType()` 2381 2382 Logically Collective 2383 2384 Input Parameters: 2385 + A - a dense matrix 2386 - array - pointer to the data 2387 2388 Level: intermediate 2389 2390 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2391 @*/ 2392 PetscErrorCode MatDenseRestoreArrayAndMemType(Mat A, PetscScalar *array[]) 2393 { 2394 PetscBool isMPI; 2395 2396 PetscFunctionBegin; 2397 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2398 PetscAssertPointer(array, 2); 2399 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2400 if (isMPI) { 2401 PetscCall(MatDenseRestoreArrayAndMemType(((Mat_MPIDense *)A->data)->A, array)); 2402 } else { 2403 PetscErrorCode (*fptr)(Mat, PetscScalar **); 2404 2405 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayAndMemType_C", &fptr)); 2406 if (fptr) { 2407 PetscCall((*fptr)(A, array)); 2408 } else { 2409 PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array)); 2410 } 2411 *array = NULL; 2412 } 2413 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2414 PetscFunctionReturn(PETSC_SUCCESS); 2415 } 2416 2417 /*@C 2418 MatDenseGetArrayReadAndMemType - gives read-only access to the array where the data for a `MATDENSE` matrix is stored 2419 2420 Logically Collective 2421 2422 Input Parameter: 2423 . A - a dense matrix 2424 2425 Output Parameters: 2426 + array - pointer to the data 2427 - mtype - memory type of the returned pointer 2428 2429 Level: intermediate 2430 2431 Note: 2432 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., 2433 an array on device is always returned and is guaranteed to contain the matrix's latest data. 2434 2435 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayReadAndMemType()`, `MatDenseGetArrayWriteAndMemType()`, 2436 `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` 2437 @*/ 2438 PetscErrorCode MatDenseGetArrayReadAndMemType(Mat A, const PetscScalar *array[], PetscMemType *mtype) 2439 { 2440 PetscBool isMPI; 2441 2442 PetscFunctionBegin; 2443 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2444 PetscAssertPointer(array, 2); 2445 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ 2446 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2447 if (isMPI) { /* Dispatch here so that the code can be reused for all subclasses of MATDENSE */ 2448 PetscCall(MatDenseGetArrayReadAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); 2449 } else { 2450 PetscErrorCode (*fptr)(Mat, const PetscScalar **, PetscMemType *); 2451 2452 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayReadAndMemType_C", &fptr)); 2453 if (fptr) { 2454 PetscCall((*fptr)(A, array, mtype)); 2455 } else { 2456 PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2457 if (mtype) *mtype = PETSC_MEMTYPE_HOST; 2458 } 2459 } 2460 PetscFunctionReturn(PETSC_SUCCESS); 2461 } 2462 2463 /*@C 2464 MatDenseRestoreArrayReadAndMemType - returns access to the array that is obtained by `MatDenseGetArrayReadAndMemType()` 2465 2466 Logically Collective 2467 2468 Input Parameters: 2469 + A - a dense matrix 2470 - array - pointer to the data 2471 2472 Level: intermediate 2473 2474 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2475 @*/ 2476 PetscErrorCode MatDenseRestoreArrayReadAndMemType(Mat A, const PetscScalar *array[]) 2477 { 2478 PetscBool isMPI; 2479 2480 PetscFunctionBegin; 2481 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2482 PetscAssertPointer(array, 2); 2483 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2484 if (isMPI) { 2485 PetscCall(MatDenseRestoreArrayReadAndMemType(((Mat_MPIDense *)A->data)->A, array)); 2486 } else { 2487 PetscErrorCode (*fptr)(Mat, const PetscScalar **); 2488 2489 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayReadAndMemType_C", &fptr)); 2490 if (fptr) { 2491 PetscCall((*fptr)(A, array)); 2492 } else { 2493 PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2494 } 2495 *array = NULL; 2496 } 2497 PetscFunctionReturn(PETSC_SUCCESS); 2498 } 2499 2500 /*@C 2501 MatDenseGetArrayWriteAndMemType - gives write-only access to the array where the data for a `MATDENSE` matrix is stored 2502 2503 Logically Collective 2504 2505 Input Parameter: 2506 . A - a dense matrix 2507 2508 Output Parameters: 2509 + array - pointer to the data 2510 - mtype - memory type of the returned pointer 2511 2512 Level: intermediate 2513 2514 Note: 2515 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., 2516 an array on device is always returned and is guaranteed to contain the matrix's latest data. 2517 2518 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayWriteAndMemType()`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArrayRead()`, 2519 `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` 2520 @*/ 2521 PetscErrorCode MatDenseGetArrayWriteAndMemType(Mat A, PetscScalar *array[], PetscMemType *mtype) 2522 { 2523 PetscBool isMPI; 2524 2525 PetscFunctionBegin; 2526 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2527 PetscAssertPointer(array, 2); 2528 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ 2529 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2530 if (isMPI) { 2531 PetscCall(MatDenseGetArrayWriteAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); 2532 } else { 2533 PetscErrorCode (*fptr)(Mat, PetscScalar **, PetscMemType *); 2534 2535 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayWriteAndMemType_C", &fptr)); 2536 if (fptr) { 2537 PetscCall((*fptr)(A, array, mtype)); 2538 } else { 2539 PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2540 if (mtype) *mtype = PETSC_MEMTYPE_HOST; 2541 } 2542 } 2543 PetscFunctionReturn(PETSC_SUCCESS); 2544 } 2545 2546 /*@C 2547 MatDenseRestoreArrayWriteAndMemType - returns access to the array that is obtained by `MatDenseGetArrayReadAndMemType()` 2548 2549 Logically Collective 2550 2551 Input Parameters: 2552 + A - a dense matrix 2553 - array - pointer to the data 2554 2555 Level: intermediate 2556 2557 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2558 @*/ 2559 PetscErrorCode MatDenseRestoreArrayWriteAndMemType(Mat A, PetscScalar *array[]) 2560 { 2561 PetscBool isMPI; 2562 2563 PetscFunctionBegin; 2564 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2565 PetscAssertPointer(array, 2); 2566 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2567 if (isMPI) { 2568 PetscCall(MatDenseRestoreArrayWriteAndMemType(((Mat_MPIDense *)A->data)->A, array)); 2569 } else { 2570 PetscErrorCode (*fptr)(Mat, PetscScalar **); 2571 2572 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayWriteAndMemType_C", &fptr)); 2573 if (fptr) { 2574 PetscCall((*fptr)(A, array)); 2575 } else { 2576 PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2577 } 2578 *array = NULL; 2579 } 2580 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2581 PetscFunctionReturn(PETSC_SUCCESS); 2582 } 2583 2584 static PetscErrorCode MatCreateSubMatrix_SeqDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 2585 { 2586 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2587 PetscInt i, j, nrows, ncols, ldb; 2588 const PetscInt *irow, *icol; 2589 PetscScalar *av, *bv, *v = mat->v; 2590 Mat newmat; 2591 2592 PetscFunctionBegin; 2593 PetscCall(ISGetIndices(isrow, &irow)); 2594 PetscCall(ISGetIndices(iscol, &icol)); 2595 PetscCall(ISGetLocalSize(isrow, &nrows)); 2596 PetscCall(ISGetLocalSize(iscol, &ncols)); 2597 2598 /* Check submatrixcall */ 2599 if (scall == MAT_REUSE_MATRIX) { 2600 PetscInt n_cols, n_rows; 2601 PetscCall(MatGetSize(*B, &n_rows, &n_cols)); 2602 if (n_rows != nrows || n_cols != ncols) { 2603 /* resize the result matrix to match number of requested rows/columns */ 2604 PetscCall(MatSetSizes(*B, nrows, ncols, nrows, ncols)); 2605 } 2606 newmat = *B; 2607 } else { 2608 /* Create and fill new matrix */ 2609 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 2610 PetscCall(MatSetSizes(newmat, nrows, ncols, nrows, ncols)); 2611 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 2612 PetscCall(MatSeqDenseSetPreallocation(newmat, NULL)); 2613 } 2614 2615 /* Now extract the data pointers and do the copy,column at a time */ 2616 PetscCall(MatDenseGetArray(newmat, &bv)); 2617 PetscCall(MatDenseGetLDA(newmat, &ldb)); 2618 for (i = 0; i < ncols; i++) { 2619 av = v + mat->lda * icol[i]; 2620 for (j = 0; j < nrows; j++) bv[j] = av[irow[j]]; 2621 bv += ldb; 2622 } 2623 PetscCall(MatDenseRestoreArray(newmat, &bv)); 2624 2625 /* Assemble the matrices so that the correct flags are set */ 2626 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 2627 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 2628 2629 /* Free work space */ 2630 PetscCall(ISRestoreIndices(isrow, &irow)); 2631 PetscCall(ISRestoreIndices(iscol, &icol)); 2632 *B = newmat; 2633 PetscFunctionReturn(PETSC_SUCCESS); 2634 } 2635 2636 static PetscErrorCode MatCreateSubMatrices_SeqDense(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 2637 { 2638 PetscInt i; 2639 2640 PetscFunctionBegin; 2641 if (scall == MAT_INITIAL_MATRIX) PetscCall(PetscCalloc1(n, B)); 2642 2643 for (i = 0; i < n; i++) PetscCall(MatCreateSubMatrix_SeqDense(A, irow[i], icol[i], scall, &(*B)[i])); 2644 PetscFunctionReturn(PETSC_SUCCESS); 2645 } 2646 2647 PetscErrorCode MatCopy_SeqDense(Mat A, Mat B, MatStructure str) 2648 { 2649 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data; 2650 const PetscScalar *va; 2651 PetscScalar *vb; 2652 PetscInt lda1 = a->lda, lda2 = b->lda, m = A->rmap->n, n = A->cmap->n, j; 2653 2654 PetscFunctionBegin; 2655 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2656 if (A->ops->copy != B->ops->copy) { 2657 PetscCall(MatCopy_Basic(A, B, str)); 2658 PetscFunctionReturn(PETSC_SUCCESS); 2659 } 2660 PetscCheck(m == B->rmap->n && n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "size(B) != size(A)"); 2661 PetscCall(MatDenseGetArrayRead(A, &va)); 2662 PetscCall(MatDenseGetArray(B, &vb)); 2663 if (lda1 > m || lda2 > m) { 2664 for (j = 0; j < n; j++) PetscCall(PetscArraycpy(vb + j * lda2, va + j * lda1, m)); 2665 } else { 2666 PetscCall(PetscArraycpy(vb, va, A->rmap->n * A->cmap->n)); 2667 } 2668 PetscCall(MatDenseRestoreArray(B, &vb)); 2669 PetscCall(MatDenseRestoreArrayRead(A, &va)); 2670 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2671 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2672 PetscFunctionReturn(PETSC_SUCCESS); 2673 } 2674 2675 PetscErrorCode MatSetUp_SeqDense(Mat A) 2676 { 2677 PetscFunctionBegin; 2678 PetscCall(PetscLayoutSetUp(A->rmap)); 2679 PetscCall(PetscLayoutSetUp(A->cmap)); 2680 if (!A->preallocated) PetscCall(MatSeqDenseSetPreallocation(A, NULL)); 2681 PetscFunctionReturn(PETSC_SUCCESS); 2682 } 2683 2684 static PetscErrorCode MatConjugate_SeqDense(Mat A) 2685 { 2686 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2687 PetscInt i, j; 2688 PetscInt min = PetscMin(A->rmap->n, A->cmap->n); 2689 PetscScalar *aa; 2690 2691 PetscFunctionBegin; 2692 PetscCall(MatDenseGetArray(A, &aa)); 2693 for (j = 0; j < A->cmap->n; j++) { 2694 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscConj(aa[i + j * mat->lda]); 2695 } 2696 PetscCall(MatDenseRestoreArray(A, &aa)); 2697 if (mat->tau) 2698 for (i = 0; i < min; i++) mat->tau[i] = PetscConj(mat->tau[i]); 2699 PetscFunctionReturn(PETSC_SUCCESS); 2700 } 2701 2702 static PetscErrorCode MatRealPart_SeqDense(Mat A) 2703 { 2704 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2705 PetscInt i, j; 2706 PetscScalar *aa; 2707 2708 PetscFunctionBegin; 2709 PetscCall(MatDenseGetArray(A, &aa)); 2710 for (j = 0; j < A->cmap->n; j++) { 2711 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscRealPart(aa[i + j * mat->lda]); 2712 } 2713 PetscCall(MatDenseRestoreArray(A, &aa)); 2714 PetscFunctionReturn(PETSC_SUCCESS); 2715 } 2716 2717 static PetscErrorCode MatImaginaryPart_SeqDense(Mat A) 2718 { 2719 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2720 PetscInt i, j; 2721 PetscScalar *aa; 2722 2723 PetscFunctionBegin; 2724 PetscCall(MatDenseGetArray(A, &aa)); 2725 for (j = 0; j < A->cmap->n; j++) { 2726 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscImaginaryPart(aa[i + j * mat->lda]); 2727 } 2728 PetscCall(MatDenseRestoreArray(A, &aa)); 2729 PetscFunctionReturn(PETSC_SUCCESS); 2730 } 2731 2732 PetscErrorCode MatMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2733 { 2734 PetscInt m = A->rmap->n, n = B->cmap->n; 2735 PetscBool cisdense = PETSC_FALSE; 2736 2737 PetscFunctionBegin; 2738 PetscCall(MatSetSizes(C, m, n, m, n)); 2739 #if defined(PETSC_HAVE_CUDA) 2740 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2741 #endif 2742 #if defined(PETSC_HAVE_HIP) 2743 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2744 #endif 2745 if (!cisdense) { 2746 PetscBool flg; 2747 2748 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2749 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2750 } 2751 PetscCall(MatSetUp(C)); 2752 PetscFunctionReturn(PETSC_SUCCESS); 2753 } 2754 2755 PetscErrorCode MatMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2756 { 2757 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data, *c = (Mat_SeqDense *)C->data; 2758 PetscBLASInt m, n, k; 2759 const PetscScalar *av, *bv; 2760 PetscScalar *cv; 2761 PetscScalar _DOne = 1.0, _DZero = 0.0; 2762 2763 PetscFunctionBegin; 2764 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2765 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2766 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2767 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); 2768 PetscCall(MatDenseGetArrayRead(A, &av)); 2769 PetscCall(MatDenseGetArrayRead(B, &bv)); 2770 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2771 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2772 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2773 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2774 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2775 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2776 PetscFunctionReturn(PETSC_SUCCESS); 2777 } 2778 2779 PetscErrorCode MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2780 { 2781 PetscInt m = A->rmap->n, n = B->rmap->n; 2782 PetscBool cisdense = PETSC_FALSE; 2783 2784 PetscFunctionBegin; 2785 PetscCall(MatSetSizes(C, m, n, m, n)); 2786 #if defined(PETSC_HAVE_CUDA) 2787 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2788 #endif 2789 #if defined(PETSC_HAVE_HIP) 2790 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2791 #endif 2792 if (!cisdense) { 2793 PetscBool flg; 2794 2795 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2796 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2797 } 2798 PetscCall(MatSetUp(C)); 2799 PetscFunctionReturn(PETSC_SUCCESS); 2800 } 2801 2802 PetscErrorCode MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2803 { 2804 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2805 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2806 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2807 const PetscScalar *av, *bv; 2808 PetscScalar *cv; 2809 PetscBLASInt m, n, k; 2810 PetscScalar _DOne = 1.0, _DZero = 0.0; 2811 2812 PetscFunctionBegin; 2813 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2814 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2815 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2816 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); 2817 PetscCall(MatDenseGetArrayRead(A, &av)); 2818 PetscCall(MatDenseGetArrayRead(B, &bv)); 2819 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2820 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2821 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2822 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2823 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2824 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2825 PetscFunctionReturn(PETSC_SUCCESS); 2826 } 2827 2828 PetscErrorCode MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2829 { 2830 PetscInt m = A->cmap->n, n = B->cmap->n; 2831 PetscBool cisdense = PETSC_FALSE; 2832 2833 PetscFunctionBegin; 2834 PetscCall(MatSetSizes(C, m, n, m, n)); 2835 #if defined(PETSC_HAVE_CUDA) 2836 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2837 #endif 2838 #if defined(PETSC_HAVE_HIP) 2839 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2840 #endif 2841 if (!cisdense) { 2842 PetscBool flg; 2843 2844 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2845 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2846 } 2847 PetscCall(MatSetUp(C)); 2848 PetscFunctionReturn(PETSC_SUCCESS); 2849 } 2850 2851 PetscErrorCode MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2852 { 2853 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2854 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2855 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2856 const PetscScalar *av, *bv; 2857 PetscScalar *cv; 2858 PetscBLASInt m, n, k; 2859 PetscScalar _DOne = 1.0, _DZero = 0.0; 2860 2861 PetscFunctionBegin; 2862 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2863 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2864 PetscCall(PetscBLASIntCast(A->rmap->n, &k)); 2865 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); 2866 PetscCall(MatDenseGetArrayRead(A, &av)); 2867 PetscCall(MatDenseGetArrayRead(B, &bv)); 2868 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2869 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2870 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2871 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2872 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2873 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2874 PetscFunctionReturn(PETSC_SUCCESS); 2875 } 2876 2877 static PetscErrorCode MatProductSetFromOptions_SeqDense_AB(Mat C) 2878 { 2879 PetscFunctionBegin; 2880 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqDense; 2881 C->ops->productsymbolic = MatProductSymbolic_AB; 2882 PetscFunctionReturn(PETSC_SUCCESS); 2883 } 2884 2885 static PetscErrorCode MatProductSetFromOptions_SeqDense_AtB(Mat C) 2886 { 2887 PetscFunctionBegin; 2888 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqDense_SeqDense; 2889 C->ops->productsymbolic = MatProductSymbolic_AtB; 2890 PetscFunctionReturn(PETSC_SUCCESS); 2891 } 2892 2893 static PetscErrorCode MatProductSetFromOptions_SeqDense_ABt(Mat C) 2894 { 2895 PetscFunctionBegin; 2896 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqDense_SeqDense; 2897 C->ops->productsymbolic = MatProductSymbolic_ABt; 2898 PetscFunctionReturn(PETSC_SUCCESS); 2899 } 2900 2901 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense(Mat C) 2902 { 2903 Mat_Product *product = C->product; 2904 2905 PetscFunctionBegin; 2906 switch (product->type) { 2907 case MATPRODUCT_AB: 2908 PetscCall(MatProductSetFromOptions_SeqDense_AB(C)); 2909 break; 2910 case MATPRODUCT_AtB: 2911 PetscCall(MatProductSetFromOptions_SeqDense_AtB(C)); 2912 break; 2913 case MATPRODUCT_ABt: 2914 PetscCall(MatProductSetFromOptions_SeqDense_ABt(C)); 2915 break; 2916 default: 2917 break; 2918 } 2919 PetscFunctionReturn(PETSC_SUCCESS); 2920 } 2921 2922 static PetscErrorCode MatGetRowMax_SeqDense(Mat A, Vec v, PetscInt idx[]) 2923 { 2924 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2925 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2926 PetscScalar *x; 2927 const PetscScalar *aa; 2928 2929 PetscFunctionBegin; 2930 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2931 PetscCall(VecGetArray(v, &x)); 2932 PetscCall(VecGetLocalSize(v, &p)); 2933 PetscCall(MatDenseGetArrayRead(A, &aa)); 2934 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2935 for (i = 0; i < m; i++) { 2936 x[i] = aa[i]; 2937 if (idx) idx[i] = 0; 2938 for (j = 1; j < n; j++) { 2939 if (PetscRealPart(x[i]) < PetscRealPart(aa[i + a->lda * j])) { 2940 x[i] = aa[i + a->lda * j]; 2941 if (idx) idx[i] = j; 2942 } 2943 } 2944 } 2945 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2946 PetscCall(VecRestoreArray(v, &x)); 2947 PetscFunctionReturn(PETSC_SUCCESS); 2948 } 2949 2950 static PetscErrorCode MatGetRowMaxAbs_SeqDense(Mat A, Vec v, PetscInt idx[]) 2951 { 2952 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2953 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2954 PetscScalar *x; 2955 PetscReal atmp; 2956 const PetscScalar *aa; 2957 2958 PetscFunctionBegin; 2959 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2960 PetscCall(VecGetArray(v, &x)); 2961 PetscCall(VecGetLocalSize(v, &p)); 2962 PetscCall(MatDenseGetArrayRead(A, &aa)); 2963 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2964 for (i = 0; i < m; i++) { 2965 x[i] = PetscAbsScalar(aa[i]); 2966 for (j = 1; j < n; j++) { 2967 atmp = PetscAbsScalar(aa[i + a->lda * j]); 2968 if (PetscAbsScalar(x[i]) < atmp) { 2969 x[i] = atmp; 2970 if (idx) idx[i] = j; 2971 } 2972 } 2973 } 2974 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2975 PetscCall(VecRestoreArray(v, &x)); 2976 PetscFunctionReturn(PETSC_SUCCESS); 2977 } 2978 2979 static PetscErrorCode MatGetRowMin_SeqDense(Mat A, Vec v, PetscInt idx[]) 2980 { 2981 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2982 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2983 PetscScalar *x; 2984 const PetscScalar *aa; 2985 2986 PetscFunctionBegin; 2987 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2988 PetscCall(MatDenseGetArrayRead(A, &aa)); 2989 PetscCall(VecGetArray(v, &x)); 2990 PetscCall(VecGetLocalSize(v, &p)); 2991 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2992 for (i = 0; i < m; i++) { 2993 x[i] = aa[i]; 2994 if (idx) idx[i] = 0; 2995 for (j = 1; j < n; j++) { 2996 if (PetscRealPart(x[i]) > PetscRealPart(aa[i + a->lda * j])) { 2997 x[i] = aa[i + a->lda * j]; 2998 if (idx) idx[i] = j; 2999 } 3000 } 3001 } 3002 PetscCall(VecRestoreArray(v, &x)); 3003 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 3004 PetscFunctionReturn(PETSC_SUCCESS); 3005 } 3006 3007 PetscErrorCode MatGetColumnVector_SeqDense(Mat A, Vec v, PetscInt col) 3008 { 3009 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3010 PetscScalar *x; 3011 const PetscScalar *aa; 3012 3013 PetscFunctionBegin; 3014 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3015 PetscCall(MatDenseGetArrayRead(A, &aa)); 3016 PetscCall(VecGetArray(v, &x)); 3017 PetscCall(PetscArraycpy(x, aa + col * a->lda, A->rmap->n)); 3018 PetscCall(VecRestoreArray(v, &x)); 3019 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 3020 PetscFunctionReturn(PETSC_SUCCESS); 3021 } 3022 3023 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat A, PetscInt type, PetscReal *reductions) 3024 { 3025 PetscInt i, j, m, n; 3026 const PetscScalar *a; 3027 3028 PetscFunctionBegin; 3029 PetscCall(MatGetSize(A, &m, &n)); 3030 PetscCall(PetscArrayzero(reductions, n)); 3031 PetscCall(MatDenseGetArrayRead(A, &a)); 3032 if (type == NORM_2) { 3033 for (i = 0; i < n; i++) { 3034 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j] * a[j]); 3035 a = PetscSafePointerPlusOffset(a, m); 3036 } 3037 } else if (type == NORM_1) { 3038 for (i = 0; i < n; i++) { 3039 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j]); 3040 a = PetscSafePointerPlusOffset(a, m); 3041 } 3042 } else if (type == NORM_INFINITY) { 3043 for (i = 0; i < n; i++) { 3044 for (j = 0; j < m; j++) reductions[i] = PetscMax(PetscAbsScalar(a[j]), reductions[i]); 3045 a = PetscSafePointerPlusOffset(a, m); 3046 } 3047 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 3048 for (i = 0; i < n; i++) { 3049 for (j = 0; j < m; j++) reductions[i] += PetscRealPart(a[j]); 3050 a = PetscSafePointerPlusOffset(a, m); 3051 } 3052 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 3053 for (i = 0; i < n; i++) { 3054 for (j = 0; j < m; j++) reductions[i] += PetscImaginaryPart(a[j]); 3055 a = PetscSafePointerPlusOffset(a, m); 3056 } 3057 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 3058 PetscCall(MatDenseRestoreArrayRead(A, &a)); 3059 if (type == NORM_2) { 3060 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 3061 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 3062 for (i = 0; i < n; i++) reductions[i] /= m; 3063 } 3064 PetscFunctionReturn(PETSC_SUCCESS); 3065 } 3066 3067 PetscErrorCode MatSetRandom_SeqDense(Mat x, PetscRandom rctx) 3068 { 3069 PetscScalar *a; 3070 PetscInt lda, m, n, i, j; 3071 3072 PetscFunctionBegin; 3073 PetscCall(MatGetSize(x, &m, &n)); 3074 PetscCall(MatDenseGetLDA(x, &lda)); 3075 PetscCall(MatDenseGetArrayWrite(x, &a)); 3076 for (j = 0; j < n; j++) { 3077 for (i = 0; i < m; i++) PetscCall(PetscRandomGetValue(rctx, a + j * lda + i)); 3078 } 3079 PetscCall(MatDenseRestoreArrayWrite(x, &a)); 3080 PetscFunctionReturn(PETSC_SUCCESS); 3081 } 3082 3083 static PetscErrorCode MatMissingDiagonal_SeqDense(Mat A, PetscBool *missing, PetscInt *d) 3084 { 3085 PetscFunctionBegin; 3086 *missing = PETSC_FALSE; 3087 PetscFunctionReturn(PETSC_SUCCESS); 3088 } 3089 3090 /* vals is not const */ 3091 static PetscErrorCode MatDenseGetColumn_SeqDense(Mat A, PetscInt col, PetscScalar **vals) 3092 { 3093 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3094 PetscScalar *v; 3095 3096 PetscFunctionBegin; 3097 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3098 PetscCall(MatDenseGetArray(A, &v)); 3099 *vals = v + col * a->lda; 3100 PetscCall(MatDenseRestoreArray(A, &v)); 3101 PetscFunctionReturn(PETSC_SUCCESS); 3102 } 3103 3104 static PetscErrorCode MatDenseRestoreColumn_SeqDense(Mat A, PetscScalar **vals) 3105 { 3106 PetscFunctionBegin; 3107 if (vals) *vals = NULL; /* user cannot accidentally use the array later */ 3108 PetscFunctionReturn(PETSC_SUCCESS); 3109 } 3110 3111 static struct _MatOps MatOps_Values = {MatSetValues_SeqDense, 3112 MatGetRow_SeqDense, 3113 MatRestoreRow_SeqDense, 3114 MatMult_SeqDense, 3115 /* 4*/ MatMultAdd_SeqDense, 3116 MatMultTranspose_SeqDense, 3117 MatMultTransposeAdd_SeqDense, 3118 NULL, 3119 NULL, 3120 NULL, 3121 /* 10*/ NULL, 3122 MatLUFactor_SeqDense, 3123 MatCholeskyFactor_SeqDense, 3124 MatSOR_SeqDense, 3125 MatTranspose_SeqDense, 3126 /* 15*/ MatGetInfo_SeqDense, 3127 MatEqual_SeqDense, 3128 MatGetDiagonal_SeqDense, 3129 MatDiagonalScale_SeqDense, 3130 MatNorm_SeqDense, 3131 /* 20*/ NULL, 3132 NULL, 3133 MatSetOption_SeqDense, 3134 MatZeroEntries_SeqDense, 3135 /* 24*/ MatZeroRows_SeqDense, 3136 NULL, 3137 NULL, 3138 NULL, 3139 NULL, 3140 /* 29*/ MatSetUp_SeqDense, 3141 NULL, 3142 NULL, 3143 NULL, 3144 NULL, 3145 /* 34*/ MatDuplicate_SeqDense, 3146 NULL, 3147 NULL, 3148 NULL, 3149 NULL, 3150 /* 39*/ MatAXPY_SeqDense, 3151 MatCreateSubMatrices_SeqDense, 3152 NULL, 3153 MatGetValues_SeqDense, 3154 MatCopy_SeqDense, 3155 /* 44*/ MatGetRowMax_SeqDense, 3156 MatScale_SeqDense, 3157 MatShift_SeqDense, 3158 NULL, 3159 MatZeroRowsColumns_SeqDense, 3160 /* 49*/ MatSetRandom_SeqDense, 3161 NULL, 3162 NULL, 3163 NULL, 3164 NULL, 3165 /* 54*/ NULL, 3166 NULL, 3167 NULL, 3168 NULL, 3169 NULL, 3170 /* 59*/ MatCreateSubMatrix_SeqDense, 3171 MatDestroy_SeqDense, 3172 MatView_SeqDense, 3173 NULL, 3174 NULL, 3175 /* 64*/ NULL, 3176 NULL, 3177 NULL, 3178 NULL, 3179 NULL, 3180 /* 69*/ MatGetRowMaxAbs_SeqDense, 3181 NULL, 3182 NULL, 3183 NULL, 3184 NULL, 3185 /* 74*/ NULL, 3186 NULL, 3187 NULL, 3188 NULL, 3189 NULL, 3190 /* 79*/ NULL, 3191 NULL, 3192 NULL, 3193 NULL, 3194 /* 83*/ MatLoad_SeqDense, 3195 MatIsSymmetric_SeqDense, 3196 MatIsHermitian_SeqDense, 3197 NULL, 3198 NULL, 3199 NULL, 3200 /* 89*/ NULL, 3201 NULL, 3202 MatMatMultNumeric_SeqDense_SeqDense, 3203 NULL, 3204 NULL, 3205 /* 94*/ NULL, 3206 NULL, 3207 NULL, 3208 MatMatTransposeMultNumeric_SeqDense_SeqDense, 3209 NULL, 3210 /* 99*/ MatProductSetFromOptions_SeqDense, 3211 NULL, 3212 NULL, 3213 MatConjugate_SeqDense, 3214 NULL, 3215 /*104*/ NULL, 3216 MatRealPart_SeqDense, 3217 MatImaginaryPart_SeqDense, 3218 NULL, 3219 NULL, 3220 /*109*/ NULL, 3221 NULL, 3222 MatGetRowMin_SeqDense, 3223 MatGetColumnVector_SeqDense, 3224 MatMissingDiagonal_SeqDense, 3225 /*114*/ NULL, 3226 NULL, 3227 NULL, 3228 NULL, 3229 NULL, 3230 /*119*/ NULL, 3231 NULL, 3232 MatMultHermitianTranspose_SeqDense, 3233 MatMultHermitianTransposeAdd_SeqDense, 3234 NULL, 3235 /*124*/ NULL, 3236 MatGetColumnReductions_SeqDense, 3237 NULL, 3238 NULL, 3239 NULL, 3240 /*129*/ NULL, 3241 NULL, 3242 NULL, 3243 MatTransposeMatMultNumeric_SeqDense_SeqDense, 3244 NULL, 3245 /*134*/ NULL, 3246 NULL, 3247 NULL, 3248 NULL, 3249 NULL, 3250 /*139*/ NULL, 3251 NULL, 3252 NULL, 3253 NULL, 3254 NULL, 3255 MatCreateMPIMatConcatenateSeqMat_SeqDense, 3256 /*145*/ NULL, 3257 NULL, 3258 NULL, 3259 NULL, 3260 NULL, 3261 /*150*/ NULL, 3262 NULL, 3263 NULL, 3264 NULL, 3265 NULL, 3266 /*155*/ NULL, 3267 NULL}; 3268 3269 /*@ 3270 MatCreateSeqDense - Creates a `MATSEQDENSE` that 3271 is stored in column major order (the usual Fortran format). 3272 3273 Collective 3274 3275 Input Parameters: 3276 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3277 . m - number of rows 3278 . n - number of columns 3279 - data - optional location of matrix data in column major order. Use `NULL` for PETSc 3280 to control all matrix memory allocation. 3281 3282 Output Parameter: 3283 . A - the matrix 3284 3285 Level: intermediate 3286 3287 Note: 3288 The data input variable is intended primarily for Fortran programmers 3289 who wish to allocate their own matrix memory space. Most users should 3290 set `data` = `NULL`. 3291 3292 Developer Note: 3293 Many of the matrix operations for this variant use the BLAS and LAPACK routines. 3294 3295 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 3296 @*/ 3297 PetscErrorCode MatCreateSeqDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscScalar data[], Mat *A) 3298 { 3299 PetscFunctionBegin; 3300 PetscCall(MatCreate(comm, A)); 3301 PetscCall(MatSetSizes(*A, m, n, m, n)); 3302 PetscCall(MatSetType(*A, MATSEQDENSE)); 3303 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 3304 PetscFunctionReturn(PETSC_SUCCESS); 3305 } 3306 3307 /*@ 3308 MatSeqDenseSetPreallocation - Sets the array used for storing the matrix elements of a `MATSEQDENSE` matrix 3309 3310 Collective 3311 3312 Input Parameters: 3313 + B - the matrix 3314 - data - the array (or `NULL`) 3315 3316 Level: intermediate 3317 3318 Note: 3319 The data input variable is intended primarily for Fortran programmers 3320 who wish to allocate their own matrix memory space. Most users should 3321 need not call this routine. 3322 3323 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`, `MatDenseSetLDA()` 3324 @*/ 3325 PetscErrorCode MatSeqDenseSetPreallocation(Mat B, PetscScalar data[]) 3326 { 3327 PetscFunctionBegin; 3328 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3329 PetscTryMethod(B, "MatSeqDenseSetPreallocation_C", (Mat, PetscScalar[]), (B, data)); 3330 PetscFunctionReturn(PETSC_SUCCESS); 3331 } 3332 3333 PetscErrorCode MatSeqDenseSetPreallocation_SeqDense(Mat B, PetscScalar *data) 3334 { 3335 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3336 3337 PetscFunctionBegin; 3338 PetscCheck(!b->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3339 B->preallocated = PETSC_TRUE; 3340 3341 PetscCall(PetscLayoutSetUp(B->rmap)); 3342 PetscCall(PetscLayoutSetUp(B->cmap)); 3343 3344 if (b->lda <= 0) PetscCall(PetscBLASIntCast(B->rmap->n, &b->lda)); 3345 3346 if (!data) { /* petsc-allocated storage */ 3347 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 3348 PetscCall(PetscCalloc1((size_t)b->lda * B->cmap->n, &b->v)); 3349 3350 b->user_alloc = PETSC_FALSE; 3351 } else { /* user-allocated storage */ 3352 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 3353 b->v = data; 3354 b->user_alloc = PETSC_TRUE; 3355 } 3356 B->assembled = PETSC_TRUE; 3357 PetscFunctionReturn(PETSC_SUCCESS); 3358 } 3359 3360 #if defined(PETSC_HAVE_ELEMENTAL) 3361 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 3362 { 3363 Mat mat_elemental; 3364 const PetscScalar *array; 3365 PetscScalar *v_colwise; 3366 PetscInt M = A->rmap->N, N = A->cmap->N, i, j, k, *rows, *cols; 3367 3368 PetscFunctionBegin; 3369 PetscCall(PetscMalloc3(M * N, &v_colwise, M, &rows, N, &cols)); 3370 PetscCall(MatDenseGetArrayRead(A, &array)); 3371 /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */ 3372 k = 0; 3373 for (j = 0; j < N; j++) { 3374 cols[j] = j; 3375 for (i = 0; i < M; i++) v_colwise[j * M + i] = array[k++]; 3376 } 3377 for (i = 0; i < M; i++) rows[i] = i; 3378 PetscCall(MatDenseRestoreArrayRead(A, &array)); 3379 3380 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 3381 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N)); 3382 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 3383 PetscCall(MatSetUp(mat_elemental)); 3384 3385 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 3386 PetscCall(MatSetValues(mat_elemental, M, rows, N, cols, v_colwise, ADD_VALUES)); 3387 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 3388 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 3389 PetscCall(PetscFree3(v_colwise, rows, cols)); 3390 3391 if (reuse == MAT_INPLACE_MATRIX) { 3392 PetscCall(MatHeaderReplace(A, &mat_elemental)); 3393 } else { 3394 *newmat = mat_elemental; 3395 } 3396 PetscFunctionReturn(PETSC_SUCCESS); 3397 } 3398 #endif 3399 3400 PetscErrorCode MatDenseSetLDA_SeqDense(Mat B, PetscInt lda) 3401 { 3402 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3403 PetscBool data; 3404 3405 PetscFunctionBegin; 3406 data = (B->rmap->n > 0 && B->cmap->n > 0) ? (b->v ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 3407 PetscCheck(b->user_alloc || !data || b->lda == lda, PETSC_COMM_SELF, PETSC_ERR_ORDER, "LDA cannot be changed after allocation of internal storage"); 3408 PetscCheck(lda >= B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "LDA %" PetscInt_FMT " must be at least matrix dimension %" PetscInt_FMT, lda, B->rmap->n); 3409 PetscCall(PetscBLASIntCast(lda, &b->lda)); 3410 PetscFunctionReturn(PETSC_SUCCESS); 3411 } 3412 3413 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3414 { 3415 PetscFunctionBegin; 3416 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIDense(comm, inmat, n, scall, outmat)); 3417 PetscFunctionReturn(PETSC_SUCCESS); 3418 } 3419 3420 PetscErrorCode MatDenseCreateColumnVec_Private(Mat A, Vec *v) 3421 { 3422 PetscBool isstd, iskok, iscuda, iship; 3423 PetscMPIInt size; 3424 #if PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP) 3425 /* we pass the data of A, to prevent allocating needless GPU memory the first time VecCUPMPlaceArray is called. */ 3426 const PetscScalar *a; 3427 #endif 3428 3429 PetscFunctionBegin; 3430 *v = NULL; 3431 PetscCall(PetscStrcmpAny(A->defaultvectype, &isstd, VECSTANDARD, VECSEQ, VECMPI, "")); 3432 PetscCall(PetscStrcmpAny(A->defaultvectype, &iskok, VECKOKKOS, VECSEQKOKKOS, VECMPIKOKKOS, "")); 3433 PetscCall(PetscStrcmpAny(A->defaultvectype, &iscuda, VECCUDA, VECSEQCUDA, VECMPICUDA, "")); 3434 PetscCall(PetscStrcmpAny(A->defaultvectype, &iship, VECHIP, VECSEQHIP, VECMPIHIP, "")); 3435 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3436 if (isstd) { 3437 if (size > 1) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, v)); 3438 else PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, v)); 3439 } else if (iskok) { 3440 PetscCheck(PetscDefined(HAVE_KOKKOS_KERNELS), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using KOKKOS kernels support"); 3441 #if PetscDefined(HAVE_KOKKOS_KERNELS) 3442 if (size > 1) PetscCall(VecCreateMPIKokkosWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, v)); 3443 else PetscCall(VecCreateSeqKokkosWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, v)); 3444 #endif 3445 } else if (iscuda) { 3446 PetscCheck(PetscDefined(HAVE_CUDA), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using CUDA support"); 3447 #if PetscDefined(HAVE_CUDA) 3448 PetscCall(MatDenseCUDAGetArrayRead(A, &a)); 3449 if (size > 1) PetscCall(VecCreateMPICUDAWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, a, v)); 3450 else PetscCall(VecCreateSeqCUDAWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, a, v)); 3451 #endif 3452 } else if (iship) { 3453 PetscCheck(PetscDefined(HAVE_HIP), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using HIP support"); 3454 #if PetscDefined(HAVE_HIP) 3455 PetscCall(MatDenseHIPGetArrayRead(A, &a)); 3456 if (size > 1) PetscCall(VecCreateMPIHIPWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, a, v)); 3457 else PetscCall(VecCreateSeqHIPWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, a, v)); 3458 #endif 3459 } 3460 PetscCheck(*v, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not coded for type %s", A->defaultvectype); 3461 PetscFunctionReturn(PETSC_SUCCESS); 3462 } 3463 3464 PetscErrorCode MatDenseGetColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) 3465 { 3466 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3467 3468 PetscFunctionBegin; 3469 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3470 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3471 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 3472 a->vecinuse = col + 1; 3473 PetscCall(MatDenseGetArray(A, (PetscScalar **)&a->ptrinuse)); 3474 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3475 *v = a->cvec; 3476 PetscFunctionReturn(PETSC_SUCCESS); 3477 } 3478 3479 PetscErrorCode MatDenseRestoreColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) 3480 { 3481 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3482 3483 PetscFunctionBegin; 3484 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3485 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3486 VecCheckAssembled(a->cvec); 3487 a->vecinuse = 0; 3488 PetscCall(MatDenseRestoreArray(A, (PetscScalar **)&a->ptrinuse)); 3489 PetscCall(VecResetArray(a->cvec)); 3490 if (v) *v = NULL; 3491 PetscFunctionReturn(PETSC_SUCCESS); 3492 } 3493 3494 PetscErrorCode MatDenseGetColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) 3495 { 3496 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3497 3498 PetscFunctionBegin; 3499 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3500 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3501 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 3502 a->vecinuse = col + 1; 3503 PetscCall(MatDenseGetArrayRead(A, &a->ptrinuse)); 3504 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)a->lda))); 3505 PetscCall(VecLockReadPush(a->cvec)); 3506 *v = a->cvec; 3507 PetscFunctionReturn(PETSC_SUCCESS); 3508 } 3509 3510 PetscErrorCode MatDenseRestoreColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) 3511 { 3512 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3513 3514 PetscFunctionBegin; 3515 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3516 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3517 VecCheckAssembled(a->cvec); 3518 a->vecinuse = 0; 3519 PetscCall(MatDenseRestoreArrayRead(A, &a->ptrinuse)); 3520 PetscCall(VecLockReadPop(a->cvec)); 3521 PetscCall(VecResetArray(a->cvec)); 3522 if (v) *v = NULL; 3523 PetscFunctionReturn(PETSC_SUCCESS); 3524 } 3525 3526 PetscErrorCode MatDenseGetColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) 3527 { 3528 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3529 3530 PetscFunctionBegin; 3531 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3532 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3533 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 3534 a->vecinuse = col + 1; 3535 PetscCall(MatDenseGetArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3536 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)a->lda))); 3537 *v = a->cvec; 3538 PetscFunctionReturn(PETSC_SUCCESS); 3539 } 3540 3541 PetscErrorCode MatDenseRestoreColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) 3542 { 3543 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3544 3545 PetscFunctionBegin; 3546 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3547 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3548 VecCheckAssembled(a->cvec); 3549 a->vecinuse = 0; 3550 PetscCall(MatDenseRestoreArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3551 PetscCall(VecResetArray(a->cvec)); 3552 if (v) *v = NULL; 3553 PetscFunctionReturn(PETSC_SUCCESS); 3554 } 3555 3556 PetscErrorCode MatDenseGetSubMatrix_SeqDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 3557 { 3558 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3559 3560 PetscFunctionBegin; 3561 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3562 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3563 if (a->cmat && (cend - cbegin != a->cmat->cmap->N || rend - rbegin != a->cmat->rmap->N)) PetscCall(MatDestroy(&a->cmat)); 3564 if (!a->cmat) { 3565 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), rend - rbegin, PETSC_DECIDE, rend - rbegin, cend - cbegin, PetscSafePointerPlusOffset(a->v, rbegin + (size_t)cbegin * a->lda), &a->cmat)); 3566 } else { 3567 PetscCall(MatDensePlaceArray(a->cmat, PetscSafePointerPlusOffset(a->v, rbegin + (size_t)cbegin * a->lda))); 3568 } 3569 PetscCall(MatDenseSetLDA(a->cmat, a->lda)); 3570 a->matinuse = cbegin + 1; 3571 *v = a->cmat; 3572 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 3573 A->offloadmask = PETSC_OFFLOAD_CPU; 3574 #endif 3575 PetscFunctionReturn(PETSC_SUCCESS); 3576 } 3577 3578 PetscErrorCode MatDenseRestoreSubMatrix_SeqDense(Mat A, Mat *v) 3579 { 3580 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3581 3582 PetscFunctionBegin; 3583 PetscCheck(a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 3584 PetscCheck(a->cmat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column matrix"); 3585 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 3586 a->matinuse = 0; 3587 PetscCall(MatDenseResetArray(a->cmat)); 3588 if (v) *v = NULL; 3589 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 3590 A->offloadmask = PETSC_OFFLOAD_CPU; 3591 #endif 3592 PetscFunctionReturn(PETSC_SUCCESS); 3593 } 3594 3595 /*MC 3596 MATSEQDENSE - MATSEQDENSE = "seqdense" - A matrix type to be used for sequential dense matrices. 3597 3598 Options Database Key: 3599 . -mat_type seqdense - sets the matrix type to `MATSEQDENSE` during a call to `MatSetFromOptions()` 3600 3601 Level: beginner 3602 3603 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreateSeqDense()` 3604 M*/ 3605 PetscErrorCode MatCreate_SeqDense(Mat B) 3606 { 3607 Mat_SeqDense *b; 3608 PetscMPIInt size; 3609 3610 PetscFunctionBegin; 3611 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3612 PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3613 3614 PetscCall(PetscNew(&b)); 3615 B->data = (void *)b; 3616 B->ops[0] = MatOps_Values; 3617 3618 b->roworiented = PETSC_TRUE; 3619 3620 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatQRFactor_C", MatQRFactor_SeqDense)); 3621 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetLDA_C", MatDenseGetLDA_SeqDense)); 3622 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseSetLDA_C", MatDenseSetLDA_SeqDense)); 3623 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArray_C", MatDenseGetArray_SeqDense)); 3624 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArray_C", MatDenseRestoreArray_SeqDense)); 3625 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDensePlaceArray_C", MatDensePlaceArray_SeqDense)); 3626 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseResetArray_C", MatDenseResetArray_SeqDense)); 3627 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseReplaceArray_C", MatDenseReplaceArray_SeqDense)); 3628 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayRead_C", MatDenseGetArray_SeqDense)); 3629 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayRead_C", MatDenseRestoreArray_SeqDense)); 3630 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayWrite_C", MatDenseGetArray_SeqDense)); 3631 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArray_SeqDense)); 3632 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqaij_C", MatConvert_SeqDense_SeqAIJ)); 3633 #if defined(PETSC_HAVE_ELEMENTAL) 3634 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_elemental_C", MatConvert_SeqDense_Elemental)); 3635 #endif 3636 #if defined(PETSC_HAVE_SCALAPACK) 3637 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 3638 #endif 3639 #if defined(PETSC_HAVE_CUDA) 3640 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensecuda_C", MatConvert_SeqDense_SeqDenseCUDA)); 3641 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3642 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdense_C", MatProductSetFromOptions_SeqDense)); 3643 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3644 #endif 3645 #if defined(PETSC_HAVE_HIP) 3646 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensehip_C", MatConvert_SeqDense_SeqDenseHIP)); 3647 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", MatProductSetFromOptions_SeqDense)); 3648 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdense_C", MatProductSetFromOptions_SeqDense)); 3649 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensehip_C", MatProductSetFromOptions_SeqDense)); 3650 #endif 3651 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqDenseSetPreallocation_C", MatSeqDenseSetPreallocation_SeqDense)); 3652 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 3653 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdense_C", MatProductSetFromOptions_SeqDense)); 3654 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3655 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqsbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3656 3657 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumn_C", MatDenseGetColumn_SeqDense)); 3658 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_SeqDense)); 3659 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_SeqDense)); 3660 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_SeqDense)); 3661 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_SeqDense)); 3662 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_SeqDense)); 3663 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_SeqDense)); 3664 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_SeqDense)); 3665 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_SeqDense)); 3666 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_SeqDense)); 3667 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultAddColumnRange_C", MatMultAddColumnRange_SeqDense)); 3668 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_SeqDense)); 3669 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_SeqDense)); 3670 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQDENSE)); 3671 PetscFunctionReturn(PETSC_SUCCESS); 3672 } 3673 3674 /*@C 3675 MatDenseGetColumn - gives access to a column of a dense matrix. This is only the local part of the column. You MUST call `MatDenseRestoreColumn()` to avoid memory bleeding. 3676 3677 Not Collective 3678 3679 Input Parameters: 3680 + A - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3681 - col - column index 3682 3683 Output Parameter: 3684 . vals - pointer to the data 3685 3686 Level: intermediate 3687 3688 Note: 3689 Use `MatDenseGetColumnVec()` to get access to a column of a `MATDENSE` treated as a `Vec` 3690 3691 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreColumn()`, `MatDenseGetColumnVec()` 3692 @*/ 3693 PetscErrorCode MatDenseGetColumn(Mat A, PetscInt col, PetscScalar *vals[]) 3694 { 3695 PetscFunctionBegin; 3696 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3697 PetscValidLogicalCollectiveInt(A, col, 2); 3698 PetscAssertPointer(vals, 3); 3699 PetscUseMethod(A, "MatDenseGetColumn_C", (Mat, PetscInt, PetscScalar **), (A, col, vals)); 3700 PetscFunctionReturn(PETSC_SUCCESS); 3701 } 3702 3703 /*@C 3704 MatDenseRestoreColumn - returns access to a column of a `MATDENSE` matrix which is returned by `MatDenseGetColumn()`. 3705 3706 Not Collective 3707 3708 Input Parameters: 3709 + A - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3710 - vals - pointer to the data (may be `NULL`) 3711 3712 Level: intermediate 3713 3714 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetColumn()` 3715 @*/ 3716 PetscErrorCode MatDenseRestoreColumn(Mat A, PetscScalar *vals[]) 3717 { 3718 PetscFunctionBegin; 3719 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3720 PetscAssertPointer(vals, 2); 3721 PetscUseMethod(A, "MatDenseRestoreColumn_C", (Mat, PetscScalar **), (A, vals)); 3722 PetscFunctionReturn(PETSC_SUCCESS); 3723 } 3724 3725 /*@ 3726 MatDenseGetColumnVec - Gives read-write access to a column of a `MATDENSE` matrix, represented as a `Vec`. 3727 3728 Collective 3729 3730 Input Parameters: 3731 + A - the `Mat` object 3732 - col - the column index 3733 3734 Output Parameter: 3735 . v - the vector 3736 3737 Level: intermediate 3738 3739 Notes: 3740 The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVec()` when the vector is no longer needed. 3741 3742 Use `MatDenseGetColumnVecRead()` to obtain read-only access or `MatDenseGetColumnVecWrite()` for write-only access. 3743 3744 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`, `MatDenseGetColumn()` 3745 @*/ 3746 PetscErrorCode MatDenseGetColumnVec(Mat A, PetscInt col, Vec *v) 3747 { 3748 PetscFunctionBegin; 3749 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3750 PetscValidType(A, 1); 3751 PetscValidLogicalCollectiveInt(A, col, 2); 3752 PetscAssertPointer(v, 3); 3753 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3754 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3755 PetscUseMethod(A, "MatDenseGetColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3756 PetscFunctionReturn(PETSC_SUCCESS); 3757 } 3758 3759 /*@ 3760 MatDenseRestoreColumnVec - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVec()`. 3761 3762 Collective 3763 3764 Input Parameters: 3765 + A - the `Mat` object 3766 . col - the column index 3767 - v - the `Vec` object (may be `NULL`) 3768 3769 Level: intermediate 3770 3771 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3772 @*/ 3773 PetscErrorCode MatDenseRestoreColumnVec(Mat A, PetscInt col, Vec *v) 3774 { 3775 PetscFunctionBegin; 3776 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3777 PetscValidType(A, 1); 3778 PetscValidLogicalCollectiveInt(A, col, 2); 3779 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3780 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3781 PetscUseMethod(A, "MatDenseRestoreColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3782 PetscFunctionReturn(PETSC_SUCCESS); 3783 } 3784 3785 /*@ 3786 MatDenseGetColumnVecRead - Gives read-only access to a column of a dense matrix, represented as a `Vec`. 3787 3788 Collective 3789 3790 Input Parameters: 3791 + A - the `Mat` object 3792 - col - the column index 3793 3794 Output Parameter: 3795 . v - the vector 3796 3797 Level: intermediate 3798 3799 Notes: 3800 The vector is owned by PETSc and users cannot modify it. 3801 3802 Users need to call `MatDenseRestoreColumnVecRead()` when the vector is no longer needed. 3803 3804 Use `MatDenseGetColumnVec()` to obtain read-write access or `MatDenseGetColumnVecWrite()` for write-only access. 3805 3806 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3807 @*/ 3808 PetscErrorCode MatDenseGetColumnVecRead(Mat A, PetscInt col, Vec *v) 3809 { 3810 PetscFunctionBegin; 3811 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3812 PetscValidType(A, 1); 3813 PetscValidLogicalCollectiveInt(A, col, 2); 3814 PetscAssertPointer(v, 3); 3815 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3816 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3817 PetscUseMethod(A, "MatDenseGetColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3818 PetscFunctionReturn(PETSC_SUCCESS); 3819 } 3820 3821 /*@ 3822 MatDenseRestoreColumnVecRead - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVecRead()`. 3823 3824 Collective 3825 3826 Input Parameters: 3827 + A - the `Mat` object 3828 . col - the column index 3829 - v - the `Vec` object (may be `NULL`) 3830 3831 Level: intermediate 3832 3833 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecWrite()` 3834 @*/ 3835 PetscErrorCode MatDenseRestoreColumnVecRead(Mat A, PetscInt col, Vec *v) 3836 { 3837 PetscFunctionBegin; 3838 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3839 PetscValidType(A, 1); 3840 PetscValidLogicalCollectiveInt(A, col, 2); 3841 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3842 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3843 PetscUseMethod(A, "MatDenseRestoreColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3844 PetscFunctionReturn(PETSC_SUCCESS); 3845 } 3846 3847 /*@ 3848 MatDenseGetColumnVecWrite - Gives write-only access to a column of a dense matrix, represented as a `Vec`. 3849 3850 Collective 3851 3852 Input Parameters: 3853 + A - the `Mat` object 3854 - col - the column index 3855 3856 Output Parameter: 3857 . v - the vector 3858 3859 Level: intermediate 3860 3861 Notes: 3862 The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVecWrite()` when the vector is no longer needed. 3863 3864 Use `MatDenseGetColumnVec()` to obtain read-write access or `MatDenseGetColumnVecRead()` for read-only access. 3865 3866 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3867 @*/ 3868 PetscErrorCode MatDenseGetColumnVecWrite(Mat A, PetscInt col, Vec *v) 3869 { 3870 PetscFunctionBegin; 3871 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3872 PetscValidType(A, 1); 3873 PetscValidLogicalCollectiveInt(A, col, 2); 3874 PetscAssertPointer(v, 3); 3875 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3876 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3877 PetscUseMethod(A, "MatDenseGetColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3878 PetscFunctionReturn(PETSC_SUCCESS); 3879 } 3880 3881 /*@ 3882 MatDenseRestoreColumnVecWrite - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVecWrite()`. 3883 3884 Collective 3885 3886 Input Parameters: 3887 + A - the `Mat` object 3888 . col - the column index 3889 - v - the `Vec` object (may be `NULL`) 3890 3891 Level: intermediate 3892 3893 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()` 3894 @*/ 3895 PetscErrorCode MatDenseRestoreColumnVecWrite(Mat A, PetscInt col, Vec *v) 3896 { 3897 PetscFunctionBegin; 3898 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3899 PetscValidType(A, 1); 3900 PetscValidLogicalCollectiveInt(A, col, 2); 3901 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3902 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3903 PetscUseMethod(A, "MatDenseRestoreColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3904 PetscFunctionReturn(PETSC_SUCCESS); 3905 } 3906 3907 /*@ 3908 MatDenseGetSubMatrix - Gives access to a block of rows and columns of a dense matrix, represented as a `Mat`. 3909 3910 Collective 3911 3912 Input Parameters: 3913 + A - the `Mat` object 3914 . rbegin - the first global row index in the block (if `PETSC_DECIDE`, is 0) 3915 . rend - the global row index past the last one in the block (if `PETSC_DECIDE`, is `M`) 3916 . cbegin - the first global column index in the block (if `PETSC_DECIDE`, is 0) 3917 - cend - the global column index past the last one in the block (if `PETSC_DECIDE`, is `N`) 3918 3919 Output Parameter: 3920 . v - the matrix 3921 3922 Level: intermediate 3923 3924 Notes: 3925 The matrix is owned by PETSc. Users need to call `MatDenseRestoreSubMatrix()` when the matrix is no longer needed. 3926 3927 The output matrix is not redistributed by PETSc, so depending on the values of `rbegin` and `rend`, some processes may have no local rows. 3928 3929 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreSubMatrix()` 3930 @*/ 3931 PetscErrorCode MatDenseGetSubMatrix(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 3932 { 3933 PetscFunctionBegin; 3934 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3935 PetscValidType(A, 1); 3936 PetscValidLogicalCollectiveInt(A, rbegin, 2); 3937 PetscValidLogicalCollectiveInt(A, rend, 3); 3938 PetscValidLogicalCollectiveInt(A, cbegin, 4); 3939 PetscValidLogicalCollectiveInt(A, cend, 5); 3940 PetscAssertPointer(v, 6); 3941 if (rbegin == PETSC_DECIDE) rbegin = 0; 3942 if (rend == PETSC_DECIDE) rend = A->rmap->N; 3943 if (cbegin == PETSC_DECIDE) cbegin = 0; 3944 if (cend == PETSC_DECIDE) cend = A->cmap->N; 3945 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3946 PetscCheck(rbegin >= 0 && rbegin <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", rbegin, A->rmap->N); 3947 PetscCheck(rend >= rbegin && rend <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", rend, rbegin, A->rmap->N); 3948 PetscCheck(cbegin >= 0 && cbegin <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", cbegin, A->cmap->N); 3949 PetscCheck(cend >= cbegin && cend <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", cend, cbegin, A->cmap->N); 3950 PetscUseMethod(A, "MatDenseGetSubMatrix_C", (Mat, PetscInt, PetscInt, PetscInt, PetscInt, Mat *), (A, rbegin, rend, cbegin, cend, v)); 3951 PetscFunctionReturn(PETSC_SUCCESS); 3952 } 3953 3954 /*@ 3955 MatDenseRestoreSubMatrix - Returns access to a block of columns of a dense matrix obtained from `MatDenseGetSubMatrix()`. 3956 3957 Collective 3958 3959 Input Parameters: 3960 + A - the `Mat` object 3961 - v - the `Mat` object (may be `NULL`) 3962 3963 Level: intermediate 3964 3965 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseGetSubMatrix()` 3966 @*/ 3967 PetscErrorCode MatDenseRestoreSubMatrix(Mat A, Mat *v) 3968 { 3969 PetscFunctionBegin; 3970 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3971 PetscValidType(A, 1); 3972 PetscAssertPointer(v, 2); 3973 PetscUseMethod(A, "MatDenseRestoreSubMatrix_C", (Mat, Mat *), (A, v)); 3974 PetscFunctionReturn(PETSC_SUCCESS); 3975 } 3976 3977 #include <petscblaslapack.h> 3978 #include <petsc/private/kernels/blockinvert.h> 3979 3980 PetscErrorCode MatSeqDenseInvert(Mat A) 3981 { 3982 PetscInt m; 3983 const PetscReal shift = 0.0; 3984 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 3985 PetscScalar *values; 3986 3987 PetscFunctionBegin; 3988 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3989 PetscCall(MatDenseGetArray(A, &values)); 3990 PetscCall(MatGetLocalSize(A, &m, NULL)); 3991 allowzeropivot = PetscNot(A->erroriffailure); 3992 /* factor and invert each block */ 3993 switch (m) { 3994 case 1: 3995 values[0] = (PetscScalar)1.0 / (values[0] + shift); 3996 break; 3997 case 2: 3998 PetscCall(PetscKernel_A_gets_inverse_A_2(values, shift, allowzeropivot, &zeropivotdetected)); 3999 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4000 break; 4001 case 3: 4002 PetscCall(PetscKernel_A_gets_inverse_A_3(values, shift, allowzeropivot, &zeropivotdetected)); 4003 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4004 break; 4005 case 4: 4006 PetscCall(PetscKernel_A_gets_inverse_A_4(values, shift, allowzeropivot, &zeropivotdetected)); 4007 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4008 break; 4009 case 5: { 4010 PetscScalar work[25]; 4011 PetscInt ipvt[5]; 4012 4013 PetscCall(PetscKernel_A_gets_inverse_A_5(values, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 4014 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4015 } break; 4016 case 6: 4017 PetscCall(PetscKernel_A_gets_inverse_A_6(values, shift, allowzeropivot, &zeropivotdetected)); 4018 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4019 break; 4020 case 7: 4021 PetscCall(PetscKernel_A_gets_inverse_A_7(values, shift, allowzeropivot, &zeropivotdetected)); 4022 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4023 break; 4024 default: { 4025 PetscInt *v_pivots, *IJ, j; 4026 PetscScalar *v_work; 4027 4028 PetscCall(PetscMalloc3(m, &v_work, m, &v_pivots, m, &IJ)); 4029 for (j = 0; j < m; j++) IJ[j] = j; 4030 PetscCall(PetscKernel_A_gets_inverse_A(m, values, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 4031 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4032 PetscCall(PetscFree3(v_work, v_pivots, IJ)); 4033 } 4034 } 4035 PetscCall(MatDenseRestoreArray(A, &values)); 4036 PetscFunctionReturn(PETSC_SUCCESS); 4037 } 4038