1 /* 2 Defines the basic matrix operations for sequential dense. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/dense/mpi/mpidense.h> 9 #include <petscblaslapack.h> 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <petsc/private/vecimpl.h> 12 13 PetscErrorCode MatSeqDenseSymmetrize_Private(Mat A, PetscBool hermitian) 14 { 15 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 16 PetscInt j, k, n = A->rmap->n; 17 PetscScalar *v; 18 19 PetscFunctionBegin; 20 PetscCheck(A->rmap->n == A->cmap->n, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot symmetrize a rectangular matrix"); 21 PetscCall(MatDenseGetArray(A, &v)); 22 if (!hermitian) { 23 for (k = 0; k < n; k++) { 24 for (j = k; j < n; j++) v[j * mat->lda + k] = v[k * mat->lda + j]; 25 } 26 } else { 27 for (k = 0; k < n; k++) { 28 for (j = k; j < n; j++) v[j * mat->lda + k] = PetscConj(v[k * mat->lda + j]); 29 } 30 } 31 PetscCall(MatDenseRestoreArray(A, &v)); 32 PetscFunctionReturn(PETSC_SUCCESS); 33 } 34 35 PetscErrorCode MatSeqDenseInvertFactors_Private(Mat A) 36 { 37 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 38 PetscBLASInt info, n; 39 40 PetscFunctionBegin; 41 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 42 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 43 if (A->factortype == MAT_FACTOR_LU) { 44 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 45 if (!mat->fwork) { 46 mat->lfwork = n; 47 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 48 } 49 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 50 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 51 PetscCall(PetscFPTrapPop()); 52 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 53 } else if (A->factortype == MAT_FACTOR_CHOLESKY) { 54 if (A->spd == PETSC_BOOL3_TRUE) { 55 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 56 PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &n, mat->v, &mat->lda, &info)); 57 PetscCall(PetscFPTrapPop()); 58 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 59 #if defined(PETSC_USE_COMPLEX) 60 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 61 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 62 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 63 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 64 PetscCallBLAS("LAPACKhetri", LAPACKhetri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 65 PetscCall(PetscFPTrapPop()); 66 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 67 #endif 68 } else { /* symmetric case */ 69 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 70 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 71 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 72 PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 73 PetscCall(PetscFPTrapPop()); 74 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_FALSE)); 75 } 76 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad Inversion: zero pivot in row %" PetscInt_FMT, (PetscInt)info - 1); 77 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 78 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix must be factored to solve"); 79 80 A->ops->solve = NULL; 81 A->ops->matsolve = NULL; 82 A->ops->solvetranspose = NULL; 83 A->ops->matsolvetranspose = NULL; 84 A->ops->solveadd = NULL; 85 A->ops->solvetransposeadd = NULL; 86 A->factortype = MAT_FACTOR_NONE; 87 PetscCall(PetscFree(A->solvertype)); 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 91 static PetscErrorCode MatZeroRowsColumns_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 92 { 93 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 94 PetscInt m = l->lda, n = A->cmap->n, r = A->rmap->n, i, j; 95 PetscScalar *slot, *bb, *v; 96 const PetscScalar *xx; 97 98 PetscFunctionBegin; 99 if (PetscDefined(USE_DEBUG)) { 100 for (i = 0; i < N; i++) { 101 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 102 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 103 PetscCheck(rows[i] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Col %" PetscInt_FMT " requested to be zeroed greater than or equal number of cols %" PetscInt_FMT, rows[i], A->cmap->n); 104 } 105 } 106 if (!N) PetscFunctionReturn(PETSC_SUCCESS); 107 108 /* fix right-hand side if needed */ 109 if (x && b) { 110 Vec xt; 111 112 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 113 PetscCall(VecDuplicate(x, &xt)); 114 PetscCall(VecCopy(x, xt)); 115 PetscCall(VecScale(xt, -1.0)); 116 PetscCall(MatMultAdd(A, xt, b, b)); 117 PetscCall(VecDestroy(&xt)); 118 PetscCall(VecGetArrayRead(x, &xx)); 119 PetscCall(VecGetArray(b, &bb)); 120 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 121 PetscCall(VecRestoreArrayRead(x, &xx)); 122 PetscCall(VecRestoreArray(b, &bb)); 123 } 124 125 PetscCall(MatDenseGetArray(A, &v)); 126 for (i = 0; i < N; i++) { 127 slot = v + rows[i] * m; 128 PetscCall(PetscArrayzero(slot, r)); 129 } 130 for (i = 0; i < N; i++) { 131 slot = v + rows[i]; 132 for (j = 0; j < n; j++) { 133 *slot = 0.0; 134 slot += m; 135 } 136 } 137 if (diag != 0.0) { 138 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 139 for (i = 0; i < N; i++) { 140 slot = v + (m + 1) * rows[i]; 141 *slot = diag; 142 } 143 } 144 PetscCall(MatDenseRestoreArray(A, &v)); 145 PetscFunctionReturn(PETSC_SUCCESS); 146 } 147 148 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 149 { 150 Mat B = NULL; 151 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 152 Mat_SeqDense *b; 153 PetscInt *ai = a->i, *aj = a->j, m = A->rmap->N, n = A->cmap->N, i; 154 const MatScalar *av; 155 PetscBool isseqdense; 156 157 PetscFunctionBegin; 158 if (reuse == MAT_REUSE_MATRIX) { 159 PetscCall(PetscObjectTypeCompare((PetscObject)*newmat, MATSEQDENSE, &isseqdense)); 160 PetscCheck(isseqdense, PetscObjectComm((PetscObject)*newmat), PETSC_ERR_USER, "Cannot reuse matrix of type %s", ((PetscObject)*newmat)->type_name); 161 } 162 if (reuse != MAT_REUSE_MATRIX) { 163 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 164 PetscCall(MatSetSizes(B, m, n, m, n)); 165 PetscCall(MatSetType(B, MATSEQDENSE)); 166 PetscCall(MatSeqDenseSetPreallocation(B, NULL)); 167 b = (Mat_SeqDense *)B->data; 168 } else { 169 b = (Mat_SeqDense *)((*newmat)->data); 170 for (i = 0; i < n; i++) PetscCall(PetscArrayzero(b->v + i * b->lda, m)); 171 } 172 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 173 for (i = 0; i < m; i++) { 174 PetscInt j; 175 for (j = 0; j < ai[1] - ai[0]; j++) { 176 b->v[*aj * b->lda + i] = *av; 177 aj++; 178 av++; 179 } 180 ai++; 181 } 182 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 183 184 if (reuse == MAT_INPLACE_MATRIX) { 185 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 186 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 187 PetscCall(MatHeaderReplace(A, &B)); 188 } else { 189 if (B) *newmat = B; 190 PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY)); 191 PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY)); 192 } 193 PetscFunctionReturn(PETSC_SUCCESS); 194 } 195 196 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 197 { 198 Mat B = NULL; 199 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 200 PetscInt i, j; 201 PetscInt *rows, *nnz; 202 MatScalar *aa = a->v, *vals; 203 204 PetscFunctionBegin; 205 PetscCall(PetscCalloc3(A->rmap->n, &rows, A->rmap->n, &nnz, A->rmap->n, &vals)); 206 if (reuse != MAT_REUSE_MATRIX) { 207 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 208 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 209 PetscCall(MatSetType(B, MATSEQAIJ)); 210 for (j = 0; j < A->cmap->n; j++) { 211 for (i = 0; i < A->rmap->n; i++) 212 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) ++nnz[i]; 213 aa += a->lda; 214 } 215 PetscCall(MatSeqAIJSetPreallocation(B, PETSC_DETERMINE, nnz)); 216 } else B = *newmat; 217 aa = a->v; 218 for (j = 0; j < A->cmap->n; j++) { 219 PetscInt numRows = 0; 220 for (i = 0; i < A->rmap->n; i++) 221 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) { 222 rows[numRows] = i; 223 vals[numRows++] = aa[i]; 224 } 225 PetscCall(MatSetValues(B, numRows, rows, 1, &j, vals, INSERT_VALUES)); 226 aa += a->lda; 227 } 228 PetscCall(PetscFree3(rows, nnz, vals)); 229 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 230 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 231 232 if (reuse == MAT_INPLACE_MATRIX) { 233 PetscCall(MatHeaderReplace(A, &B)); 234 } else if (reuse != MAT_REUSE_MATRIX) *newmat = B; 235 PetscFunctionReturn(PETSC_SUCCESS); 236 } 237 238 PetscErrorCode MatAXPY_SeqDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 239 { 240 Mat_SeqDense *x = (Mat_SeqDense *)X->data, *y = (Mat_SeqDense *)Y->data; 241 const PetscScalar *xv; 242 PetscScalar *yv; 243 PetscBLASInt N, m, ldax = 0, lday = 0, one = 1; 244 245 PetscFunctionBegin; 246 PetscCall(MatDenseGetArrayRead(X, &xv)); 247 PetscCall(MatDenseGetArray(Y, &yv)); 248 PetscCall(PetscBLASIntCast(X->rmap->n * X->cmap->n, &N)); 249 PetscCall(PetscBLASIntCast(X->rmap->n, &m)); 250 PetscCall(PetscBLASIntCast(x->lda, &ldax)); 251 PetscCall(PetscBLASIntCast(y->lda, &lday)); 252 if (ldax > m || lday > m) { 253 for (PetscInt j = 0; j < X->cmap->n; j++) PetscCallBLAS("BLASaxpy", BLASaxpy_(&m, &alpha, PetscSafePointerPlusOffset(xv, j * ldax), &one, PetscSafePointerPlusOffset(yv, j * lday), &one)); 254 } else { 255 PetscCallBLAS("BLASaxpy", BLASaxpy_(&N, &alpha, xv, &one, yv, &one)); 256 } 257 PetscCall(MatDenseRestoreArrayRead(X, &xv)); 258 PetscCall(MatDenseRestoreArray(Y, &yv)); 259 PetscCall(PetscLogFlops(PetscMax(2.0 * N - 1, 0))); 260 PetscFunctionReturn(PETSC_SUCCESS); 261 } 262 263 static PetscErrorCode MatGetInfo_SeqDense(Mat A, MatInfoType flag, MatInfo *info) 264 { 265 PetscLogDouble N = A->rmap->n * A->cmap->n; 266 267 PetscFunctionBegin; 268 info->block_size = 1.0; 269 info->nz_allocated = N; 270 info->nz_used = N; 271 info->nz_unneeded = 0; 272 info->assemblies = A->num_ass; 273 info->mallocs = 0; 274 info->memory = 0; /* REVIEW ME */ 275 info->fill_ratio_given = 0; 276 info->fill_ratio_needed = 0; 277 info->factor_mallocs = 0; 278 PetscFunctionReturn(PETSC_SUCCESS); 279 } 280 281 PetscErrorCode MatScale_SeqDense(Mat A, PetscScalar alpha) 282 { 283 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 284 PetscScalar *v; 285 PetscBLASInt one = 1, j, nz, lda = 0; 286 287 PetscFunctionBegin; 288 PetscCall(MatDenseGetArray(A, &v)); 289 PetscCall(PetscBLASIntCast(a->lda, &lda)); 290 if (lda > A->rmap->n) { 291 PetscCall(PetscBLASIntCast(A->rmap->n, &nz)); 292 for (j = 0; j < A->cmap->n; j++) PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v + j * lda, &one)); 293 } else { 294 PetscCall(PetscBLASIntCast(A->rmap->n * A->cmap->n, &nz)); 295 PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v, &one)); 296 } 297 PetscCall(PetscLogFlops(A->rmap->n * A->cmap->n)); 298 PetscCall(MatDenseRestoreArray(A, &v)); 299 PetscFunctionReturn(PETSC_SUCCESS); 300 } 301 302 PetscErrorCode MatShift_SeqDense(Mat A, PetscScalar alpha) 303 { 304 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 305 PetscScalar *v; 306 PetscInt j, k; 307 308 PetscFunctionBegin; 309 PetscCall(MatDenseGetArray(A, &v)); 310 k = PetscMin(A->rmap->n, A->cmap->n); 311 for (j = 0; j < k; j++) v[j + j * a->lda] += alpha; 312 PetscCall(PetscLogFlops(k)); 313 PetscCall(MatDenseRestoreArray(A, &v)); 314 PetscFunctionReturn(PETSC_SUCCESS); 315 } 316 317 static PetscErrorCode MatIsHermitian_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) 318 { 319 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 320 PetscInt i, j, m = A->rmap->n, N = a->lda; 321 const PetscScalar *v; 322 323 PetscFunctionBegin; 324 *fl = PETSC_FALSE; 325 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 326 PetscCall(MatDenseGetArrayRead(A, &v)); 327 for (i = 0; i < m; i++) { 328 for (j = i; j < m; j++) { 329 if (PetscAbsScalar(v[i + j * N] - PetscConj(v[j + i * N])) > rtol) goto restore; 330 } 331 } 332 *fl = PETSC_TRUE; 333 restore: 334 PetscCall(MatDenseRestoreArrayRead(A, &v)); 335 PetscFunctionReturn(PETSC_SUCCESS); 336 } 337 338 static PetscErrorCode MatIsSymmetric_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) 339 { 340 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 341 PetscInt i, j, m = A->rmap->n, N = a->lda; 342 const PetscScalar *v; 343 344 PetscFunctionBegin; 345 *fl = PETSC_FALSE; 346 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 347 PetscCall(MatDenseGetArrayRead(A, &v)); 348 for (i = 0; i < m; i++) { 349 for (j = i; j < m; j++) { 350 if (PetscAbsScalar(v[i + j * N] - v[j + i * N]) > rtol) goto restore; 351 } 352 } 353 *fl = PETSC_TRUE; 354 restore: 355 PetscCall(MatDenseRestoreArrayRead(A, &v)); 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 PetscErrorCode MatDuplicateNoCreate_SeqDense(Mat newi, Mat A, MatDuplicateOption cpvalues) 360 { 361 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 362 PetscInt lda = (PetscInt)mat->lda, j, m, nlda = lda; 363 PetscBool isdensecpu; 364 365 PetscFunctionBegin; 366 PetscCall(PetscLayoutReference(A->rmap, &newi->rmap)); 367 PetscCall(PetscLayoutReference(A->cmap, &newi->cmap)); 368 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { /* propagate LDA */ 369 PetscCall(MatDenseSetLDA(newi, lda)); 370 } 371 PetscCall(PetscObjectTypeCompare((PetscObject)newi, MATSEQDENSE, &isdensecpu)); 372 if (isdensecpu) PetscCall(MatSeqDenseSetPreallocation(newi, NULL)); 373 if (cpvalues == MAT_COPY_VALUES) { 374 const PetscScalar *av; 375 PetscScalar *v; 376 377 PetscCall(MatDenseGetArrayRead(A, &av)); 378 PetscCall(MatDenseGetArrayWrite(newi, &v)); 379 PetscCall(MatDenseGetLDA(newi, &nlda)); 380 m = A->rmap->n; 381 if (lda > m || nlda > m) { 382 for (j = 0; j < A->cmap->n; j++) PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(v, j * nlda), PetscSafePointerPlusOffset(av, j * lda), m)); 383 } else { 384 PetscCall(PetscArraycpy(v, av, A->rmap->n * A->cmap->n)); 385 } 386 PetscCall(MatDenseRestoreArrayWrite(newi, &v)); 387 PetscCall(MatDenseRestoreArrayRead(A, &av)); 388 PetscCall(MatPropagateSymmetryOptions(A, newi)); 389 } 390 PetscFunctionReturn(PETSC_SUCCESS); 391 } 392 393 PetscErrorCode MatDuplicate_SeqDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 394 { 395 PetscFunctionBegin; 396 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), newmat)); 397 PetscCall(MatSetSizes(*newmat, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 398 PetscCall(MatSetType(*newmat, ((PetscObject)A)->type_name)); 399 PetscCall(MatDuplicateNoCreate_SeqDense(*newmat, A, cpvalues)); 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 static PetscErrorCode MatSolve_SeqDense_Internal_LU(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) 404 { 405 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 406 PetscBLASInt info; 407 408 PetscFunctionBegin; 409 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 410 PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_(T ? "T" : "N", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 411 PetscCall(PetscFPTrapPop()); 412 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve %d", (int)info); 413 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 414 PetscFunctionReturn(PETSC_SUCCESS); 415 } 416 417 static PetscErrorCode MatConjugate_SeqDense(Mat); 418 419 static PetscErrorCode MatSolve_SeqDense_Internal_Cholesky(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) 420 { 421 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 422 PetscBLASInt info; 423 424 PetscFunctionBegin; 425 if (A->spd == PETSC_BOOL3_TRUE) { 426 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 427 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 428 PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &m, &nrhs, mat->v, &mat->lda, x, &m, &info)); 429 PetscCall(PetscFPTrapPop()); 430 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "POTRS Bad solve %d", (int)info); 431 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 432 #if defined(PETSC_USE_COMPLEX) 433 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 434 if (T) PetscCall(MatConjugate_SeqDense(A)); 435 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 436 PetscCallBLAS("LAPACKhetrs", LAPACKhetrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 437 PetscCall(PetscFPTrapPop()); 438 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "HETRS Bad solve %d", (int)info); 439 if (T) PetscCall(MatConjugate_SeqDense(A)); 440 #endif 441 } else { /* symmetric case */ 442 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 443 PetscCallBLAS("LAPACKsytrs", LAPACKsytrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 444 PetscCall(PetscFPTrapPop()); 445 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "SYTRS Bad solve %d", (int)info); 446 } 447 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 448 PetscFunctionReturn(PETSC_SUCCESS); 449 } 450 451 static PetscErrorCode MatSolve_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) 452 { 453 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 454 PetscBLASInt info; 455 char trans; 456 457 PetscFunctionBegin; 458 if (PetscDefined(USE_COMPLEX)) { 459 trans = 'C'; 460 } else { 461 trans = 'T'; 462 } 463 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 464 { /* lwork depends on the number of right-hand sides */ 465 PetscBLASInt nlfwork, lfwork = -1; 466 PetscScalar fwork; 467 468 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 469 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 470 if (nlfwork > mat->lfwork) { 471 mat->lfwork = nlfwork; 472 PetscCall(PetscFree(mat->fwork)); 473 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 474 } 475 } 476 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 477 PetscCall(PetscFPTrapPop()); 478 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %d", (int)info); 479 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 480 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &mat->rank, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 481 PetscCall(PetscFPTrapPop()); 482 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %d", (int)info); 483 for (PetscInt j = 0; j < nrhs; j++) { 484 for (PetscInt i = mat->rank; i < k; i++) x[j * ldx + i] = 0.; 485 } 486 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 487 PetscFunctionReturn(PETSC_SUCCESS); 488 } 489 490 static PetscErrorCode MatSolveTranspose_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) 491 { 492 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 493 PetscBLASInt info; 494 495 PetscFunctionBegin; 496 if (A->rmap->n == A->cmap->n && mat->rank == A->rmap->n) { 497 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 498 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &m, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 499 PetscCall(PetscFPTrapPop()); 500 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %d", (int)info); 501 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 502 { /* lwork depends on the number of right-hand sides */ 503 PetscBLASInt nlfwork, lfwork = -1; 504 PetscScalar fwork; 505 506 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 507 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 508 if (nlfwork > mat->lfwork) { 509 mat->lfwork = nlfwork; 510 PetscCall(PetscFree(mat->fwork)); 511 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 512 } 513 } 514 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 515 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 516 PetscCall(PetscFPTrapPop()); 517 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %d", (int)info); 518 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 519 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "QR factored matrix cannot be used for transpose solve"); 520 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 521 PetscFunctionReturn(PETSC_SUCCESS); 522 } 523 524 static PetscErrorCode MatSolve_SeqDense_SetUp(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) 525 { 526 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 527 PetscScalar *y; 528 PetscBLASInt m = 0, k = 0; 529 530 PetscFunctionBegin; 531 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 532 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 533 if (k < m) { 534 PetscCall(VecCopy(xx, mat->qrrhs)); 535 PetscCall(VecGetArray(mat->qrrhs, &y)); 536 } else { 537 PetscCall(VecCopy(xx, yy)); 538 PetscCall(VecGetArray(yy, &y)); 539 } 540 *_y = y; 541 *_k = k; 542 *_m = m; 543 PetscFunctionReturn(PETSC_SUCCESS); 544 } 545 546 static PetscErrorCode MatSolve_SeqDense_TearDown(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) 547 { 548 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 549 PetscScalar *y = NULL; 550 PetscBLASInt m, k; 551 552 PetscFunctionBegin; 553 y = *_y; 554 *_y = NULL; 555 k = *_k; 556 m = *_m; 557 if (k < m) { 558 PetscScalar *yv; 559 PetscCall(VecGetArray(yy, &yv)); 560 PetscCall(PetscArraycpy(yv, y, k)); 561 PetscCall(VecRestoreArray(yy, &yv)); 562 PetscCall(VecRestoreArray(mat->qrrhs, &y)); 563 } else { 564 PetscCall(VecRestoreArray(yy, &y)); 565 } 566 PetscFunctionReturn(PETSC_SUCCESS); 567 } 568 569 static PetscErrorCode MatSolve_SeqDense_LU(Mat A, Vec xx, Vec yy) 570 { 571 PetscScalar *y = NULL; 572 PetscBLASInt m = 0, k = 0; 573 574 PetscFunctionBegin; 575 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 576 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_FALSE)); 577 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 578 PetscFunctionReturn(PETSC_SUCCESS); 579 } 580 581 static PetscErrorCode MatSolveTranspose_SeqDense_LU(Mat A, Vec xx, Vec yy) 582 { 583 PetscScalar *y = NULL; 584 PetscBLASInt m = 0, k = 0; 585 586 PetscFunctionBegin; 587 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 588 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_TRUE)); 589 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 590 PetscFunctionReturn(PETSC_SUCCESS); 591 } 592 593 static PetscErrorCode MatSolve_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) 594 { 595 PetscScalar *y = NULL; 596 PetscBLASInt m = 0, k = 0; 597 598 PetscFunctionBegin; 599 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 600 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_FALSE)); 601 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 602 PetscFunctionReturn(PETSC_SUCCESS); 603 } 604 605 static PetscErrorCode MatSolveTranspose_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) 606 { 607 PetscScalar *y = NULL; 608 PetscBLASInt m = 0, k = 0; 609 610 PetscFunctionBegin; 611 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 612 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_TRUE)); 613 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 614 PetscFunctionReturn(PETSC_SUCCESS); 615 } 616 617 static PetscErrorCode MatSolve_SeqDense_QR(Mat A, Vec xx, Vec yy) 618 { 619 PetscScalar *y = NULL; 620 PetscBLASInt m = 0, k = 0; 621 622 PetscFunctionBegin; 623 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 624 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 625 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 626 PetscFunctionReturn(PETSC_SUCCESS); 627 } 628 629 static PetscErrorCode MatSolveTranspose_SeqDense_QR(Mat A, Vec xx, Vec yy) 630 { 631 PetscScalar *y = NULL; 632 PetscBLASInt m = 0, k = 0; 633 634 PetscFunctionBegin; 635 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 636 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 637 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 638 PetscFunctionReturn(PETSC_SUCCESS); 639 } 640 641 static PetscErrorCode MatMatSolve_SeqDense_SetUp(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) 642 { 643 const PetscScalar *b; 644 PetscScalar *y; 645 PetscInt n, _ldb, _ldx; 646 PetscBLASInt nrhs = 0, m = 0, k = 0, ldb = 0, ldx = 0, ldy = 0; 647 648 PetscFunctionBegin; 649 *_ldy = 0; 650 *_m = 0; 651 *_nrhs = 0; 652 *_k = 0; 653 *_y = NULL; 654 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 655 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 656 PetscCall(MatGetSize(B, NULL, &n)); 657 PetscCall(PetscBLASIntCast(n, &nrhs)); 658 PetscCall(MatDenseGetLDA(B, &_ldb)); 659 PetscCall(PetscBLASIntCast(_ldb, &ldb)); 660 PetscCall(MatDenseGetLDA(X, &_ldx)); 661 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 662 if (ldx < m) { 663 PetscCall(MatDenseGetArrayRead(B, &b)); 664 PetscCall(PetscMalloc1(nrhs * m, &y)); 665 if (ldb == m) { 666 PetscCall(PetscArraycpy(y, b, ldb * nrhs)); 667 } else { 668 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * m], &b[j * ldb], m)); 669 } 670 ldy = m; 671 PetscCall(MatDenseRestoreArrayRead(B, &b)); 672 } else { 673 if (ldb == ldx) { 674 PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN)); 675 PetscCall(MatDenseGetArray(X, &y)); 676 } else { 677 PetscCall(MatDenseGetArray(X, &y)); 678 PetscCall(MatDenseGetArrayRead(B, &b)); 679 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * ldx], &b[j * ldb], m)); 680 PetscCall(MatDenseRestoreArrayRead(B, &b)); 681 } 682 ldy = ldx; 683 } 684 *_y = y; 685 *_ldy = ldy; 686 *_k = k; 687 *_m = m; 688 *_nrhs = nrhs; 689 PetscFunctionReturn(PETSC_SUCCESS); 690 } 691 692 static PetscErrorCode MatMatSolve_SeqDense_TearDown(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) 693 { 694 PetscScalar *y; 695 PetscInt _ldx; 696 PetscBLASInt k, ldy, nrhs, ldx = 0; 697 698 PetscFunctionBegin; 699 y = *_y; 700 *_y = NULL; 701 k = *_k; 702 ldy = *_ldy; 703 nrhs = *_nrhs; 704 PetscCall(MatDenseGetLDA(X, &_ldx)); 705 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 706 if (ldx != ldy) { 707 PetscScalar *xv; 708 PetscCall(MatDenseGetArray(X, &xv)); 709 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&xv[j * ldx], &y[j * ldy], k)); 710 PetscCall(MatDenseRestoreArray(X, &xv)); 711 PetscCall(PetscFree(y)); 712 } else { 713 PetscCall(MatDenseRestoreArray(X, &y)); 714 } 715 PetscFunctionReturn(PETSC_SUCCESS); 716 } 717 718 static PetscErrorCode MatMatSolve_SeqDense_LU(Mat A, Mat B, Mat X) 719 { 720 PetscScalar *y; 721 PetscBLASInt m, k, ldy, nrhs; 722 723 PetscFunctionBegin; 724 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 725 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 726 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 727 PetscFunctionReturn(PETSC_SUCCESS); 728 } 729 730 static PetscErrorCode MatMatSolveTranspose_SeqDense_LU(Mat A, Mat B, Mat X) 731 { 732 PetscScalar *y; 733 PetscBLASInt m, k, ldy, nrhs; 734 735 PetscFunctionBegin; 736 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 737 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 738 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 739 PetscFunctionReturn(PETSC_SUCCESS); 740 } 741 742 static PetscErrorCode MatMatSolve_SeqDense_Cholesky(Mat A, Mat B, Mat X) 743 { 744 PetscScalar *y; 745 PetscBLASInt m, k, ldy, nrhs; 746 747 PetscFunctionBegin; 748 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 749 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 750 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 751 PetscFunctionReturn(PETSC_SUCCESS); 752 } 753 754 static PetscErrorCode MatMatSolveTranspose_SeqDense_Cholesky(Mat A, Mat B, Mat X) 755 { 756 PetscScalar *y; 757 PetscBLASInt m, k, ldy, nrhs; 758 759 PetscFunctionBegin; 760 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 761 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 762 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 763 PetscFunctionReturn(PETSC_SUCCESS); 764 } 765 766 static PetscErrorCode MatMatSolve_SeqDense_QR(Mat A, Mat B, Mat X) 767 { 768 PetscScalar *y; 769 PetscBLASInt m, k, ldy, nrhs; 770 771 PetscFunctionBegin; 772 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 773 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 774 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 775 PetscFunctionReturn(PETSC_SUCCESS); 776 } 777 778 static PetscErrorCode MatMatSolveTranspose_SeqDense_QR(Mat A, Mat B, Mat X) 779 { 780 PetscScalar *y; 781 PetscBLASInt m, k, ldy, nrhs; 782 783 PetscFunctionBegin; 784 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 785 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 786 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 787 PetscFunctionReturn(PETSC_SUCCESS); 788 } 789 790 /* COMMENT: I have chosen to hide row permutation in the pivots, 791 rather than put it in the Mat->row slot.*/ 792 PetscErrorCode MatLUFactor_SeqDense(Mat A, IS row, IS col, const MatFactorInfo *minfo) 793 { 794 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 795 PetscBLASInt n, m, info; 796 797 PetscFunctionBegin; 798 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 799 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 800 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 801 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 802 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 803 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&m, &n, mat->v, &mat->lda, mat->pivots, &info)); 804 PetscCall(PetscFPTrapPop()); 805 806 PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to LU factorization %d", (int)info); 807 PetscCheck(info <= 0, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Bad LU factorization %d", (int)info); 808 809 A->ops->solve = MatSolve_SeqDense_LU; 810 A->ops->matsolve = MatMatSolve_SeqDense_LU; 811 A->ops->solvetranspose = MatSolveTranspose_SeqDense_LU; 812 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_LU; 813 A->factortype = MAT_FACTOR_LU; 814 815 PetscCall(PetscFree(A->solvertype)); 816 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 817 818 PetscCall(PetscLogFlops((2.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3)); 819 PetscFunctionReturn(PETSC_SUCCESS); 820 } 821 822 static PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 823 { 824 MatFactorInfo info; 825 826 PetscFunctionBegin; 827 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 828 PetscUseTypeMethod(fact, lufactor, NULL, NULL, &info); 829 PetscFunctionReturn(PETSC_SUCCESS); 830 } 831 832 PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) 833 { 834 PetscFunctionBegin; 835 fact->preallocated = PETSC_TRUE; 836 fact->assembled = PETSC_TRUE; 837 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense; 838 PetscFunctionReturn(PETSC_SUCCESS); 839 } 840 841 /* Cholesky as L*L^T or L*D*L^T and the symmetric/hermitian complex variants */ 842 PetscErrorCode MatCholeskyFactor_SeqDense(Mat A, IS perm, const MatFactorInfo *factinfo) 843 { 844 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 845 PetscBLASInt info, n; 846 847 PetscFunctionBegin; 848 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 849 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 850 if (A->spd == PETSC_BOOL3_TRUE) { 851 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 852 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &n, mat->v, &mat->lda, &info)); 853 PetscCall(PetscFPTrapPop()); 854 #if defined(PETSC_USE_COMPLEX) 855 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 856 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 857 if (!mat->fwork) { 858 PetscScalar dummy; 859 860 mat->lfwork = -1; 861 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 862 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 863 PetscCall(PetscFPTrapPop()); 864 mat->lfwork = (PetscInt)PetscRealPart(dummy); 865 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 866 } 867 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 868 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 869 PetscCall(PetscFPTrapPop()); 870 #endif 871 } else { /* symmetric case */ 872 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 873 if (!mat->fwork) { 874 PetscScalar dummy; 875 876 mat->lfwork = -1; 877 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 878 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 879 PetscCall(PetscFPTrapPop()); 880 PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork)); 881 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 882 } 883 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 884 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 885 PetscCall(PetscFPTrapPop()); 886 } 887 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad factorization: zero pivot in row %" PetscInt_FMT, (PetscInt)info - 1); 888 889 A->ops->solve = MatSolve_SeqDense_Cholesky; 890 A->ops->matsolve = MatMatSolve_SeqDense_Cholesky; 891 A->ops->solvetranspose = MatSolveTranspose_SeqDense_Cholesky; 892 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_Cholesky; 893 A->factortype = MAT_FACTOR_CHOLESKY; 894 895 PetscCall(PetscFree(A->solvertype)); 896 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 897 898 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 899 PetscFunctionReturn(PETSC_SUCCESS); 900 } 901 902 static PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 903 { 904 MatFactorInfo info; 905 906 PetscFunctionBegin; 907 info.fill = 1.0; 908 909 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 910 PetscUseTypeMethod(fact, choleskyfactor, NULL, &info); 911 PetscFunctionReturn(PETSC_SUCCESS); 912 } 913 914 PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) 915 { 916 PetscFunctionBegin; 917 fact->assembled = PETSC_TRUE; 918 fact->preallocated = PETSC_TRUE; 919 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense; 920 PetscFunctionReturn(PETSC_SUCCESS); 921 } 922 923 PetscErrorCode MatQRFactor_SeqDense(Mat A, IS col, const MatFactorInfo *minfo) 924 { 925 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 926 PetscBLASInt n, m, info, min, max; 927 928 PetscFunctionBegin; 929 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 930 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 931 max = PetscMax(m, n); 932 min = PetscMin(m, n); 933 if (!mat->tau) { PetscCall(PetscMalloc1(min, &mat->tau)); } 934 if (!mat->pivots) { PetscCall(PetscMalloc1(n, &mat->pivots)); } 935 if (!mat->qrrhs) PetscCall(MatCreateVecs(A, NULL, &mat->qrrhs)); 936 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 937 if (!mat->fwork) { 938 PetscScalar dummy; 939 940 mat->lfwork = -1; 941 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 942 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, &dummy, &mat->lfwork, &info)); 943 PetscCall(PetscFPTrapPop()); 944 PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork)); 945 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 946 } 947 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 948 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, mat->fwork, &mat->lfwork, &info)); 949 PetscCall(PetscFPTrapPop()); 950 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to QR factorization %d", (int)info); 951 // TODO: try to estimate rank or test for and use geqp3 for rank revealing QR. For now just say rank is min of m and n 952 mat->rank = min; 953 954 A->ops->solve = MatSolve_SeqDense_QR; 955 A->ops->matsolve = MatMatSolve_SeqDense_QR; 956 A->factortype = MAT_FACTOR_QR; 957 if (m == n) { 958 A->ops->solvetranspose = MatSolveTranspose_SeqDense_QR; 959 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_QR; 960 } 961 962 PetscCall(PetscFree(A->solvertype)); 963 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 964 965 PetscCall(PetscLogFlops(2.0 * min * min * (max - min / 3.0))); 966 PetscFunctionReturn(PETSC_SUCCESS); 967 } 968 969 static PetscErrorCode MatQRFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 970 { 971 MatFactorInfo info; 972 973 PetscFunctionBegin; 974 info.fill = 1.0; 975 976 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 977 PetscUseMethod(fact, "MatQRFactor_C", (Mat, IS, const MatFactorInfo *), (fact, NULL, &info)); 978 PetscFunctionReturn(PETSC_SUCCESS); 979 } 980 981 PetscErrorCode MatQRFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) 982 { 983 PetscFunctionBegin; 984 fact->assembled = PETSC_TRUE; 985 fact->preallocated = PETSC_TRUE; 986 PetscCall(PetscObjectComposeFunction((PetscObject)fact, "MatQRFactorNumeric_C", MatQRFactorNumeric_SeqDense)); 987 PetscFunctionReturn(PETSC_SUCCESS); 988 } 989 990 /* uses LAPACK */ 991 PETSC_INTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A, MatFactorType ftype, Mat *fact) 992 { 993 PetscFunctionBegin; 994 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), fact)); 995 PetscCall(MatSetSizes(*fact, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 996 PetscCall(MatSetType(*fact, MATDENSE)); 997 (*fact)->trivialsymbolic = PETSC_TRUE; 998 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 999 (*fact)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqDense; 1000 (*fact)->ops->ilufactorsymbolic = MatLUFactorSymbolic_SeqDense; 1001 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 1002 (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense; 1003 } else if (ftype == MAT_FACTOR_QR) { 1004 PetscCall(PetscObjectComposeFunction((PetscObject)*fact, "MatQRFactorSymbolic_C", MatQRFactorSymbolic_SeqDense)); 1005 } 1006 (*fact)->factortype = ftype; 1007 1008 PetscCall(PetscFree((*fact)->solvertype)); 1009 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*fact)->solvertype)); 1010 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_LU])); 1011 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ILU])); 1012 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_CHOLESKY])); 1013 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ICC])); 1014 PetscFunctionReturn(PETSC_SUCCESS); 1015 } 1016 1017 static PetscErrorCode MatSOR_SeqDense(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal shift, PetscInt its, PetscInt lits, Vec xx) 1018 { 1019 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1020 PetscScalar *x, *v = mat->v, zero = 0.0, xt; 1021 const PetscScalar *b; 1022 PetscInt m = A->rmap->n, i; 1023 PetscBLASInt o = 1, bm = 0; 1024 1025 PetscFunctionBegin; 1026 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1027 PetscCheck(A->offloadmask != PETSC_OFFLOAD_GPU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1028 #endif 1029 if (shift == -1) shift = 0.0; /* negative shift indicates do not error on zero diagonal; this code never zeros on zero diagonal */ 1030 PetscCall(PetscBLASIntCast(m, &bm)); 1031 if (flag & SOR_ZERO_INITIAL_GUESS) { 1032 /* this is a hack fix, should have another version without the second BLASdotu */ 1033 PetscCall(VecSet(xx, zero)); 1034 } 1035 PetscCall(VecGetArray(xx, &x)); 1036 PetscCall(VecGetArrayRead(bb, &b)); 1037 its = its * lits; 1038 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 1039 while (its--) { 1040 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 1041 for (i = 0; i < m; i++) { 1042 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1043 x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift); 1044 } 1045 } 1046 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1047 for (i = m - 1; i >= 0; i--) { 1048 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1049 x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift); 1050 } 1051 } 1052 } 1053 PetscCall(VecRestoreArrayRead(bb, &b)); 1054 PetscCall(VecRestoreArray(xx, &x)); 1055 PetscFunctionReturn(PETSC_SUCCESS); 1056 } 1057 1058 static PetscErrorCode MatMultColumnRangeKernel_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end, PetscBool trans, PetscBool herm) 1059 { 1060 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1061 PetscScalar *y, _DOne = 1.0, _DZero = 0.0; 1062 PetscBLASInt m, n, _One = 1; 1063 const PetscScalar *v = mat->v, *x; 1064 1065 PetscFunctionBegin; 1066 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1067 PetscCall(PetscBLASIntCast(c_end - c_start, &n)); 1068 PetscCall(VecGetArrayRead(xx, &x)); 1069 PetscCall(VecGetArrayWrite(yy, &y)); 1070 if (!m || !n) { 1071 PetscBLASInt i; 1072 if (trans) 1073 for (i = 0; i < n; i++) y[i] = 0.0; 1074 else 1075 for (i = 0; i < m; i++) y[i] = 0.0; 1076 } else { 1077 if (trans) { 1078 if (herm) PetscCallBLAS("BLASgemv", BLASgemv_("C", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DZero, y + c_start, &_One)); 1079 else PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DZero, y + c_start, &_One)); 1080 } else { 1081 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x + c_start, &_One, &_DZero, y, &_One)); 1082 } 1083 PetscCall(PetscLogFlops(2.0 * m * n - n)); 1084 } 1085 PetscCall(VecRestoreArrayRead(xx, &x)); 1086 PetscCall(VecRestoreArrayWrite(yy, &y)); 1087 PetscFunctionReturn(PETSC_SUCCESS); 1088 } 1089 1090 PetscErrorCode MatMultHermitianTransposeColumnRange_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 1091 { 1092 PetscFunctionBegin; 1093 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, c_start, c_end, PETSC_TRUE, PETSC_TRUE)); 1094 PetscFunctionReturn(PETSC_SUCCESS); 1095 } 1096 1097 PetscErrorCode MatMult_SeqDense(Mat A, Vec xx, Vec yy) 1098 { 1099 PetscFunctionBegin; 1100 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_FALSE, PETSC_FALSE)); 1101 PetscFunctionReturn(PETSC_SUCCESS); 1102 } 1103 1104 PetscErrorCode MatMultTranspose_SeqDense(Mat A, Vec xx, Vec yy) 1105 { 1106 PetscFunctionBegin; 1107 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_FALSE)); 1108 PetscFunctionReturn(PETSC_SUCCESS); 1109 } 1110 1111 PetscErrorCode MatMultHermitianTranspose_SeqDense(Mat A, Vec xx, Vec yy) 1112 { 1113 PetscFunctionBegin; 1114 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_TRUE)); 1115 PetscFunctionReturn(PETSC_SUCCESS); 1116 } 1117 1118 static PetscErrorCode MatMultAddColumnRangeKernel_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end, PetscBool trans, PetscBool herm) 1119 { 1120 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1121 const PetscScalar *v = mat->v, *x; 1122 PetscScalar *y, _DOne = 1.0; 1123 PetscBLASInt m, n, _One = 1; 1124 1125 PetscFunctionBegin; 1126 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1127 PetscCall(PetscBLASIntCast(c_end - c_start, &n)); 1128 PetscCall(VecCopy(zz, yy)); 1129 if (!m || !n) PetscFunctionReturn(PETSC_SUCCESS); 1130 PetscCall(VecGetArray(yy, &y)); 1131 PetscCall(VecGetArrayRead(xx, &x)); 1132 if (trans) { 1133 if (herm) PetscCallBLAS("BLASgemv", BLASgemv_("C", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DOne, y + c_start, &_One)); 1134 else PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DOne, y + c_start, &_One)); 1135 } else { 1136 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x + c_start, &_One, &_DOne, y, &_One)); 1137 } 1138 PetscCall(VecRestoreArrayRead(xx, &x)); 1139 PetscCall(VecRestoreArray(yy, &y)); 1140 PetscCall(PetscLogFlops(2.0 * m * n)); 1141 PetscFunctionReturn(PETSC_SUCCESS); 1142 } 1143 1144 PetscErrorCode MatMultAddColumnRange_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end) 1145 { 1146 PetscFunctionBegin; 1147 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, c_start, c_end, PETSC_FALSE, PETSC_FALSE)); 1148 PetscFunctionReturn(PETSC_SUCCESS); 1149 } 1150 1151 PetscErrorCode MatMultHermitianTransposeAddColumnRange_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end) 1152 { 1153 PetscFunctionBegin; 1154 PetscMPIInt rank; 1155 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 1156 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, c_start, c_end, PETSC_TRUE, PETSC_TRUE)); 1157 PetscFunctionReturn(PETSC_SUCCESS); 1158 } 1159 1160 PetscErrorCode MatMultAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1161 { 1162 PetscFunctionBegin; 1163 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_FALSE, PETSC_FALSE)); 1164 PetscFunctionReturn(PETSC_SUCCESS); 1165 } 1166 1167 PetscErrorCode MatMultTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1168 { 1169 PetscFunctionBegin; 1170 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_FALSE)); 1171 PetscFunctionReturn(PETSC_SUCCESS); 1172 } 1173 1174 PetscErrorCode MatMultHermitianTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1175 { 1176 PetscFunctionBegin; 1177 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_TRUE)); 1178 PetscFunctionReturn(PETSC_SUCCESS); 1179 } 1180 1181 static PetscErrorCode MatGetRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) 1182 { 1183 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1184 PetscInt i; 1185 1186 PetscFunctionBegin; 1187 if (ncols) *ncols = A->cmap->n; 1188 if (cols) { 1189 PetscCall(PetscMalloc1(A->cmap->n, cols)); 1190 for (i = 0; i < A->cmap->n; i++) (*cols)[i] = i; 1191 } 1192 if (vals) { 1193 const PetscScalar *v; 1194 1195 PetscCall(MatDenseGetArrayRead(A, &v)); 1196 PetscCall(PetscMalloc1(A->cmap->n, vals)); 1197 v += row; 1198 for (i = 0; i < A->cmap->n; i++) { 1199 (*vals)[i] = *v; 1200 v += mat->lda; 1201 } 1202 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1203 } 1204 PetscFunctionReturn(PETSC_SUCCESS); 1205 } 1206 1207 static PetscErrorCode MatRestoreRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) 1208 { 1209 PetscFunctionBegin; 1210 if (cols) PetscCall(PetscFree(*cols)); 1211 if (vals) PetscCall(PetscFree(*vals)); 1212 PetscFunctionReturn(PETSC_SUCCESS); 1213 } 1214 1215 static PetscErrorCode MatSetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], const PetscScalar v[], InsertMode addv) 1216 { 1217 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1218 PetscScalar *av; 1219 PetscInt i, j, idx = 0; 1220 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1221 PetscOffloadMask oldf; 1222 #endif 1223 1224 PetscFunctionBegin; 1225 PetscCall(MatDenseGetArray(A, &av)); 1226 if (!mat->roworiented) { 1227 if (addv == INSERT_VALUES) { 1228 for (j = 0; j < n; j++) { 1229 if (indexn[j] < 0) { 1230 idx += m; 1231 continue; 1232 } 1233 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1234 for (i = 0; i < m; i++) { 1235 if (indexm[i] < 0) { 1236 idx++; 1237 continue; 1238 } 1239 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1240 av[indexn[j] * mat->lda + indexm[i]] = v ? v[idx++] : (idx++, 0.0); 1241 } 1242 } 1243 } else { 1244 for (j = 0; j < n; j++) { 1245 if (indexn[j] < 0) { 1246 idx += m; 1247 continue; 1248 } 1249 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1250 for (i = 0; i < m; i++) { 1251 if (indexm[i] < 0) { 1252 idx++; 1253 continue; 1254 } 1255 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1256 av[indexn[j] * mat->lda + indexm[i]] += v ? v[idx++] : (idx++, 0.0); 1257 } 1258 } 1259 } 1260 } else { 1261 if (addv == INSERT_VALUES) { 1262 for (i = 0; i < m; i++) { 1263 if (indexm[i] < 0) { 1264 idx += n; 1265 continue; 1266 } 1267 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1268 for (j = 0; j < n; j++) { 1269 if (indexn[j] < 0) { 1270 idx++; 1271 continue; 1272 } 1273 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1274 av[indexn[j] * mat->lda + indexm[i]] = v ? v[idx++] : (idx++, 0.0); 1275 } 1276 } 1277 } else { 1278 for (i = 0; i < m; i++) { 1279 if (indexm[i] < 0) { 1280 idx += n; 1281 continue; 1282 } 1283 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1284 for (j = 0; j < n; j++) { 1285 if (indexn[j] < 0) { 1286 idx++; 1287 continue; 1288 } 1289 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1290 av[indexn[j] * mat->lda + indexm[i]] += v ? v[idx++] : (idx++, 0.0); 1291 } 1292 } 1293 } 1294 } 1295 /* hack to prevent unneeded copy to the GPU while returning the array */ 1296 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1297 oldf = A->offloadmask; 1298 A->offloadmask = PETSC_OFFLOAD_GPU; 1299 #endif 1300 PetscCall(MatDenseRestoreArray(A, &av)); 1301 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1302 A->offloadmask = (oldf == PETSC_OFFLOAD_UNALLOCATED ? PETSC_OFFLOAD_UNALLOCATED : PETSC_OFFLOAD_CPU); 1303 #endif 1304 PetscFunctionReturn(PETSC_SUCCESS); 1305 } 1306 1307 static PetscErrorCode MatGetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], PetscScalar v[]) 1308 { 1309 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1310 const PetscScalar *vv; 1311 PetscInt i, j; 1312 1313 PetscFunctionBegin; 1314 PetscCall(MatDenseGetArrayRead(A, &vv)); 1315 /* row-oriented output */ 1316 for (i = 0; i < m; i++) { 1317 if (indexm[i] < 0) { 1318 v += n; 1319 continue; 1320 } 1321 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested larger than number rows %" PetscInt_FMT, indexm[i], A->rmap->n); 1322 for (j = 0; j < n; j++) { 1323 if (indexn[j] < 0) { 1324 v++; 1325 continue; 1326 } 1327 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " requested larger than number columns %" PetscInt_FMT, indexn[j], A->cmap->n); 1328 *v++ = vv[indexn[j] * mat->lda + indexm[i]]; 1329 } 1330 } 1331 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1332 PetscFunctionReturn(PETSC_SUCCESS); 1333 } 1334 1335 PetscErrorCode MatView_Dense_Binary(Mat mat, PetscViewer viewer) 1336 { 1337 PetscBool skipHeader; 1338 PetscViewerFormat format; 1339 PetscInt header[4], M, N, m, lda, i, j, k; 1340 const PetscScalar *v; 1341 PetscScalar *vwork; 1342 1343 PetscFunctionBegin; 1344 PetscCall(PetscViewerSetUp(viewer)); 1345 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1346 PetscCall(PetscViewerGetFormat(viewer, &format)); 1347 if (skipHeader) format = PETSC_VIEWER_NATIVE; 1348 1349 PetscCall(MatGetSize(mat, &M, &N)); 1350 1351 /* write matrix header */ 1352 header[0] = MAT_FILE_CLASSID; 1353 header[1] = M; 1354 header[2] = N; 1355 header[3] = (format == PETSC_VIEWER_NATIVE) ? MATRIX_BINARY_FORMAT_DENSE : M * N; 1356 if (!skipHeader) PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1357 1358 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1359 if (format != PETSC_VIEWER_NATIVE) { 1360 PetscInt nnz = m * N, *iwork; 1361 /* store row lengths for each row */ 1362 PetscCall(PetscMalloc1(nnz, &iwork)); 1363 for (i = 0; i < m; i++) iwork[i] = N; 1364 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1365 /* store column indices (zero start index) */ 1366 for (k = 0, i = 0; i < m; i++) 1367 for (j = 0; j < N; j++, k++) iwork[k] = j; 1368 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1369 PetscCall(PetscFree(iwork)); 1370 } 1371 /* store matrix values as a dense matrix in row major order */ 1372 PetscCall(PetscMalloc1(m * N, &vwork)); 1373 PetscCall(MatDenseGetArrayRead(mat, &v)); 1374 PetscCall(MatDenseGetLDA(mat, &lda)); 1375 for (k = 0, i = 0; i < m; i++) 1376 for (j = 0; j < N; j++, k++) vwork[k] = v[i + lda * j]; 1377 PetscCall(MatDenseRestoreArrayRead(mat, &v)); 1378 PetscCall(PetscViewerBinaryWriteAll(viewer, vwork, m * N, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1379 PetscCall(PetscFree(vwork)); 1380 PetscFunctionReturn(PETSC_SUCCESS); 1381 } 1382 1383 PetscErrorCode MatLoad_Dense_Binary(Mat mat, PetscViewer viewer) 1384 { 1385 PetscBool skipHeader; 1386 PetscInt header[4], M, N, m, nz, lda, i, j, k; 1387 PetscInt rows, cols; 1388 PetscScalar *v, *vwork; 1389 1390 PetscFunctionBegin; 1391 PetscCall(PetscViewerSetUp(viewer)); 1392 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1393 1394 if (!skipHeader) { 1395 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 1396 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 1397 M = header[1]; 1398 N = header[2]; 1399 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 1400 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 1401 nz = header[3]; 1402 PetscCheck(nz == MATRIX_BINARY_FORMAT_DENSE || nz >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Unknown matrix format %" PetscInt_FMT " in file", nz); 1403 } else { 1404 PetscCall(MatGetSize(mat, &M, &N)); 1405 PetscCheck(M >= 0 && N >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "Matrix binary file header was skipped, thus the user must specify the global sizes of input matrix"); 1406 nz = MATRIX_BINARY_FORMAT_DENSE; 1407 } 1408 1409 /* setup global sizes if not set */ 1410 if (mat->rmap->N < 0) mat->rmap->N = M; 1411 if (mat->cmap->N < 0) mat->cmap->N = N; 1412 PetscCall(MatSetUp(mat)); 1413 /* check if global sizes are correct */ 1414 PetscCall(MatGetSize(mat, &rows, &cols)); 1415 PetscCheck(M == rows && N == cols, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 1416 1417 PetscCall(MatGetSize(mat, NULL, &N)); 1418 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1419 PetscCall(MatDenseGetArray(mat, &v)); 1420 PetscCall(MatDenseGetLDA(mat, &lda)); 1421 if (nz == MATRIX_BINARY_FORMAT_DENSE) { /* matrix in file is dense format */ 1422 PetscInt nnz = m * N; 1423 /* read in matrix values */ 1424 PetscCall(PetscMalloc1(nnz, &vwork)); 1425 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1426 /* store values in column major order */ 1427 for (j = 0; j < N; j++) 1428 for (i = 0; i < m; i++) v[i + lda * j] = vwork[i * N + j]; 1429 PetscCall(PetscFree(vwork)); 1430 } else { /* matrix in file is sparse format */ 1431 PetscInt nnz = 0, *rlens, *icols; 1432 /* read in row lengths */ 1433 PetscCall(PetscMalloc1(m, &rlens)); 1434 PetscCall(PetscViewerBinaryReadAll(viewer, rlens, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1435 for (i = 0; i < m; i++) nnz += rlens[i]; 1436 /* read in column indices and values */ 1437 PetscCall(PetscMalloc2(nnz, &icols, nnz, &vwork)); 1438 PetscCall(PetscViewerBinaryReadAll(viewer, icols, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1439 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1440 /* store values in column major order */ 1441 for (k = 0, i = 0; i < m; i++) 1442 for (j = 0; j < rlens[i]; j++, k++) v[i + lda * icols[k]] = vwork[k]; 1443 PetscCall(PetscFree(rlens)); 1444 PetscCall(PetscFree2(icols, vwork)); 1445 } 1446 PetscCall(MatDenseRestoreArray(mat, &v)); 1447 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 1448 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 1449 PetscFunctionReturn(PETSC_SUCCESS); 1450 } 1451 1452 static PetscErrorCode MatLoad_SeqDense(Mat newMat, PetscViewer viewer) 1453 { 1454 PetscBool isbinary, ishdf5; 1455 1456 PetscFunctionBegin; 1457 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1458 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1459 /* force binary viewer to load .info file if it has not yet done so */ 1460 PetscCall(PetscViewerSetUp(viewer)); 1461 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1462 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1463 if (isbinary) { 1464 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1465 } else if (ishdf5) { 1466 #if defined(PETSC_HAVE_HDF5) 1467 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1468 #else 1469 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 1470 #endif 1471 } else { 1472 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1473 } 1474 PetscFunctionReturn(PETSC_SUCCESS); 1475 } 1476 1477 static PetscErrorCode MatView_SeqDense_ASCII(Mat A, PetscViewer viewer) 1478 { 1479 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1480 PetscInt i, j; 1481 const char *name; 1482 PetscScalar *v, *av; 1483 PetscViewerFormat format; 1484 #if defined(PETSC_USE_COMPLEX) 1485 PetscBool allreal = PETSC_TRUE; 1486 #endif 1487 1488 PetscFunctionBegin; 1489 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&av)); 1490 PetscCall(PetscViewerGetFormat(viewer, &format)); 1491 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1492 PetscFunctionReturn(PETSC_SUCCESS); /* do nothing for now */ 1493 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1494 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1495 for (i = 0; i < A->rmap->n; i++) { 1496 v = av + i; 1497 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i)); 1498 for (j = 0; j < A->cmap->n; j++) { 1499 #if defined(PETSC_USE_COMPLEX) 1500 if (PetscRealPart(*v) != 0.0 && PetscImaginaryPart(*v) != 0.0) { 1501 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", j, (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1502 } else if (PetscRealPart(*v)) { 1503 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)PetscRealPart(*v))); 1504 } 1505 #else 1506 if (*v) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)*v)); 1507 #endif 1508 v += a->lda; 1509 } 1510 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1511 } 1512 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1513 } else { 1514 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1515 #if defined(PETSC_USE_COMPLEX) 1516 /* determine if matrix has all real values */ 1517 for (j = 0; j < A->cmap->n; j++) { 1518 v = av + j * a->lda; 1519 for (i = 0; i < A->rmap->n; i++) { 1520 if (PetscImaginaryPart(v[i])) { 1521 allreal = PETSC_FALSE; 1522 break; 1523 } 1524 } 1525 } 1526 #endif 1527 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1528 PetscCall(PetscObjectGetName((PetscObject)A, &name)); 1529 PetscCall(PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", A->rmap->n, A->cmap->n)); 1530 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = zeros(%" PetscInt_FMT ",%" PetscInt_FMT ");\n", name, A->rmap->n, A->cmap->n)); 1531 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = [\n", name)); 1532 } 1533 1534 for (i = 0; i < A->rmap->n; i++) { 1535 v = av + i; 1536 for (j = 0; j < A->cmap->n; j++) { 1537 #if defined(PETSC_USE_COMPLEX) 1538 if (allreal) { 1539 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(*v))); 1540 } else { 1541 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei ", (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1542 } 1543 #else 1544 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)*v)); 1545 #endif 1546 v += a->lda; 1547 } 1548 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1549 } 1550 if (format == PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, "];\n")); 1551 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1552 } 1553 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&av)); 1554 PetscCall(PetscViewerFlush(viewer)); 1555 PetscFunctionReturn(PETSC_SUCCESS); 1556 } 1557 1558 #include <petscdraw.h> 1559 static PetscErrorCode MatView_SeqDense_Draw_Zoom(PetscDraw draw, void *Aa) 1560 { 1561 Mat A = (Mat)Aa; 1562 PetscInt m = A->rmap->n, n = A->cmap->n, i, j; 1563 int color = PETSC_DRAW_WHITE; 1564 const PetscScalar *v; 1565 PetscViewer viewer; 1566 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1567 PetscViewerFormat format; 1568 1569 PetscFunctionBegin; 1570 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1571 PetscCall(PetscViewerGetFormat(viewer, &format)); 1572 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1573 1574 /* Loop over matrix elements drawing boxes */ 1575 PetscCall(MatDenseGetArrayRead(A, &v)); 1576 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1577 PetscDrawCollectiveBegin(draw); 1578 /* Blue for negative and Red for positive */ 1579 for (j = 0; j < n; j++) { 1580 x_l = j; 1581 x_r = x_l + 1.0; 1582 for (i = 0; i < m; i++) { 1583 y_l = m - i - 1.0; 1584 y_r = y_l + 1.0; 1585 if (PetscRealPart(v[j * m + i]) > 0.) color = PETSC_DRAW_RED; 1586 else if (PetscRealPart(v[j * m + i]) < 0.) color = PETSC_DRAW_BLUE; 1587 else continue; 1588 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1589 } 1590 } 1591 PetscDrawCollectiveEnd(draw); 1592 } else { 1593 /* use contour shading to indicate magnitude of values */ 1594 /* first determine max of all nonzero values */ 1595 PetscReal minv = 0.0, maxv = 0.0; 1596 PetscDraw popup; 1597 1598 for (i = 0; i < m * n; i++) { 1599 if (PetscAbsScalar(v[i]) > maxv) maxv = PetscAbsScalar(v[i]); 1600 } 1601 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1602 PetscCall(PetscDrawGetPopup(draw, &popup)); 1603 PetscCall(PetscDrawScalePopup(popup, minv, maxv)); 1604 1605 PetscDrawCollectiveBegin(draw); 1606 for (j = 0; j < n; j++) { 1607 x_l = j; 1608 x_r = x_l + 1.0; 1609 for (i = 0; i < m; i++) { 1610 y_l = m - i - 1.0; 1611 y_r = y_l + 1.0; 1612 color = PetscDrawRealToColor(PetscAbsScalar(v[j * m + i]), minv, maxv); 1613 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1614 } 1615 } 1616 PetscDrawCollectiveEnd(draw); 1617 } 1618 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1619 PetscFunctionReturn(PETSC_SUCCESS); 1620 } 1621 1622 static PetscErrorCode MatView_SeqDense_Draw(Mat A, PetscViewer viewer) 1623 { 1624 PetscDraw draw; 1625 PetscBool isnull; 1626 PetscReal xr, yr, xl, yl, h, w; 1627 1628 PetscFunctionBegin; 1629 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1630 PetscCall(PetscDrawIsNull(draw, &isnull)); 1631 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1632 1633 xr = A->cmap->n; 1634 yr = A->rmap->n; 1635 h = yr / 10.0; 1636 w = xr / 10.0; 1637 xr += w; 1638 yr += h; 1639 xl = -w; 1640 yl = -h; 1641 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 1642 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 1643 PetscCall(PetscDrawZoom(draw, MatView_SeqDense_Draw_Zoom, A)); 1644 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 1645 PetscCall(PetscDrawSave(draw)); 1646 PetscFunctionReturn(PETSC_SUCCESS); 1647 } 1648 1649 PetscErrorCode MatView_SeqDense(Mat A, PetscViewer viewer) 1650 { 1651 PetscBool iascii, isbinary, isdraw; 1652 1653 PetscFunctionBegin; 1654 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1655 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1656 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1657 if (iascii) PetscCall(MatView_SeqDense_ASCII(A, viewer)); 1658 else if (isbinary) PetscCall(MatView_Dense_Binary(A, viewer)); 1659 else if (isdraw) PetscCall(MatView_SeqDense_Draw(A, viewer)); 1660 PetscFunctionReturn(PETSC_SUCCESS); 1661 } 1662 1663 static PetscErrorCode MatDensePlaceArray_SeqDense(Mat A, const PetscScalar *array) 1664 { 1665 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1666 1667 PetscFunctionBegin; 1668 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1669 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1670 PetscCheck(!a->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseResetArray() first"); 1671 a->unplacedarray = a->v; 1672 a->unplaced_user_alloc = a->user_alloc; 1673 a->v = (PetscScalar *)array; 1674 a->user_alloc = PETSC_TRUE; 1675 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1676 A->offloadmask = PETSC_OFFLOAD_CPU; 1677 #endif 1678 PetscFunctionReturn(PETSC_SUCCESS); 1679 } 1680 1681 static PetscErrorCode MatDenseResetArray_SeqDense(Mat A) 1682 { 1683 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1684 1685 PetscFunctionBegin; 1686 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1687 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1688 a->v = a->unplacedarray; 1689 a->user_alloc = a->unplaced_user_alloc; 1690 a->unplacedarray = NULL; 1691 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1692 A->offloadmask = PETSC_OFFLOAD_CPU; 1693 #endif 1694 PetscFunctionReturn(PETSC_SUCCESS); 1695 } 1696 1697 static PetscErrorCode MatDenseReplaceArray_SeqDense(Mat A, const PetscScalar *array) 1698 { 1699 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1700 1701 PetscFunctionBegin; 1702 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1703 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1704 if (!a->user_alloc) PetscCall(PetscFree(a->v)); 1705 a->v = (PetscScalar *)array; 1706 a->user_alloc = PETSC_FALSE; 1707 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1708 A->offloadmask = PETSC_OFFLOAD_CPU; 1709 #endif 1710 PetscFunctionReturn(PETSC_SUCCESS); 1711 } 1712 1713 PetscErrorCode MatDestroy_SeqDense(Mat mat) 1714 { 1715 Mat_SeqDense *l = (Mat_SeqDense *)mat->data; 1716 1717 PetscFunctionBegin; 1718 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows %" PetscInt_FMT " Cols %" PetscInt_FMT, mat->rmap->n, mat->cmap->n)); 1719 PetscCall(VecDestroy(&l->qrrhs)); 1720 PetscCall(PetscFree(l->tau)); 1721 PetscCall(PetscFree(l->pivots)); 1722 PetscCall(PetscFree(l->fwork)); 1723 if (!l->user_alloc) PetscCall(PetscFree(l->v)); 1724 if (!l->unplaced_user_alloc) PetscCall(PetscFree(l->unplacedarray)); 1725 PetscCheck(!l->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1726 PetscCheck(!l->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1727 PetscCall(VecDestroy(&l->cvec)); 1728 PetscCall(MatDestroy(&l->cmat)); 1729 PetscCall(PetscFree(mat->data)); 1730 1731 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactor_C", NULL)); 1733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorSymbolic_C", NULL)); 1734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorNumeric_C", NULL)); 1735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 1736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 1737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 1738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 1739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 1740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 1741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 1742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 1743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 1744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 1745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 1746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqaij_C", NULL)); 1747 #if defined(PETSC_HAVE_ELEMENTAL) 1748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_elemental_C", NULL)); 1749 #endif 1750 #if defined(PETSC_HAVE_SCALAPACK) 1751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_scalapack_C", NULL)); 1752 #endif 1753 #if defined(PETSC_HAVE_CUDA) 1754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensecuda_C", NULL)); 1755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", NULL)); 1756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdense_C", NULL)); 1757 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensecuda_C", NULL)); 1758 #endif 1759 #if defined(PETSC_HAVE_HIP) 1760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensehip_C", NULL)); 1761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", NULL)); 1762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdense_C", NULL)); 1763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensehip_C", NULL)); 1764 #endif 1765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSeqDenseSetPreallocation_C", NULL)); 1766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqaij_seqdense_C", NULL)); 1767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdense_C", NULL)); 1768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqbaij_seqdense_C", NULL)); 1769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqsbaij_seqdense_C", NULL)); 1770 1771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 1772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 1773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 1774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 1775 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 1776 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 1777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 1778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 1779 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 1780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 1781 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); 1782 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); 1783 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); 1784 PetscFunctionReturn(PETSC_SUCCESS); 1785 } 1786 1787 static PetscErrorCode MatTranspose_SeqDense(Mat A, MatReuse reuse, Mat *matout) 1788 { 1789 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1790 PetscInt k, j, m = A->rmap->n, M = mat->lda, n = A->cmap->n; 1791 PetscScalar *v, tmp; 1792 1793 PetscFunctionBegin; 1794 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1795 if (reuse == MAT_INPLACE_MATRIX) { 1796 if (m == n) { /* in place transpose */ 1797 PetscCall(MatDenseGetArray(A, &v)); 1798 for (j = 0; j < m; j++) { 1799 for (k = 0; k < j; k++) { 1800 tmp = v[j + k * M]; 1801 v[j + k * M] = v[k + j * M]; 1802 v[k + j * M] = tmp; 1803 } 1804 } 1805 PetscCall(MatDenseRestoreArray(A, &v)); 1806 } else { /* reuse memory, temporary allocates new memory */ 1807 PetscScalar *v2; 1808 PetscLayout tmplayout; 1809 1810 PetscCall(PetscMalloc1((size_t)m * n, &v2)); 1811 PetscCall(MatDenseGetArray(A, &v)); 1812 for (j = 0; j < n; j++) { 1813 for (k = 0; k < m; k++) v2[j + (size_t)k * n] = v[k + (size_t)j * M]; 1814 } 1815 PetscCall(PetscArraycpy(v, v2, (size_t)m * n)); 1816 PetscCall(PetscFree(v2)); 1817 PetscCall(MatDenseRestoreArray(A, &v)); 1818 /* cleanup size dependent quantities */ 1819 PetscCall(VecDestroy(&mat->cvec)); 1820 PetscCall(MatDestroy(&mat->cmat)); 1821 PetscCall(PetscFree(mat->pivots)); 1822 PetscCall(PetscFree(mat->fwork)); 1823 /* swap row/col layouts */ 1824 PetscCall(PetscBLASIntCast(n, &mat->lda)); 1825 tmplayout = A->rmap; 1826 A->rmap = A->cmap; 1827 A->cmap = tmplayout; 1828 } 1829 } else { /* out-of-place transpose */ 1830 Mat tmat; 1831 Mat_SeqDense *tmatd; 1832 PetscScalar *v2; 1833 PetscInt M2; 1834 1835 if (reuse == MAT_INITIAL_MATRIX) { 1836 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &tmat)); 1837 PetscCall(MatSetSizes(tmat, A->cmap->n, A->rmap->n, A->cmap->n, A->rmap->n)); 1838 PetscCall(MatSetType(tmat, ((PetscObject)A)->type_name)); 1839 PetscCall(MatSeqDenseSetPreallocation(tmat, NULL)); 1840 } else tmat = *matout; 1841 1842 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&v)); 1843 PetscCall(MatDenseGetArray(tmat, &v2)); 1844 tmatd = (Mat_SeqDense *)tmat->data; 1845 M2 = tmatd->lda; 1846 for (j = 0; j < n; j++) { 1847 for (k = 0; k < m; k++) v2[j + k * M2] = v[k + j * M]; 1848 } 1849 PetscCall(MatDenseRestoreArray(tmat, &v2)); 1850 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&v)); 1851 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 1852 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 1853 *matout = tmat; 1854 } 1855 PetscFunctionReturn(PETSC_SUCCESS); 1856 } 1857 1858 static PetscErrorCode MatEqual_SeqDense(Mat A1, Mat A2, PetscBool *flg) 1859 { 1860 Mat_SeqDense *mat1 = (Mat_SeqDense *)A1->data; 1861 Mat_SeqDense *mat2 = (Mat_SeqDense *)A2->data; 1862 PetscInt i; 1863 const PetscScalar *v1, *v2; 1864 1865 PetscFunctionBegin; 1866 if (A1->rmap->n != A2->rmap->n) { 1867 *flg = PETSC_FALSE; 1868 PetscFunctionReturn(PETSC_SUCCESS); 1869 } 1870 if (A1->cmap->n != A2->cmap->n) { 1871 *flg = PETSC_FALSE; 1872 PetscFunctionReturn(PETSC_SUCCESS); 1873 } 1874 PetscCall(MatDenseGetArrayRead(A1, &v1)); 1875 PetscCall(MatDenseGetArrayRead(A2, &v2)); 1876 for (i = 0; i < A1->cmap->n; i++) { 1877 PetscCall(PetscArraycmp(v1, v2, A1->rmap->n, flg)); 1878 if (*flg == PETSC_FALSE) PetscFunctionReturn(PETSC_SUCCESS); 1879 v1 += mat1->lda; 1880 v2 += mat2->lda; 1881 } 1882 PetscCall(MatDenseRestoreArrayRead(A1, &v1)); 1883 PetscCall(MatDenseRestoreArrayRead(A2, &v2)); 1884 *flg = PETSC_TRUE; 1885 PetscFunctionReturn(PETSC_SUCCESS); 1886 } 1887 1888 PetscErrorCode MatGetDiagonal_SeqDense(Mat A, Vec v) 1889 { 1890 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1891 PetscInt i, n, len; 1892 PetscScalar *x; 1893 const PetscScalar *vv; 1894 1895 PetscFunctionBegin; 1896 PetscCall(VecGetSize(v, &n)); 1897 PetscCall(VecGetArray(v, &x)); 1898 len = PetscMin(A->rmap->n, A->cmap->n); 1899 PetscCall(MatDenseGetArrayRead(A, &vv)); 1900 PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 1901 for (i = 0; i < len; i++) x[i] = vv[i * mat->lda + i]; 1902 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1903 PetscCall(VecRestoreArray(v, &x)); 1904 PetscFunctionReturn(PETSC_SUCCESS); 1905 } 1906 1907 static PetscErrorCode MatDiagonalScale_SeqDense(Mat A, Vec ll, Vec rr) 1908 { 1909 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1910 const PetscScalar *l, *r; 1911 PetscScalar x, *v, *vv; 1912 PetscInt i, j, m = A->rmap->n, n = A->cmap->n; 1913 1914 PetscFunctionBegin; 1915 PetscCall(MatDenseGetArray(A, &vv)); 1916 if (ll) { 1917 PetscCall(VecGetSize(ll, &m)); 1918 PetscCall(VecGetArrayRead(ll, &l)); 1919 PetscCheck(m == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vec wrong size"); 1920 for (i = 0; i < m; i++) { 1921 x = l[i]; 1922 v = vv + i; 1923 for (j = 0; j < n; j++) { 1924 (*v) *= x; 1925 v += mat->lda; 1926 } 1927 } 1928 PetscCall(VecRestoreArrayRead(ll, &l)); 1929 PetscCall(PetscLogFlops(1.0 * n * m)); 1930 } 1931 if (rr) { 1932 PetscCall(VecGetSize(rr, &n)); 1933 PetscCall(VecGetArrayRead(rr, &r)); 1934 PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec wrong size"); 1935 for (i = 0; i < n; i++) { 1936 x = r[i]; 1937 v = vv + i * mat->lda; 1938 for (j = 0; j < m; j++) (*v++) *= x; 1939 } 1940 PetscCall(VecRestoreArrayRead(rr, &r)); 1941 PetscCall(PetscLogFlops(1.0 * n * m)); 1942 } 1943 PetscCall(MatDenseRestoreArray(A, &vv)); 1944 PetscFunctionReturn(PETSC_SUCCESS); 1945 } 1946 1947 PetscErrorCode MatNorm_SeqDense(Mat A, NormType type, PetscReal *nrm) 1948 { 1949 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1950 PetscScalar *v, *vv; 1951 PetscReal sum = 0.0; 1952 PetscInt lda, m = A->rmap->n, i, j; 1953 1954 PetscFunctionBegin; 1955 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&vv)); 1956 PetscCall(MatDenseGetLDA(A, &lda)); 1957 v = vv; 1958 if (type == NORM_FROBENIUS) { 1959 if (lda > m) { 1960 for (j = 0; j < A->cmap->n; j++) { 1961 v = vv + j * lda; 1962 for (i = 0; i < m; i++) { 1963 sum += PetscRealPart(PetscConj(*v) * (*v)); 1964 v++; 1965 } 1966 } 1967 } else { 1968 #if defined(PETSC_USE_REAL___FP16) 1969 PetscBLASInt one = 1, cnt = A->cmap->n * A->rmap->n; 1970 PetscCallBLAS("BLASnrm2", *nrm = BLASnrm2_(&cnt, v, &one)); 1971 } 1972 #else 1973 for (i = 0; i < A->cmap->n * A->rmap->n; i++) { 1974 sum += PetscRealPart(PetscConj(*v) * (*v)); 1975 v++; 1976 } 1977 } 1978 *nrm = PetscSqrtReal(sum); 1979 #endif 1980 PetscCall(PetscLogFlops(2.0 * A->cmap->n * A->rmap->n)); 1981 } else if (type == NORM_1) { 1982 *nrm = 0.0; 1983 for (j = 0; j < A->cmap->n; j++) { 1984 v = vv + j * mat->lda; 1985 sum = 0.0; 1986 for (i = 0; i < A->rmap->n; i++) { 1987 sum += PetscAbsScalar(*v); 1988 v++; 1989 } 1990 if (sum > *nrm) *nrm = sum; 1991 } 1992 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 1993 } else if (type == NORM_INFINITY) { 1994 *nrm = 0.0; 1995 for (j = 0; j < A->rmap->n; j++) { 1996 v = vv + j; 1997 sum = 0.0; 1998 for (i = 0; i < A->cmap->n; i++) { 1999 sum += PetscAbsScalar(*v); 2000 v += mat->lda; 2001 } 2002 if (sum > *nrm) *nrm = sum; 2003 } 2004 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 2005 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No two norm"); 2006 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&vv)); 2007 PetscFunctionReturn(PETSC_SUCCESS); 2008 } 2009 2010 static PetscErrorCode MatSetOption_SeqDense(Mat A, MatOption op, PetscBool flg) 2011 { 2012 Mat_SeqDense *aij = (Mat_SeqDense *)A->data; 2013 2014 PetscFunctionBegin; 2015 switch (op) { 2016 case MAT_ROW_ORIENTED: 2017 aij->roworiented = flg; 2018 break; 2019 case MAT_NEW_NONZERO_LOCATIONS: 2020 case MAT_NEW_NONZERO_LOCATION_ERR: 2021 case MAT_NEW_NONZERO_ALLOCATION_ERR: 2022 case MAT_FORCE_DIAGONAL_ENTRIES: 2023 case MAT_KEEP_NONZERO_PATTERN: 2024 case MAT_IGNORE_OFF_PROC_ENTRIES: 2025 case MAT_USE_HASH_TABLE: 2026 case MAT_IGNORE_ZERO_ENTRIES: 2027 case MAT_IGNORE_LOWER_TRIANGULAR: 2028 case MAT_SORTED_FULL: 2029 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 2030 break; 2031 case MAT_SPD: 2032 case MAT_SYMMETRIC: 2033 case MAT_STRUCTURALLY_SYMMETRIC: 2034 case MAT_HERMITIAN: 2035 case MAT_SYMMETRY_ETERNAL: 2036 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 2037 case MAT_SPD_ETERNAL: 2038 break; 2039 default: 2040 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 2041 } 2042 PetscFunctionReturn(PETSC_SUCCESS); 2043 } 2044 2045 PetscErrorCode MatZeroEntries_SeqDense(Mat A) 2046 { 2047 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 2048 PetscInt lda = l->lda, m = A->rmap->n, n = A->cmap->n, j; 2049 PetscScalar *v; 2050 2051 PetscFunctionBegin; 2052 PetscCall(MatDenseGetArrayWrite(A, &v)); 2053 if (lda > m) { 2054 for (j = 0; j < n; j++) PetscCall(PetscArrayzero(v + j * lda, m)); 2055 } else { 2056 PetscCall(PetscArrayzero(v, PetscInt64Mult(m, n))); 2057 } 2058 PetscCall(MatDenseRestoreArrayWrite(A, &v)); 2059 PetscFunctionReturn(PETSC_SUCCESS); 2060 } 2061 2062 static PetscErrorCode MatZeroRows_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 2063 { 2064 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 2065 PetscInt m = l->lda, n = A->cmap->n, i, j; 2066 PetscScalar *slot, *bb, *v; 2067 const PetscScalar *xx; 2068 2069 PetscFunctionBegin; 2070 if (PetscDefined(USE_DEBUG)) { 2071 for (i = 0; i < N; i++) { 2072 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 2073 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 2074 } 2075 } 2076 if (!N) PetscFunctionReturn(PETSC_SUCCESS); 2077 2078 /* fix right-hand side if needed */ 2079 if (x && b) { 2080 PetscCall(VecGetArrayRead(x, &xx)); 2081 PetscCall(VecGetArray(b, &bb)); 2082 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 2083 PetscCall(VecRestoreArrayRead(x, &xx)); 2084 PetscCall(VecRestoreArray(b, &bb)); 2085 } 2086 2087 PetscCall(MatDenseGetArray(A, &v)); 2088 for (i = 0; i < N; i++) { 2089 slot = v + rows[i]; 2090 for (j = 0; j < n; j++) { 2091 *slot = 0.0; 2092 slot += m; 2093 } 2094 } 2095 if (diag != 0.0) { 2096 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 2097 for (i = 0; i < N; i++) { 2098 slot = v + (m + 1) * rows[i]; 2099 *slot = diag; 2100 } 2101 } 2102 PetscCall(MatDenseRestoreArray(A, &v)); 2103 PetscFunctionReturn(PETSC_SUCCESS); 2104 } 2105 2106 static PetscErrorCode MatDenseGetLDA_SeqDense(Mat A, PetscInt *lda) 2107 { 2108 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2109 2110 PetscFunctionBegin; 2111 *lda = mat->lda; 2112 PetscFunctionReturn(PETSC_SUCCESS); 2113 } 2114 2115 PetscErrorCode MatDenseGetArray_SeqDense(Mat A, PetscScalar **array) 2116 { 2117 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2118 2119 PetscFunctionBegin; 2120 PetscCheck(!mat->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2121 *array = mat->v; 2122 PetscFunctionReturn(PETSC_SUCCESS); 2123 } 2124 2125 PetscErrorCode MatDenseRestoreArray_SeqDense(Mat A, PetscScalar **array) 2126 { 2127 PetscFunctionBegin; 2128 if (array) *array = NULL; 2129 PetscFunctionReturn(PETSC_SUCCESS); 2130 } 2131 2132 /*@ 2133 MatDenseGetLDA - gets the leading dimension of the array returned from `MatDenseGetArray()` 2134 2135 Not Collective 2136 2137 Input Parameter: 2138 . A - a `MATDENSE` or `MATDENSECUDA` matrix 2139 2140 Output Parameter: 2141 . lda - the leading dimension 2142 2143 Level: intermediate 2144 2145 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseSetLDA()` 2146 @*/ 2147 PetscErrorCode MatDenseGetLDA(Mat A, PetscInt *lda) 2148 { 2149 PetscFunctionBegin; 2150 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2151 PetscAssertPointer(lda, 2); 2152 MatCheckPreallocated(A, 1); 2153 PetscUseMethod(A, "MatDenseGetLDA_C", (Mat, PetscInt *), (A, lda)); 2154 PetscFunctionReturn(PETSC_SUCCESS); 2155 } 2156 2157 /*@ 2158 MatDenseSetLDA - Sets the leading dimension of the array used by the `MATDENSE` matrix 2159 2160 Collective if the matrix layouts have not yet been setup 2161 2162 Input Parameters: 2163 + A - a `MATDENSE` or `MATDENSECUDA` matrix 2164 - lda - the leading dimension 2165 2166 Level: intermediate 2167 2168 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetLDA()` 2169 @*/ 2170 PetscErrorCode MatDenseSetLDA(Mat A, PetscInt lda) 2171 { 2172 PetscFunctionBegin; 2173 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2174 PetscTryMethod(A, "MatDenseSetLDA_C", (Mat, PetscInt), (A, lda)); 2175 PetscFunctionReturn(PETSC_SUCCESS); 2176 } 2177 2178 /*@C 2179 MatDenseGetArray - gives read-write access to the array where the data for a `MATDENSE` matrix is stored 2180 2181 Logically Collective 2182 2183 Input Parameter: 2184 . A - a dense matrix 2185 2186 Output Parameter: 2187 . array - pointer to the data 2188 2189 Level: intermediate 2190 2191 Fortran Notes: 2192 `MatDenseGetArray()` Fortran binding is deprecated (since PETSc 3.19), use `MatDenseGetArrayF90()` 2193 2194 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2195 @*/ 2196 PetscErrorCode MatDenseGetArray(Mat A, PetscScalar *array[]) 2197 { 2198 PetscFunctionBegin; 2199 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2200 PetscAssertPointer(array, 2); 2201 PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array)); 2202 PetscFunctionReturn(PETSC_SUCCESS); 2203 } 2204 2205 /*@C 2206 MatDenseRestoreArray - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArray()` 2207 2208 Logically Collective 2209 2210 Input Parameters: 2211 + A - a dense matrix 2212 - array - pointer to the data (may be `NULL`) 2213 2214 Level: intermediate 2215 2216 Fortran Notes: 2217 `MatDenseRestoreArray()` Fortran binding is deprecated (since PETSc 3.19), use `MatDenseRestoreArrayF90()` 2218 2219 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2220 @*/ 2221 PetscErrorCode MatDenseRestoreArray(Mat A, PetscScalar *array[]) 2222 { 2223 PetscFunctionBegin; 2224 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2225 if (array) PetscAssertPointer(array, 2); 2226 PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array)); 2227 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2228 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2229 A->offloadmask = PETSC_OFFLOAD_CPU; 2230 #endif 2231 PetscFunctionReturn(PETSC_SUCCESS); 2232 } 2233 2234 /*@C 2235 MatDenseGetArrayRead - gives read-only access to the array where the data for a `MATDENSE` matrix is stored 2236 2237 Not Collective 2238 2239 Input Parameter: 2240 . A - a dense matrix 2241 2242 Output Parameter: 2243 . array - pointer to the data 2244 2245 Level: intermediate 2246 2247 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2248 @*/ 2249 PetscErrorCode MatDenseGetArrayRead(Mat A, const PetscScalar *array[]) 2250 { 2251 PetscFunctionBegin; 2252 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2253 PetscAssertPointer(array, 2); 2254 PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2255 PetscFunctionReturn(PETSC_SUCCESS); 2256 } 2257 2258 /*@C 2259 MatDenseRestoreArrayRead - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayRead()` 2260 2261 Not Collective 2262 2263 Input Parameters: 2264 + A - a dense matrix 2265 - array - pointer to the data (may be `NULL`) 2266 2267 Level: intermediate 2268 2269 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2270 @*/ 2271 PetscErrorCode MatDenseRestoreArrayRead(Mat A, const PetscScalar *array[]) 2272 { 2273 PetscFunctionBegin; 2274 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2275 if (array) PetscAssertPointer(array, 2); 2276 PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2277 PetscFunctionReturn(PETSC_SUCCESS); 2278 } 2279 2280 /*@C 2281 MatDenseGetArrayWrite - gives write-only access to the array where the data for a `MATDENSE` matrix is stored 2282 2283 Not Collective 2284 2285 Input Parameter: 2286 . A - a dense matrix 2287 2288 Output Parameter: 2289 . array - pointer to the data 2290 2291 Level: intermediate 2292 2293 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2294 @*/ 2295 PetscErrorCode MatDenseGetArrayWrite(Mat A, PetscScalar *array[]) 2296 { 2297 PetscFunctionBegin; 2298 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2299 PetscAssertPointer(array, 2); 2300 PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2301 PetscFunctionReturn(PETSC_SUCCESS); 2302 } 2303 2304 /*@C 2305 MatDenseRestoreArrayWrite - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayWrite()` 2306 2307 Not Collective 2308 2309 Input Parameters: 2310 + A - a dense matrix 2311 - array - pointer to the data (may be `NULL`) 2312 2313 Level: intermediate 2314 2315 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2316 @*/ 2317 PetscErrorCode MatDenseRestoreArrayWrite(Mat A, PetscScalar *array[]) 2318 { 2319 PetscFunctionBegin; 2320 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2321 if (array) PetscAssertPointer(array, 2); 2322 PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2323 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2324 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2325 A->offloadmask = PETSC_OFFLOAD_CPU; 2326 #endif 2327 PetscFunctionReturn(PETSC_SUCCESS); 2328 } 2329 2330 /*@C 2331 MatDenseGetArrayAndMemType - gives read-write access to the array where the data for a `MATDENSE` matrix is stored 2332 2333 Logically Collective 2334 2335 Input Parameter: 2336 . A - a dense matrix 2337 2338 Output Parameters: 2339 + array - pointer to the data 2340 - mtype - memory type of the returned pointer 2341 2342 Level: intermediate 2343 2344 Note: 2345 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., 2346 an array on device is always returned and is guaranteed to contain the matrix's latest data. 2347 2348 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayAndMemType()`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArrayRead()`, 2349 `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` 2350 @*/ 2351 PetscErrorCode MatDenseGetArrayAndMemType(Mat A, PetscScalar *array[], PetscMemType *mtype) 2352 { 2353 PetscBool isMPI; 2354 2355 PetscFunctionBegin; 2356 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2357 PetscAssertPointer(array, 2); 2358 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ 2359 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2360 if (isMPI) { 2361 /* Dispatch here so that the code can be reused for all subclasses of MATDENSE */ 2362 PetscCall(MatDenseGetArrayAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); 2363 } else { 2364 PetscErrorCode (*fptr)(Mat, PetscScalar **, PetscMemType *); 2365 2366 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayAndMemType_C", &fptr)); 2367 if (fptr) { 2368 PetscCall((*fptr)(A, array, mtype)); 2369 } else { 2370 PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array)); 2371 if (mtype) *mtype = PETSC_MEMTYPE_HOST; 2372 } 2373 } 2374 PetscFunctionReturn(PETSC_SUCCESS); 2375 } 2376 2377 /*@C 2378 MatDenseRestoreArrayAndMemType - returns access to the array that is obtained by `MatDenseGetArrayAndMemType()` 2379 2380 Logically Collective 2381 2382 Input Parameters: 2383 + A - a dense matrix 2384 - array - pointer to the data 2385 2386 Level: intermediate 2387 2388 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2389 @*/ 2390 PetscErrorCode MatDenseRestoreArrayAndMemType(Mat A, PetscScalar *array[]) 2391 { 2392 PetscBool isMPI; 2393 2394 PetscFunctionBegin; 2395 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2396 PetscAssertPointer(array, 2); 2397 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2398 if (isMPI) { 2399 PetscCall(MatDenseRestoreArrayAndMemType(((Mat_MPIDense *)A->data)->A, array)); 2400 } else { 2401 PetscErrorCode (*fptr)(Mat, PetscScalar **); 2402 2403 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayAndMemType_C", &fptr)); 2404 if (fptr) { 2405 PetscCall((*fptr)(A, array)); 2406 } else { 2407 PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array)); 2408 } 2409 *array = NULL; 2410 } 2411 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2412 PetscFunctionReturn(PETSC_SUCCESS); 2413 } 2414 2415 /*@C 2416 MatDenseGetArrayReadAndMemType - gives read-only access to the array where the data for a `MATDENSE` matrix is stored 2417 2418 Logically Collective 2419 2420 Input Parameter: 2421 . A - a dense matrix 2422 2423 Output Parameters: 2424 + array - pointer to the data 2425 - mtype - memory type of the returned pointer 2426 2427 Level: intermediate 2428 2429 Note: 2430 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., 2431 an array on device is always returned and is guaranteed to contain the matrix's latest data. 2432 2433 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayReadAndMemType()`, `MatDenseGetArrayWriteAndMemType()`, 2434 `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` 2435 @*/ 2436 PetscErrorCode MatDenseGetArrayReadAndMemType(Mat A, const PetscScalar *array[], PetscMemType *mtype) 2437 { 2438 PetscBool isMPI; 2439 2440 PetscFunctionBegin; 2441 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2442 PetscAssertPointer(array, 2); 2443 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ 2444 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2445 if (isMPI) { /* Dispatch here so that the code can be reused for all subclasses of MATDENSE */ 2446 PetscCall(MatDenseGetArrayReadAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); 2447 } else { 2448 PetscErrorCode (*fptr)(Mat, const PetscScalar **, PetscMemType *); 2449 2450 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayReadAndMemType_C", &fptr)); 2451 if (fptr) { 2452 PetscCall((*fptr)(A, array, mtype)); 2453 } else { 2454 PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2455 if (mtype) *mtype = PETSC_MEMTYPE_HOST; 2456 } 2457 } 2458 PetscFunctionReturn(PETSC_SUCCESS); 2459 } 2460 2461 /*@C 2462 MatDenseRestoreArrayReadAndMemType - returns access to the array that is obtained by `MatDenseGetArrayReadAndMemType()` 2463 2464 Logically Collective 2465 2466 Input Parameters: 2467 + A - a dense matrix 2468 - array - pointer to the data 2469 2470 Level: intermediate 2471 2472 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2473 @*/ 2474 PetscErrorCode MatDenseRestoreArrayReadAndMemType(Mat A, const PetscScalar *array[]) 2475 { 2476 PetscBool isMPI; 2477 2478 PetscFunctionBegin; 2479 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2480 PetscAssertPointer(array, 2); 2481 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2482 if (isMPI) { 2483 PetscCall(MatDenseRestoreArrayReadAndMemType(((Mat_MPIDense *)A->data)->A, array)); 2484 } else { 2485 PetscErrorCode (*fptr)(Mat, const PetscScalar **); 2486 2487 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayReadAndMemType_C", &fptr)); 2488 if (fptr) { 2489 PetscCall((*fptr)(A, array)); 2490 } else { 2491 PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2492 } 2493 *array = NULL; 2494 } 2495 PetscFunctionReturn(PETSC_SUCCESS); 2496 } 2497 2498 /*@C 2499 MatDenseGetArrayWriteAndMemType - gives write-only access to the array where the data for a `MATDENSE` matrix is stored 2500 2501 Logically Collective 2502 2503 Input Parameter: 2504 . A - a dense matrix 2505 2506 Output Parameters: 2507 + array - pointer to the data 2508 - mtype - memory type of the returned pointer 2509 2510 Level: intermediate 2511 2512 Note: 2513 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., 2514 an array on device is always returned and is guaranteed to contain the matrix's latest data. 2515 2516 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayWriteAndMemType()`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArrayRead()`, 2517 `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` 2518 @*/ 2519 PetscErrorCode MatDenseGetArrayWriteAndMemType(Mat A, PetscScalar *array[], PetscMemType *mtype) 2520 { 2521 PetscBool isMPI; 2522 2523 PetscFunctionBegin; 2524 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2525 PetscAssertPointer(array, 2); 2526 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ 2527 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2528 if (isMPI) { 2529 PetscCall(MatDenseGetArrayWriteAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); 2530 } else { 2531 PetscErrorCode (*fptr)(Mat, PetscScalar **, PetscMemType *); 2532 2533 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayWriteAndMemType_C", &fptr)); 2534 if (fptr) { 2535 PetscCall((*fptr)(A, array, mtype)); 2536 } else { 2537 PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2538 if (mtype) *mtype = PETSC_MEMTYPE_HOST; 2539 } 2540 } 2541 PetscFunctionReturn(PETSC_SUCCESS); 2542 } 2543 2544 /*@C 2545 MatDenseRestoreArrayWriteAndMemType - returns access to the array that is obtained by `MatDenseGetArrayReadAndMemType()` 2546 2547 Logically Collective 2548 2549 Input Parameters: 2550 + A - a dense matrix 2551 - array - pointer to the data 2552 2553 Level: intermediate 2554 2555 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2556 @*/ 2557 PetscErrorCode MatDenseRestoreArrayWriteAndMemType(Mat A, PetscScalar *array[]) 2558 { 2559 PetscBool isMPI; 2560 2561 PetscFunctionBegin; 2562 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2563 PetscAssertPointer(array, 2); 2564 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2565 if (isMPI) { 2566 PetscCall(MatDenseRestoreArrayWriteAndMemType(((Mat_MPIDense *)A->data)->A, array)); 2567 } else { 2568 PetscErrorCode (*fptr)(Mat, PetscScalar **); 2569 2570 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayWriteAndMemType_C", &fptr)); 2571 if (fptr) { 2572 PetscCall((*fptr)(A, array)); 2573 } else { 2574 PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2575 } 2576 *array = NULL; 2577 } 2578 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2579 PetscFunctionReturn(PETSC_SUCCESS); 2580 } 2581 2582 static PetscErrorCode MatCreateSubMatrix_SeqDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 2583 { 2584 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2585 PetscInt i, j, nrows, ncols, ldb; 2586 const PetscInt *irow, *icol; 2587 PetscScalar *av, *bv, *v = mat->v; 2588 Mat newmat; 2589 2590 PetscFunctionBegin; 2591 PetscCall(ISGetIndices(isrow, &irow)); 2592 PetscCall(ISGetIndices(iscol, &icol)); 2593 PetscCall(ISGetLocalSize(isrow, &nrows)); 2594 PetscCall(ISGetLocalSize(iscol, &ncols)); 2595 2596 /* Check submatrixcall */ 2597 if (scall == MAT_REUSE_MATRIX) { 2598 PetscInt n_cols, n_rows; 2599 PetscCall(MatGetSize(*B, &n_rows, &n_cols)); 2600 if (n_rows != nrows || n_cols != ncols) { 2601 /* resize the result matrix to match number of requested rows/columns */ 2602 PetscCall(MatSetSizes(*B, nrows, ncols, nrows, ncols)); 2603 } 2604 newmat = *B; 2605 } else { 2606 /* Create and fill new matrix */ 2607 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 2608 PetscCall(MatSetSizes(newmat, nrows, ncols, nrows, ncols)); 2609 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 2610 PetscCall(MatSeqDenseSetPreallocation(newmat, NULL)); 2611 } 2612 2613 /* Now extract the data pointers and do the copy,column at a time */ 2614 PetscCall(MatDenseGetArray(newmat, &bv)); 2615 PetscCall(MatDenseGetLDA(newmat, &ldb)); 2616 for (i = 0; i < ncols; i++) { 2617 av = v + mat->lda * icol[i]; 2618 for (j = 0; j < nrows; j++) bv[j] = av[irow[j]]; 2619 bv += ldb; 2620 } 2621 PetscCall(MatDenseRestoreArray(newmat, &bv)); 2622 2623 /* Assemble the matrices so that the correct flags are set */ 2624 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 2625 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 2626 2627 /* Free work space */ 2628 PetscCall(ISRestoreIndices(isrow, &irow)); 2629 PetscCall(ISRestoreIndices(iscol, &icol)); 2630 *B = newmat; 2631 PetscFunctionReturn(PETSC_SUCCESS); 2632 } 2633 2634 static PetscErrorCode MatCreateSubMatrices_SeqDense(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 2635 { 2636 PetscInt i; 2637 2638 PetscFunctionBegin; 2639 if (scall == MAT_INITIAL_MATRIX) PetscCall(PetscCalloc1(n, B)); 2640 2641 for (i = 0; i < n; i++) PetscCall(MatCreateSubMatrix_SeqDense(A, irow[i], icol[i], scall, &(*B)[i])); 2642 PetscFunctionReturn(PETSC_SUCCESS); 2643 } 2644 2645 static PetscErrorCode MatAssemblyBegin_SeqDense(Mat mat, MatAssemblyType mode) 2646 { 2647 PetscFunctionBegin; 2648 PetscFunctionReturn(PETSC_SUCCESS); 2649 } 2650 2651 static PetscErrorCode MatAssemblyEnd_SeqDense(Mat mat, MatAssemblyType mode) 2652 { 2653 PetscFunctionBegin; 2654 PetscFunctionReturn(PETSC_SUCCESS); 2655 } 2656 2657 PetscErrorCode MatCopy_SeqDense(Mat A, Mat B, MatStructure str) 2658 { 2659 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data; 2660 const PetscScalar *va; 2661 PetscScalar *vb; 2662 PetscInt lda1 = a->lda, lda2 = b->lda, m = A->rmap->n, n = A->cmap->n, j; 2663 2664 PetscFunctionBegin; 2665 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2666 if (A->ops->copy != B->ops->copy) { 2667 PetscCall(MatCopy_Basic(A, B, str)); 2668 PetscFunctionReturn(PETSC_SUCCESS); 2669 } 2670 PetscCheck(m == B->rmap->n && n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "size(B) != size(A)"); 2671 PetscCall(MatDenseGetArrayRead(A, &va)); 2672 PetscCall(MatDenseGetArray(B, &vb)); 2673 if (lda1 > m || lda2 > m) { 2674 for (j = 0; j < n; j++) PetscCall(PetscArraycpy(vb + j * lda2, va + j * lda1, m)); 2675 } else { 2676 PetscCall(PetscArraycpy(vb, va, A->rmap->n * A->cmap->n)); 2677 } 2678 PetscCall(MatDenseRestoreArray(B, &vb)); 2679 PetscCall(MatDenseRestoreArrayRead(A, &va)); 2680 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2681 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2682 PetscFunctionReturn(PETSC_SUCCESS); 2683 } 2684 2685 PetscErrorCode MatSetUp_SeqDense(Mat A) 2686 { 2687 PetscFunctionBegin; 2688 PetscCall(PetscLayoutSetUp(A->rmap)); 2689 PetscCall(PetscLayoutSetUp(A->cmap)); 2690 if (!A->preallocated) PetscCall(MatSeqDenseSetPreallocation(A, NULL)); 2691 PetscFunctionReturn(PETSC_SUCCESS); 2692 } 2693 2694 static PetscErrorCode MatConjugate_SeqDense(Mat A) 2695 { 2696 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2697 PetscInt i, j; 2698 PetscInt min = PetscMin(A->rmap->n, A->cmap->n); 2699 PetscScalar *aa; 2700 2701 PetscFunctionBegin; 2702 PetscCall(MatDenseGetArray(A, &aa)); 2703 for (j = 0; j < A->cmap->n; j++) { 2704 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscConj(aa[i + j * mat->lda]); 2705 } 2706 PetscCall(MatDenseRestoreArray(A, &aa)); 2707 if (mat->tau) 2708 for (i = 0; i < min; i++) mat->tau[i] = PetscConj(mat->tau[i]); 2709 PetscFunctionReturn(PETSC_SUCCESS); 2710 } 2711 2712 static PetscErrorCode MatRealPart_SeqDense(Mat A) 2713 { 2714 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2715 PetscInt i, j; 2716 PetscScalar *aa; 2717 2718 PetscFunctionBegin; 2719 PetscCall(MatDenseGetArray(A, &aa)); 2720 for (j = 0; j < A->cmap->n; j++) { 2721 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscRealPart(aa[i + j * mat->lda]); 2722 } 2723 PetscCall(MatDenseRestoreArray(A, &aa)); 2724 PetscFunctionReturn(PETSC_SUCCESS); 2725 } 2726 2727 static PetscErrorCode MatImaginaryPart_SeqDense(Mat A) 2728 { 2729 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2730 PetscInt i, j; 2731 PetscScalar *aa; 2732 2733 PetscFunctionBegin; 2734 PetscCall(MatDenseGetArray(A, &aa)); 2735 for (j = 0; j < A->cmap->n; j++) { 2736 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscImaginaryPart(aa[i + j * mat->lda]); 2737 } 2738 PetscCall(MatDenseRestoreArray(A, &aa)); 2739 PetscFunctionReturn(PETSC_SUCCESS); 2740 } 2741 2742 PetscErrorCode MatMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2743 { 2744 PetscInt m = A->rmap->n, n = B->cmap->n; 2745 PetscBool cisdense = PETSC_FALSE; 2746 2747 PetscFunctionBegin; 2748 PetscCall(MatSetSizes(C, m, n, m, n)); 2749 #if defined(PETSC_HAVE_CUDA) 2750 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2751 #endif 2752 #if defined(PETSC_HAVE_HIP) 2753 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2754 #endif 2755 if (!cisdense) { 2756 PetscBool flg; 2757 2758 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2759 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2760 } 2761 PetscCall(MatSetUp(C)); 2762 PetscFunctionReturn(PETSC_SUCCESS); 2763 } 2764 2765 PetscErrorCode MatMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2766 { 2767 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data, *c = (Mat_SeqDense *)C->data; 2768 PetscBLASInt m, n, k; 2769 const PetscScalar *av, *bv; 2770 PetscScalar *cv; 2771 PetscScalar _DOne = 1.0, _DZero = 0.0; 2772 2773 PetscFunctionBegin; 2774 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2775 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2776 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2777 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); 2778 PetscCall(MatDenseGetArrayRead(A, &av)); 2779 PetscCall(MatDenseGetArrayRead(B, &bv)); 2780 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2781 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2782 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2783 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2784 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2785 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2786 PetscFunctionReturn(PETSC_SUCCESS); 2787 } 2788 2789 PetscErrorCode MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2790 { 2791 PetscInt m = A->rmap->n, n = B->rmap->n; 2792 PetscBool cisdense = PETSC_FALSE; 2793 2794 PetscFunctionBegin; 2795 PetscCall(MatSetSizes(C, m, n, m, n)); 2796 #if defined(PETSC_HAVE_CUDA) 2797 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2798 #endif 2799 #if defined(PETSC_HAVE_HIP) 2800 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2801 #endif 2802 if (!cisdense) { 2803 PetscBool flg; 2804 2805 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2806 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2807 } 2808 PetscCall(MatSetUp(C)); 2809 PetscFunctionReturn(PETSC_SUCCESS); 2810 } 2811 2812 PetscErrorCode MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2813 { 2814 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2815 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2816 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2817 const PetscScalar *av, *bv; 2818 PetscScalar *cv; 2819 PetscBLASInt m, n, k; 2820 PetscScalar _DOne = 1.0, _DZero = 0.0; 2821 2822 PetscFunctionBegin; 2823 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2824 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2825 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2826 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); 2827 PetscCall(MatDenseGetArrayRead(A, &av)); 2828 PetscCall(MatDenseGetArrayRead(B, &bv)); 2829 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2830 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2831 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2832 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2833 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2834 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2835 PetscFunctionReturn(PETSC_SUCCESS); 2836 } 2837 2838 PetscErrorCode MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2839 { 2840 PetscInt m = A->cmap->n, n = B->cmap->n; 2841 PetscBool cisdense = PETSC_FALSE; 2842 2843 PetscFunctionBegin; 2844 PetscCall(MatSetSizes(C, m, n, m, n)); 2845 #if defined(PETSC_HAVE_CUDA) 2846 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2847 #endif 2848 #if defined(PETSC_HAVE_HIP) 2849 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2850 #endif 2851 if (!cisdense) { 2852 PetscBool flg; 2853 2854 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2855 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2856 } 2857 PetscCall(MatSetUp(C)); 2858 PetscFunctionReturn(PETSC_SUCCESS); 2859 } 2860 2861 PetscErrorCode MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2862 { 2863 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2864 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2865 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2866 const PetscScalar *av, *bv; 2867 PetscScalar *cv; 2868 PetscBLASInt m, n, k; 2869 PetscScalar _DOne = 1.0, _DZero = 0.0; 2870 2871 PetscFunctionBegin; 2872 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2873 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2874 PetscCall(PetscBLASIntCast(A->rmap->n, &k)); 2875 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); 2876 PetscCall(MatDenseGetArrayRead(A, &av)); 2877 PetscCall(MatDenseGetArrayRead(B, &bv)); 2878 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2879 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2880 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2881 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2882 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2883 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2884 PetscFunctionReturn(PETSC_SUCCESS); 2885 } 2886 2887 static PetscErrorCode MatProductSetFromOptions_SeqDense_AB(Mat C) 2888 { 2889 PetscFunctionBegin; 2890 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqDense; 2891 C->ops->productsymbolic = MatProductSymbolic_AB; 2892 PetscFunctionReturn(PETSC_SUCCESS); 2893 } 2894 2895 static PetscErrorCode MatProductSetFromOptions_SeqDense_AtB(Mat C) 2896 { 2897 PetscFunctionBegin; 2898 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqDense_SeqDense; 2899 C->ops->productsymbolic = MatProductSymbolic_AtB; 2900 PetscFunctionReturn(PETSC_SUCCESS); 2901 } 2902 2903 static PetscErrorCode MatProductSetFromOptions_SeqDense_ABt(Mat C) 2904 { 2905 PetscFunctionBegin; 2906 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqDense_SeqDense; 2907 C->ops->productsymbolic = MatProductSymbolic_ABt; 2908 PetscFunctionReturn(PETSC_SUCCESS); 2909 } 2910 2911 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense(Mat C) 2912 { 2913 Mat_Product *product = C->product; 2914 2915 PetscFunctionBegin; 2916 switch (product->type) { 2917 case MATPRODUCT_AB: 2918 PetscCall(MatProductSetFromOptions_SeqDense_AB(C)); 2919 break; 2920 case MATPRODUCT_AtB: 2921 PetscCall(MatProductSetFromOptions_SeqDense_AtB(C)); 2922 break; 2923 case MATPRODUCT_ABt: 2924 PetscCall(MatProductSetFromOptions_SeqDense_ABt(C)); 2925 break; 2926 default: 2927 break; 2928 } 2929 PetscFunctionReturn(PETSC_SUCCESS); 2930 } 2931 2932 static PetscErrorCode MatGetRowMax_SeqDense(Mat A, Vec v, PetscInt idx[]) 2933 { 2934 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2935 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2936 PetscScalar *x; 2937 const PetscScalar *aa; 2938 2939 PetscFunctionBegin; 2940 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2941 PetscCall(VecGetArray(v, &x)); 2942 PetscCall(VecGetLocalSize(v, &p)); 2943 PetscCall(MatDenseGetArrayRead(A, &aa)); 2944 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2945 for (i = 0; i < m; i++) { 2946 x[i] = aa[i]; 2947 if (idx) idx[i] = 0; 2948 for (j = 1; j < n; j++) { 2949 if (PetscRealPart(x[i]) < PetscRealPart(aa[i + a->lda * j])) { 2950 x[i] = aa[i + a->lda * j]; 2951 if (idx) idx[i] = j; 2952 } 2953 } 2954 } 2955 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2956 PetscCall(VecRestoreArray(v, &x)); 2957 PetscFunctionReturn(PETSC_SUCCESS); 2958 } 2959 2960 static PetscErrorCode MatGetRowMaxAbs_SeqDense(Mat A, Vec v, PetscInt idx[]) 2961 { 2962 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2963 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2964 PetscScalar *x; 2965 PetscReal atmp; 2966 const PetscScalar *aa; 2967 2968 PetscFunctionBegin; 2969 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2970 PetscCall(VecGetArray(v, &x)); 2971 PetscCall(VecGetLocalSize(v, &p)); 2972 PetscCall(MatDenseGetArrayRead(A, &aa)); 2973 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2974 for (i = 0; i < m; i++) { 2975 x[i] = PetscAbsScalar(aa[i]); 2976 for (j = 1; j < n; j++) { 2977 atmp = PetscAbsScalar(aa[i + a->lda * j]); 2978 if (PetscAbsScalar(x[i]) < atmp) { 2979 x[i] = atmp; 2980 if (idx) idx[i] = j; 2981 } 2982 } 2983 } 2984 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2985 PetscCall(VecRestoreArray(v, &x)); 2986 PetscFunctionReturn(PETSC_SUCCESS); 2987 } 2988 2989 static PetscErrorCode MatGetRowMin_SeqDense(Mat A, Vec v, PetscInt idx[]) 2990 { 2991 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2992 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2993 PetscScalar *x; 2994 const PetscScalar *aa; 2995 2996 PetscFunctionBegin; 2997 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2998 PetscCall(MatDenseGetArrayRead(A, &aa)); 2999 PetscCall(VecGetArray(v, &x)); 3000 PetscCall(VecGetLocalSize(v, &p)); 3001 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 3002 for (i = 0; i < m; i++) { 3003 x[i] = aa[i]; 3004 if (idx) idx[i] = 0; 3005 for (j = 1; j < n; j++) { 3006 if (PetscRealPart(x[i]) > PetscRealPart(aa[i + a->lda * j])) { 3007 x[i] = aa[i + a->lda * j]; 3008 if (idx) idx[i] = j; 3009 } 3010 } 3011 } 3012 PetscCall(VecRestoreArray(v, &x)); 3013 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 3014 PetscFunctionReturn(PETSC_SUCCESS); 3015 } 3016 3017 PetscErrorCode MatGetColumnVector_SeqDense(Mat A, Vec v, PetscInt col) 3018 { 3019 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3020 PetscScalar *x; 3021 const PetscScalar *aa; 3022 3023 PetscFunctionBegin; 3024 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3025 PetscCall(MatDenseGetArrayRead(A, &aa)); 3026 PetscCall(VecGetArray(v, &x)); 3027 PetscCall(PetscArraycpy(x, aa + col * a->lda, A->rmap->n)); 3028 PetscCall(VecRestoreArray(v, &x)); 3029 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 3030 PetscFunctionReturn(PETSC_SUCCESS); 3031 } 3032 3033 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat A, PetscInt type, PetscReal *reductions) 3034 { 3035 PetscInt i, j, m, n; 3036 const PetscScalar *a; 3037 3038 PetscFunctionBegin; 3039 PetscCall(MatGetSize(A, &m, &n)); 3040 PetscCall(PetscArrayzero(reductions, n)); 3041 PetscCall(MatDenseGetArrayRead(A, &a)); 3042 if (type == NORM_2) { 3043 for (i = 0; i < n; i++) { 3044 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j] * a[j]); 3045 a = PetscSafePointerPlusOffset(a, m); 3046 } 3047 } else if (type == NORM_1) { 3048 for (i = 0; i < n; i++) { 3049 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j]); 3050 a = PetscSafePointerPlusOffset(a, m); 3051 } 3052 } else if (type == NORM_INFINITY) { 3053 for (i = 0; i < n; i++) { 3054 for (j = 0; j < m; j++) reductions[i] = PetscMax(PetscAbsScalar(a[j]), reductions[i]); 3055 a = PetscSafePointerPlusOffset(a, m); 3056 } 3057 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 3058 for (i = 0; i < n; i++) { 3059 for (j = 0; j < m; j++) reductions[i] += PetscRealPart(a[j]); 3060 a = PetscSafePointerPlusOffset(a, m); 3061 } 3062 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 3063 for (i = 0; i < n; i++) { 3064 for (j = 0; j < m; j++) reductions[i] += PetscImaginaryPart(a[j]); 3065 a = PetscSafePointerPlusOffset(a, m); 3066 } 3067 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 3068 PetscCall(MatDenseRestoreArrayRead(A, &a)); 3069 if (type == NORM_2) { 3070 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 3071 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 3072 for (i = 0; i < n; i++) reductions[i] /= m; 3073 } 3074 PetscFunctionReturn(PETSC_SUCCESS); 3075 } 3076 3077 PetscErrorCode MatSetRandom_SeqDense(Mat x, PetscRandom rctx) 3078 { 3079 PetscScalar *a; 3080 PetscInt lda, m, n, i, j; 3081 3082 PetscFunctionBegin; 3083 PetscCall(MatGetSize(x, &m, &n)); 3084 PetscCall(MatDenseGetLDA(x, &lda)); 3085 PetscCall(MatDenseGetArrayWrite(x, &a)); 3086 for (j = 0; j < n; j++) { 3087 for (i = 0; i < m; i++) PetscCall(PetscRandomGetValue(rctx, a + j * lda + i)); 3088 } 3089 PetscCall(MatDenseRestoreArrayWrite(x, &a)); 3090 PetscFunctionReturn(PETSC_SUCCESS); 3091 } 3092 3093 static PetscErrorCode MatMissingDiagonal_SeqDense(Mat A, PetscBool *missing, PetscInt *d) 3094 { 3095 PetscFunctionBegin; 3096 *missing = PETSC_FALSE; 3097 PetscFunctionReturn(PETSC_SUCCESS); 3098 } 3099 3100 /* vals is not const */ 3101 static PetscErrorCode MatDenseGetColumn_SeqDense(Mat A, PetscInt col, PetscScalar **vals) 3102 { 3103 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3104 PetscScalar *v; 3105 3106 PetscFunctionBegin; 3107 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3108 PetscCall(MatDenseGetArray(A, &v)); 3109 *vals = v + col * a->lda; 3110 PetscCall(MatDenseRestoreArray(A, &v)); 3111 PetscFunctionReturn(PETSC_SUCCESS); 3112 } 3113 3114 static PetscErrorCode MatDenseRestoreColumn_SeqDense(Mat A, PetscScalar **vals) 3115 { 3116 PetscFunctionBegin; 3117 if (vals) *vals = NULL; /* user cannot accidentally use the array later */ 3118 PetscFunctionReturn(PETSC_SUCCESS); 3119 } 3120 3121 static struct _MatOps MatOps_Values = {MatSetValues_SeqDense, 3122 MatGetRow_SeqDense, 3123 MatRestoreRow_SeqDense, 3124 MatMult_SeqDense, 3125 /* 4*/ MatMultAdd_SeqDense, 3126 MatMultTranspose_SeqDense, 3127 MatMultTransposeAdd_SeqDense, 3128 NULL, 3129 NULL, 3130 NULL, 3131 /* 10*/ NULL, 3132 MatLUFactor_SeqDense, 3133 MatCholeskyFactor_SeqDense, 3134 MatSOR_SeqDense, 3135 MatTranspose_SeqDense, 3136 /* 15*/ MatGetInfo_SeqDense, 3137 MatEqual_SeqDense, 3138 MatGetDiagonal_SeqDense, 3139 MatDiagonalScale_SeqDense, 3140 MatNorm_SeqDense, 3141 /* 20*/ MatAssemblyBegin_SeqDense, 3142 MatAssemblyEnd_SeqDense, 3143 MatSetOption_SeqDense, 3144 MatZeroEntries_SeqDense, 3145 /* 24*/ MatZeroRows_SeqDense, 3146 NULL, 3147 NULL, 3148 NULL, 3149 NULL, 3150 /* 29*/ MatSetUp_SeqDense, 3151 NULL, 3152 NULL, 3153 NULL, 3154 NULL, 3155 /* 34*/ MatDuplicate_SeqDense, 3156 NULL, 3157 NULL, 3158 NULL, 3159 NULL, 3160 /* 39*/ MatAXPY_SeqDense, 3161 MatCreateSubMatrices_SeqDense, 3162 NULL, 3163 MatGetValues_SeqDense, 3164 MatCopy_SeqDense, 3165 /* 44*/ MatGetRowMax_SeqDense, 3166 MatScale_SeqDense, 3167 MatShift_SeqDense, 3168 NULL, 3169 MatZeroRowsColumns_SeqDense, 3170 /* 49*/ MatSetRandom_SeqDense, 3171 NULL, 3172 NULL, 3173 NULL, 3174 NULL, 3175 /* 54*/ NULL, 3176 NULL, 3177 NULL, 3178 NULL, 3179 NULL, 3180 /* 59*/ MatCreateSubMatrix_SeqDense, 3181 MatDestroy_SeqDense, 3182 MatView_SeqDense, 3183 NULL, 3184 NULL, 3185 /* 64*/ NULL, 3186 NULL, 3187 NULL, 3188 NULL, 3189 NULL, 3190 /* 69*/ MatGetRowMaxAbs_SeqDense, 3191 NULL, 3192 NULL, 3193 NULL, 3194 NULL, 3195 /* 74*/ NULL, 3196 NULL, 3197 NULL, 3198 NULL, 3199 NULL, 3200 /* 79*/ NULL, 3201 NULL, 3202 NULL, 3203 NULL, 3204 /* 83*/ MatLoad_SeqDense, 3205 MatIsSymmetric_SeqDense, 3206 MatIsHermitian_SeqDense, 3207 NULL, 3208 NULL, 3209 NULL, 3210 /* 89*/ NULL, 3211 NULL, 3212 MatMatMultNumeric_SeqDense_SeqDense, 3213 NULL, 3214 NULL, 3215 /* 94*/ NULL, 3216 NULL, 3217 NULL, 3218 MatMatTransposeMultNumeric_SeqDense_SeqDense, 3219 NULL, 3220 /* 99*/ MatProductSetFromOptions_SeqDense, 3221 NULL, 3222 NULL, 3223 MatConjugate_SeqDense, 3224 NULL, 3225 /*104*/ NULL, 3226 MatRealPart_SeqDense, 3227 MatImaginaryPart_SeqDense, 3228 NULL, 3229 NULL, 3230 /*109*/ NULL, 3231 NULL, 3232 MatGetRowMin_SeqDense, 3233 MatGetColumnVector_SeqDense, 3234 MatMissingDiagonal_SeqDense, 3235 /*114*/ NULL, 3236 NULL, 3237 NULL, 3238 NULL, 3239 NULL, 3240 /*119*/ NULL, 3241 NULL, 3242 MatMultHermitianTranspose_SeqDense, 3243 MatMultHermitianTransposeAdd_SeqDense, 3244 NULL, 3245 /*124*/ NULL, 3246 MatGetColumnReductions_SeqDense, 3247 NULL, 3248 NULL, 3249 NULL, 3250 /*129*/ NULL, 3251 NULL, 3252 NULL, 3253 MatTransposeMatMultNumeric_SeqDense_SeqDense, 3254 NULL, 3255 /*134*/ NULL, 3256 NULL, 3257 NULL, 3258 NULL, 3259 NULL, 3260 /*139*/ NULL, 3261 NULL, 3262 NULL, 3263 NULL, 3264 NULL, 3265 MatCreateMPIMatConcatenateSeqMat_SeqDense, 3266 /*145*/ NULL, 3267 NULL, 3268 NULL, 3269 NULL, 3270 NULL, 3271 /*150*/ NULL, 3272 NULL, 3273 NULL, 3274 NULL, 3275 NULL, 3276 NULL}; 3277 3278 /*@ 3279 MatCreateSeqDense - Creates a `MATSEQDENSE` that 3280 is stored in column major order (the usual Fortran format). 3281 3282 Collective 3283 3284 Input Parameters: 3285 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3286 . m - number of rows 3287 . n - number of columns 3288 - data - optional location of matrix data in column major order. Use `NULL` for PETSc 3289 to control all matrix memory allocation. 3290 3291 Output Parameter: 3292 . A - the matrix 3293 3294 Level: intermediate 3295 3296 Note: 3297 The data input variable is intended primarily for Fortran programmers 3298 who wish to allocate their own matrix memory space. Most users should 3299 set `data` = `NULL`. 3300 3301 Developer Note: 3302 Many of the matrix operations for this variant use the BLAS and LAPACK routines. 3303 3304 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 3305 @*/ 3306 PetscErrorCode MatCreateSeqDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscScalar data[], Mat *A) 3307 { 3308 PetscFunctionBegin; 3309 PetscCall(MatCreate(comm, A)); 3310 PetscCall(MatSetSizes(*A, m, n, m, n)); 3311 PetscCall(MatSetType(*A, MATSEQDENSE)); 3312 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 3313 PetscFunctionReturn(PETSC_SUCCESS); 3314 } 3315 3316 /*@ 3317 MatSeqDenseSetPreallocation - Sets the array used for storing the matrix elements of a `MATSEQDENSE` matrix 3318 3319 Collective 3320 3321 Input Parameters: 3322 + B - the matrix 3323 - data - the array (or `NULL`) 3324 3325 Level: intermediate 3326 3327 Note: 3328 The data input variable is intended primarily for Fortran programmers 3329 who wish to allocate their own matrix memory space. Most users should 3330 need not call this routine. 3331 3332 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`, `MatDenseSetLDA()` 3333 @*/ 3334 PetscErrorCode MatSeqDenseSetPreallocation(Mat B, PetscScalar data[]) 3335 { 3336 PetscFunctionBegin; 3337 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3338 PetscTryMethod(B, "MatSeqDenseSetPreallocation_C", (Mat, PetscScalar[]), (B, data)); 3339 PetscFunctionReturn(PETSC_SUCCESS); 3340 } 3341 3342 PetscErrorCode MatSeqDenseSetPreallocation_SeqDense(Mat B, PetscScalar *data) 3343 { 3344 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3345 3346 PetscFunctionBegin; 3347 PetscCheck(!b->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3348 B->preallocated = PETSC_TRUE; 3349 3350 PetscCall(PetscLayoutSetUp(B->rmap)); 3351 PetscCall(PetscLayoutSetUp(B->cmap)); 3352 3353 if (b->lda <= 0) PetscCall(PetscBLASIntCast(B->rmap->n, &b->lda)); 3354 3355 if (!data) { /* petsc-allocated storage */ 3356 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 3357 PetscCall(PetscCalloc1((size_t)b->lda * B->cmap->n, &b->v)); 3358 3359 b->user_alloc = PETSC_FALSE; 3360 } else { /* user-allocated storage */ 3361 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 3362 b->v = data; 3363 b->user_alloc = PETSC_TRUE; 3364 } 3365 B->assembled = PETSC_TRUE; 3366 PetscFunctionReturn(PETSC_SUCCESS); 3367 } 3368 3369 #if defined(PETSC_HAVE_ELEMENTAL) 3370 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 3371 { 3372 Mat mat_elemental; 3373 const PetscScalar *array; 3374 PetscScalar *v_colwise; 3375 PetscInt M = A->rmap->N, N = A->cmap->N, i, j, k, *rows, *cols; 3376 3377 PetscFunctionBegin; 3378 PetscCall(PetscMalloc3(M * N, &v_colwise, M, &rows, N, &cols)); 3379 PetscCall(MatDenseGetArrayRead(A, &array)); 3380 /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */ 3381 k = 0; 3382 for (j = 0; j < N; j++) { 3383 cols[j] = j; 3384 for (i = 0; i < M; i++) v_colwise[j * M + i] = array[k++]; 3385 } 3386 for (i = 0; i < M; i++) rows[i] = i; 3387 PetscCall(MatDenseRestoreArrayRead(A, &array)); 3388 3389 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 3390 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N)); 3391 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 3392 PetscCall(MatSetUp(mat_elemental)); 3393 3394 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 3395 PetscCall(MatSetValues(mat_elemental, M, rows, N, cols, v_colwise, ADD_VALUES)); 3396 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 3397 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 3398 PetscCall(PetscFree3(v_colwise, rows, cols)); 3399 3400 if (reuse == MAT_INPLACE_MATRIX) { 3401 PetscCall(MatHeaderReplace(A, &mat_elemental)); 3402 } else { 3403 *newmat = mat_elemental; 3404 } 3405 PetscFunctionReturn(PETSC_SUCCESS); 3406 } 3407 #endif 3408 3409 PetscErrorCode MatDenseSetLDA_SeqDense(Mat B, PetscInt lda) 3410 { 3411 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3412 PetscBool data; 3413 3414 PetscFunctionBegin; 3415 data = (PetscBool)((B->rmap->n > 0 && B->cmap->n > 0) ? (b->v ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE); 3416 PetscCheck(b->user_alloc || !data || b->lda == lda, PETSC_COMM_SELF, PETSC_ERR_ORDER, "LDA cannot be changed after allocation of internal storage"); 3417 PetscCheck(lda >= B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "LDA %" PetscInt_FMT " must be at least matrix dimension %" PetscInt_FMT, lda, B->rmap->n); 3418 PetscCall(PetscBLASIntCast(lda, &b->lda)); 3419 PetscFunctionReturn(PETSC_SUCCESS); 3420 } 3421 3422 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3423 { 3424 PetscFunctionBegin; 3425 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIDense(comm, inmat, n, scall, outmat)); 3426 PetscFunctionReturn(PETSC_SUCCESS); 3427 } 3428 3429 PetscErrorCode MatDenseCreateColumnVec_Private(Mat A, Vec *v) 3430 { 3431 PetscBool isstd, iskok, iscuda, iship; 3432 PetscMPIInt size; 3433 #if PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP) 3434 /* we pass the data of A, to prevent allocating needless GPU memory the first time VecCUPMPlaceArray is called. */ 3435 const PetscScalar *a; 3436 #endif 3437 3438 PetscFunctionBegin; 3439 *v = NULL; 3440 PetscCall(PetscStrcmpAny(A->defaultvectype, &isstd, VECSTANDARD, VECSEQ, VECMPI, "")); 3441 PetscCall(PetscStrcmpAny(A->defaultvectype, &iskok, VECKOKKOS, VECSEQKOKKOS, VECMPIKOKKOS, "")); 3442 PetscCall(PetscStrcmpAny(A->defaultvectype, &iscuda, VECCUDA, VECSEQCUDA, VECMPICUDA, "")); 3443 PetscCall(PetscStrcmpAny(A->defaultvectype, &iship, VECHIP, VECSEQHIP, VECMPIHIP, "")); 3444 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3445 if (isstd) { 3446 if (size > 1) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, v)); 3447 else PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, v)); 3448 } else if (iskok) { 3449 PetscCheck(PetscDefined(HAVE_KOKKOS_KERNELS), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using KOKKOS kernels support"); 3450 #if PetscDefined(HAVE_KOKKOS_KERNELS) 3451 if (size > 1) PetscCall(VecCreateMPIKokkosWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, v)); 3452 else PetscCall(VecCreateSeqKokkosWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, v)); 3453 #endif 3454 } else if (iscuda) { 3455 PetscCheck(PetscDefined(HAVE_CUDA), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using CUDA support"); 3456 #if PetscDefined(HAVE_CUDA) 3457 PetscCall(MatDenseCUDAGetArrayRead(A, &a)); 3458 if (size > 1) PetscCall(VecCreateMPICUDAWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, a, v)); 3459 else PetscCall(VecCreateSeqCUDAWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, a, v)); 3460 #endif 3461 } else if (iship) { 3462 PetscCheck(PetscDefined(HAVE_HIP), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using HIP support"); 3463 #if PetscDefined(HAVE_HIP) 3464 PetscCall(MatDenseHIPGetArrayRead(A, &a)); 3465 if (size > 1) PetscCall(VecCreateMPIHIPWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, a, v)); 3466 else PetscCall(VecCreateSeqHIPWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, a, v)); 3467 #endif 3468 } 3469 PetscCheck(*v, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not coded for type %s", A->defaultvectype); 3470 PetscFunctionReturn(PETSC_SUCCESS); 3471 } 3472 3473 PetscErrorCode MatDenseGetColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) 3474 { 3475 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3476 3477 PetscFunctionBegin; 3478 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3479 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3480 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 3481 a->vecinuse = col + 1; 3482 PetscCall(MatDenseGetArray(A, (PetscScalar **)&a->ptrinuse)); 3483 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3484 *v = a->cvec; 3485 PetscFunctionReturn(PETSC_SUCCESS); 3486 } 3487 3488 PetscErrorCode MatDenseRestoreColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) 3489 { 3490 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3491 3492 PetscFunctionBegin; 3493 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3494 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3495 VecCheckAssembled(a->cvec); 3496 a->vecinuse = 0; 3497 PetscCall(MatDenseRestoreArray(A, (PetscScalar **)&a->ptrinuse)); 3498 PetscCall(VecResetArray(a->cvec)); 3499 if (v) *v = NULL; 3500 PetscFunctionReturn(PETSC_SUCCESS); 3501 } 3502 3503 PetscErrorCode MatDenseGetColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) 3504 { 3505 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3506 3507 PetscFunctionBegin; 3508 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3509 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3510 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 3511 a->vecinuse = col + 1; 3512 PetscCall(MatDenseGetArrayRead(A, &a->ptrinuse)); 3513 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)a->lda))); 3514 PetscCall(VecLockReadPush(a->cvec)); 3515 *v = a->cvec; 3516 PetscFunctionReturn(PETSC_SUCCESS); 3517 } 3518 3519 PetscErrorCode MatDenseRestoreColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) 3520 { 3521 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3522 3523 PetscFunctionBegin; 3524 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3525 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3526 VecCheckAssembled(a->cvec); 3527 a->vecinuse = 0; 3528 PetscCall(MatDenseRestoreArrayRead(A, &a->ptrinuse)); 3529 PetscCall(VecLockReadPop(a->cvec)); 3530 PetscCall(VecResetArray(a->cvec)); 3531 if (v) *v = NULL; 3532 PetscFunctionReturn(PETSC_SUCCESS); 3533 } 3534 3535 PetscErrorCode MatDenseGetColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) 3536 { 3537 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3538 3539 PetscFunctionBegin; 3540 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3541 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3542 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 3543 a->vecinuse = col + 1; 3544 PetscCall(MatDenseGetArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3545 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)a->lda))); 3546 *v = a->cvec; 3547 PetscFunctionReturn(PETSC_SUCCESS); 3548 } 3549 3550 PetscErrorCode MatDenseRestoreColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) 3551 { 3552 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3553 3554 PetscFunctionBegin; 3555 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3556 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3557 VecCheckAssembled(a->cvec); 3558 a->vecinuse = 0; 3559 PetscCall(MatDenseRestoreArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3560 PetscCall(VecResetArray(a->cvec)); 3561 if (v) *v = NULL; 3562 PetscFunctionReturn(PETSC_SUCCESS); 3563 } 3564 3565 PetscErrorCode MatDenseGetSubMatrix_SeqDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 3566 { 3567 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3568 3569 PetscFunctionBegin; 3570 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3571 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3572 if (a->cmat && (cend - cbegin != a->cmat->cmap->N || rend - rbegin != a->cmat->rmap->N)) PetscCall(MatDestroy(&a->cmat)); 3573 if (!a->cmat) { 3574 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), rend - rbegin, PETSC_DECIDE, rend - rbegin, cend - cbegin, PetscSafePointerPlusOffset(a->v, rbegin + (size_t)cbegin * a->lda), &a->cmat)); 3575 } else { 3576 PetscCall(MatDensePlaceArray(a->cmat, PetscSafePointerPlusOffset(a->v, rbegin + (size_t)cbegin * a->lda))); 3577 } 3578 PetscCall(MatDenseSetLDA(a->cmat, a->lda)); 3579 a->matinuse = cbegin + 1; 3580 *v = a->cmat; 3581 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 3582 A->offloadmask = PETSC_OFFLOAD_CPU; 3583 #endif 3584 PetscFunctionReturn(PETSC_SUCCESS); 3585 } 3586 3587 PetscErrorCode MatDenseRestoreSubMatrix_SeqDense(Mat A, Mat *v) 3588 { 3589 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3590 3591 PetscFunctionBegin; 3592 PetscCheck(a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 3593 PetscCheck(a->cmat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column matrix"); 3594 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 3595 a->matinuse = 0; 3596 PetscCall(MatDenseResetArray(a->cmat)); 3597 if (v) *v = NULL; 3598 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 3599 A->offloadmask = PETSC_OFFLOAD_CPU; 3600 #endif 3601 PetscFunctionReturn(PETSC_SUCCESS); 3602 } 3603 3604 /*MC 3605 MATSEQDENSE - MATSEQDENSE = "seqdense" - A matrix type to be used for sequential dense matrices. 3606 3607 Options Database Key: 3608 . -mat_type seqdense - sets the matrix type to `MATSEQDENSE` during a call to `MatSetFromOptions()` 3609 3610 Level: beginner 3611 3612 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreateSeqDense()` 3613 M*/ 3614 PetscErrorCode MatCreate_SeqDense(Mat B) 3615 { 3616 Mat_SeqDense *b; 3617 PetscMPIInt size; 3618 3619 PetscFunctionBegin; 3620 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3621 PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3622 3623 PetscCall(PetscNew(&b)); 3624 B->data = (void *)b; 3625 B->ops[0] = MatOps_Values; 3626 3627 b->roworiented = PETSC_TRUE; 3628 3629 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatQRFactor_C", MatQRFactor_SeqDense)); 3630 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetLDA_C", MatDenseGetLDA_SeqDense)); 3631 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseSetLDA_C", MatDenseSetLDA_SeqDense)); 3632 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArray_C", MatDenseGetArray_SeqDense)); 3633 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArray_C", MatDenseRestoreArray_SeqDense)); 3634 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDensePlaceArray_C", MatDensePlaceArray_SeqDense)); 3635 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseResetArray_C", MatDenseResetArray_SeqDense)); 3636 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseReplaceArray_C", MatDenseReplaceArray_SeqDense)); 3637 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayRead_C", MatDenseGetArray_SeqDense)); 3638 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayRead_C", MatDenseRestoreArray_SeqDense)); 3639 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayWrite_C", MatDenseGetArray_SeqDense)); 3640 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArray_SeqDense)); 3641 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqaij_C", MatConvert_SeqDense_SeqAIJ)); 3642 #if defined(PETSC_HAVE_ELEMENTAL) 3643 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_elemental_C", MatConvert_SeqDense_Elemental)); 3644 #endif 3645 #if defined(PETSC_HAVE_SCALAPACK) 3646 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 3647 #endif 3648 #if defined(PETSC_HAVE_CUDA) 3649 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensecuda_C", MatConvert_SeqDense_SeqDenseCUDA)); 3650 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3651 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdense_C", MatProductSetFromOptions_SeqDense)); 3652 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3653 #endif 3654 #if defined(PETSC_HAVE_HIP) 3655 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensehip_C", MatConvert_SeqDense_SeqDenseHIP)); 3656 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", MatProductSetFromOptions_SeqDense)); 3657 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdense_C", MatProductSetFromOptions_SeqDense)); 3658 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensehip_C", MatProductSetFromOptions_SeqDense)); 3659 #endif 3660 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqDenseSetPreallocation_C", MatSeqDenseSetPreallocation_SeqDense)); 3661 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 3662 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdense_C", MatProductSetFromOptions_SeqDense)); 3663 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3664 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqsbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3665 3666 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumn_C", MatDenseGetColumn_SeqDense)); 3667 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_SeqDense)); 3668 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_SeqDense)); 3669 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_SeqDense)); 3670 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_SeqDense)); 3671 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_SeqDense)); 3672 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_SeqDense)); 3673 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_SeqDense)); 3674 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_SeqDense)); 3675 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_SeqDense)); 3676 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultAddColumnRange_C", MatMultAddColumnRange_SeqDense)); 3677 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_SeqDense)); 3678 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_SeqDense)); 3679 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQDENSE)); 3680 PetscFunctionReturn(PETSC_SUCCESS); 3681 } 3682 3683 /*@C 3684 MatDenseGetColumn - gives access to a column of a dense matrix. This is only the local part of the column. You MUST call `MatDenseRestoreColumn()` to avoid memory bleeding. 3685 3686 Not Collective 3687 3688 Input Parameters: 3689 + A - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3690 - col - column index 3691 3692 Output Parameter: 3693 . vals - pointer to the data 3694 3695 Level: intermediate 3696 3697 Note: 3698 Use `MatDenseGetColumnVec()` to get access to a column of a `MATDENSE` treated as a `Vec` 3699 3700 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreColumn()`, `MatDenseGetColumnVec()` 3701 @*/ 3702 PetscErrorCode MatDenseGetColumn(Mat A, PetscInt col, PetscScalar *vals[]) 3703 { 3704 PetscFunctionBegin; 3705 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3706 PetscValidLogicalCollectiveInt(A, col, 2); 3707 PetscAssertPointer(vals, 3); 3708 PetscUseMethod(A, "MatDenseGetColumn_C", (Mat, PetscInt, PetscScalar **), (A, col, vals)); 3709 PetscFunctionReturn(PETSC_SUCCESS); 3710 } 3711 3712 /*@C 3713 MatDenseRestoreColumn - returns access to a column of a `MATDENSE` matrix which is returned by `MatDenseGetColumn()`. 3714 3715 Not Collective 3716 3717 Input Parameters: 3718 + A - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3719 - vals - pointer to the data (may be `NULL`) 3720 3721 Level: intermediate 3722 3723 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetColumn()` 3724 @*/ 3725 PetscErrorCode MatDenseRestoreColumn(Mat A, PetscScalar *vals[]) 3726 { 3727 PetscFunctionBegin; 3728 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3729 PetscAssertPointer(vals, 2); 3730 PetscUseMethod(A, "MatDenseRestoreColumn_C", (Mat, PetscScalar **), (A, vals)); 3731 PetscFunctionReturn(PETSC_SUCCESS); 3732 } 3733 3734 /*@ 3735 MatDenseGetColumnVec - Gives read-write access to a column of a `MATDENSE` matrix, represented as a `Vec`. 3736 3737 Collective 3738 3739 Input Parameters: 3740 + A - the `Mat` object 3741 - col - the column index 3742 3743 Output Parameter: 3744 . v - the vector 3745 3746 Level: intermediate 3747 3748 Notes: 3749 The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVec()` when the vector is no longer needed. 3750 3751 Use `MatDenseGetColumnVecRead()` to obtain read-only access or `MatDenseGetColumnVecWrite()` for write-only access. 3752 3753 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`, `MatDenseGetColumn()` 3754 @*/ 3755 PetscErrorCode MatDenseGetColumnVec(Mat A, PetscInt col, Vec *v) 3756 { 3757 PetscFunctionBegin; 3758 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3759 PetscValidType(A, 1); 3760 PetscValidLogicalCollectiveInt(A, col, 2); 3761 PetscAssertPointer(v, 3); 3762 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3763 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3764 PetscUseMethod(A, "MatDenseGetColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3765 PetscFunctionReturn(PETSC_SUCCESS); 3766 } 3767 3768 /*@ 3769 MatDenseRestoreColumnVec - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVec()`. 3770 3771 Collective 3772 3773 Input Parameters: 3774 + A - the `Mat` object 3775 . col - the column index 3776 - v - the `Vec` object (may be `NULL`) 3777 3778 Level: intermediate 3779 3780 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3781 @*/ 3782 PetscErrorCode MatDenseRestoreColumnVec(Mat A, PetscInt col, Vec *v) 3783 { 3784 PetscFunctionBegin; 3785 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3786 PetscValidType(A, 1); 3787 PetscValidLogicalCollectiveInt(A, col, 2); 3788 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3789 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3790 PetscUseMethod(A, "MatDenseRestoreColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3791 PetscFunctionReturn(PETSC_SUCCESS); 3792 } 3793 3794 /*@ 3795 MatDenseGetColumnVecRead - Gives read-only access to a column of a dense matrix, represented as a `Vec`. 3796 3797 Collective 3798 3799 Input Parameters: 3800 + A - the `Mat` object 3801 - col - the column index 3802 3803 Output Parameter: 3804 . v - the vector 3805 3806 Level: intermediate 3807 3808 Notes: 3809 The vector is owned by PETSc and users cannot modify it. 3810 3811 Users need to call `MatDenseRestoreColumnVecRead()` when the vector is no longer needed. 3812 3813 Use `MatDenseGetColumnVec()` to obtain read-write access or `MatDenseGetColumnVecWrite()` for write-only access. 3814 3815 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3816 @*/ 3817 PetscErrorCode MatDenseGetColumnVecRead(Mat A, PetscInt col, Vec *v) 3818 { 3819 PetscFunctionBegin; 3820 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3821 PetscValidType(A, 1); 3822 PetscValidLogicalCollectiveInt(A, col, 2); 3823 PetscAssertPointer(v, 3); 3824 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3825 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3826 PetscUseMethod(A, "MatDenseGetColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3827 PetscFunctionReturn(PETSC_SUCCESS); 3828 } 3829 3830 /*@ 3831 MatDenseRestoreColumnVecRead - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVecRead()`. 3832 3833 Collective 3834 3835 Input Parameters: 3836 + A - the `Mat` object 3837 . col - the column index 3838 - v - the `Vec` object (may be `NULL`) 3839 3840 Level: intermediate 3841 3842 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecWrite()` 3843 @*/ 3844 PetscErrorCode MatDenseRestoreColumnVecRead(Mat A, PetscInt col, Vec *v) 3845 { 3846 PetscFunctionBegin; 3847 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3848 PetscValidType(A, 1); 3849 PetscValidLogicalCollectiveInt(A, col, 2); 3850 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3851 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3852 PetscUseMethod(A, "MatDenseRestoreColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3853 PetscFunctionReturn(PETSC_SUCCESS); 3854 } 3855 3856 /*@ 3857 MatDenseGetColumnVecWrite - Gives write-only access to a column of a dense matrix, represented as a `Vec`. 3858 3859 Collective 3860 3861 Input Parameters: 3862 + A - the `Mat` object 3863 - col - the column index 3864 3865 Output Parameter: 3866 . v - the vector 3867 3868 Level: intermediate 3869 3870 Notes: 3871 The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVecWrite()` when the vector is no longer needed. 3872 3873 Use `MatDenseGetColumnVec()` to obtain read-write access or `MatDenseGetColumnVecRead()` for read-only access. 3874 3875 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3876 @*/ 3877 PetscErrorCode MatDenseGetColumnVecWrite(Mat A, PetscInt col, Vec *v) 3878 { 3879 PetscFunctionBegin; 3880 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3881 PetscValidType(A, 1); 3882 PetscValidLogicalCollectiveInt(A, col, 2); 3883 PetscAssertPointer(v, 3); 3884 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3885 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3886 PetscUseMethod(A, "MatDenseGetColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3887 PetscFunctionReturn(PETSC_SUCCESS); 3888 } 3889 3890 /*@ 3891 MatDenseRestoreColumnVecWrite - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVecWrite()`. 3892 3893 Collective 3894 3895 Input Parameters: 3896 + A - the `Mat` object 3897 . col - the column index 3898 - v - the `Vec` object (may be `NULL`) 3899 3900 Level: intermediate 3901 3902 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()` 3903 @*/ 3904 PetscErrorCode MatDenseRestoreColumnVecWrite(Mat A, PetscInt col, Vec *v) 3905 { 3906 PetscFunctionBegin; 3907 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3908 PetscValidType(A, 1); 3909 PetscValidLogicalCollectiveInt(A, col, 2); 3910 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3911 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3912 PetscUseMethod(A, "MatDenseRestoreColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3913 PetscFunctionReturn(PETSC_SUCCESS); 3914 } 3915 3916 /*@ 3917 MatDenseGetSubMatrix - Gives access to a block of rows and columns of a dense matrix, represented as a `Mat`. 3918 3919 Collective 3920 3921 Input Parameters: 3922 + A - the `Mat` object 3923 . rbegin - the first global row index in the block (if `PETSC_DECIDE`, is 0) 3924 . rend - the global row index past the last one in the block (if `PETSC_DECIDE`, is `M`) 3925 . cbegin - the first global column index in the block (if `PETSC_DECIDE`, is 0) 3926 - cend - the global column index past the last one in the block (if `PETSC_DECIDE`, is `N`) 3927 3928 Output Parameter: 3929 . v - the matrix 3930 3931 Level: intermediate 3932 3933 Notes: 3934 The matrix is owned by PETSc. Users need to call `MatDenseRestoreSubMatrix()` when the matrix is no longer needed. 3935 3936 The output matrix is not redistributed by PETSc, so depending on the values of `rbegin` and `rend`, some processes may have no local rows. 3937 3938 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreSubMatrix()` 3939 @*/ 3940 PetscErrorCode MatDenseGetSubMatrix(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 3941 { 3942 PetscFunctionBegin; 3943 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3944 PetscValidType(A, 1); 3945 PetscValidLogicalCollectiveInt(A, rbegin, 2); 3946 PetscValidLogicalCollectiveInt(A, rend, 3); 3947 PetscValidLogicalCollectiveInt(A, cbegin, 4); 3948 PetscValidLogicalCollectiveInt(A, cend, 5); 3949 PetscAssertPointer(v, 6); 3950 if (rbegin == PETSC_DECIDE) rbegin = 0; 3951 if (rend == PETSC_DECIDE) rend = A->rmap->N; 3952 if (cbegin == PETSC_DECIDE) cbegin = 0; 3953 if (cend == PETSC_DECIDE) cend = A->cmap->N; 3954 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3955 PetscCheck(rbegin >= 0 && rbegin <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", rbegin, A->rmap->N); 3956 PetscCheck(rend >= rbegin && rend <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", rend, rbegin, A->rmap->N); 3957 PetscCheck(cbegin >= 0 && cbegin <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", cbegin, A->cmap->N); 3958 PetscCheck(cend >= cbegin && cend <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", cend, cbegin, A->cmap->N); 3959 PetscUseMethod(A, "MatDenseGetSubMatrix_C", (Mat, PetscInt, PetscInt, PetscInt, PetscInt, Mat *), (A, rbegin, rend, cbegin, cend, v)); 3960 PetscFunctionReturn(PETSC_SUCCESS); 3961 } 3962 3963 /*@ 3964 MatDenseRestoreSubMatrix - Returns access to a block of columns of a dense matrix obtained from `MatDenseGetSubMatrix()`. 3965 3966 Collective 3967 3968 Input Parameters: 3969 + A - the `Mat` object 3970 - v - the `Mat` object (may be `NULL`) 3971 3972 Level: intermediate 3973 3974 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseGetSubMatrix()` 3975 @*/ 3976 PetscErrorCode MatDenseRestoreSubMatrix(Mat A, Mat *v) 3977 { 3978 PetscFunctionBegin; 3979 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3980 PetscValidType(A, 1); 3981 PetscAssertPointer(v, 2); 3982 PetscUseMethod(A, "MatDenseRestoreSubMatrix_C", (Mat, Mat *), (A, v)); 3983 PetscFunctionReturn(PETSC_SUCCESS); 3984 } 3985 3986 #include <petscblaslapack.h> 3987 #include <petsc/private/kernels/blockinvert.h> 3988 3989 PetscErrorCode MatSeqDenseInvert(Mat A) 3990 { 3991 PetscInt m; 3992 const PetscReal shift = 0.0; 3993 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 3994 PetscScalar *values; 3995 3996 PetscFunctionBegin; 3997 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3998 PetscCall(MatDenseGetArray(A, &values)); 3999 PetscCall(MatGetLocalSize(A, &m, NULL)); 4000 allowzeropivot = PetscNot(A->erroriffailure); 4001 /* factor and invert each block */ 4002 switch (m) { 4003 case 1: 4004 values[0] = (PetscScalar)1.0 / (values[0] + shift); 4005 break; 4006 case 2: 4007 PetscCall(PetscKernel_A_gets_inverse_A_2(values, shift, allowzeropivot, &zeropivotdetected)); 4008 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4009 break; 4010 case 3: 4011 PetscCall(PetscKernel_A_gets_inverse_A_3(values, shift, allowzeropivot, &zeropivotdetected)); 4012 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4013 break; 4014 case 4: 4015 PetscCall(PetscKernel_A_gets_inverse_A_4(values, shift, allowzeropivot, &zeropivotdetected)); 4016 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4017 break; 4018 case 5: { 4019 PetscScalar work[25]; 4020 PetscInt ipvt[5]; 4021 4022 PetscCall(PetscKernel_A_gets_inverse_A_5(values, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 4023 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4024 } break; 4025 case 6: 4026 PetscCall(PetscKernel_A_gets_inverse_A_6(values, shift, allowzeropivot, &zeropivotdetected)); 4027 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4028 break; 4029 case 7: 4030 PetscCall(PetscKernel_A_gets_inverse_A_7(values, shift, allowzeropivot, &zeropivotdetected)); 4031 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4032 break; 4033 default: { 4034 PetscInt *v_pivots, *IJ, j; 4035 PetscScalar *v_work; 4036 4037 PetscCall(PetscMalloc3(m, &v_work, m, &v_pivots, m, &IJ)); 4038 for (j = 0; j < m; j++) IJ[j] = j; 4039 PetscCall(PetscKernel_A_gets_inverse_A(m, values, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 4040 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4041 PetscCall(PetscFree3(v_work, v_pivots, IJ)); 4042 } 4043 } 4044 PetscCall(MatDenseRestoreArray(A, &values)); 4045 PetscFunctionReturn(PETSC_SUCCESS); 4046 } 4047