1 /* 2 Defines the basic matrix operations for sequential dense. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/dense/mpi/mpidense.h> 9 #include <petscblaslapack.h> 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <petsc/private/vecimpl.h> 12 13 PetscErrorCode MatSeqDenseSymmetrize_Private(Mat A, PetscBool hermitian) 14 { 15 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 16 PetscInt j, k, n = A->rmap->n; 17 PetscScalar *v; 18 19 PetscFunctionBegin; 20 PetscCheck(A->rmap->n == A->cmap->n, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot symmetrize a rectangular matrix"); 21 PetscCall(MatDenseGetArray(A, &v)); 22 if (!hermitian) { 23 for (k = 0; k < n; k++) { 24 for (j = k; j < n; j++) v[j * mat->lda + k] = v[k * mat->lda + j]; 25 } 26 } else { 27 for (k = 0; k < n; k++) { 28 for (j = k; j < n; j++) v[j * mat->lda + k] = PetscConj(v[k * mat->lda + j]); 29 } 30 } 31 PetscCall(MatDenseRestoreArray(A, &v)); 32 PetscFunctionReturn(PETSC_SUCCESS); 33 } 34 35 PetscErrorCode MatSeqDenseInvertFactors_Private(Mat A) 36 { 37 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 38 PetscBLASInt info, n; 39 40 PetscFunctionBegin; 41 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 42 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 43 if (A->factortype == MAT_FACTOR_LU) { 44 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 45 if (!mat->fwork) { 46 mat->lfwork = n; 47 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 48 } 49 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 50 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 51 PetscCall(PetscFPTrapPop()); 52 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 53 } else if (A->factortype == MAT_FACTOR_CHOLESKY) { 54 if (A->spd == PETSC_BOOL3_TRUE) { 55 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 56 PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &n, mat->v, &mat->lda, &info)); 57 PetscCall(PetscFPTrapPop()); 58 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 59 #if defined(PETSC_USE_COMPLEX) 60 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 61 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 62 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 63 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 64 PetscCallBLAS("LAPACKhetri", LAPACKhetri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 65 PetscCall(PetscFPTrapPop()); 66 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 67 #endif 68 } else { /* symmetric case */ 69 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 70 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 71 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 72 PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 73 PetscCall(PetscFPTrapPop()); 74 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_FALSE)); 75 } 76 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad Inversion: zero pivot in row %" PetscBLASInt_FMT, info - 1); 77 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 78 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix must be factored to solve"); 79 80 A->ops->solve = NULL; 81 A->ops->matsolve = NULL; 82 A->ops->solvetranspose = NULL; 83 A->ops->matsolvetranspose = NULL; 84 A->ops->solveadd = NULL; 85 A->ops->solvetransposeadd = NULL; 86 A->factortype = MAT_FACTOR_NONE; 87 PetscCall(PetscFree(A->solvertype)); 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 91 static PetscErrorCode MatZeroRowsColumns_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 92 { 93 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 94 PetscInt m = l->lda, n = A->cmap->n, r = A->rmap->n, i, j; 95 PetscScalar *slot, *bb, *v; 96 const PetscScalar *xx; 97 98 PetscFunctionBegin; 99 if (PetscDefined(USE_DEBUG)) { 100 for (i = 0; i < N; i++) { 101 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 102 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 103 PetscCheck(rows[i] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Col %" PetscInt_FMT " requested to be zeroed greater than or equal number of cols %" PetscInt_FMT, rows[i], A->cmap->n); 104 } 105 } 106 if (!N) PetscFunctionReturn(PETSC_SUCCESS); 107 108 /* fix right-hand side if needed */ 109 if (x && b) { 110 Vec xt; 111 112 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 113 PetscCall(VecDuplicate(x, &xt)); 114 PetscCall(VecCopy(x, xt)); 115 PetscCall(VecScale(xt, -1.0)); 116 PetscCall(MatMultAdd(A, xt, b, b)); 117 PetscCall(VecDestroy(&xt)); 118 PetscCall(VecGetArrayRead(x, &xx)); 119 PetscCall(VecGetArray(b, &bb)); 120 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 121 PetscCall(VecRestoreArrayRead(x, &xx)); 122 PetscCall(VecRestoreArray(b, &bb)); 123 } 124 125 PetscCall(MatDenseGetArray(A, &v)); 126 for (i = 0; i < N; i++) { 127 slot = v + rows[i] * m; 128 PetscCall(PetscArrayzero(slot, r)); 129 } 130 for (i = 0; i < N; i++) { 131 slot = v + rows[i]; 132 for (j = 0; j < n; j++) { 133 *slot = 0.0; 134 slot += m; 135 } 136 } 137 if (diag != 0.0) { 138 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 139 for (i = 0; i < N; i++) { 140 slot = v + (m + 1) * rows[i]; 141 *slot = diag; 142 } 143 } 144 PetscCall(MatDenseRestoreArray(A, &v)); 145 PetscFunctionReturn(PETSC_SUCCESS); 146 } 147 148 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 149 { 150 Mat B = NULL; 151 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 152 Mat_SeqDense *b; 153 PetscInt *ai = a->i, *aj = a->j, m = A->rmap->N, n = A->cmap->N, i; 154 const MatScalar *av; 155 PetscBool isseqdense; 156 157 PetscFunctionBegin; 158 if (reuse == MAT_REUSE_MATRIX) { 159 PetscCall(PetscObjectTypeCompare((PetscObject)*newmat, MATSEQDENSE, &isseqdense)); 160 PetscCheck(isseqdense, PetscObjectComm((PetscObject)*newmat), PETSC_ERR_USER, "Cannot reuse matrix of type %s", ((PetscObject)*newmat)->type_name); 161 } 162 if (reuse != MAT_REUSE_MATRIX) { 163 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 164 PetscCall(MatSetSizes(B, m, n, m, n)); 165 PetscCall(MatSetType(B, MATSEQDENSE)); 166 PetscCall(MatSeqDenseSetPreallocation(B, NULL)); 167 b = (Mat_SeqDense *)B->data; 168 } else { 169 b = (Mat_SeqDense *)((*newmat)->data); 170 for (i = 0; i < n; i++) PetscCall(PetscArrayzero(b->v + i * b->lda, m)); 171 } 172 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 173 for (i = 0; i < m; i++) { 174 PetscInt j; 175 for (j = 0; j < ai[1] - ai[0]; j++) { 176 b->v[*aj * b->lda + i] = *av; 177 aj++; 178 av++; 179 } 180 ai++; 181 } 182 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 183 184 if (reuse == MAT_INPLACE_MATRIX) { 185 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 186 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 187 PetscCall(MatHeaderReplace(A, &B)); 188 } else { 189 if (B) *newmat = B; 190 PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY)); 191 PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY)); 192 } 193 PetscFunctionReturn(PETSC_SUCCESS); 194 } 195 196 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 197 { 198 Mat B = NULL; 199 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 200 PetscInt i, j; 201 PetscInt *rows, *nnz; 202 MatScalar *aa = a->v, *vals; 203 204 PetscFunctionBegin; 205 PetscCall(PetscCalloc3(A->rmap->n, &rows, A->rmap->n, &nnz, A->rmap->n, &vals)); 206 if (reuse != MAT_REUSE_MATRIX) { 207 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 208 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 209 PetscCall(MatSetType(B, MATSEQAIJ)); 210 for (j = 0; j < A->cmap->n; j++) { 211 for (i = 0; i < A->rmap->n; i++) 212 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) ++nnz[i]; 213 aa += a->lda; 214 } 215 PetscCall(MatSeqAIJSetPreallocation(B, PETSC_DETERMINE, nnz)); 216 } else B = *newmat; 217 aa = a->v; 218 for (j = 0; j < A->cmap->n; j++) { 219 PetscInt numRows = 0; 220 for (i = 0; i < A->rmap->n; i++) 221 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) { 222 rows[numRows] = i; 223 vals[numRows++] = aa[i]; 224 } 225 PetscCall(MatSetValues(B, numRows, rows, 1, &j, vals, INSERT_VALUES)); 226 aa += a->lda; 227 } 228 PetscCall(PetscFree3(rows, nnz, vals)); 229 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 230 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 231 232 if (reuse == MAT_INPLACE_MATRIX) { 233 PetscCall(MatHeaderReplace(A, &B)); 234 } else if (reuse != MAT_REUSE_MATRIX) *newmat = B; 235 PetscFunctionReturn(PETSC_SUCCESS); 236 } 237 238 PetscErrorCode MatAXPY_SeqDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 239 { 240 Mat_SeqDense *x = (Mat_SeqDense *)X->data, *y = (Mat_SeqDense *)Y->data; 241 const PetscScalar *xv; 242 PetscScalar *yv; 243 PetscBLASInt N, m, ldax = 0, lday = 0, one = 1; 244 245 PetscFunctionBegin; 246 PetscCall(MatDenseGetArrayRead(X, &xv)); 247 PetscCall(MatDenseGetArray(Y, &yv)); 248 PetscCall(PetscBLASIntCast(X->rmap->n * X->cmap->n, &N)); 249 PetscCall(PetscBLASIntCast(X->rmap->n, &m)); 250 PetscCall(PetscBLASIntCast(x->lda, &ldax)); 251 PetscCall(PetscBLASIntCast(y->lda, &lday)); 252 if (ldax > m || lday > m) { 253 for (PetscInt j = 0; j < X->cmap->n; j++) PetscCallBLAS("BLASaxpy", BLASaxpy_(&m, &alpha, PetscSafePointerPlusOffset(xv, j * ldax), &one, PetscSafePointerPlusOffset(yv, j * lday), &one)); 254 } else { 255 PetscCallBLAS("BLASaxpy", BLASaxpy_(&N, &alpha, xv, &one, yv, &one)); 256 } 257 PetscCall(MatDenseRestoreArrayRead(X, &xv)); 258 PetscCall(MatDenseRestoreArray(Y, &yv)); 259 PetscCall(PetscLogFlops(PetscMax(2.0 * N - 1, 0))); 260 PetscFunctionReturn(PETSC_SUCCESS); 261 } 262 263 static PetscErrorCode MatGetInfo_SeqDense(Mat A, MatInfoType flag, MatInfo *info) 264 { 265 PetscLogDouble N = A->rmap->n * A->cmap->n; 266 267 PetscFunctionBegin; 268 info->block_size = 1.0; 269 info->nz_allocated = N; 270 info->nz_used = N; 271 info->nz_unneeded = 0; 272 info->assemblies = A->num_ass; 273 info->mallocs = 0; 274 info->memory = 0; /* REVIEW ME */ 275 info->fill_ratio_given = 0; 276 info->fill_ratio_needed = 0; 277 info->factor_mallocs = 0; 278 PetscFunctionReturn(PETSC_SUCCESS); 279 } 280 281 PetscErrorCode MatScale_SeqDense(Mat A, PetscScalar alpha) 282 { 283 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 284 PetscScalar *v; 285 PetscBLASInt one = 1, j, nz, lda = 0; 286 287 PetscFunctionBegin; 288 PetscCall(MatDenseGetArray(A, &v)); 289 PetscCall(PetscBLASIntCast(a->lda, &lda)); 290 if (lda > A->rmap->n) { 291 PetscCall(PetscBLASIntCast(A->rmap->n, &nz)); 292 for (j = 0; j < A->cmap->n; j++) PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v + j * lda, &one)); 293 } else { 294 PetscCall(PetscBLASIntCast(A->rmap->n * A->cmap->n, &nz)); 295 PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v, &one)); 296 } 297 PetscCall(PetscLogFlops(A->rmap->n * A->cmap->n)); 298 PetscCall(MatDenseRestoreArray(A, &v)); 299 PetscFunctionReturn(PETSC_SUCCESS); 300 } 301 302 PetscErrorCode MatShift_SeqDense(Mat A, PetscScalar alpha) 303 { 304 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 305 PetscScalar *v; 306 PetscInt j, k; 307 308 PetscFunctionBegin; 309 PetscCall(MatDenseGetArray(A, &v)); 310 k = PetscMin(A->rmap->n, A->cmap->n); 311 for (j = 0; j < k; j++) v[j + j * a->lda] += alpha; 312 PetscCall(PetscLogFlops(k)); 313 PetscCall(MatDenseRestoreArray(A, &v)); 314 PetscFunctionReturn(PETSC_SUCCESS); 315 } 316 317 static PetscErrorCode MatIsHermitian_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) 318 { 319 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 320 PetscInt i, j, m = A->rmap->n, N = a->lda; 321 const PetscScalar *v; 322 323 PetscFunctionBegin; 324 *fl = PETSC_FALSE; 325 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 326 PetscCall(MatDenseGetArrayRead(A, &v)); 327 for (i = 0; i < m; i++) { 328 for (j = i; j < m; j++) { 329 if (PetscAbsScalar(v[i + j * N] - PetscConj(v[j + i * N])) > rtol) goto restore; 330 } 331 } 332 *fl = PETSC_TRUE; 333 restore: 334 PetscCall(MatDenseRestoreArrayRead(A, &v)); 335 PetscFunctionReturn(PETSC_SUCCESS); 336 } 337 338 static PetscErrorCode MatIsSymmetric_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) 339 { 340 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 341 PetscInt i, j, m = A->rmap->n, N = a->lda; 342 const PetscScalar *v; 343 344 PetscFunctionBegin; 345 *fl = PETSC_FALSE; 346 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 347 PetscCall(MatDenseGetArrayRead(A, &v)); 348 for (i = 0; i < m; i++) { 349 for (j = i; j < m; j++) { 350 if (PetscAbsScalar(v[i + j * N] - v[j + i * N]) > rtol) goto restore; 351 } 352 } 353 *fl = PETSC_TRUE; 354 restore: 355 PetscCall(MatDenseRestoreArrayRead(A, &v)); 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 PetscErrorCode MatDuplicateNoCreate_SeqDense(Mat newi, Mat A, MatDuplicateOption cpvalues) 360 { 361 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 362 PetscInt lda = mat->lda, j, m, nlda = lda; 363 PetscBool isdensecpu; 364 365 PetscFunctionBegin; 366 PetscCall(PetscLayoutReference(A->rmap, &newi->rmap)); 367 PetscCall(PetscLayoutReference(A->cmap, &newi->cmap)); 368 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { /* propagate LDA */ 369 PetscCall(MatDenseSetLDA(newi, lda)); 370 } 371 PetscCall(PetscObjectTypeCompare((PetscObject)newi, MATSEQDENSE, &isdensecpu)); 372 if (isdensecpu) PetscCall(MatSeqDenseSetPreallocation(newi, NULL)); 373 if (cpvalues == MAT_COPY_VALUES) { 374 const PetscScalar *av; 375 PetscScalar *v; 376 377 PetscCall(MatDenseGetArrayRead(A, &av)); 378 PetscCall(MatDenseGetArrayWrite(newi, &v)); 379 PetscCall(MatDenseGetLDA(newi, &nlda)); 380 m = A->rmap->n; 381 if (lda > m || nlda > m) { 382 for (j = 0; j < A->cmap->n; j++) PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(v, j * nlda), PetscSafePointerPlusOffset(av, j * lda), m)); 383 } else { 384 PetscCall(PetscArraycpy(v, av, A->rmap->n * A->cmap->n)); 385 } 386 PetscCall(MatDenseRestoreArrayWrite(newi, &v)); 387 PetscCall(MatDenseRestoreArrayRead(A, &av)); 388 PetscCall(MatPropagateSymmetryOptions(A, newi)); 389 } 390 PetscFunctionReturn(PETSC_SUCCESS); 391 } 392 393 PetscErrorCode MatDuplicate_SeqDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 394 { 395 PetscFunctionBegin; 396 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), newmat)); 397 PetscCall(MatSetSizes(*newmat, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 398 PetscCall(MatSetType(*newmat, ((PetscObject)A)->type_name)); 399 PetscCall(MatDuplicateNoCreate_SeqDense(*newmat, A, cpvalues)); 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 static PetscErrorCode MatSolve_SeqDense_Internal_LU(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) 404 { 405 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 406 PetscBLASInt info; 407 408 PetscFunctionBegin; 409 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 410 PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_(T ? "T" : "N", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 411 PetscCall(PetscFPTrapPop()); 412 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve %" PetscBLASInt_FMT, info); 413 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 414 PetscFunctionReturn(PETSC_SUCCESS); 415 } 416 417 static PetscErrorCode MatSolve_SeqDense_Internal_Cholesky(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) 418 { 419 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 420 PetscBLASInt info; 421 422 PetscFunctionBegin; 423 if (A->spd == PETSC_BOOL3_TRUE) { 424 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 425 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 426 PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &m, &nrhs, mat->v, &mat->lda, x, &m, &info)); 427 PetscCall(PetscFPTrapPop()); 428 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "POTRS Bad solve %" PetscBLASInt_FMT, info); 429 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 430 #if defined(PETSC_USE_COMPLEX) 431 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 432 if (T) PetscCall(MatConjugate_SeqDense(A)); 433 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 434 PetscCallBLAS("LAPACKhetrs", LAPACKhetrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 435 PetscCall(PetscFPTrapPop()); 436 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "HETRS Bad solve %" PetscBLASInt_FMT, info); 437 if (T) PetscCall(MatConjugate_SeqDense(A)); 438 #endif 439 } else { /* symmetric case */ 440 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 441 PetscCallBLAS("LAPACKsytrs", LAPACKsytrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 442 PetscCall(PetscFPTrapPop()); 443 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "SYTRS Bad solve %" PetscBLASInt_FMT, info); 444 } 445 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 446 PetscFunctionReturn(PETSC_SUCCESS); 447 } 448 449 static PetscErrorCode MatSolve_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) 450 { 451 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 452 PetscBLASInt info; 453 char trans; 454 455 PetscFunctionBegin; 456 if (PetscDefined(USE_COMPLEX)) { 457 trans = 'C'; 458 } else { 459 trans = 'T'; 460 } 461 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 462 { /* lwork depends on the number of right-hand sides */ 463 PetscBLASInt nlfwork, lfwork = -1; 464 PetscScalar fwork; 465 466 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 467 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 468 if (nlfwork > mat->lfwork) { 469 mat->lfwork = nlfwork; 470 PetscCall(PetscFree(mat->fwork)); 471 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 472 } 473 } 474 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 475 PetscCall(PetscFPTrapPop()); 476 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %" PetscBLASInt_FMT, info); 477 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 478 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &mat->rank, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 479 PetscCall(PetscFPTrapPop()); 480 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %" PetscBLASInt_FMT, info); 481 for (PetscInt j = 0; j < nrhs; j++) { 482 for (PetscInt i = mat->rank; i < k; i++) x[j * ldx + i] = 0.; 483 } 484 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 static PetscErrorCode MatSolveTranspose_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) 489 { 490 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 491 PetscBLASInt info; 492 493 PetscFunctionBegin; 494 if (A->rmap->n == A->cmap->n && mat->rank == A->rmap->n) { 495 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 496 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &m, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 497 PetscCall(PetscFPTrapPop()); 498 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %" PetscBLASInt_FMT, info); 499 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 500 { /* lwork depends on the number of right-hand sides */ 501 PetscBLASInt nlfwork, lfwork = -1; 502 PetscScalar fwork; 503 504 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 505 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 506 if (nlfwork > mat->lfwork) { 507 mat->lfwork = nlfwork; 508 PetscCall(PetscFree(mat->fwork)); 509 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 510 } 511 } 512 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 513 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 514 PetscCall(PetscFPTrapPop()); 515 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %" PetscBLASInt_FMT, info); 516 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 517 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "QR factored matrix cannot be used for transpose solve"); 518 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 519 PetscFunctionReturn(PETSC_SUCCESS); 520 } 521 522 static PetscErrorCode MatSolve_SeqDense_SetUp(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) 523 { 524 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 525 PetscScalar *y; 526 PetscBLASInt m = 0, k = 0; 527 528 PetscFunctionBegin; 529 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 530 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 531 if (k < m) { 532 PetscCall(VecCopy(xx, mat->qrrhs)); 533 PetscCall(VecGetArray(mat->qrrhs, &y)); 534 } else { 535 PetscCall(VecCopy(xx, yy)); 536 PetscCall(VecGetArray(yy, &y)); 537 } 538 *_y = y; 539 *_k = k; 540 *_m = m; 541 PetscFunctionReturn(PETSC_SUCCESS); 542 } 543 544 static PetscErrorCode MatSolve_SeqDense_TearDown(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) 545 { 546 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 547 PetscScalar *y = NULL; 548 PetscBLASInt m, k; 549 550 PetscFunctionBegin; 551 y = *_y; 552 *_y = NULL; 553 k = *_k; 554 m = *_m; 555 if (k < m) { 556 PetscScalar *yv; 557 PetscCall(VecGetArray(yy, &yv)); 558 PetscCall(PetscArraycpy(yv, y, k)); 559 PetscCall(VecRestoreArray(yy, &yv)); 560 PetscCall(VecRestoreArray(mat->qrrhs, &y)); 561 } else { 562 PetscCall(VecRestoreArray(yy, &y)); 563 } 564 PetscFunctionReturn(PETSC_SUCCESS); 565 } 566 567 static PetscErrorCode MatSolve_SeqDense_LU(Mat A, Vec xx, Vec yy) 568 { 569 PetscScalar *y = NULL; 570 PetscBLASInt m = 0, k = 0; 571 572 PetscFunctionBegin; 573 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 574 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_FALSE)); 575 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 576 PetscFunctionReturn(PETSC_SUCCESS); 577 } 578 579 static PetscErrorCode MatSolveTranspose_SeqDense_LU(Mat A, Vec xx, Vec yy) 580 { 581 PetscScalar *y = NULL; 582 PetscBLASInt m = 0, k = 0; 583 584 PetscFunctionBegin; 585 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 586 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_TRUE)); 587 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 588 PetscFunctionReturn(PETSC_SUCCESS); 589 } 590 591 static PetscErrorCode MatSolve_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) 592 { 593 PetscScalar *y = NULL; 594 PetscBLASInt m = 0, k = 0; 595 596 PetscFunctionBegin; 597 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 598 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_FALSE)); 599 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 600 PetscFunctionReturn(PETSC_SUCCESS); 601 } 602 603 static PetscErrorCode MatSolveTranspose_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) 604 { 605 PetscScalar *y = NULL; 606 PetscBLASInt m = 0, k = 0; 607 608 PetscFunctionBegin; 609 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 610 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_TRUE)); 611 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 612 PetscFunctionReturn(PETSC_SUCCESS); 613 } 614 615 static PetscErrorCode MatSolve_SeqDense_QR(Mat A, Vec xx, Vec yy) 616 { 617 PetscScalar *y = NULL; 618 PetscBLASInt m = 0, k = 0; 619 620 PetscFunctionBegin; 621 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 622 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 623 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 624 PetscFunctionReturn(PETSC_SUCCESS); 625 } 626 627 static PetscErrorCode MatSolveTranspose_SeqDense_QR(Mat A, Vec xx, Vec yy) 628 { 629 PetscScalar *y = NULL; 630 PetscBLASInt m = 0, k = 0; 631 632 PetscFunctionBegin; 633 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 634 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 635 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 636 PetscFunctionReturn(PETSC_SUCCESS); 637 } 638 639 static PetscErrorCode MatMatSolve_SeqDense_SetUp(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) 640 { 641 const PetscScalar *b; 642 PetscScalar *y; 643 PetscInt n, _ldb, _ldx; 644 PetscBLASInt nrhs = 0, m = 0, k = 0, ldb = 0, ldx = 0, ldy = 0; 645 646 PetscFunctionBegin; 647 *_ldy = 0; 648 *_m = 0; 649 *_nrhs = 0; 650 *_k = 0; 651 *_y = NULL; 652 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 653 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 654 PetscCall(MatGetSize(B, NULL, &n)); 655 PetscCall(PetscBLASIntCast(n, &nrhs)); 656 PetscCall(MatDenseGetLDA(B, &_ldb)); 657 PetscCall(PetscBLASIntCast(_ldb, &ldb)); 658 PetscCall(MatDenseGetLDA(X, &_ldx)); 659 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 660 if (ldx < m) { 661 PetscCall(MatDenseGetArrayRead(B, &b)); 662 PetscCall(PetscMalloc1(nrhs * m, &y)); 663 if (ldb == m) { 664 PetscCall(PetscArraycpy(y, b, ldb * nrhs)); 665 } else { 666 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * m], &b[j * ldb], m)); 667 } 668 ldy = m; 669 PetscCall(MatDenseRestoreArrayRead(B, &b)); 670 } else { 671 if (ldb == ldx) { 672 PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN)); 673 PetscCall(MatDenseGetArray(X, &y)); 674 } else { 675 PetscCall(MatDenseGetArray(X, &y)); 676 PetscCall(MatDenseGetArrayRead(B, &b)); 677 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * ldx], &b[j * ldb], m)); 678 PetscCall(MatDenseRestoreArrayRead(B, &b)); 679 } 680 ldy = ldx; 681 } 682 *_y = y; 683 *_ldy = ldy; 684 *_k = k; 685 *_m = m; 686 *_nrhs = nrhs; 687 PetscFunctionReturn(PETSC_SUCCESS); 688 } 689 690 static PetscErrorCode MatMatSolve_SeqDense_TearDown(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) 691 { 692 PetscScalar *y; 693 PetscInt _ldx; 694 PetscBLASInt k, ldy, nrhs, ldx = 0; 695 696 PetscFunctionBegin; 697 y = *_y; 698 *_y = NULL; 699 k = *_k; 700 ldy = *_ldy; 701 nrhs = *_nrhs; 702 PetscCall(MatDenseGetLDA(X, &_ldx)); 703 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 704 if (ldx != ldy) { 705 PetscScalar *xv; 706 PetscCall(MatDenseGetArray(X, &xv)); 707 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&xv[j * ldx], &y[j * ldy], k)); 708 PetscCall(MatDenseRestoreArray(X, &xv)); 709 PetscCall(PetscFree(y)); 710 } else { 711 PetscCall(MatDenseRestoreArray(X, &y)); 712 } 713 PetscFunctionReturn(PETSC_SUCCESS); 714 } 715 716 static PetscErrorCode MatMatSolve_SeqDense_LU(Mat A, Mat B, Mat X) 717 { 718 PetscScalar *y; 719 PetscBLASInt m, k, ldy, nrhs; 720 721 PetscFunctionBegin; 722 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 723 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 724 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 725 PetscFunctionReturn(PETSC_SUCCESS); 726 } 727 728 static PetscErrorCode MatMatSolveTranspose_SeqDense_LU(Mat A, Mat B, Mat X) 729 { 730 PetscScalar *y; 731 PetscBLASInt m, k, ldy, nrhs; 732 733 PetscFunctionBegin; 734 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 735 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 736 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 740 static PetscErrorCode MatMatSolve_SeqDense_Cholesky(Mat A, Mat B, Mat X) 741 { 742 PetscScalar *y; 743 PetscBLASInt m, k, ldy, nrhs; 744 745 PetscFunctionBegin; 746 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 747 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 748 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 749 PetscFunctionReturn(PETSC_SUCCESS); 750 } 751 752 static PetscErrorCode MatMatSolveTranspose_SeqDense_Cholesky(Mat A, Mat B, Mat X) 753 { 754 PetscScalar *y; 755 PetscBLASInt m, k, ldy, nrhs; 756 757 PetscFunctionBegin; 758 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 759 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 760 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 761 PetscFunctionReturn(PETSC_SUCCESS); 762 } 763 764 static PetscErrorCode MatMatSolve_SeqDense_QR(Mat A, Mat B, Mat X) 765 { 766 PetscScalar *y; 767 PetscBLASInt m, k, ldy, nrhs; 768 769 PetscFunctionBegin; 770 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 771 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 772 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 773 PetscFunctionReturn(PETSC_SUCCESS); 774 } 775 776 static PetscErrorCode MatMatSolveTranspose_SeqDense_QR(Mat A, Mat B, Mat X) 777 { 778 PetscScalar *y; 779 PetscBLASInt m, k, ldy, nrhs; 780 781 PetscFunctionBegin; 782 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 783 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 784 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 785 PetscFunctionReturn(PETSC_SUCCESS); 786 } 787 788 /* COMMENT: I have chosen to hide row permutation in the pivots, 789 rather than put it in the Mat->row slot.*/ 790 PetscErrorCode MatLUFactor_SeqDense(Mat A, IS row, IS col, const MatFactorInfo *minfo) 791 { 792 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 793 PetscBLASInt n, m, info; 794 795 PetscFunctionBegin; 796 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 797 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 798 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 799 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 800 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 801 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&m, &n, mat->v, &mat->lda, mat->pivots, &info)); 802 PetscCall(PetscFPTrapPop()); 803 804 PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to LU factorization %" PetscBLASInt_FMT, info); 805 PetscCheck(info <= 0, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Bad LU factorization %" PetscBLASInt_FMT, info); 806 807 A->ops->solve = MatSolve_SeqDense_LU; 808 A->ops->matsolve = MatMatSolve_SeqDense_LU; 809 A->ops->solvetranspose = MatSolveTranspose_SeqDense_LU; 810 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_LU; 811 A->factortype = MAT_FACTOR_LU; 812 813 PetscCall(PetscFree(A->solvertype)); 814 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 815 816 PetscCall(PetscLogFlops((2.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3)); 817 PetscFunctionReturn(PETSC_SUCCESS); 818 } 819 820 static PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 821 { 822 MatFactorInfo info; 823 824 PetscFunctionBegin; 825 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 826 PetscUseTypeMethod(fact, lufactor, NULL, NULL, &info); 827 PetscFunctionReturn(PETSC_SUCCESS); 828 } 829 830 PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) 831 { 832 PetscFunctionBegin; 833 fact->preallocated = PETSC_TRUE; 834 fact->assembled = PETSC_TRUE; 835 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense; 836 PetscFunctionReturn(PETSC_SUCCESS); 837 } 838 839 /* Cholesky as L*L^T or L*D*L^T and the symmetric/hermitian complex variants */ 840 PetscErrorCode MatCholeskyFactor_SeqDense(Mat A, IS perm, const MatFactorInfo *factinfo) 841 { 842 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 843 PetscBLASInt info, n; 844 845 PetscFunctionBegin; 846 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 847 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 848 if (A->spd == PETSC_BOOL3_TRUE) { 849 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 850 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &n, mat->v, &mat->lda, &info)); 851 PetscCall(PetscFPTrapPop()); 852 #if defined(PETSC_USE_COMPLEX) 853 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 854 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 855 if (!mat->fwork) { 856 PetscScalar dummy; 857 858 mat->lfwork = -1; 859 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 860 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 861 PetscCall(PetscFPTrapPop()); 862 PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork)); 863 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 864 } 865 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 866 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 867 PetscCall(PetscFPTrapPop()); 868 #endif 869 } else { /* symmetric case */ 870 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 871 if (!mat->fwork) { 872 PetscScalar dummy; 873 874 mat->lfwork = -1; 875 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 876 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 877 PetscCall(PetscFPTrapPop()); 878 PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork)); 879 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 880 } 881 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 882 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 883 PetscCall(PetscFPTrapPop()); 884 } 885 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad factorization: zero pivot in row %" PetscBLASInt_FMT, info - 1); 886 887 A->ops->solve = MatSolve_SeqDense_Cholesky; 888 A->ops->matsolve = MatMatSolve_SeqDense_Cholesky; 889 A->ops->solvetranspose = MatSolveTranspose_SeqDense_Cholesky; 890 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_Cholesky; 891 A->factortype = MAT_FACTOR_CHOLESKY; 892 893 PetscCall(PetscFree(A->solvertype)); 894 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 895 896 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 897 PetscFunctionReturn(PETSC_SUCCESS); 898 } 899 900 static PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 901 { 902 MatFactorInfo info; 903 904 PetscFunctionBegin; 905 info.fill = 1.0; 906 907 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 908 PetscUseTypeMethod(fact, choleskyfactor, NULL, &info); 909 PetscFunctionReturn(PETSC_SUCCESS); 910 } 911 912 PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) 913 { 914 PetscFunctionBegin; 915 fact->assembled = PETSC_TRUE; 916 fact->preallocated = PETSC_TRUE; 917 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense; 918 PetscFunctionReturn(PETSC_SUCCESS); 919 } 920 921 PetscErrorCode MatQRFactor_SeqDense(Mat A, IS col, const MatFactorInfo *minfo) 922 { 923 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 924 PetscBLASInt n, m, info, min, max; 925 926 PetscFunctionBegin; 927 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 928 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 929 max = PetscMax(m, n); 930 min = PetscMin(m, n); 931 if (!mat->tau) { PetscCall(PetscMalloc1(min, &mat->tau)); } 932 if (!mat->pivots) { PetscCall(PetscMalloc1(n, &mat->pivots)); } 933 if (!mat->qrrhs) PetscCall(MatCreateVecs(A, NULL, &mat->qrrhs)); 934 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 935 if (!mat->fwork) { 936 PetscScalar dummy; 937 938 mat->lfwork = -1; 939 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 940 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, &dummy, &mat->lfwork, &info)); 941 PetscCall(PetscFPTrapPop()); 942 PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork)); 943 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 944 } 945 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 946 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, mat->fwork, &mat->lfwork, &info)); 947 PetscCall(PetscFPTrapPop()); 948 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to QR factorization %" PetscBLASInt_FMT, info); 949 // TODO: try to estimate rank or test for and use geqp3 for rank revealing QR. For now just say rank is min of m and n 950 mat->rank = min; 951 952 A->ops->solve = MatSolve_SeqDense_QR; 953 A->ops->matsolve = MatMatSolve_SeqDense_QR; 954 A->factortype = MAT_FACTOR_QR; 955 if (m == n) { 956 A->ops->solvetranspose = MatSolveTranspose_SeqDense_QR; 957 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_QR; 958 } 959 960 PetscCall(PetscFree(A->solvertype)); 961 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 962 963 PetscCall(PetscLogFlops(2.0 * min * min * (max - min / 3.0))); 964 PetscFunctionReturn(PETSC_SUCCESS); 965 } 966 967 static PetscErrorCode MatQRFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 968 { 969 MatFactorInfo info; 970 971 PetscFunctionBegin; 972 info.fill = 1.0; 973 974 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 975 PetscUseMethod(fact, "MatQRFactor_C", (Mat, IS, const MatFactorInfo *), (fact, NULL, &info)); 976 PetscFunctionReturn(PETSC_SUCCESS); 977 } 978 979 PetscErrorCode MatQRFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) 980 { 981 PetscFunctionBegin; 982 fact->assembled = PETSC_TRUE; 983 fact->preallocated = PETSC_TRUE; 984 PetscCall(PetscObjectComposeFunction((PetscObject)fact, "MatQRFactorNumeric_C", MatQRFactorNumeric_SeqDense)); 985 PetscFunctionReturn(PETSC_SUCCESS); 986 } 987 988 /* uses LAPACK */ 989 PETSC_INTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A, MatFactorType ftype, Mat *fact) 990 { 991 PetscFunctionBegin; 992 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), fact)); 993 PetscCall(MatSetSizes(*fact, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 994 PetscCall(MatSetType(*fact, MATDENSE)); 995 (*fact)->trivialsymbolic = PETSC_TRUE; 996 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 997 (*fact)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqDense; 998 (*fact)->ops->ilufactorsymbolic = MatLUFactorSymbolic_SeqDense; 999 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 1000 (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense; 1001 } else if (ftype == MAT_FACTOR_QR) { 1002 PetscCall(PetscObjectComposeFunction((PetscObject)*fact, "MatQRFactorSymbolic_C", MatQRFactorSymbolic_SeqDense)); 1003 } 1004 (*fact)->factortype = ftype; 1005 1006 PetscCall(PetscFree((*fact)->solvertype)); 1007 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*fact)->solvertype)); 1008 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_LU])); 1009 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ILU])); 1010 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_CHOLESKY])); 1011 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ICC])); 1012 PetscFunctionReturn(PETSC_SUCCESS); 1013 } 1014 1015 static PetscErrorCode MatSOR_SeqDense(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal shift, PetscInt its, PetscInt lits, Vec xx) 1016 { 1017 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1018 PetscScalar *x, *v = mat->v, zero = 0.0, xt; 1019 const PetscScalar *b; 1020 PetscInt m = A->rmap->n, i; 1021 PetscBLASInt o = 1, bm = 0; 1022 1023 PetscFunctionBegin; 1024 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1025 PetscCheck(A->offloadmask != PETSC_OFFLOAD_GPU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1026 #endif 1027 if (shift == -1) shift = 0.0; /* negative shift indicates do not error on zero diagonal; this code never zeros on zero diagonal */ 1028 PetscCall(PetscBLASIntCast(m, &bm)); 1029 if (flag & SOR_ZERO_INITIAL_GUESS) { 1030 /* this is a hack fix, should have another version without the second BLASdotu */ 1031 PetscCall(VecSet(xx, zero)); 1032 } 1033 PetscCall(VecGetArray(xx, &x)); 1034 PetscCall(VecGetArrayRead(bb, &b)); 1035 its = its * lits; 1036 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 1037 while (its--) { 1038 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 1039 for (i = 0; i < m; i++) { 1040 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1041 x[i] = (1. - omega) * x[i] + (xt + v[i + i * m] * x[i]) * omega / (v[i + i * m] + shift); 1042 } 1043 } 1044 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1045 for (i = m - 1; i >= 0; i--) { 1046 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1047 x[i] = (1. - omega) * x[i] + (xt + v[i + i * m] * x[i]) * omega / (v[i + i * m] + shift); 1048 } 1049 } 1050 } 1051 PetscCall(VecRestoreArrayRead(bb, &b)); 1052 PetscCall(VecRestoreArray(xx, &x)); 1053 PetscFunctionReturn(PETSC_SUCCESS); 1054 } 1055 1056 static PetscErrorCode MatMultColumnRangeKernel_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end, PetscBool trans, PetscBool herm) 1057 { 1058 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1059 PetscScalar *y, _DOne = 1.0, _DZero = 0.0; 1060 PetscBLASInt m, n, _One = 1; 1061 const PetscScalar *v = mat->v, *x; 1062 1063 PetscFunctionBegin; 1064 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1065 PetscCall(PetscBLASIntCast(c_end - c_start, &n)); 1066 PetscCall(VecGetArrayRead(xx, &x)); 1067 PetscCall(VecGetArrayWrite(yy, &y)); 1068 if (!m || !n) { 1069 PetscBLASInt i; 1070 if (trans) 1071 for (i = 0; i < n; i++) y[i] = 0.0; 1072 else 1073 for (i = 0; i < m; i++) y[i] = 0.0; 1074 } else { 1075 if (trans) { 1076 if (herm) PetscCallBLAS("BLASgemv", BLASgemv_("C", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DZero, y + c_start, &_One)); 1077 else PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DZero, y + c_start, &_One)); 1078 } else { 1079 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x + c_start, &_One, &_DZero, y, &_One)); 1080 } 1081 PetscCall(PetscLogFlops(2.0 * m * n - n)); 1082 } 1083 PetscCall(VecRestoreArrayRead(xx, &x)); 1084 PetscCall(VecRestoreArrayWrite(yy, &y)); 1085 PetscFunctionReturn(PETSC_SUCCESS); 1086 } 1087 1088 PetscErrorCode MatMultHermitianTransposeColumnRange_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 1089 { 1090 PetscFunctionBegin; 1091 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, c_start, c_end, PETSC_TRUE, PETSC_TRUE)); 1092 PetscFunctionReturn(PETSC_SUCCESS); 1093 } 1094 1095 PetscErrorCode MatMult_SeqDense(Mat A, Vec xx, Vec yy) 1096 { 1097 PetscFunctionBegin; 1098 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_FALSE, PETSC_FALSE)); 1099 PetscFunctionReturn(PETSC_SUCCESS); 1100 } 1101 1102 PetscErrorCode MatMultTranspose_SeqDense(Mat A, Vec xx, Vec yy) 1103 { 1104 PetscFunctionBegin; 1105 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_FALSE)); 1106 PetscFunctionReturn(PETSC_SUCCESS); 1107 } 1108 1109 PetscErrorCode MatMultHermitianTranspose_SeqDense(Mat A, Vec xx, Vec yy) 1110 { 1111 PetscFunctionBegin; 1112 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_TRUE)); 1113 PetscFunctionReturn(PETSC_SUCCESS); 1114 } 1115 1116 static PetscErrorCode MatMultAddColumnRangeKernel_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end, PetscBool trans, PetscBool herm) 1117 { 1118 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1119 const PetscScalar *v = mat->v, *x; 1120 PetscScalar *y, _DOne = 1.0; 1121 PetscBLASInt m, n, _One = 1; 1122 1123 PetscFunctionBegin; 1124 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1125 PetscCall(PetscBLASIntCast(c_end - c_start, &n)); 1126 PetscCall(VecCopy(zz, yy)); 1127 if (!m || !n) PetscFunctionReturn(PETSC_SUCCESS); 1128 PetscCall(VecGetArray(yy, &y)); 1129 PetscCall(VecGetArrayRead(xx, &x)); 1130 if (trans) { 1131 if (herm) PetscCallBLAS("BLASgemv", BLASgemv_("C", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DOne, y + c_start, &_One)); 1132 else PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DOne, y + c_start, &_One)); 1133 } else { 1134 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x + c_start, &_One, &_DOne, y, &_One)); 1135 } 1136 PetscCall(VecRestoreArrayRead(xx, &x)); 1137 PetscCall(VecRestoreArray(yy, &y)); 1138 PetscCall(PetscLogFlops(2.0 * m * n)); 1139 PetscFunctionReturn(PETSC_SUCCESS); 1140 } 1141 1142 PetscErrorCode MatMultAddColumnRange_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end) 1143 { 1144 PetscFunctionBegin; 1145 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, c_start, c_end, PETSC_FALSE, PETSC_FALSE)); 1146 PetscFunctionReturn(PETSC_SUCCESS); 1147 } 1148 1149 PetscErrorCode MatMultHermitianTransposeAddColumnRange_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end) 1150 { 1151 PetscFunctionBegin; 1152 PetscMPIInt rank; 1153 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 1154 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, c_start, c_end, PETSC_TRUE, PETSC_TRUE)); 1155 PetscFunctionReturn(PETSC_SUCCESS); 1156 } 1157 1158 PetscErrorCode MatMultAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1159 { 1160 PetscFunctionBegin; 1161 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_FALSE, PETSC_FALSE)); 1162 PetscFunctionReturn(PETSC_SUCCESS); 1163 } 1164 1165 PetscErrorCode MatMultTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1166 { 1167 PetscFunctionBegin; 1168 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_FALSE)); 1169 PetscFunctionReturn(PETSC_SUCCESS); 1170 } 1171 1172 PetscErrorCode MatMultHermitianTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1173 { 1174 PetscFunctionBegin; 1175 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_TRUE)); 1176 PetscFunctionReturn(PETSC_SUCCESS); 1177 } 1178 1179 static PetscErrorCode MatGetRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) 1180 { 1181 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1182 PetscInt i; 1183 1184 PetscFunctionBegin; 1185 if (ncols) *ncols = A->cmap->n; 1186 if (cols) { 1187 PetscCall(PetscMalloc1(A->cmap->n, cols)); 1188 for (i = 0; i < A->cmap->n; i++) (*cols)[i] = i; 1189 } 1190 if (vals) { 1191 const PetscScalar *v; 1192 1193 PetscCall(MatDenseGetArrayRead(A, &v)); 1194 PetscCall(PetscMalloc1(A->cmap->n, vals)); 1195 v += row; 1196 for (i = 0; i < A->cmap->n; i++) { 1197 (*vals)[i] = *v; 1198 v += mat->lda; 1199 } 1200 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1201 } 1202 PetscFunctionReturn(PETSC_SUCCESS); 1203 } 1204 1205 static PetscErrorCode MatRestoreRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) 1206 { 1207 PetscFunctionBegin; 1208 if (cols) PetscCall(PetscFree(*cols)); 1209 if (vals) PetscCall(PetscFree(*vals)); 1210 PetscFunctionReturn(PETSC_SUCCESS); 1211 } 1212 1213 static PetscErrorCode MatSetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], const PetscScalar v[], InsertMode addv) 1214 { 1215 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1216 PetscScalar *av; 1217 PetscInt i, j, idx = 0; 1218 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1219 PetscOffloadMask oldf; 1220 #endif 1221 1222 PetscFunctionBegin; 1223 PetscCall(MatDenseGetArray(A, &av)); 1224 if (!mat->roworiented) { 1225 if (addv == INSERT_VALUES) { 1226 for (j = 0; j < n; j++) { 1227 if (indexn[j] < 0) { 1228 idx += m; 1229 continue; 1230 } 1231 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1232 for (i = 0; i < m; i++) { 1233 if (indexm[i] < 0) { 1234 idx++; 1235 continue; 1236 } 1237 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1238 av[indexn[j] * mat->lda + indexm[i]] = v ? v[idx++] : (idx++, 0.0); 1239 } 1240 } 1241 } else { 1242 for (j = 0; j < n; j++) { 1243 if (indexn[j] < 0) { 1244 idx += m; 1245 continue; 1246 } 1247 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1248 for (i = 0; i < m; i++) { 1249 if (indexm[i] < 0) { 1250 idx++; 1251 continue; 1252 } 1253 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1254 av[indexn[j] * mat->lda + indexm[i]] += v ? v[idx++] : (idx++, 0.0); 1255 } 1256 } 1257 } 1258 } else { 1259 if (addv == INSERT_VALUES) { 1260 for (i = 0; i < m; i++) { 1261 if (indexm[i] < 0) { 1262 idx += n; 1263 continue; 1264 } 1265 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1266 for (j = 0; j < n; j++) { 1267 if (indexn[j] < 0) { 1268 idx++; 1269 continue; 1270 } 1271 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1272 av[indexn[j] * mat->lda + indexm[i]] = v ? v[idx++] : (idx++, 0.0); 1273 } 1274 } 1275 } else { 1276 for (i = 0; i < m; i++) { 1277 if (indexm[i] < 0) { 1278 idx += n; 1279 continue; 1280 } 1281 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1282 for (j = 0; j < n; j++) { 1283 if (indexn[j] < 0) { 1284 idx++; 1285 continue; 1286 } 1287 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1288 av[indexn[j] * mat->lda + indexm[i]] += v ? v[idx++] : (idx++, 0.0); 1289 } 1290 } 1291 } 1292 } 1293 /* hack to prevent unneeded copy to the GPU while returning the array */ 1294 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1295 oldf = A->offloadmask; 1296 A->offloadmask = PETSC_OFFLOAD_GPU; 1297 #endif 1298 PetscCall(MatDenseRestoreArray(A, &av)); 1299 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1300 A->offloadmask = (oldf == PETSC_OFFLOAD_UNALLOCATED ? PETSC_OFFLOAD_UNALLOCATED : PETSC_OFFLOAD_CPU); 1301 #endif 1302 PetscFunctionReturn(PETSC_SUCCESS); 1303 } 1304 1305 static PetscErrorCode MatGetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], PetscScalar v[]) 1306 { 1307 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1308 const PetscScalar *vv; 1309 PetscInt i, j; 1310 1311 PetscFunctionBegin; 1312 PetscCall(MatDenseGetArrayRead(A, &vv)); 1313 /* row-oriented output */ 1314 for (i = 0; i < m; i++) { 1315 if (indexm[i] < 0) { 1316 v += n; 1317 continue; 1318 } 1319 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested larger than number rows %" PetscInt_FMT, indexm[i], A->rmap->n); 1320 for (j = 0; j < n; j++) { 1321 if (indexn[j] < 0) { 1322 v++; 1323 continue; 1324 } 1325 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " requested larger than number columns %" PetscInt_FMT, indexn[j], A->cmap->n); 1326 *v++ = vv[indexn[j] * mat->lda + indexm[i]]; 1327 } 1328 } 1329 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1330 PetscFunctionReturn(PETSC_SUCCESS); 1331 } 1332 1333 PetscErrorCode MatView_Dense_Binary(Mat mat, PetscViewer viewer) 1334 { 1335 PetscBool skipHeader; 1336 PetscViewerFormat format; 1337 PetscInt header[4], M, N, m, lda, i, j; 1338 PetscCount k; 1339 const PetscScalar *v; 1340 PetscScalar *vwork; 1341 1342 PetscFunctionBegin; 1343 PetscCall(PetscViewerSetUp(viewer)); 1344 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1345 PetscCall(PetscViewerGetFormat(viewer, &format)); 1346 if (skipHeader) format = PETSC_VIEWER_NATIVE; 1347 1348 PetscCall(MatGetSize(mat, &M, &N)); 1349 1350 /* write matrix header */ 1351 header[0] = MAT_FILE_CLASSID; 1352 header[1] = M; 1353 header[2] = N; 1354 header[3] = (format == PETSC_VIEWER_NATIVE) ? MATRIX_BINARY_FORMAT_DENSE : M * N; 1355 if (!skipHeader) PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1356 1357 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1358 if (format != PETSC_VIEWER_NATIVE) { 1359 PetscInt nnz = m * N, *iwork; 1360 /* store row lengths for each row */ 1361 PetscCall(PetscMalloc1(nnz, &iwork)); 1362 for (i = 0; i < m; i++) iwork[i] = N; 1363 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1364 /* store column indices (zero start index) */ 1365 for (k = 0, i = 0; i < m; i++) 1366 for (j = 0; j < N; j++, k++) iwork[k] = j; 1367 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1368 PetscCall(PetscFree(iwork)); 1369 } 1370 /* store matrix values as a dense matrix in row major order */ 1371 PetscCall(PetscMalloc1(m * N, &vwork)); 1372 PetscCall(MatDenseGetArrayRead(mat, &v)); 1373 PetscCall(MatDenseGetLDA(mat, &lda)); 1374 for (k = 0, i = 0; i < m; i++) 1375 for (j = 0; j < N; j++, k++) vwork[k] = v[i + (size_t)lda * j]; 1376 PetscCall(MatDenseRestoreArrayRead(mat, &v)); 1377 PetscCall(PetscViewerBinaryWriteAll(viewer, vwork, m * N, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1378 PetscCall(PetscFree(vwork)); 1379 PetscFunctionReturn(PETSC_SUCCESS); 1380 } 1381 1382 PetscErrorCode MatLoad_Dense_Binary(Mat mat, PetscViewer viewer) 1383 { 1384 PetscBool skipHeader; 1385 PetscInt header[4], M, N, m, nz, lda, i, j, k; 1386 PetscInt rows, cols; 1387 PetscScalar *v, *vwork; 1388 1389 PetscFunctionBegin; 1390 PetscCall(PetscViewerSetUp(viewer)); 1391 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1392 1393 if (!skipHeader) { 1394 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 1395 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 1396 M = header[1]; 1397 N = header[2]; 1398 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 1399 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 1400 nz = header[3]; 1401 PetscCheck(nz == MATRIX_BINARY_FORMAT_DENSE || nz >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Unknown matrix format %" PetscInt_FMT " in file", nz); 1402 } else { 1403 PetscCall(MatGetSize(mat, &M, &N)); 1404 PetscCheck(M >= 0 && N >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "Matrix binary file header was skipped, thus the user must specify the global sizes of input matrix"); 1405 nz = MATRIX_BINARY_FORMAT_DENSE; 1406 } 1407 1408 /* setup global sizes if not set */ 1409 if (mat->rmap->N < 0) mat->rmap->N = M; 1410 if (mat->cmap->N < 0) mat->cmap->N = N; 1411 PetscCall(MatSetUp(mat)); 1412 /* check if global sizes are correct */ 1413 PetscCall(MatGetSize(mat, &rows, &cols)); 1414 PetscCheck(M == rows && N == cols, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 1415 1416 PetscCall(MatGetSize(mat, NULL, &N)); 1417 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1418 PetscCall(MatDenseGetArray(mat, &v)); 1419 PetscCall(MatDenseGetLDA(mat, &lda)); 1420 if (nz == MATRIX_BINARY_FORMAT_DENSE) { /* matrix in file is dense format */ 1421 PetscCount nnz = (size_t)m * N; 1422 /* read in matrix values */ 1423 PetscCall(PetscMalloc1(nnz, &vwork)); 1424 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1425 /* store values in column major order */ 1426 for (j = 0; j < N; j++) 1427 for (i = 0; i < m; i++) v[i + (size_t)lda * j] = vwork[(size_t)i * N + j]; 1428 PetscCall(PetscFree(vwork)); 1429 } else { /* matrix in file is sparse format */ 1430 PetscInt nnz = 0, *rlens, *icols; 1431 /* read in row lengths */ 1432 PetscCall(PetscMalloc1(m, &rlens)); 1433 PetscCall(PetscViewerBinaryReadAll(viewer, rlens, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1434 for (i = 0; i < m; i++) nnz += rlens[i]; 1435 /* read in column indices and values */ 1436 PetscCall(PetscMalloc2(nnz, &icols, nnz, &vwork)); 1437 PetscCall(PetscViewerBinaryReadAll(viewer, icols, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1438 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1439 /* store values in column major order */ 1440 for (k = 0, i = 0; i < m; i++) 1441 for (j = 0; j < rlens[i]; j++, k++) v[i + lda * icols[k]] = vwork[k]; 1442 PetscCall(PetscFree(rlens)); 1443 PetscCall(PetscFree2(icols, vwork)); 1444 } 1445 PetscCall(MatDenseRestoreArray(mat, &v)); 1446 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 1447 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 1448 PetscFunctionReturn(PETSC_SUCCESS); 1449 } 1450 1451 static PetscErrorCode MatLoad_SeqDense(Mat newMat, PetscViewer viewer) 1452 { 1453 PetscBool isbinary, ishdf5; 1454 1455 PetscFunctionBegin; 1456 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1457 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1458 /* force binary viewer to load .info file if it has not yet done so */ 1459 PetscCall(PetscViewerSetUp(viewer)); 1460 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1461 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1462 if (isbinary) { 1463 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1464 } else if (ishdf5) { 1465 #if defined(PETSC_HAVE_HDF5) 1466 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1467 #else 1468 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 1469 #endif 1470 } else { 1471 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1472 } 1473 PetscFunctionReturn(PETSC_SUCCESS); 1474 } 1475 1476 static PetscErrorCode MatView_SeqDense_ASCII(Mat A, PetscViewer viewer) 1477 { 1478 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1479 PetscInt i, j; 1480 const char *name; 1481 PetscScalar *v, *av; 1482 PetscViewerFormat format; 1483 #if defined(PETSC_USE_COMPLEX) 1484 PetscBool allreal = PETSC_TRUE; 1485 #endif 1486 1487 PetscFunctionBegin; 1488 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&av)); 1489 PetscCall(PetscViewerGetFormat(viewer, &format)); 1490 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1491 PetscFunctionReturn(PETSC_SUCCESS); /* do nothing for now */ 1492 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1493 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1494 for (i = 0; i < A->rmap->n; i++) { 1495 v = av + i; 1496 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i)); 1497 for (j = 0; j < A->cmap->n; j++) { 1498 #if defined(PETSC_USE_COMPLEX) 1499 if (PetscRealPart(*v) != 0.0 && PetscImaginaryPart(*v) != 0.0) { 1500 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", j, (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1501 } else if (PetscRealPart(*v)) { 1502 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)PetscRealPart(*v))); 1503 } 1504 #else 1505 if (*v) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)*v)); 1506 #endif 1507 v += a->lda; 1508 } 1509 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1510 } 1511 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1512 } else { 1513 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1514 #if defined(PETSC_USE_COMPLEX) 1515 /* determine if matrix has all real values */ 1516 for (j = 0; j < A->cmap->n; j++) { 1517 v = av + j * a->lda; 1518 for (i = 0; i < A->rmap->n; i++) { 1519 if (PetscImaginaryPart(v[i])) { 1520 allreal = PETSC_FALSE; 1521 break; 1522 } 1523 } 1524 } 1525 #endif 1526 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1527 PetscCall(PetscObjectGetName((PetscObject)A, &name)); 1528 PetscCall(PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", A->rmap->n, A->cmap->n)); 1529 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = zeros(%" PetscInt_FMT ",%" PetscInt_FMT ");\n", name, A->rmap->n, A->cmap->n)); 1530 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = [\n", name)); 1531 } 1532 1533 for (i = 0; i < A->rmap->n; i++) { 1534 v = av + i; 1535 for (j = 0; j < A->cmap->n; j++) { 1536 #if defined(PETSC_USE_COMPLEX) 1537 if (allreal) { 1538 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(*v))); 1539 } else { 1540 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei ", (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1541 } 1542 #else 1543 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)*v)); 1544 #endif 1545 v += a->lda; 1546 } 1547 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1548 } 1549 if (format == PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, "];\n")); 1550 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1551 } 1552 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&av)); 1553 PetscCall(PetscViewerFlush(viewer)); 1554 PetscFunctionReturn(PETSC_SUCCESS); 1555 } 1556 1557 #include <petscdraw.h> 1558 static PetscErrorCode MatView_SeqDense_Draw_Zoom(PetscDraw draw, void *Aa) 1559 { 1560 Mat A = (Mat)Aa; 1561 PetscInt m = A->rmap->n, n = A->cmap->n, i, j; 1562 int color = PETSC_DRAW_WHITE; 1563 const PetscScalar *v; 1564 PetscViewer viewer; 1565 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1566 PetscViewerFormat format; 1567 1568 PetscFunctionBegin; 1569 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1570 PetscCall(PetscViewerGetFormat(viewer, &format)); 1571 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1572 1573 /* Loop over matrix elements drawing boxes */ 1574 PetscCall(MatDenseGetArrayRead(A, &v)); 1575 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1576 PetscDrawCollectiveBegin(draw); 1577 /* Blue for negative and Red for positive */ 1578 for (j = 0; j < n; j++) { 1579 x_l = j; 1580 x_r = x_l + 1.0; 1581 for (i = 0; i < m; i++) { 1582 y_l = m - i - 1.0; 1583 y_r = y_l + 1.0; 1584 if (PetscRealPart(v[j * m + i]) > 0.) color = PETSC_DRAW_RED; 1585 else if (PetscRealPart(v[j * m + i]) < 0.) color = PETSC_DRAW_BLUE; 1586 else continue; 1587 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1588 } 1589 } 1590 PetscDrawCollectiveEnd(draw); 1591 } else { 1592 /* use contour shading to indicate magnitude of values */ 1593 /* first determine max of all nonzero values */ 1594 PetscReal minv = 0.0, maxv = 0.0; 1595 PetscDraw popup; 1596 1597 for (i = 0; i < m * n; i++) { 1598 if (PetscAbsScalar(v[i]) > maxv) maxv = PetscAbsScalar(v[i]); 1599 } 1600 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1601 PetscCall(PetscDrawGetPopup(draw, &popup)); 1602 PetscCall(PetscDrawScalePopup(popup, minv, maxv)); 1603 1604 PetscDrawCollectiveBegin(draw); 1605 for (j = 0; j < n; j++) { 1606 x_l = j; 1607 x_r = x_l + 1.0; 1608 for (i = 0; i < m; i++) { 1609 y_l = m - i - 1.0; 1610 y_r = y_l + 1.0; 1611 color = PetscDrawRealToColor(PetscAbsScalar(v[j * m + i]), minv, maxv); 1612 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1613 } 1614 } 1615 PetscDrawCollectiveEnd(draw); 1616 } 1617 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1618 PetscFunctionReturn(PETSC_SUCCESS); 1619 } 1620 1621 static PetscErrorCode MatView_SeqDense_Draw(Mat A, PetscViewer viewer) 1622 { 1623 PetscDraw draw; 1624 PetscBool isnull; 1625 PetscReal xr, yr, xl, yl, h, w; 1626 1627 PetscFunctionBegin; 1628 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1629 PetscCall(PetscDrawIsNull(draw, &isnull)); 1630 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1631 1632 xr = A->cmap->n; 1633 yr = A->rmap->n; 1634 h = yr / 10.0; 1635 w = xr / 10.0; 1636 xr += w; 1637 yr += h; 1638 xl = -w; 1639 yl = -h; 1640 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 1641 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 1642 PetscCall(PetscDrawZoom(draw, MatView_SeqDense_Draw_Zoom, A)); 1643 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 1644 PetscCall(PetscDrawSave(draw)); 1645 PetscFunctionReturn(PETSC_SUCCESS); 1646 } 1647 1648 PetscErrorCode MatView_SeqDense(Mat A, PetscViewer viewer) 1649 { 1650 PetscBool iascii, isbinary, isdraw; 1651 1652 PetscFunctionBegin; 1653 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1654 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1655 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1656 if (iascii) PetscCall(MatView_SeqDense_ASCII(A, viewer)); 1657 else if (isbinary) PetscCall(MatView_Dense_Binary(A, viewer)); 1658 else if (isdraw) PetscCall(MatView_SeqDense_Draw(A, viewer)); 1659 PetscFunctionReturn(PETSC_SUCCESS); 1660 } 1661 1662 static PetscErrorCode MatDensePlaceArray_SeqDense(Mat A, const PetscScalar *array) 1663 { 1664 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1665 1666 PetscFunctionBegin; 1667 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1668 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1669 PetscCheck(!a->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseResetArray() first"); 1670 a->unplacedarray = a->v; 1671 a->unplaced_user_alloc = a->user_alloc; 1672 a->v = (PetscScalar *)array; 1673 a->user_alloc = PETSC_TRUE; 1674 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1675 A->offloadmask = PETSC_OFFLOAD_CPU; 1676 #endif 1677 PetscFunctionReturn(PETSC_SUCCESS); 1678 } 1679 1680 static PetscErrorCode MatDenseResetArray_SeqDense(Mat A) 1681 { 1682 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1683 1684 PetscFunctionBegin; 1685 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1686 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1687 a->v = a->unplacedarray; 1688 a->user_alloc = a->unplaced_user_alloc; 1689 a->unplacedarray = NULL; 1690 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1691 A->offloadmask = PETSC_OFFLOAD_CPU; 1692 #endif 1693 PetscFunctionReturn(PETSC_SUCCESS); 1694 } 1695 1696 static PetscErrorCode MatDenseReplaceArray_SeqDense(Mat A, const PetscScalar *array) 1697 { 1698 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1699 1700 PetscFunctionBegin; 1701 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1702 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1703 if (!a->user_alloc) PetscCall(PetscFree(a->v)); 1704 a->v = (PetscScalar *)array; 1705 a->user_alloc = PETSC_FALSE; 1706 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1707 A->offloadmask = PETSC_OFFLOAD_CPU; 1708 #endif 1709 PetscFunctionReturn(PETSC_SUCCESS); 1710 } 1711 1712 PetscErrorCode MatDestroy_SeqDense(Mat mat) 1713 { 1714 Mat_SeqDense *l = (Mat_SeqDense *)mat->data; 1715 1716 PetscFunctionBegin; 1717 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows %" PetscInt_FMT " Cols %" PetscInt_FMT, mat->rmap->n, mat->cmap->n)); 1718 PetscCall(VecDestroy(&l->qrrhs)); 1719 PetscCall(PetscFree(l->tau)); 1720 PetscCall(PetscFree(l->pivots)); 1721 PetscCall(PetscFree(l->fwork)); 1722 if (!l->user_alloc) PetscCall(PetscFree(l->v)); 1723 if (!l->unplaced_user_alloc) PetscCall(PetscFree(l->unplacedarray)); 1724 PetscCheck(!l->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1725 PetscCheck(!l->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1726 PetscCall(VecDestroy(&l->cvec)); 1727 PetscCall(MatDestroy(&l->cmat)); 1728 PetscCall(PetscFree(mat->data)); 1729 1730 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1731 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactor_C", NULL)); 1732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorSymbolic_C", NULL)); 1733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorNumeric_C", NULL)); 1734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 1735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 1736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 1737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 1738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 1739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 1740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 1741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 1742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 1743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 1744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 1745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqaij_C", NULL)); 1746 #if defined(PETSC_HAVE_ELEMENTAL) 1747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_elemental_C", NULL)); 1748 #endif 1749 #if defined(PETSC_HAVE_SCALAPACK) 1750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_scalapack_C", NULL)); 1751 #endif 1752 #if defined(PETSC_HAVE_CUDA) 1753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensecuda_C", NULL)); 1754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", NULL)); 1755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdense_C", NULL)); 1756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensecuda_C", NULL)); 1757 #endif 1758 #if defined(PETSC_HAVE_HIP) 1759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensehip_C", NULL)); 1760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", NULL)); 1761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdense_C", NULL)); 1762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensehip_C", NULL)); 1763 #endif 1764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSeqDenseSetPreallocation_C", NULL)); 1765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqaij_seqdense_C", NULL)); 1766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdense_C", NULL)); 1767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqbaij_seqdense_C", NULL)); 1768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqsbaij_seqdense_C", NULL)); 1769 1770 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 1771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 1772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 1773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 1774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 1775 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 1776 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 1777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 1778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 1779 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 1780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); 1781 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); 1782 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); 1783 PetscFunctionReturn(PETSC_SUCCESS); 1784 } 1785 1786 static PetscErrorCode MatTranspose_SeqDense(Mat A, MatReuse reuse, Mat *matout) 1787 { 1788 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1789 PetscInt k, j, m = A->rmap->n, M = mat->lda, n = A->cmap->n; 1790 PetscScalar *v, tmp; 1791 1792 PetscFunctionBegin; 1793 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1794 if (reuse == MAT_INPLACE_MATRIX) { 1795 if (m == n) { /* in place transpose */ 1796 PetscCall(MatDenseGetArray(A, &v)); 1797 for (j = 0; j < m; j++) { 1798 for (k = 0; k < j; k++) { 1799 tmp = v[j + k * M]; 1800 v[j + k * M] = v[k + j * M]; 1801 v[k + j * M] = tmp; 1802 } 1803 } 1804 PetscCall(MatDenseRestoreArray(A, &v)); 1805 } else { /* reuse memory, temporary allocates new memory */ 1806 PetscScalar *v2; 1807 PetscLayout tmplayout; 1808 1809 PetscCall(PetscMalloc1((size_t)m * n, &v2)); 1810 PetscCall(MatDenseGetArray(A, &v)); 1811 for (j = 0; j < n; j++) { 1812 for (k = 0; k < m; k++) v2[j + (size_t)k * n] = v[k + (size_t)j * M]; 1813 } 1814 PetscCall(PetscArraycpy(v, v2, (size_t)m * n)); 1815 PetscCall(PetscFree(v2)); 1816 PetscCall(MatDenseRestoreArray(A, &v)); 1817 /* cleanup size dependent quantities */ 1818 PetscCall(VecDestroy(&mat->cvec)); 1819 PetscCall(MatDestroy(&mat->cmat)); 1820 PetscCall(PetscFree(mat->pivots)); 1821 PetscCall(PetscFree(mat->fwork)); 1822 /* swap row/col layouts */ 1823 PetscCall(PetscBLASIntCast(n, &mat->lda)); 1824 tmplayout = A->rmap; 1825 A->rmap = A->cmap; 1826 A->cmap = tmplayout; 1827 } 1828 } else { /* out-of-place transpose */ 1829 Mat tmat; 1830 Mat_SeqDense *tmatd; 1831 PetscScalar *v2; 1832 PetscInt M2; 1833 1834 if (reuse == MAT_INITIAL_MATRIX) { 1835 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &tmat)); 1836 PetscCall(MatSetSizes(tmat, A->cmap->n, A->rmap->n, A->cmap->n, A->rmap->n)); 1837 PetscCall(MatSetType(tmat, ((PetscObject)A)->type_name)); 1838 PetscCall(MatSeqDenseSetPreallocation(tmat, NULL)); 1839 } else tmat = *matout; 1840 1841 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&v)); 1842 PetscCall(MatDenseGetArray(tmat, &v2)); 1843 tmatd = (Mat_SeqDense *)tmat->data; 1844 M2 = tmatd->lda; 1845 for (j = 0; j < n; j++) { 1846 for (k = 0; k < m; k++) v2[j + k * M2] = v[k + j * M]; 1847 } 1848 PetscCall(MatDenseRestoreArray(tmat, &v2)); 1849 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&v)); 1850 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 1851 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 1852 *matout = tmat; 1853 } 1854 PetscFunctionReturn(PETSC_SUCCESS); 1855 } 1856 1857 static PetscErrorCode MatEqual_SeqDense(Mat A1, Mat A2, PetscBool *flg) 1858 { 1859 Mat_SeqDense *mat1 = (Mat_SeqDense *)A1->data; 1860 Mat_SeqDense *mat2 = (Mat_SeqDense *)A2->data; 1861 PetscInt i; 1862 const PetscScalar *v1, *v2; 1863 1864 PetscFunctionBegin; 1865 if (A1->rmap->n != A2->rmap->n) { 1866 *flg = PETSC_FALSE; 1867 PetscFunctionReturn(PETSC_SUCCESS); 1868 } 1869 if (A1->cmap->n != A2->cmap->n) { 1870 *flg = PETSC_FALSE; 1871 PetscFunctionReturn(PETSC_SUCCESS); 1872 } 1873 PetscCall(MatDenseGetArrayRead(A1, &v1)); 1874 PetscCall(MatDenseGetArrayRead(A2, &v2)); 1875 for (i = 0; i < A1->cmap->n; i++) { 1876 PetscCall(PetscArraycmp(v1, v2, A1->rmap->n, flg)); 1877 if (*flg == PETSC_FALSE) PetscFunctionReturn(PETSC_SUCCESS); 1878 v1 += mat1->lda; 1879 v2 += mat2->lda; 1880 } 1881 PetscCall(MatDenseRestoreArrayRead(A1, &v1)); 1882 PetscCall(MatDenseRestoreArrayRead(A2, &v2)); 1883 *flg = PETSC_TRUE; 1884 PetscFunctionReturn(PETSC_SUCCESS); 1885 } 1886 1887 PetscErrorCode MatGetDiagonal_SeqDense(Mat A, Vec v) 1888 { 1889 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1890 PetscInt i, n, len; 1891 PetscScalar *x; 1892 const PetscScalar *vv; 1893 1894 PetscFunctionBegin; 1895 PetscCall(VecGetSize(v, &n)); 1896 PetscCall(VecGetArray(v, &x)); 1897 len = PetscMin(A->rmap->n, A->cmap->n); 1898 PetscCall(MatDenseGetArrayRead(A, &vv)); 1899 PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 1900 for (i = 0; i < len; i++) x[i] = vv[i * mat->lda + i]; 1901 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1902 PetscCall(VecRestoreArray(v, &x)); 1903 PetscFunctionReturn(PETSC_SUCCESS); 1904 } 1905 1906 static PetscErrorCode MatDiagonalScale_SeqDense(Mat A, Vec ll, Vec rr) 1907 { 1908 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1909 const PetscScalar *l, *r; 1910 PetscScalar x, *v, *vv; 1911 PetscInt i, j, m = A->rmap->n, n = A->cmap->n; 1912 1913 PetscFunctionBegin; 1914 PetscCall(MatDenseGetArray(A, &vv)); 1915 if (ll) { 1916 PetscCall(VecGetSize(ll, &m)); 1917 PetscCall(VecGetArrayRead(ll, &l)); 1918 PetscCheck(m == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vec wrong size"); 1919 for (i = 0; i < m; i++) { 1920 x = l[i]; 1921 v = vv + i; 1922 for (j = 0; j < n; j++) { 1923 (*v) *= x; 1924 v += mat->lda; 1925 } 1926 } 1927 PetscCall(VecRestoreArrayRead(ll, &l)); 1928 PetscCall(PetscLogFlops(1.0 * n * m)); 1929 } 1930 if (rr) { 1931 PetscCall(VecGetSize(rr, &n)); 1932 PetscCall(VecGetArrayRead(rr, &r)); 1933 PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec wrong size"); 1934 for (i = 0; i < n; i++) { 1935 x = r[i]; 1936 v = vv + i * mat->lda; 1937 for (j = 0; j < m; j++) (*v++) *= x; 1938 } 1939 PetscCall(VecRestoreArrayRead(rr, &r)); 1940 PetscCall(PetscLogFlops(1.0 * n * m)); 1941 } 1942 PetscCall(MatDenseRestoreArray(A, &vv)); 1943 PetscFunctionReturn(PETSC_SUCCESS); 1944 } 1945 1946 PetscErrorCode MatNorm_SeqDense(Mat A, NormType type, PetscReal *nrm) 1947 { 1948 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1949 PetscScalar *v, *vv; 1950 PetscReal sum = 0.0; 1951 PetscInt lda, m = A->rmap->n, i, j; 1952 1953 PetscFunctionBegin; 1954 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&vv)); 1955 PetscCall(MatDenseGetLDA(A, &lda)); 1956 v = vv; 1957 if (type == NORM_FROBENIUS) { 1958 if (lda > m) { 1959 for (j = 0; j < A->cmap->n; j++) { 1960 v = vv + j * lda; 1961 for (i = 0; i < m; i++) { 1962 sum += PetscRealPart(PetscConj(*v) * (*v)); 1963 v++; 1964 } 1965 } 1966 } else { 1967 #if defined(PETSC_USE_REAL___FP16) 1968 PetscBLASInt one = 1, cnt = A->cmap->n * A->rmap->n; 1969 PetscCallBLAS("BLASnrm2", *nrm = BLASnrm2_(&cnt, v, &one)); 1970 } 1971 #else 1972 for (i = 0; i < A->cmap->n * A->rmap->n; i++) { 1973 sum += PetscRealPart(PetscConj(*v) * (*v)); 1974 v++; 1975 } 1976 } 1977 *nrm = PetscSqrtReal(sum); 1978 #endif 1979 PetscCall(PetscLogFlops(2.0 * A->cmap->n * A->rmap->n)); 1980 } else if (type == NORM_1) { 1981 *nrm = 0.0; 1982 for (j = 0; j < A->cmap->n; j++) { 1983 v = vv + j * mat->lda; 1984 sum = 0.0; 1985 for (i = 0; i < A->rmap->n; i++) { 1986 sum += PetscAbsScalar(*v); 1987 v++; 1988 } 1989 if (sum > *nrm) *nrm = sum; 1990 } 1991 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 1992 } else if (type == NORM_INFINITY) { 1993 *nrm = 0.0; 1994 for (j = 0; j < A->rmap->n; j++) { 1995 v = vv + j; 1996 sum = 0.0; 1997 for (i = 0; i < A->cmap->n; i++) { 1998 sum += PetscAbsScalar(*v); 1999 v += mat->lda; 2000 } 2001 if (sum > *nrm) *nrm = sum; 2002 } 2003 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 2004 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No two norm"); 2005 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&vv)); 2006 PetscFunctionReturn(PETSC_SUCCESS); 2007 } 2008 2009 static PetscErrorCode MatSetOption_SeqDense(Mat A, MatOption op, PetscBool flg) 2010 { 2011 Mat_SeqDense *aij = (Mat_SeqDense *)A->data; 2012 2013 PetscFunctionBegin; 2014 switch (op) { 2015 case MAT_ROW_ORIENTED: 2016 aij->roworiented = flg; 2017 break; 2018 default: 2019 break; 2020 } 2021 PetscFunctionReturn(PETSC_SUCCESS); 2022 } 2023 2024 PetscErrorCode MatZeroEntries_SeqDense(Mat A) 2025 { 2026 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 2027 PetscInt lda = l->lda, m = A->rmap->n, n = A->cmap->n, j; 2028 PetscScalar *v; 2029 2030 PetscFunctionBegin; 2031 PetscCall(MatDenseGetArrayWrite(A, &v)); 2032 if (lda > m) { 2033 for (j = 0; j < n; j++) PetscCall(PetscArrayzero(v + j * lda, m)); 2034 } else { 2035 PetscCall(PetscArrayzero(v, PetscInt64Mult(m, n))); 2036 } 2037 PetscCall(MatDenseRestoreArrayWrite(A, &v)); 2038 PetscFunctionReturn(PETSC_SUCCESS); 2039 } 2040 2041 static PetscErrorCode MatZeroRows_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 2042 { 2043 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 2044 PetscInt m = l->lda, n = A->cmap->n, i, j; 2045 PetscScalar *slot, *bb, *v; 2046 const PetscScalar *xx; 2047 2048 PetscFunctionBegin; 2049 if (PetscDefined(USE_DEBUG)) { 2050 for (i = 0; i < N; i++) { 2051 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 2052 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 2053 } 2054 } 2055 if (!N) PetscFunctionReturn(PETSC_SUCCESS); 2056 2057 /* fix right-hand side if needed */ 2058 if (x && b) { 2059 PetscCall(VecGetArrayRead(x, &xx)); 2060 PetscCall(VecGetArray(b, &bb)); 2061 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 2062 PetscCall(VecRestoreArrayRead(x, &xx)); 2063 PetscCall(VecRestoreArray(b, &bb)); 2064 } 2065 2066 PetscCall(MatDenseGetArray(A, &v)); 2067 for (i = 0; i < N; i++) { 2068 slot = v + rows[i]; 2069 for (j = 0; j < n; j++) { 2070 *slot = 0.0; 2071 slot += m; 2072 } 2073 } 2074 if (diag != 0.0) { 2075 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 2076 for (i = 0; i < N; i++) { 2077 slot = v + (m + 1) * rows[i]; 2078 *slot = diag; 2079 } 2080 } 2081 PetscCall(MatDenseRestoreArray(A, &v)); 2082 PetscFunctionReturn(PETSC_SUCCESS); 2083 } 2084 2085 static PetscErrorCode MatDenseGetLDA_SeqDense(Mat A, PetscInt *lda) 2086 { 2087 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2088 2089 PetscFunctionBegin; 2090 *lda = mat->lda; 2091 PetscFunctionReturn(PETSC_SUCCESS); 2092 } 2093 2094 PetscErrorCode MatDenseGetArray_SeqDense(Mat A, PetscScalar **array) 2095 { 2096 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2097 2098 PetscFunctionBegin; 2099 PetscCheck(!mat->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2100 *array = mat->v; 2101 PetscFunctionReturn(PETSC_SUCCESS); 2102 } 2103 2104 PetscErrorCode MatDenseRestoreArray_SeqDense(Mat A, PetscScalar **array) 2105 { 2106 PetscFunctionBegin; 2107 if (array) *array = NULL; 2108 PetscFunctionReturn(PETSC_SUCCESS); 2109 } 2110 2111 /*@ 2112 MatDenseGetLDA - gets the leading dimension of the array returned from `MatDenseGetArray()` 2113 2114 Not Collective 2115 2116 Input Parameter: 2117 . A - a `MATDENSE` or `MATDENSECUDA` matrix 2118 2119 Output Parameter: 2120 . lda - the leading dimension 2121 2122 Level: intermediate 2123 2124 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseSetLDA()` 2125 @*/ 2126 PetscErrorCode MatDenseGetLDA(Mat A, PetscInt *lda) 2127 { 2128 PetscFunctionBegin; 2129 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2130 PetscAssertPointer(lda, 2); 2131 MatCheckPreallocated(A, 1); 2132 PetscUseMethod(A, "MatDenseGetLDA_C", (Mat, PetscInt *), (A, lda)); 2133 PetscFunctionReturn(PETSC_SUCCESS); 2134 } 2135 2136 /*@ 2137 MatDenseSetLDA - Sets the leading dimension of the array used by the `MATDENSE` matrix 2138 2139 Collective if the matrix layouts have not yet been setup 2140 2141 Input Parameters: 2142 + A - a `MATDENSE` or `MATDENSECUDA` matrix 2143 - lda - the leading dimension 2144 2145 Level: intermediate 2146 2147 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetLDA()` 2148 @*/ 2149 PetscErrorCode MatDenseSetLDA(Mat A, PetscInt lda) 2150 { 2151 PetscFunctionBegin; 2152 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2153 PetscTryMethod(A, "MatDenseSetLDA_C", (Mat, PetscInt), (A, lda)); 2154 PetscFunctionReturn(PETSC_SUCCESS); 2155 } 2156 2157 /*@C 2158 MatDenseGetArray - gives read-write access to the array where the data for a `MATDENSE` matrix is stored 2159 2160 Logically Collective 2161 2162 Input Parameter: 2163 . A - a dense matrix 2164 2165 Output Parameter: 2166 . array - pointer to the data 2167 2168 Level: intermediate 2169 2170 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2171 @*/ 2172 PetscErrorCode MatDenseGetArray(Mat A, PetscScalar *array[]) PeNS 2173 { 2174 PetscFunctionBegin; 2175 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2176 PetscAssertPointer(array, 2); 2177 PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array)); 2178 PetscFunctionReturn(PETSC_SUCCESS); 2179 } 2180 2181 /*@C 2182 MatDenseRestoreArray - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArray()` 2183 2184 Logically Collective 2185 2186 Input Parameters: 2187 + A - a dense matrix 2188 - array - pointer to the data (may be `NULL`) 2189 2190 Level: intermediate 2191 2192 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2193 @*/ 2194 PetscErrorCode MatDenseRestoreArray(Mat A, PetscScalar *array[]) PeNS 2195 { 2196 PetscFunctionBegin; 2197 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2198 if (array) PetscAssertPointer(array, 2); 2199 PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array)); 2200 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2201 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2202 A->offloadmask = PETSC_OFFLOAD_CPU; 2203 #endif 2204 PetscFunctionReturn(PETSC_SUCCESS); 2205 } 2206 2207 /*@C 2208 MatDenseGetArrayRead - gives read-only access to the array where the data for a `MATDENSE` matrix is stored 2209 2210 Not Collective 2211 2212 Input Parameter: 2213 . A - a dense matrix 2214 2215 Output Parameter: 2216 . array - pointer to the data 2217 2218 Level: intermediate 2219 2220 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2221 @*/ 2222 PetscErrorCode MatDenseGetArrayRead(Mat A, const PetscScalar *array[]) PeNS 2223 { 2224 PetscFunctionBegin; 2225 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2226 PetscAssertPointer(array, 2); 2227 PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2228 PetscFunctionReturn(PETSC_SUCCESS); 2229 } 2230 2231 /*@C 2232 MatDenseRestoreArrayRead - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayRead()` 2233 2234 Not Collective 2235 2236 Input Parameters: 2237 + A - a dense matrix 2238 - array - pointer to the data (may be `NULL`) 2239 2240 Level: intermediate 2241 2242 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2243 @*/ 2244 PetscErrorCode MatDenseRestoreArrayRead(Mat A, const PetscScalar *array[]) PeNS 2245 { 2246 PetscFunctionBegin; 2247 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2248 if (array) PetscAssertPointer(array, 2); 2249 PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2250 PetscFunctionReturn(PETSC_SUCCESS); 2251 } 2252 2253 /*@C 2254 MatDenseGetArrayWrite - gives write-only access to the array where the data for a `MATDENSE` matrix is stored 2255 2256 Not Collective 2257 2258 Input Parameter: 2259 . A - a dense matrix 2260 2261 Output Parameter: 2262 . array - pointer to the data 2263 2264 Level: intermediate 2265 2266 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2267 @*/ 2268 PetscErrorCode MatDenseGetArrayWrite(Mat A, PetscScalar *array[]) PeNS 2269 { 2270 PetscFunctionBegin; 2271 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2272 PetscAssertPointer(array, 2); 2273 PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2274 PetscFunctionReturn(PETSC_SUCCESS); 2275 } 2276 2277 /*@C 2278 MatDenseRestoreArrayWrite - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayWrite()` 2279 2280 Not Collective 2281 2282 Input Parameters: 2283 + A - a dense matrix 2284 - array - pointer to the data (may be `NULL`) 2285 2286 Level: intermediate 2287 2288 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2289 @*/ 2290 PetscErrorCode MatDenseRestoreArrayWrite(Mat A, PetscScalar *array[]) PeNS 2291 { 2292 PetscFunctionBegin; 2293 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2294 if (array) PetscAssertPointer(array, 2); 2295 PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2296 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2297 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2298 A->offloadmask = PETSC_OFFLOAD_CPU; 2299 #endif 2300 PetscFunctionReturn(PETSC_SUCCESS); 2301 } 2302 2303 /*@C 2304 MatDenseGetArrayAndMemType - gives read-write access to the array where the data for a `MATDENSE` matrix is stored 2305 2306 Logically Collective 2307 2308 Input Parameter: 2309 . A - a dense matrix 2310 2311 Output Parameters: 2312 + array - pointer to the data 2313 - mtype - memory type of the returned pointer 2314 2315 Level: intermediate 2316 2317 Note: 2318 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., 2319 an array on device is always returned and is guaranteed to contain the matrix's latest data. 2320 2321 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayAndMemType()`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArrayRead()`, 2322 `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` 2323 @*/ 2324 PetscErrorCode MatDenseGetArrayAndMemType(Mat A, PetscScalar *array[], PetscMemType *mtype) 2325 { 2326 PetscBool isMPI; 2327 2328 PetscFunctionBegin; 2329 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2330 PetscAssertPointer(array, 2); 2331 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ 2332 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2333 if (isMPI) { 2334 /* Dispatch here so that the code can be reused for all subclasses of MATDENSE */ 2335 PetscCall(MatDenseGetArrayAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); 2336 } else { 2337 PetscErrorCode (*fptr)(Mat, PetscScalar **, PetscMemType *); 2338 2339 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayAndMemType_C", &fptr)); 2340 if (fptr) { 2341 PetscCall((*fptr)(A, array, mtype)); 2342 } else { 2343 PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array)); 2344 if (mtype) *mtype = PETSC_MEMTYPE_HOST; 2345 } 2346 } 2347 PetscFunctionReturn(PETSC_SUCCESS); 2348 } 2349 2350 /*@C 2351 MatDenseRestoreArrayAndMemType - returns access to the array that is obtained by `MatDenseGetArrayAndMemType()` 2352 2353 Logically Collective 2354 2355 Input Parameters: 2356 + A - a dense matrix 2357 - array - pointer to the data 2358 2359 Level: intermediate 2360 2361 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2362 @*/ 2363 PetscErrorCode MatDenseRestoreArrayAndMemType(Mat A, PetscScalar *array[]) 2364 { 2365 PetscBool isMPI; 2366 2367 PetscFunctionBegin; 2368 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2369 PetscAssertPointer(array, 2); 2370 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2371 if (isMPI) { 2372 PetscCall(MatDenseRestoreArrayAndMemType(((Mat_MPIDense *)A->data)->A, array)); 2373 } else { 2374 PetscErrorCode (*fptr)(Mat, PetscScalar **); 2375 2376 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayAndMemType_C", &fptr)); 2377 if (fptr) { 2378 PetscCall((*fptr)(A, array)); 2379 } else { 2380 PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array)); 2381 } 2382 *array = NULL; 2383 } 2384 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2385 PetscFunctionReturn(PETSC_SUCCESS); 2386 } 2387 2388 /*@C 2389 MatDenseGetArrayReadAndMemType - gives read-only access to the array where the data for a `MATDENSE` matrix is stored 2390 2391 Logically Collective 2392 2393 Input Parameter: 2394 . A - a dense matrix 2395 2396 Output Parameters: 2397 + array - pointer to the data 2398 - mtype - memory type of the returned pointer 2399 2400 Level: intermediate 2401 2402 Note: 2403 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., 2404 an array on device is always returned and is guaranteed to contain the matrix's latest data. 2405 2406 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayReadAndMemType()`, `MatDenseGetArrayWriteAndMemType()`, 2407 `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` 2408 @*/ 2409 PetscErrorCode MatDenseGetArrayReadAndMemType(Mat A, const PetscScalar *array[], PetscMemType *mtype) 2410 { 2411 PetscBool isMPI; 2412 2413 PetscFunctionBegin; 2414 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2415 PetscAssertPointer(array, 2); 2416 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ 2417 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2418 if (isMPI) { /* Dispatch here so that the code can be reused for all subclasses of MATDENSE */ 2419 PetscCall(MatDenseGetArrayReadAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); 2420 } else { 2421 PetscErrorCode (*fptr)(Mat, const PetscScalar **, PetscMemType *); 2422 2423 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayReadAndMemType_C", &fptr)); 2424 if (fptr) { 2425 PetscCall((*fptr)(A, array, mtype)); 2426 } else { 2427 PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2428 if (mtype) *mtype = PETSC_MEMTYPE_HOST; 2429 } 2430 } 2431 PetscFunctionReturn(PETSC_SUCCESS); 2432 } 2433 2434 /*@C 2435 MatDenseRestoreArrayReadAndMemType - returns access to the array that is obtained by `MatDenseGetArrayReadAndMemType()` 2436 2437 Logically Collective 2438 2439 Input Parameters: 2440 + A - a dense matrix 2441 - array - pointer to the data 2442 2443 Level: intermediate 2444 2445 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2446 @*/ 2447 PetscErrorCode MatDenseRestoreArrayReadAndMemType(Mat A, const PetscScalar *array[]) 2448 { 2449 PetscBool isMPI; 2450 2451 PetscFunctionBegin; 2452 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2453 PetscAssertPointer(array, 2); 2454 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2455 if (isMPI) { 2456 PetscCall(MatDenseRestoreArrayReadAndMemType(((Mat_MPIDense *)A->data)->A, array)); 2457 } else { 2458 PetscErrorCode (*fptr)(Mat, const PetscScalar **); 2459 2460 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayReadAndMemType_C", &fptr)); 2461 if (fptr) { 2462 PetscCall((*fptr)(A, array)); 2463 } else { 2464 PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2465 } 2466 *array = NULL; 2467 } 2468 PetscFunctionReturn(PETSC_SUCCESS); 2469 } 2470 2471 /*@C 2472 MatDenseGetArrayWriteAndMemType - gives write-only access to the array where the data for a `MATDENSE` matrix is stored 2473 2474 Logically Collective 2475 2476 Input Parameter: 2477 . A - a dense matrix 2478 2479 Output Parameters: 2480 + array - pointer to the data 2481 - mtype - memory type of the returned pointer 2482 2483 Level: intermediate 2484 2485 Note: 2486 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., 2487 an array on device is always returned and is guaranteed to contain the matrix's latest data. 2488 2489 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayWriteAndMemType()`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArrayRead()`, 2490 `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` 2491 @*/ 2492 PetscErrorCode MatDenseGetArrayWriteAndMemType(Mat A, PetscScalar *array[], PetscMemType *mtype) 2493 { 2494 PetscBool isMPI; 2495 2496 PetscFunctionBegin; 2497 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2498 PetscAssertPointer(array, 2); 2499 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ 2500 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2501 if (isMPI) { 2502 PetscCall(MatDenseGetArrayWriteAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); 2503 } else { 2504 PetscErrorCode (*fptr)(Mat, PetscScalar **, PetscMemType *); 2505 2506 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayWriteAndMemType_C", &fptr)); 2507 if (fptr) { 2508 PetscCall((*fptr)(A, array, mtype)); 2509 } else { 2510 PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2511 if (mtype) *mtype = PETSC_MEMTYPE_HOST; 2512 } 2513 } 2514 PetscFunctionReturn(PETSC_SUCCESS); 2515 } 2516 2517 /*@C 2518 MatDenseRestoreArrayWriteAndMemType - returns access to the array that is obtained by `MatDenseGetArrayReadAndMemType()` 2519 2520 Logically Collective 2521 2522 Input Parameters: 2523 + A - a dense matrix 2524 - array - pointer to the data 2525 2526 Level: intermediate 2527 2528 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2529 @*/ 2530 PetscErrorCode MatDenseRestoreArrayWriteAndMemType(Mat A, PetscScalar *array[]) 2531 { 2532 PetscBool isMPI; 2533 2534 PetscFunctionBegin; 2535 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2536 PetscAssertPointer(array, 2); 2537 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2538 if (isMPI) { 2539 PetscCall(MatDenseRestoreArrayWriteAndMemType(((Mat_MPIDense *)A->data)->A, array)); 2540 } else { 2541 PetscErrorCode (*fptr)(Mat, PetscScalar **); 2542 2543 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayWriteAndMemType_C", &fptr)); 2544 if (fptr) { 2545 PetscCall((*fptr)(A, array)); 2546 } else { 2547 PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2548 } 2549 *array = NULL; 2550 } 2551 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2552 PetscFunctionReturn(PETSC_SUCCESS); 2553 } 2554 2555 static PetscErrorCode MatCreateSubMatrix_SeqDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 2556 { 2557 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2558 PetscInt i, j, nrows, ncols, ldb; 2559 const PetscInt *irow, *icol; 2560 PetscScalar *av, *bv, *v = mat->v; 2561 Mat newmat; 2562 2563 PetscFunctionBegin; 2564 PetscCall(ISGetIndices(isrow, &irow)); 2565 PetscCall(ISGetIndices(iscol, &icol)); 2566 PetscCall(ISGetLocalSize(isrow, &nrows)); 2567 PetscCall(ISGetLocalSize(iscol, &ncols)); 2568 2569 /* Check submatrixcall */ 2570 if (scall == MAT_REUSE_MATRIX) { 2571 PetscInt n_cols, n_rows; 2572 PetscCall(MatGetSize(*B, &n_rows, &n_cols)); 2573 if (n_rows != nrows || n_cols != ncols) { 2574 /* resize the result matrix to match number of requested rows/columns */ 2575 PetscCall(MatSetSizes(*B, nrows, ncols, nrows, ncols)); 2576 } 2577 newmat = *B; 2578 } else { 2579 /* Create and fill new matrix */ 2580 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 2581 PetscCall(MatSetSizes(newmat, nrows, ncols, nrows, ncols)); 2582 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 2583 PetscCall(MatSeqDenseSetPreallocation(newmat, NULL)); 2584 } 2585 2586 /* Now extract the data pointers and do the copy,column at a time */ 2587 PetscCall(MatDenseGetArray(newmat, &bv)); 2588 PetscCall(MatDenseGetLDA(newmat, &ldb)); 2589 for (i = 0; i < ncols; i++) { 2590 av = v + mat->lda * icol[i]; 2591 for (j = 0; j < nrows; j++) bv[j] = av[irow[j]]; 2592 bv += ldb; 2593 } 2594 PetscCall(MatDenseRestoreArray(newmat, &bv)); 2595 2596 /* Assemble the matrices so that the correct flags are set */ 2597 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 2598 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 2599 2600 /* Free work space */ 2601 PetscCall(ISRestoreIndices(isrow, &irow)); 2602 PetscCall(ISRestoreIndices(iscol, &icol)); 2603 *B = newmat; 2604 PetscFunctionReturn(PETSC_SUCCESS); 2605 } 2606 2607 static PetscErrorCode MatCreateSubMatrices_SeqDense(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 2608 { 2609 PetscInt i; 2610 2611 PetscFunctionBegin; 2612 if (scall == MAT_INITIAL_MATRIX) PetscCall(PetscCalloc1(n, B)); 2613 2614 for (i = 0; i < n; i++) PetscCall(MatCreateSubMatrix_SeqDense(A, irow[i], icol[i], scall, &(*B)[i])); 2615 PetscFunctionReturn(PETSC_SUCCESS); 2616 } 2617 2618 PetscErrorCode MatCopy_SeqDense(Mat A, Mat B, MatStructure str) 2619 { 2620 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data; 2621 const PetscScalar *va; 2622 PetscScalar *vb; 2623 PetscInt lda1 = a->lda, lda2 = b->lda, m = A->rmap->n, n = A->cmap->n, j; 2624 2625 PetscFunctionBegin; 2626 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2627 if (A->ops->copy != B->ops->copy) { 2628 PetscCall(MatCopy_Basic(A, B, str)); 2629 PetscFunctionReturn(PETSC_SUCCESS); 2630 } 2631 PetscCheck(m == B->rmap->n && n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "size(B) != size(A)"); 2632 PetscCall(MatDenseGetArrayRead(A, &va)); 2633 PetscCall(MatDenseGetArray(B, &vb)); 2634 if (lda1 > m || lda2 > m) { 2635 for (j = 0; j < n; j++) PetscCall(PetscArraycpy(vb + j * lda2, va + j * lda1, m)); 2636 } else { 2637 PetscCall(PetscArraycpy(vb, va, A->rmap->n * A->cmap->n)); 2638 } 2639 PetscCall(MatDenseRestoreArray(B, &vb)); 2640 PetscCall(MatDenseRestoreArrayRead(A, &va)); 2641 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2642 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2643 PetscFunctionReturn(PETSC_SUCCESS); 2644 } 2645 2646 PetscErrorCode MatSetUp_SeqDense(Mat A) 2647 { 2648 PetscFunctionBegin; 2649 PetscCall(PetscLayoutSetUp(A->rmap)); 2650 PetscCall(PetscLayoutSetUp(A->cmap)); 2651 if (!A->preallocated) PetscCall(MatSeqDenseSetPreallocation(A, NULL)); 2652 PetscFunctionReturn(PETSC_SUCCESS); 2653 } 2654 2655 PetscErrorCode MatConjugate_SeqDense(Mat A) 2656 { 2657 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2658 PetscInt i, j; 2659 PetscInt min = PetscMin(A->rmap->n, A->cmap->n); 2660 PetscScalar *aa; 2661 2662 PetscFunctionBegin; 2663 PetscCall(MatDenseGetArray(A, &aa)); 2664 for (j = 0; j < A->cmap->n; j++) { 2665 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscConj(aa[i + j * mat->lda]); 2666 } 2667 PetscCall(MatDenseRestoreArray(A, &aa)); 2668 if (mat->tau) 2669 for (i = 0; i < min; i++) mat->tau[i] = PetscConj(mat->tau[i]); 2670 PetscFunctionReturn(PETSC_SUCCESS); 2671 } 2672 2673 static PetscErrorCode MatRealPart_SeqDense(Mat A) 2674 { 2675 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2676 PetscInt i, j; 2677 PetscScalar *aa; 2678 2679 PetscFunctionBegin; 2680 PetscCall(MatDenseGetArray(A, &aa)); 2681 for (j = 0; j < A->cmap->n; j++) { 2682 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscRealPart(aa[i + j * mat->lda]); 2683 } 2684 PetscCall(MatDenseRestoreArray(A, &aa)); 2685 PetscFunctionReturn(PETSC_SUCCESS); 2686 } 2687 2688 static PetscErrorCode MatImaginaryPart_SeqDense(Mat A) 2689 { 2690 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2691 PetscInt i, j; 2692 PetscScalar *aa; 2693 2694 PetscFunctionBegin; 2695 PetscCall(MatDenseGetArray(A, &aa)); 2696 for (j = 0; j < A->cmap->n; j++) { 2697 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscImaginaryPart(aa[i + j * mat->lda]); 2698 } 2699 PetscCall(MatDenseRestoreArray(A, &aa)); 2700 PetscFunctionReturn(PETSC_SUCCESS); 2701 } 2702 2703 PetscErrorCode MatMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2704 { 2705 PetscInt m = A->rmap->n, n = B->cmap->n; 2706 PetscBool cisdense = PETSC_FALSE; 2707 2708 PetscFunctionBegin; 2709 PetscCall(MatSetSizes(C, m, n, m, n)); 2710 #if defined(PETSC_HAVE_CUDA) 2711 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2712 #endif 2713 #if defined(PETSC_HAVE_HIP) 2714 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2715 #endif 2716 if (!cisdense) { 2717 PetscBool flg; 2718 2719 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2720 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2721 } 2722 PetscCall(MatSetUp(C)); 2723 PetscFunctionReturn(PETSC_SUCCESS); 2724 } 2725 2726 PetscErrorCode MatMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2727 { 2728 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data, *c = (Mat_SeqDense *)C->data; 2729 PetscBLASInt m, n, k; 2730 const PetscScalar *av, *bv; 2731 PetscScalar *cv; 2732 PetscScalar _DOne = 1.0, _DZero = 0.0; 2733 2734 PetscFunctionBegin; 2735 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2736 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2737 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2738 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); 2739 PetscCall(MatDenseGetArrayRead(A, &av)); 2740 PetscCall(MatDenseGetArrayRead(B, &bv)); 2741 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2742 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2743 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2744 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2745 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2746 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2747 PetscFunctionReturn(PETSC_SUCCESS); 2748 } 2749 2750 PetscErrorCode MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2751 { 2752 PetscInt m = A->rmap->n, n = B->rmap->n; 2753 PetscBool cisdense = PETSC_FALSE; 2754 2755 PetscFunctionBegin; 2756 PetscCall(MatSetSizes(C, m, n, m, n)); 2757 #if defined(PETSC_HAVE_CUDA) 2758 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2759 #endif 2760 #if defined(PETSC_HAVE_HIP) 2761 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2762 #endif 2763 if (!cisdense) { 2764 PetscBool flg; 2765 2766 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2767 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2768 } 2769 PetscCall(MatSetUp(C)); 2770 PetscFunctionReturn(PETSC_SUCCESS); 2771 } 2772 2773 PetscErrorCode MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2774 { 2775 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2776 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2777 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2778 const PetscScalar *av, *bv; 2779 PetscScalar *cv; 2780 PetscBLASInt m, n, k; 2781 PetscScalar _DOne = 1.0, _DZero = 0.0; 2782 2783 PetscFunctionBegin; 2784 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2785 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2786 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2787 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); 2788 PetscCall(MatDenseGetArrayRead(A, &av)); 2789 PetscCall(MatDenseGetArrayRead(B, &bv)); 2790 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2791 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2792 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2793 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2794 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2795 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2796 PetscFunctionReturn(PETSC_SUCCESS); 2797 } 2798 2799 PetscErrorCode MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2800 { 2801 PetscInt m = A->cmap->n, n = B->cmap->n; 2802 PetscBool cisdense = PETSC_FALSE; 2803 2804 PetscFunctionBegin; 2805 PetscCall(MatSetSizes(C, m, n, m, n)); 2806 #if defined(PETSC_HAVE_CUDA) 2807 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2808 #endif 2809 #if defined(PETSC_HAVE_HIP) 2810 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2811 #endif 2812 if (!cisdense) { 2813 PetscBool flg; 2814 2815 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2816 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2817 } 2818 PetscCall(MatSetUp(C)); 2819 PetscFunctionReturn(PETSC_SUCCESS); 2820 } 2821 2822 PetscErrorCode MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2823 { 2824 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2825 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2826 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2827 const PetscScalar *av, *bv; 2828 PetscScalar *cv; 2829 PetscBLASInt m, n, k; 2830 PetscScalar _DOne = 1.0, _DZero = 0.0; 2831 2832 PetscFunctionBegin; 2833 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2834 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2835 PetscCall(PetscBLASIntCast(A->rmap->n, &k)); 2836 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); 2837 PetscCall(MatDenseGetArrayRead(A, &av)); 2838 PetscCall(MatDenseGetArrayRead(B, &bv)); 2839 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2840 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2841 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2842 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2843 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2844 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2845 PetscFunctionReturn(PETSC_SUCCESS); 2846 } 2847 2848 static PetscErrorCode MatProductSetFromOptions_SeqDense_AB(Mat C) 2849 { 2850 PetscFunctionBegin; 2851 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqDense; 2852 C->ops->productsymbolic = MatProductSymbolic_AB; 2853 PetscFunctionReturn(PETSC_SUCCESS); 2854 } 2855 2856 static PetscErrorCode MatProductSetFromOptions_SeqDense_AtB(Mat C) 2857 { 2858 PetscFunctionBegin; 2859 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqDense_SeqDense; 2860 C->ops->productsymbolic = MatProductSymbolic_AtB; 2861 PetscFunctionReturn(PETSC_SUCCESS); 2862 } 2863 2864 static PetscErrorCode MatProductSetFromOptions_SeqDense_ABt(Mat C) 2865 { 2866 PetscFunctionBegin; 2867 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqDense_SeqDense; 2868 C->ops->productsymbolic = MatProductSymbolic_ABt; 2869 PetscFunctionReturn(PETSC_SUCCESS); 2870 } 2871 2872 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense(Mat C) 2873 { 2874 Mat_Product *product = C->product; 2875 2876 PetscFunctionBegin; 2877 switch (product->type) { 2878 case MATPRODUCT_AB: 2879 PetscCall(MatProductSetFromOptions_SeqDense_AB(C)); 2880 break; 2881 case MATPRODUCT_AtB: 2882 PetscCall(MatProductSetFromOptions_SeqDense_AtB(C)); 2883 break; 2884 case MATPRODUCT_ABt: 2885 PetscCall(MatProductSetFromOptions_SeqDense_ABt(C)); 2886 break; 2887 default: 2888 break; 2889 } 2890 PetscFunctionReturn(PETSC_SUCCESS); 2891 } 2892 2893 static PetscErrorCode MatGetRowMax_SeqDense(Mat A, Vec v, PetscInt idx[]) 2894 { 2895 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2896 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2897 PetscScalar *x; 2898 const PetscScalar *aa; 2899 2900 PetscFunctionBegin; 2901 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2902 PetscCall(VecGetArray(v, &x)); 2903 PetscCall(VecGetLocalSize(v, &p)); 2904 PetscCall(MatDenseGetArrayRead(A, &aa)); 2905 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2906 for (i = 0; i < m; i++) { 2907 x[i] = aa[i]; 2908 if (idx) idx[i] = 0; 2909 for (j = 1; j < n; j++) { 2910 if (PetscRealPart(x[i]) < PetscRealPart(aa[i + a->lda * j])) { 2911 x[i] = aa[i + a->lda * j]; 2912 if (idx) idx[i] = j; 2913 } 2914 } 2915 } 2916 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2917 PetscCall(VecRestoreArray(v, &x)); 2918 PetscFunctionReturn(PETSC_SUCCESS); 2919 } 2920 2921 static PetscErrorCode MatGetRowMaxAbs_SeqDense(Mat A, Vec v, PetscInt idx[]) 2922 { 2923 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2924 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2925 PetscScalar *x; 2926 PetscReal atmp; 2927 const PetscScalar *aa; 2928 2929 PetscFunctionBegin; 2930 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2931 PetscCall(VecGetArray(v, &x)); 2932 PetscCall(VecGetLocalSize(v, &p)); 2933 PetscCall(MatDenseGetArrayRead(A, &aa)); 2934 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2935 for (i = 0; i < m; i++) { 2936 x[i] = PetscAbsScalar(aa[i]); 2937 for (j = 1; j < n; j++) { 2938 atmp = PetscAbsScalar(aa[i + a->lda * j]); 2939 if (PetscAbsScalar(x[i]) < atmp) { 2940 x[i] = atmp; 2941 if (idx) idx[i] = j; 2942 } 2943 } 2944 } 2945 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2946 PetscCall(VecRestoreArray(v, &x)); 2947 PetscFunctionReturn(PETSC_SUCCESS); 2948 } 2949 2950 static PetscErrorCode MatGetRowMin_SeqDense(Mat A, Vec v, PetscInt idx[]) 2951 { 2952 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2953 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2954 PetscScalar *x; 2955 const PetscScalar *aa; 2956 2957 PetscFunctionBegin; 2958 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2959 PetscCall(MatDenseGetArrayRead(A, &aa)); 2960 PetscCall(VecGetArray(v, &x)); 2961 PetscCall(VecGetLocalSize(v, &p)); 2962 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2963 for (i = 0; i < m; i++) { 2964 x[i] = aa[i]; 2965 if (idx) idx[i] = 0; 2966 for (j = 1; j < n; j++) { 2967 if (PetscRealPart(x[i]) > PetscRealPart(aa[i + a->lda * j])) { 2968 x[i] = aa[i + a->lda * j]; 2969 if (idx) idx[i] = j; 2970 } 2971 } 2972 } 2973 PetscCall(VecRestoreArray(v, &x)); 2974 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2975 PetscFunctionReturn(PETSC_SUCCESS); 2976 } 2977 2978 PetscErrorCode MatGetColumnVector_SeqDense(Mat A, Vec v, PetscInt col) 2979 { 2980 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2981 PetscScalar *x; 2982 const PetscScalar *aa; 2983 2984 PetscFunctionBegin; 2985 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2986 PetscCall(MatDenseGetArrayRead(A, &aa)); 2987 PetscCall(VecGetArray(v, &x)); 2988 PetscCall(PetscArraycpy(x, aa + col * a->lda, A->rmap->n)); 2989 PetscCall(VecRestoreArray(v, &x)); 2990 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2991 PetscFunctionReturn(PETSC_SUCCESS); 2992 } 2993 2994 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat A, PetscInt type, PetscReal *reductions) 2995 { 2996 PetscInt i, j, m, n; 2997 const PetscScalar *a; 2998 2999 PetscFunctionBegin; 3000 PetscCall(MatGetSize(A, &m, &n)); 3001 PetscCall(PetscArrayzero(reductions, n)); 3002 PetscCall(MatDenseGetArrayRead(A, &a)); 3003 if (type == NORM_2) { 3004 for (i = 0; i < n; i++) { 3005 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j] * a[j]); 3006 a = PetscSafePointerPlusOffset(a, m); 3007 } 3008 } else if (type == NORM_1) { 3009 for (i = 0; i < n; i++) { 3010 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j]); 3011 a = PetscSafePointerPlusOffset(a, m); 3012 } 3013 } else if (type == NORM_INFINITY) { 3014 for (i = 0; i < n; i++) { 3015 for (j = 0; j < m; j++) reductions[i] = PetscMax(PetscAbsScalar(a[j]), reductions[i]); 3016 a = PetscSafePointerPlusOffset(a, m); 3017 } 3018 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 3019 for (i = 0; i < n; i++) { 3020 for (j = 0; j < m; j++) reductions[i] += PetscRealPart(a[j]); 3021 a = PetscSafePointerPlusOffset(a, m); 3022 } 3023 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 3024 for (i = 0; i < n; i++) { 3025 for (j = 0; j < m; j++) reductions[i] += PetscImaginaryPart(a[j]); 3026 a = PetscSafePointerPlusOffset(a, m); 3027 } 3028 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 3029 PetscCall(MatDenseRestoreArrayRead(A, &a)); 3030 if (type == NORM_2) { 3031 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 3032 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 3033 for (i = 0; i < n; i++) reductions[i] /= m; 3034 } 3035 PetscFunctionReturn(PETSC_SUCCESS); 3036 } 3037 3038 PetscErrorCode MatSetRandom_SeqDense(Mat x, PetscRandom rctx) 3039 { 3040 PetscScalar *a; 3041 PetscInt lda, m, n, i, j; 3042 3043 PetscFunctionBegin; 3044 PetscCall(MatGetSize(x, &m, &n)); 3045 PetscCall(MatDenseGetLDA(x, &lda)); 3046 PetscCall(MatDenseGetArrayWrite(x, &a)); 3047 for (j = 0; j < n; j++) { 3048 for (i = 0; i < m; i++) PetscCall(PetscRandomGetValue(rctx, a + j * lda + i)); 3049 } 3050 PetscCall(MatDenseRestoreArrayWrite(x, &a)); 3051 PetscFunctionReturn(PETSC_SUCCESS); 3052 } 3053 3054 static PetscErrorCode MatMissingDiagonal_SeqDense(Mat A, PetscBool *missing, PetscInt *d) 3055 { 3056 PetscFunctionBegin; 3057 *missing = PETSC_FALSE; 3058 PetscFunctionReturn(PETSC_SUCCESS); 3059 } 3060 3061 /* vals is not const */ 3062 static PetscErrorCode MatDenseGetColumn_SeqDense(Mat A, PetscInt col, PetscScalar **vals) 3063 { 3064 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3065 PetscScalar *v; 3066 3067 PetscFunctionBegin; 3068 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3069 PetscCall(MatDenseGetArray(A, &v)); 3070 *vals = v + col * a->lda; 3071 PetscCall(MatDenseRestoreArray(A, &v)); 3072 PetscFunctionReturn(PETSC_SUCCESS); 3073 } 3074 3075 static PetscErrorCode MatDenseRestoreColumn_SeqDense(Mat A, PetscScalar **vals) 3076 { 3077 PetscFunctionBegin; 3078 if (vals) *vals = NULL; /* user cannot accidentally use the array later */ 3079 PetscFunctionReturn(PETSC_SUCCESS); 3080 } 3081 3082 static struct _MatOps MatOps_Values = {MatSetValues_SeqDense, 3083 MatGetRow_SeqDense, 3084 MatRestoreRow_SeqDense, 3085 MatMult_SeqDense, 3086 /* 4*/ MatMultAdd_SeqDense, 3087 MatMultTranspose_SeqDense, 3088 MatMultTransposeAdd_SeqDense, 3089 NULL, 3090 NULL, 3091 NULL, 3092 /* 10*/ NULL, 3093 MatLUFactor_SeqDense, 3094 MatCholeskyFactor_SeqDense, 3095 MatSOR_SeqDense, 3096 MatTranspose_SeqDense, 3097 /* 15*/ MatGetInfo_SeqDense, 3098 MatEqual_SeqDense, 3099 MatGetDiagonal_SeqDense, 3100 MatDiagonalScale_SeqDense, 3101 MatNorm_SeqDense, 3102 /* 20*/ NULL, 3103 NULL, 3104 MatSetOption_SeqDense, 3105 MatZeroEntries_SeqDense, 3106 /* 24*/ MatZeroRows_SeqDense, 3107 NULL, 3108 NULL, 3109 NULL, 3110 NULL, 3111 /* 29*/ MatSetUp_SeqDense, 3112 NULL, 3113 NULL, 3114 NULL, 3115 NULL, 3116 /* 34*/ MatDuplicate_SeqDense, 3117 NULL, 3118 NULL, 3119 NULL, 3120 NULL, 3121 /* 39*/ MatAXPY_SeqDense, 3122 MatCreateSubMatrices_SeqDense, 3123 NULL, 3124 MatGetValues_SeqDense, 3125 MatCopy_SeqDense, 3126 /* 44*/ MatGetRowMax_SeqDense, 3127 MatScale_SeqDense, 3128 MatShift_SeqDense, 3129 NULL, 3130 MatZeroRowsColumns_SeqDense, 3131 /* 49*/ MatSetRandom_SeqDense, 3132 NULL, 3133 NULL, 3134 NULL, 3135 NULL, 3136 /* 54*/ NULL, 3137 NULL, 3138 NULL, 3139 NULL, 3140 NULL, 3141 /* 59*/ MatCreateSubMatrix_SeqDense, 3142 MatDestroy_SeqDense, 3143 MatView_SeqDense, 3144 NULL, 3145 NULL, 3146 /* 64*/ NULL, 3147 NULL, 3148 NULL, 3149 NULL, 3150 NULL, 3151 /* 69*/ MatGetRowMaxAbs_SeqDense, 3152 NULL, 3153 NULL, 3154 NULL, 3155 NULL, 3156 /* 74*/ NULL, 3157 NULL, 3158 NULL, 3159 NULL, 3160 NULL, 3161 /* 79*/ NULL, 3162 NULL, 3163 NULL, 3164 NULL, 3165 /* 83*/ MatLoad_SeqDense, 3166 MatIsSymmetric_SeqDense, 3167 MatIsHermitian_SeqDense, 3168 NULL, 3169 NULL, 3170 NULL, 3171 /* 89*/ NULL, 3172 NULL, 3173 MatMatMultNumeric_SeqDense_SeqDense, 3174 NULL, 3175 NULL, 3176 /* 94*/ NULL, 3177 NULL, 3178 NULL, 3179 MatMatTransposeMultNumeric_SeqDense_SeqDense, 3180 NULL, 3181 /* 99*/ MatProductSetFromOptions_SeqDense, 3182 NULL, 3183 NULL, 3184 MatConjugate_SeqDense, 3185 NULL, 3186 /*104*/ NULL, 3187 MatRealPart_SeqDense, 3188 MatImaginaryPart_SeqDense, 3189 NULL, 3190 NULL, 3191 /*109*/ NULL, 3192 NULL, 3193 MatGetRowMin_SeqDense, 3194 MatGetColumnVector_SeqDense, 3195 MatMissingDiagonal_SeqDense, 3196 /*114*/ NULL, 3197 NULL, 3198 NULL, 3199 NULL, 3200 NULL, 3201 /*119*/ NULL, 3202 NULL, 3203 MatMultHermitianTranspose_SeqDense, 3204 MatMultHermitianTransposeAdd_SeqDense, 3205 NULL, 3206 /*124*/ NULL, 3207 MatGetColumnReductions_SeqDense, 3208 NULL, 3209 NULL, 3210 NULL, 3211 /*129*/ NULL, 3212 NULL, 3213 NULL, 3214 MatTransposeMatMultNumeric_SeqDense_SeqDense, 3215 NULL, 3216 /*134*/ NULL, 3217 NULL, 3218 NULL, 3219 NULL, 3220 NULL, 3221 /*139*/ NULL, 3222 NULL, 3223 NULL, 3224 NULL, 3225 NULL, 3226 MatCreateMPIMatConcatenateSeqMat_SeqDense, 3227 /*145*/ NULL, 3228 NULL, 3229 NULL, 3230 NULL, 3231 NULL, 3232 /*150*/ NULL, 3233 NULL, 3234 NULL, 3235 NULL, 3236 NULL, 3237 /*155*/ NULL, 3238 NULL}; 3239 3240 /*@ 3241 MatCreateSeqDense - Creates a `MATSEQDENSE` that 3242 is stored in column major order (the usual Fortran format). 3243 3244 Collective 3245 3246 Input Parameters: 3247 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3248 . m - number of rows 3249 . n - number of columns 3250 - data - optional location of matrix data in column major order. Use `NULL` for PETSc 3251 to control all matrix memory allocation. 3252 3253 Output Parameter: 3254 . A - the matrix 3255 3256 Level: intermediate 3257 3258 Note: 3259 The data input variable is intended primarily for Fortran programmers 3260 who wish to allocate their own matrix memory space. Most users should 3261 set `data` = `NULL`. 3262 3263 Developer Note: 3264 Many of the matrix operations for this variant use the BLAS and LAPACK routines. 3265 3266 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 3267 @*/ 3268 PetscErrorCode MatCreateSeqDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscScalar data[], Mat *A) 3269 { 3270 PetscFunctionBegin; 3271 PetscCall(MatCreate(comm, A)); 3272 PetscCall(MatSetSizes(*A, m, n, m, n)); 3273 PetscCall(MatSetType(*A, MATSEQDENSE)); 3274 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 3275 PetscFunctionReturn(PETSC_SUCCESS); 3276 } 3277 3278 /*@ 3279 MatSeqDenseSetPreallocation - Sets the array used for storing the matrix elements of a `MATSEQDENSE` matrix 3280 3281 Collective 3282 3283 Input Parameters: 3284 + B - the matrix 3285 - data - the array (or `NULL`) 3286 3287 Level: intermediate 3288 3289 Note: 3290 The data input variable is intended primarily for Fortran programmers 3291 who wish to allocate their own matrix memory space. Most users should 3292 need not call this routine. 3293 3294 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`, `MatDenseSetLDA()` 3295 @*/ 3296 PetscErrorCode MatSeqDenseSetPreallocation(Mat B, PetscScalar data[]) 3297 { 3298 PetscFunctionBegin; 3299 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3300 PetscTryMethod(B, "MatSeqDenseSetPreallocation_C", (Mat, PetscScalar[]), (B, data)); 3301 PetscFunctionReturn(PETSC_SUCCESS); 3302 } 3303 3304 PetscErrorCode MatSeqDenseSetPreallocation_SeqDense(Mat B, PetscScalar *data) 3305 { 3306 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3307 3308 PetscFunctionBegin; 3309 PetscCheck(!b->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3310 B->preallocated = PETSC_TRUE; 3311 3312 PetscCall(PetscLayoutSetUp(B->rmap)); 3313 PetscCall(PetscLayoutSetUp(B->cmap)); 3314 3315 if (b->lda <= 0) PetscCall(PetscBLASIntCast(B->rmap->n, &b->lda)); 3316 3317 if (!data) { /* petsc-allocated storage */ 3318 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 3319 PetscCall(PetscCalloc1((size_t)b->lda * B->cmap->n, &b->v)); 3320 3321 b->user_alloc = PETSC_FALSE; 3322 } else { /* user-allocated storage */ 3323 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 3324 b->v = data; 3325 b->user_alloc = PETSC_TRUE; 3326 } 3327 B->assembled = PETSC_TRUE; 3328 PetscFunctionReturn(PETSC_SUCCESS); 3329 } 3330 3331 #if defined(PETSC_HAVE_ELEMENTAL) 3332 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 3333 { 3334 Mat mat_elemental; 3335 const PetscScalar *array; 3336 PetscScalar *v_colwise; 3337 PetscInt M = A->rmap->N, N = A->cmap->N, i, j, k, *rows, *cols; 3338 3339 PetscFunctionBegin; 3340 PetscCall(PetscMalloc3(M * N, &v_colwise, M, &rows, N, &cols)); 3341 PetscCall(MatDenseGetArrayRead(A, &array)); 3342 /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */ 3343 k = 0; 3344 for (j = 0; j < N; j++) { 3345 cols[j] = j; 3346 for (i = 0; i < M; i++) v_colwise[j * M + i] = array[k++]; 3347 } 3348 for (i = 0; i < M; i++) rows[i] = i; 3349 PetscCall(MatDenseRestoreArrayRead(A, &array)); 3350 3351 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 3352 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N)); 3353 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 3354 PetscCall(MatSetUp(mat_elemental)); 3355 3356 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 3357 PetscCall(MatSetValues(mat_elemental, M, rows, N, cols, v_colwise, ADD_VALUES)); 3358 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 3359 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 3360 PetscCall(PetscFree3(v_colwise, rows, cols)); 3361 3362 if (reuse == MAT_INPLACE_MATRIX) { 3363 PetscCall(MatHeaderReplace(A, &mat_elemental)); 3364 } else { 3365 *newmat = mat_elemental; 3366 } 3367 PetscFunctionReturn(PETSC_SUCCESS); 3368 } 3369 #endif 3370 3371 PetscErrorCode MatDenseSetLDA_SeqDense(Mat B, PetscInt lda) 3372 { 3373 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3374 PetscBool data; 3375 3376 PetscFunctionBegin; 3377 data = (B->rmap->n > 0 && B->cmap->n > 0) ? (b->v ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 3378 PetscCheck(b->user_alloc || !data || b->lda == lda, PETSC_COMM_SELF, PETSC_ERR_ORDER, "LDA cannot be changed after allocation of internal storage"); 3379 PetscCheck(lda >= B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "LDA %" PetscInt_FMT " must be at least matrix dimension %" PetscInt_FMT, lda, B->rmap->n); 3380 PetscCall(PetscBLASIntCast(lda, &b->lda)); 3381 PetscFunctionReturn(PETSC_SUCCESS); 3382 } 3383 3384 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3385 { 3386 PetscFunctionBegin; 3387 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIDense(comm, inmat, n, scall, outmat)); 3388 PetscFunctionReturn(PETSC_SUCCESS); 3389 } 3390 3391 PetscErrorCode MatDenseCreateColumnVec_Private(Mat A, Vec *v) 3392 { 3393 PetscBool isstd, iskok, iscuda, iship; 3394 PetscMPIInt size; 3395 #if PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP) 3396 /* we pass the data of A, to prevent allocating needless GPU memory the first time VecCUPMPlaceArray is called. */ 3397 const PetscScalar *a; 3398 #endif 3399 3400 PetscFunctionBegin; 3401 *v = NULL; 3402 PetscCall(PetscStrcmpAny(A->defaultvectype, &isstd, VECSTANDARD, VECSEQ, VECMPI, "")); 3403 PetscCall(PetscStrcmpAny(A->defaultvectype, &iskok, VECKOKKOS, VECSEQKOKKOS, VECMPIKOKKOS, "")); 3404 PetscCall(PetscStrcmpAny(A->defaultvectype, &iscuda, VECCUDA, VECSEQCUDA, VECMPICUDA, "")); 3405 PetscCall(PetscStrcmpAny(A->defaultvectype, &iship, VECHIP, VECSEQHIP, VECMPIHIP, "")); 3406 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3407 if (isstd) { 3408 if (size > 1) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, v)); 3409 else PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, v)); 3410 } else if (iskok) { 3411 PetscCheck(PetscDefined(HAVE_KOKKOS_KERNELS), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using KOKKOS kernels support"); 3412 #if PetscDefined(HAVE_KOKKOS_KERNELS) 3413 if (size > 1) PetscCall(VecCreateMPIKokkosWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, v)); 3414 else PetscCall(VecCreateSeqKokkosWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, v)); 3415 #endif 3416 } else if (iscuda) { 3417 PetscCheck(PetscDefined(HAVE_CUDA), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using CUDA support"); 3418 #if PetscDefined(HAVE_CUDA) 3419 PetscCall(MatDenseCUDAGetArrayRead(A, &a)); 3420 if (size > 1) PetscCall(VecCreateMPICUDAWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, a, v)); 3421 else PetscCall(VecCreateSeqCUDAWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, a, v)); 3422 #endif 3423 } else if (iship) { 3424 PetscCheck(PetscDefined(HAVE_HIP), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using HIP support"); 3425 #if PetscDefined(HAVE_HIP) 3426 PetscCall(MatDenseHIPGetArrayRead(A, &a)); 3427 if (size > 1) PetscCall(VecCreateMPIHIPWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, a, v)); 3428 else PetscCall(VecCreateSeqHIPWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, a, v)); 3429 #endif 3430 } 3431 PetscCheck(*v, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not coded for type %s", A->defaultvectype); 3432 PetscFunctionReturn(PETSC_SUCCESS); 3433 } 3434 3435 PetscErrorCode MatDenseGetColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) 3436 { 3437 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3438 3439 PetscFunctionBegin; 3440 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3441 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3442 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 3443 a->vecinuse = col + 1; 3444 PetscCall(MatDenseGetArray(A, (PetscScalar **)&a->ptrinuse)); 3445 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3446 *v = a->cvec; 3447 PetscFunctionReturn(PETSC_SUCCESS); 3448 } 3449 3450 PetscErrorCode MatDenseRestoreColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) 3451 { 3452 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3453 3454 PetscFunctionBegin; 3455 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3456 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3457 VecCheckAssembled(a->cvec); 3458 a->vecinuse = 0; 3459 PetscCall(MatDenseRestoreArray(A, (PetscScalar **)&a->ptrinuse)); 3460 PetscCall(VecResetArray(a->cvec)); 3461 if (v) *v = NULL; 3462 PetscFunctionReturn(PETSC_SUCCESS); 3463 } 3464 3465 PetscErrorCode MatDenseGetColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) 3466 { 3467 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3468 3469 PetscFunctionBegin; 3470 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3471 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3472 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 3473 a->vecinuse = col + 1; 3474 PetscCall(MatDenseGetArrayRead(A, &a->ptrinuse)); 3475 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)a->lda))); 3476 PetscCall(VecLockReadPush(a->cvec)); 3477 *v = a->cvec; 3478 PetscFunctionReturn(PETSC_SUCCESS); 3479 } 3480 3481 PetscErrorCode MatDenseRestoreColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) 3482 { 3483 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3484 3485 PetscFunctionBegin; 3486 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3487 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3488 VecCheckAssembled(a->cvec); 3489 a->vecinuse = 0; 3490 PetscCall(MatDenseRestoreArrayRead(A, &a->ptrinuse)); 3491 PetscCall(VecLockReadPop(a->cvec)); 3492 PetscCall(VecResetArray(a->cvec)); 3493 if (v) *v = NULL; 3494 PetscFunctionReturn(PETSC_SUCCESS); 3495 } 3496 3497 PetscErrorCode MatDenseGetColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) 3498 { 3499 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3500 3501 PetscFunctionBegin; 3502 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3503 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3504 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 3505 a->vecinuse = col + 1; 3506 PetscCall(MatDenseGetArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3507 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)a->lda))); 3508 *v = a->cvec; 3509 PetscFunctionReturn(PETSC_SUCCESS); 3510 } 3511 3512 PetscErrorCode MatDenseRestoreColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) 3513 { 3514 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3515 3516 PetscFunctionBegin; 3517 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3518 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3519 VecCheckAssembled(a->cvec); 3520 a->vecinuse = 0; 3521 PetscCall(MatDenseRestoreArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3522 PetscCall(VecResetArray(a->cvec)); 3523 if (v) *v = NULL; 3524 PetscFunctionReturn(PETSC_SUCCESS); 3525 } 3526 3527 PetscErrorCode MatDenseGetSubMatrix_SeqDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 3528 { 3529 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3530 3531 PetscFunctionBegin; 3532 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3533 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3534 if (a->cmat && (cend - cbegin != a->cmat->cmap->N || rend - rbegin != a->cmat->rmap->N)) PetscCall(MatDestroy(&a->cmat)); 3535 if (!a->cmat) { 3536 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), rend - rbegin, PETSC_DECIDE, rend - rbegin, cend - cbegin, PetscSafePointerPlusOffset(a->v, rbegin + (size_t)cbegin * a->lda), &a->cmat)); 3537 } else { 3538 PetscCall(MatDensePlaceArray(a->cmat, PetscSafePointerPlusOffset(a->v, rbegin + (size_t)cbegin * a->lda))); 3539 } 3540 PetscCall(MatDenseSetLDA(a->cmat, a->lda)); 3541 a->matinuse = cbegin + 1; 3542 *v = a->cmat; 3543 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 3544 A->offloadmask = PETSC_OFFLOAD_CPU; 3545 #endif 3546 PetscFunctionReturn(PETSC_SUCCESS); 3547 } 3548 3549 PetscErrorCode MatDenseRestoreSubMatrix_SeqDense(Mat A, Mat *v) 3550 { 3551 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3552 3553 PetscFunctionBegin; 3554 PetscCheck(a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 3555 PetscCheck(a->cmat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column matrix"); 3556 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 3557 a->matinuse = 0; 3558 PetscCall(MatDenseResetArray(a->cmat)); 3559 if (v) *v = NULL; 3560 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 3561 A->offloadmask = PETSC_OFFLOAD_CPU; 3562 #endif 3563 PetscFunctionReturn(PETSC_SUCCESS); 3564 } 3565 3566 /*MC 3567 MATSEQDENSE - MATSEQDENSE = "seqdense" - A matrix type to be used for sequential dense matrices. 3568 3569 Options Database Key: 3570 . -mat_type seqdense - sets the matrix type to `MATSEQDENSE` during a call to `MatSetFromOptions()` 3571 3572 Level: beginner 3573 3574 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreateSeqDense()` 3575 M*/ 3576 PetscErrorCode MatCreate_SeqDense(Mat B) 3577 { 3578 Mat_SeqDense *b; 3579 PetscMPIInt size; 3580 3581 PetscFunctionBegin; 3582 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3583 PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3584 3585 PetscCall(PetscNew(&b)); 3586 B->data = (void *)b; 3587 B->ops[0] = MatOps_Values; 3588 3589 b->roworiented = PETSC_TRUE; 3590 3591 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatQRFactor_C", MatQRFactor_SeqDense)); 3592 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetLDA_C", MatDenseGetLDA_SeqDense)); 3593 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseSetLDA_C", MatDenseSetLDA_SeqDense)); 3594 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArray_C", MatDenseGetArray_SeqDense)); 3595 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArray_C", MatDenseRestoreArray_SeqDense)); 3596 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDensePlaceArray_C", MatDensePlaceArray_SeqDense)); 3597 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseResetArray_C", MatDenseResetArray_SeqDense)); 3598 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseReplaceArray_C", MatDenseReplaceArray_SeqDense)); 3599 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayRead_C", MatDenseGetArray_SeqDense)); 3600 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayRead_C", MatDenseRestoreArray_SeqDense)); 3601 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayWrite_C", MatDenseGetArray_SeqDense)); 3602 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArray_SeqDense)); 3603 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqaij_C", MatConvert_SeqDense_SeqAIJ)); 3604 #if defined(PETSC_HAVE_ELEMENTAL) 3605 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_elemental_C", MatConvert_SeqDense_Elemental)); 3606 #endif 3607 #if defined(PETSC_HAVE_SCALAPACK) 3608 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 3609 #endif 3610 #if defined(PETSC_HAVE_CUDA) 3611 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensecuda_C", MatConvert_SeqDense_SeqDenseCUDA)); 3612 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3613 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdense_C", MatProductSetFromOptions_SeqDense)); 3614 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3615 #endif 3616 #if defined(PETSC_HAVE_HIP) 3617 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensehip_C", MatConvert_SeqDense_SeqDenseHIP)); 3618 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", MatProductSetFromOptions_SeqDense)); 3619 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdense_C", MatProductSetFromOptions_SeqDense)); 3620 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensehip_C", MatProductSetFromOptions_SeqDense)); 3621 #endif 3622 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqDenseSetPreallocation_C", MatSeqDenseSetPreallocation_SeqDense)); 3623 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 3624 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdense_C", MatProductSetFromOptions_SeqDense)); 3625 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3626 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqsbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3627 3628 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumn_C", MatDenseGetColumn_SeqDense)); 3629 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_SeqDense)); 3630 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_SeqDense)); 3631 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_SeqDense)); 3632 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_SeqDense)); 3633 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_SeqDense)); 3634 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_SeqDense)); 3635 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_SeqDense)); 3636 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_SeqDense)); 3637 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_SeqDense)); 3638 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultAddColumnRange_C", MatMultAddColumnRange_SeqDense)); 3639 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_SeqDense)); 3640 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_SeqDense)); 3641 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQDENSE)); 3642 PetscFunctionReturn(PETSC_SUCCESS); 3643 } 3644 3645 /*@C 3646 MatDenseGetColumn - gives access to a column of a dense matrix. This is only the local part of the column. You MUST call `MatDenseRestoreColumn()` to avoid memory bleeding. 3647 3648 Not Collective 3649 3650 Input Parameters: 3651 + A - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3652 - col - column index 3653 3654 Output Parameter: 3655 . vals - pointer to the data 3656 3657 Level: intermediate 3658 3659 Note: 3660 Use `MatDenseGetColumnVec()` to get access to a column of a `MATDENSE` treated as a `Vec` 3661 3662 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreColumn()`, `MatDenseGetColumnVec()` 3663 @*/ 3664 PetscErrorCode MatDenseGetColumn(Mat A, PetscInt col, PetscScalar *vals[]) 3665 { 3666 PetscFunctionBegin; 3667 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3668 PetscValidLogicalCollectiveInt(A, col, 2); 3669 PetscAssertPointer(vals, 3); 3670 PetscUseMethod(A, "MatDenseGetColumn_C", (Mat, PetscInt, PetscScalar **), (A, col, vals)); 3671 PetscFunctionReturn(PETSC_SUCCESS); 3672 } 3673 3674 /*@C 3675 MatDenseRestoreColumn - returns access to a column of a `MATDENSE` matrix which is returned by `MatDenseGetColumn()`. 3676 3677 Not Collective 3678 3679 Input Parameters: 3680 + A - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3681 - vals - pointer to the data (may be `NULL`) 3682 3683 Level: intermediate 3684 3685 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetColumn()` 3686 @*/ 3687 PetscErrorCode MatDenseRestoreColumn(Mat A, PetscScalar *vals[]) 3688 { 3689 PetscFunctionBegin; 3690 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3691 PetscAssertPointer(vals, 2); 3692 PetscUseMethod(A, "MatDenseRestoreColumn_C", (Mat, PetscScalar **), (A, vals)); 3693 PetscFunctionReturn(PETSC_SUCCESS); 3694 } 3695 3696 /*@ 3697 MatDenseGetColumnVec - Gives read-write access to a column of a `MATDENSE` matrix, represented as a `Vec`. 3698 3699 Collective 3700 3701 Input Parameters: 3702 + A - the `Mat` object 3703 - col - the column index 3704 3705 Output Parameter: 3706 . v - the vector 3707 3708 Level: intermediate 3709 3710 Notes: 3711 The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVec()` when the vector is no longer needed. 3712 3713 Use `MatDenseGetColumnVecRead()` to obtain read-only access or `MatDenseGetColumnVecWrite()` for write-only access. 3714 3715 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`, `MatDenseGetColumn()` 3716 @*/ 3717 PetscErrorCode MatDenseGetColumnVec(Mat A, PetscInt col, Vec *v) 3718 { 3719 PetscFunctionBegin; 3720 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3721 PetscValidType(A, 1); 3722 PetscValidLogicalCollectiveInt(A, col, 2); 3723 PetscAssertPointer(v, 3); 3724 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3725 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3726 PetscUseMethod(A, "MatDenseGetColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3727 PetscFunctionReturn(PETSC_SUCCESS); 3728 } 3729 3730 /*@ 3731 MatDenseRestoreColumnVec - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVec()`. 3732 3733 Collective 3734 3735 Input Parameters: 3736 + A - the `Mat` object 3737 . col - the column index 3738 - v - the `Vec` object (may be `NULL`) 3739 3740 Level: intermediate 3741 3742 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3743 @*/ 3744 PetscErrorCode MatDenseRestoreColumnVec(Mat A, PetscInt col, Vec *v) 3745 { 3746 PetscFunctionBegin; 3747 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3748 PetscValidType(A, 1); 3749 PetscValidLogicalCollectiveInt(A, col, 2); 3750 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3751 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3752 PetscUseMethod(A, "MatDenseRestoreColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3753 PetscFunctionReturn(PETSC_SUCCESS); 3754 } 3755 3756 /*@ 3757 MatDenseGetColumnVecRead - Gives read-only access to a column of a dense matrix, represented as a `Vec`. 3758 3759 Collective 3760 3761 Input Parameters: 3762 + A - the `Mat` object 3763 - col - the column index 3764 3765 Output Parameter: 3766 . v - the vector 3767 3768 Level: intermediate 3769 3770 Notes: 3771 The vector is owned by PETSc and users cannot modify it. 3772 3773 Users need to call `MatDenseRestoreColumnVecRead()` when the vector is no longer needed. 3774 3775 Use `MatDenseGetColumnVec()` to obtain read-write access or `MatDenseGetColumnVecWrite()` for write-only access. 3776 3777 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3778 @*/ 3779 PetscErrorCode MatDenseGetColumnVecRead(Mat A, PetscInt col, Vec *v) 3780 { 3781 PetscFunctionBegin; 3782 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3783 PetscValidType(A, 1); 3784 PetscValidLogicalCollectiveInt(A, col, 2); 3785 PetscAssertPointer(v, 3); 3786 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3787 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3788 PetscUseMethod(A, "MatDenseGetColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3789 PetscFunctionReturn(PETSC_SUCCESS); 3790 } 3791 3792 /*@ 3793 MatDenseRestoreColumnVecRead - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVecRead()`. 3794 3795 Collective 3796 3797 Input Parameters: 3798 + A - the `Mat` object 3799 . col - the column index 3800 - v - the `Vec` object (may be `NULL`) 3801 3802 Level: intermediate 3803 3804 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecWrite()` 3805 @*/ 3806 PetscErrorCode MatDenseRestoreColumnVecRead(Mat A, PetscInt col, Vec *v) 3807 { 3808 PetscFunctionBegin; 3809 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3810 PetscValidType(A, 1); 3811 PetscValidLogicalCollectiveInt(A, col, 2); 3812 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3813 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3814 PetscUseMethod(A, "MatDenseRestoreColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3815 PetscFunctionReturn(PETSC_SUCCESS); 3816 } 3817 3818 /*@ 3819 MatDenseGetColumnVecWrite - Gives write-only access to a column of a dense matrix, represented as a `Vec`. 3820 3821 Collective 3822 3823 Input Parameters: 3824 + A - the `Mat` object 3825 - col - the column index 3826 3827 Output Parameter: 3828 . v - the vector 3829 3830 Level: intermediate 3831 3832 Notes: 3833 The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVecWrite()` when the vector is no longer needed. 3834 3835 Use `MatDenseGetColumnVec()` to obtain read-write access or `MatDenseGetColumnVecRead()` for read-only access. 3836 3837 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3838 @*/ 3839 PetscErrorCode MatDenseGetColumnVecWrite(Mat A, PetscInt col, Vec *v) 3840 { 3841 PetscFunctionBegin; 3842 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3843 PetscValidType(A, 1); 3844 PetscValidLogicalCollectiveInt(A, col, 2); 3845 PetscAssertPointer(v, 3); 3846 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3847 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3848 PetscUseMethod(A, "MatDenseGetColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3849 PetscFunctionReturn(PETSC_SUCCESS); 3850 } 3851 3852 /*@ 3853 MatDenseRestoreColumnVecWrite - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVecWrite()`. 3854 3855 Collective 3856 3857 Input Parameters: 3858 + A - the `Mat` object 3859 . col - the column index 3860 - v - the `Vec` object (may be `NULL`) 3861 3862 Level: intermediate 3863 3864 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()` 3865 @*/ 3866 PetscErrorCode MatDenseRestoreColumnVecWrite(Mat A, PetscInt col, Vec *v) 3867 { 3868 PetscFunctionBegin; 3869 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3870 PetscValidType(A, 1); 3871 PetscValidLogicalCollectiveInt(A, col, 2); 3872 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3873 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3874 PetscUseMethod(A, "MatDenseRestoreColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3875 PetscFunctionReturn(PETSC_SUCCESS); 3876 } 3877 3878 /*@ 3879 MatDenseGetSubMatrix - Gives access to a block of rows and columns of a dense matrix, represented as a `Mat`. 3880 3881 Collective 3882 3883 Input Parameters: 3884 + A - the `Mat` object 3885 . rbegin - the first global row index in the block (if `PETSC_DECIDE`, is 0) 3886 . rend - the global row index past the last one in the block (if `PETSC_DECIDE`, is `M`) 3887 . cbegin - the first global column index in the block (if `PETSC_DECIDE`, is 0) 3888 - cend - the global column index past the last one in the block (if `PETSC_DECIDE`, is `N`) 3889 3890 Output Parameter: 3891 . v - the matrix 3892 3893 Level: intermediate 3894 3895 Notes: 3896 The matrix is owned by PETSc. Users need to call `MatDenseRestoreSubMatrix()` when the matrix is no longer needed. 3897 3898 The output matrix is not redistributed by PETSc, so depending on the values of `rbegin` and `rend`, some processes may have no local rows. 3899 3900 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreSubMatrix()` 3901 @*/ 3902 PetscErrorCode MatDenseGetSubMatrix(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 3903 { 3904 PetscFunctionBegin; 3905 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3906 PetscValidType(A, 1); 3907 PetscValidLogicalCollectiveInt(A, rbegin, 2); 3908 PetscValidLogicalCollectiveInt(A, rend, 3); 3909 PetscValidLogicalCollectiveInt(A, cbegin, 4); 3910 PetscValidLogicalCollectiveInt(A, cend, 5); 3911 PetscAssertPointer(v, 6); 3912 if (rbegin == PETSC_DECIDE) rbegin = 0; 3913 if (rend == PETSC_DECIDE) rend = A->rmap->N; 3914 if (cbegin == PETSC_DECIDE) cbegin = 0; 3915 if (cend == PETSC_DECIDE) cend = A->cmap->N; 3916 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3917 PetscCheck(rbegin >= 0 && rbegin <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", rbegin, A->rmap->N); 3918 PetscCheck(rend >= rbegin && rend <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", rend, rbegin, A->rmap->N); 3919 PetscCheck(cbegin >= 0 && cbegin <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", cbegin, A->cmap->N); 3920 PetscCheck(cend >= cbegin && cend <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", cend, cbegin, A->cmap->N); 3921 PetscUseMethod(A, "MatDenseGetSubMatrix_C", (Mat, PetscInt, PetscInt, PetscInt, PetscInt, Mat *), (A, rbegin, rend, cbegin, cend, v)); 3922 PetscFunctionReturn(PETSC_SUCCESS); 3923 } 3924 3925 /*@ 3926 MatDenseRestoreSubMatrix - Returns access to a block of columns of a dense matrix obtained from `MatDenseGetSubMatrix()`. 3927 3928 Collective 3929 3930 Input Parameters: 3931 + A - the `Mat` object 3932 - v - the `Mat` object (may be `NULL`) 3933 3934 Level: intermediate 3935 3936 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseGetSubMatrix()` 3937 @*/ 3938 PetscErrorCode MatDenseRestoreSubMatrix(Mat A, Mat *v) 3939 { 3940 PetscFunctionBegin; 3941 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3942 PetscValidType(A, 1); 3943 PetscAssertPointer(v, 2); 3944 PetscUseMethod(A, "MatDenseRestoreSubMatrix_C", (Mat, Mat *), (A, v)); 3945 PetscFunctionReturn(PETSC_SUCCESS); 3946 } 3947 3948 #include <petscblaslapack.h> 3949 #include <petsc/private/kernels/blockinvert.h> 3950 3951 PetscErrorCode MatSeqDenseInvert(Mat A) 3952 { 3953 PetscInt m; 3954 const PetscReal shift = 0.0; 3955 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 3956 PetscScalar *values; 3957 3958 PetscFunctionBegin; 3959 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3960 PetscCall(MatDenseGetArray(A, &values)); 3961 PetscCall(MatGetLocalSize(A, &m, NULL)); 3962 allowzeropivot = PetscNot(A->erroriffailure); 3963 /* factor and invert each block */ 3964 switch (m) { 3965 case 1: 3966 values[0] = (PetscScalar)1.0 / (values[0] + shift); 3967 break; 3968 case 2: 3969 PetscCall(PetscKernel_A_gets_inverse_A_2(values, shift, allowzeropivot, &zeropivotdetected)); 3970 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3971 break; 3972 case 3: 3973 PetscCall(PetscKernel_A_gets_inverse_A_3(values, shift, allowzeropivot, &zeropivotdetected)); 3974 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3975 break; 3976 case 4: 3977 PetscCall(PetscKernel_A_gets_inverse_A_4(values, shift, allowzeropivot, &zeropivotdetected)); 3978 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3979 break; 3980 case 5: { 3981 PetscScalar work[25]; 3982 PetscInt ipvt[5]; 3983 3984 PetscCall(PetscKernel_A_gets_inverse_A_5(values, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 3985 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3986 } break; 3987 case 6: 3988 PetscCall(PetscKernel_A_gets_inverse_A_6(values, shift, allowzeropivot, &zeropivotdetected)); 3989 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3990 break; 3991 case 7: 3992 PetscCall(PetscKernel_A_gets_inverse_A_7(values, shift, allowzeropivot, &zeropivotdetected)); 3993 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3994 break; 3995 default: { 3996 PetscInt *v_pivots, *IJ, j; 3997 PetscScalar *v_work; 3998 3999 PetscCall(PetscMalloc3(m, &v_work, m, &v_pivots, m, &IJ)); 4000 for (j = 0; j < m; j++) IJ[j] = j; 4001 PetscCall(PetscKernel_A_gets_inverse_A(m, values, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 4002 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4003 PetscCall(PetscFree3(v_work, v_pivots, IJ)); 4004 } 4005 } 4006 PetscCall(MatDenseRestoreArray(A, &values)); 4007 PetscFunctionReturn(PETSC_SUCCESS); 4008 } 4009