1 /* 2 Defines the basic matrix operations for sequential dense. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/dense/mpi/mpidense.h> 9 #include <petscblaslapack.h> 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <petsc/private/vecimpl.h> 12 13 PetscErrorCode MatSeqDenseSymmetrize_Private(Mat A, PetscBool hermitian) 14 { 15 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 16 PetscInt j, k, n = A->rmap->n; 17 PetscScalar *v; 18 19 PetscFunctionBegin; 20 PetscCheck(A->rmap->n == A->cmap->n, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot symmetrize a rectangular matrix"); 21 PetscCall(MatDenseGetArray(A, &v)); 22 if (!hermitian) { 23 for (k = 0; k < n; k++) { 24 for (j = k; j < n; j++) v[j * mat->lda + k] = v[k * mat->lda + j]; 25 } 26 } else { 27 for (k = 0; k < n; k++) { 28 for (j = k; j < n; j++) v[j * mat->lda + k] = PetscConj(v[k * mat->lda + j]); 29 } 30 } 31 PetscCall(MatDenseRestoreArray(A, &v)); 32 PetscFunctionReturn(PETSC_SUCCESS); 33 } 34 35 PetscErrorCode MatSeqDenseInvertFactors_Private(Mat A) 36 { 37 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 38 PetscBLASInt info, n; 39 40 PetscFunctionBegin; 41 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 42 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 43 if (A->factortype == MAT_FACTOR_LU) { 44 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 45 if (!mat->fwork) { 46 mat->lfwork = n; 47 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 48 } 49 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 50 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 51 PetscCall(PetscFPTrapPop()); 52 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 53 } else if (A->factortype == MAT_FACTOR_CHOLESKY) { 54 if (A->spd == PETSC_BOOL3_TRUE) { 55 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 56 PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &n, mat->v, &mat->lda, &info)); 57 PetscCall(PetscFPTrapPop()); 58 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 59 #if defined(PETSC_USE_COMPLEX) 60 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 61 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 62 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 63 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 64 PetscCallBLAS("LAPACKhetri", LAPACKhetri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 65 PetscCall(PetscFPTrapPop()); 66 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 67 #endif 68 } else { /* symmetric case */ 69 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 70 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 71 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 72 PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 73 PetscCall(PetscFPTrapPop()); 74 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_FALSE)); 75 } 76 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad Inversion: zero pivot in row %" PetscBLASInt_FMT, info - 1); 77 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 78 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix must be factored to solve"); 79 80 A->ops->solve = NULL; 81 A->ops->matsolve = NULL; 82 A->ops->solvetranspose = NULL; 83 A->ops->matsolvetranspose = NULL; 84 A->ops->solveadd = NULL; 85 A->ops->solvetransposeadd = NULL; 86 A->factortype = MAT_FACTOR_NONE; 87 PetscCall(PetscFree(A->solvertype)); 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 91 static PetscErrorCode MatZeroRowsColumns_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 92 { 93 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 94 PetscInt m = l->lda, n = A->cmap->n, r = A->rmap->n, i, j; 95 PetscScalar *slot, *bb, *v; 96 const PetscScalar *xx; 97 98 PetscFunctionBegin; 99 if (PetscDefined(USE_DEBUG)) { 100 for (i = 0; i < N; i++) { 101 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 102 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 103 PetscCheck(rows[i] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Col %" PetscInt_FMT " requested to be zeroed greater than or equal number of cols %" PetscInt_FMT, rows[i], A->cmap->n); 104 } 105 } 106 if (!N) PetscFunctionReturn(PETSC_SUCCESS); 107 108 /* fix right-hand side if needed */ 109 if (x && b) { 110 Vec xt; 111 112 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 113 PetscCall(VecDuplicate(x, &xt)); 114 PetscCall(VecCopy(x, xt)); 115 PetscCall(VecScale(xt, -1.0)); 116 PetscCall(MatMultAdd(A, xt, b, b)); 117 PetscCall(VecDestroy(&xt)); 118 PetscCall(VecGetArrayRead(x, &xx)); 119 PetscCall(VecGetArray(b, &bb)); 120 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 121 PetscCall(VecRestoreArrayRead(x, &xx)); 122 PetscCall(VecRestoreArray(b, &bb)); 123 } 124 125 PetscCall(MatDenseGetArray(A, &v)); 126 for (i = 0; i < N; i++) { 127 slot = v + rows[i] * m; 128 PetscCall(PetscArrayzero(slot, r)); 129 } 130 for (i = 0; i < N; i++) { 131 slot = v + rows[i]; 132 for (j = 0; j < n; j++) { 133 *slot = 0.0; 134 slot += m; 135 } 136 } 137 if (diag != 0.0) { 138 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 139 for (i = 0; i < N; i++) { 140 slot = v + (m + 1) * rows[i]; 141 *slot = diag; 142 } 143 } 144 PetscCall(MatDenseRestoreArray(A, &v)); 145 PetscFunctionReturn(PETSC_SUCCESS); 146 } 147 148 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 149 { 150 Mat B = NULL; 151 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 152 Mat_SeqDense *b; 153 PetscInt *ai = a->i, *aj = a->j, m = A->rmap->N, n = A->cmap->N, i; 154 const MatScalar *av; 155 PetscBool isseqdense; 156 157 PetscFunctionBegin; 158 if (reuse == MAT_REUSE_MATRIX) { 159 PetscCall(PetscObjectTypeCompare((PetscObject)*newmat, MATSEQDENSE, &isseqdense)); 160 PetscCheck(isseqdense, PetscObjectComm((PetscObject)*newmat), PETSC_ERR_USER, "Cannot reuse matrix of type %s", ((PetscObject)*newmat)->type_name); 161 } 162 if (reuse != MAT_REUSE_MATRIX) { 163 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 164 PetscCall(MatSetSizes(B, m, n, m, n)); 165 PetscCall(MatSetType(B, MATSEQDENSE)); 166 PetscCall(MatSeqDenseSetPreallocation(B, NULL)); 167 b = (Mat_SeqDense *)B->data; 168 } else { 169 b = (Mat_SeqDense *)((*newmat)->data); 170 for (i = 0; i < n; i++) PetscCall(PetscArrayzero(b->v + i * b->lda, m)); 171 } 172 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 173 for (i = 0; i < m; i++) { 174 PetscInt j; 175 for (j = 0; j < ai[1] - ai[0]; j++) { 176 b->v[*aj * b->lda + i] = *av; 177 aj++; 178 av++; 179 } 180 ai++; 181 } 182 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 183 184 if (reuse == MAT_INPLACE_MATRIX) { 185 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 186 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 187 PetscCall(MatHeaderReplace(A, &B)); 188 } else { 189 if (B) *newmat = B; 190 PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY)); 191 PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY)); 192 } 193 PetscFunctionReturn(PETSC_SUCCESS); 194 } 195 196 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 197 { 198 Mat B = NULL; 199 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 200 PetscInt i, j; 201 PetscInt *rows, *nnz; 202 MatScalar *aa = a->v, *vals; 203 204 PetscFunctionBegin; 205 PetscCall(PetscCalloc3(A->rmap->n, &rows, A->rmap->n, &nnz, A->rmap->n, &vals)); 206 if (reuse != MAT_REUSE_MATRIX) { 207 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 208 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 209 PetscCall(MatSetType(B, MATSEQAIJ)); 210 for (j = 0; j < A->cmap->n; j++) { 211 for (i = 0; i < A->rmap->n; i++) 212 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) ++nnz[i]; 213 aa += a->lda; 214 } 215 PetscCall(MatSeqAIJSetPreallocation(B, PETSC_DETERMINE, nnz)); 216 } else B = *newmat; 217 aa = a->v; 218 for (j = 0; j < A->cmap->n; j++) { 219 PetscInt numRows = 0; 220 for (i = 0; i < A->rmap->n; i++) 221 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) { 222 rows[numRows] = i; 223 vals[numRows++] = aa[i]; 224 } 225 PetscCall(MatSetValues(B, numRows, rows, 1, &j, vals, INSERT_VALUES)); 226 aa += a->lda; 227 } 228 PetscCall(PetscFree3(rows, nnz, vals)); 229 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 230 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 231 232 if (reuse == MAT_INPLACE_MATRIX) { 233 PetscCall(MatHeaderReplace(A, &B)); 234 } else if (reuse != MAT_REUSE_MATRIX) *newmat = B; 235 PetscFunctionReturn(PETSC_SUCCESS); 236 } 237 238 PetscErrorCode MatAXPY_SeqDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 239 { 240 Mat_SeqDense *x = (Mat_SeqDense *)X->data, *y = (Mat_SeqDense *)Y->data; 241 const PetscScalar *xv; 242 PetscScalar *yv; 243 PetscBLASInt N, m, ldax = 0, lday = 0, one = 1; 244 245 PetscFunctionBegin; 246 PetscCall(MatDenseGetArrayRead(X, &xv)); 247 PetscCall(MatDenseGetArray(Y, &yv)); 248 PetscCall(PetscBLASIntCast(X->rmap->n * X->cmap->n, &N)); 249 PetscCall(PetscBLASIntCast(X->rmap->n, &m)); 250 PetscCall(PetscBLASIntCast(x->lda, &ldax)); 251 PetscCall(PetscBLASIntCast(y->lda, &lday)); 252 if (ldax > m || lday > m) { 253 for (PetscInt j = 0; j < X->cmap->n; j++) PetscCallBLAS("BLASaxpy", BLASaxpy_(&m, &alpha, PetscSafePointerPlusOffset(xv, j * ldax), &one, PetscSafePointerPlusOffset(yv, j * lday), &one)); 254 } else { 255 PetscCallBLAS("BLASaxpy", BLASaxpy_(&N, &alpha, xv, &one, yv, &one)); 256 } 257 PetscCall(MatDenseRestoreArrayRead(X, &xv)); 258 PetscCall(MatDenseRestoreArray(Y, &yv)); 259 PetscCall(PetscLogFlops(PetscMax(2.0 * N - 1, 0))); 260 PetscFunctionReturn(PETSC_SUCCESS); 261 } 262 263 static PetscErrorCode MatGetInfo_SeqDense(Mat A, MatInfoType flag, MatInfo *info) 264 { 265 PetscLogDouble N = A->rmap->n * A->cmap->n; 266 267 PetscFunctionBegin; 268 info->block_size = 1.0; 269 info->nz_allocated = N; 270 info->nz_used = N; 271 info->nz_unneeded = 0; 272 info->assemblies = A->num_ass; 273 info->mallocs = 0; 274 info->memory = 0; /* REVIEW ME */ 275 info->fill_ratio_given = 0; 276 info->fill_ratio_needed = 0; 277 info->factor_mallocs = 0; 278 PetscFunctionReturn(PETSC_SUCCESS); 279 } 280 281 PetscErrorCode MatScale_SeqDense(Mat A, PetscScalar alpha) 282 { 283 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 284 PetscScalar *v; 285 PetscBLASInt one = 1, j, nz, lda = 0; 286 287 PetscFunctionBegin; 288 PetscCall(MatDenseGetArray(A, &v)); 289 PetscCall(PetscBLASIntCast(a->lda, &lda)); 290 if (lda > A->rmap->n) { 291 PetscCall(PetscBLASIntCast(A->rmap->n, &nz)); 292 for (j = 0; j < A->cmap->n; j++) PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v + j * lda, &one)); 293 } else { 294 PetscCall(PetscBLASIntCast(A->rmap->n * A->cmap->n, &nz)); 295 PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v, &one)); 296 } 297 PetscCall(PetscLogFlops(A->rmap->n * A->cmap->n)); 298 PetscCall(MatDenseRestoreArray(A, &v)); 299 PetscFunctionReturn(PETSC_SUCCESS); 300 } 301 302 PetscErrorCode MatShift_SeqDense(Mat A, PetscScalar alpha) 303 { 304 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 305 PetscScalar *v; 306 PetscInt j, k; 307 308 PetscFunctionBegin; 309 PetscCall(MatDenseGetArray(A, &v)); 310 k = PetscMin(A->rmap->n, A->cmap->n); 311 for (j = 0; j < k; j++) v[j + j * a->lda] += alpha; 312 PetscCall(PetscLogFlops(k)); 313 PetscCall(MatDenseRestoreArray(A, &v)); 314 PetscFunctionReturn(PETSC_SUCCESS); 315 } 316 317 static PetscErrorCode MatIsHermitian_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) 318 { 319 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 320 PetscInt i, j, m = A->rmap->n, N = a->lda; 321 const PetscScalar *v; 322 323 PetscFunctionBegin; 324 *fl = PETSC_FALSE; 325 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 326 PetscCall(MatDenseGetArrayRead(A, &v)); 327 for (i = 0; i < m; i++) { 328 for (j = i; j < m; j++) { 329 if (PetscAbsScalar(v[i + j * N] - PetscConj(v[j + i * N])) > rtol) goto restore; 330 } 331 } 332 *fl = PETSC_TRUE; 333 restore: 334 PetscCall(MatDenseRestoreArrayRead(A, &v)); 335 PetscFunctionReturn(PETSC_SUCCESS); 336 } 337 338 static PetscErrorCode MatIsSymmetric_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) 339 { 340 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 341 PetscInt i, j, m = A->rmap->n, N = a->lda; 342 const PetscScalar *v; 343 344 PetscFunctionBegin; 345 *fl = PETSC_FALSE; 346 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 347 PetscCall(MatDenseGetArrayRead(A, &v)); 348 for (i = 0; i < m; i++) { 349 for (j = i; j < m; j++) { 350 if (PetscAbsScalar(v[i + j * N] - v[j + i * N]) > rtol) goto restore; 351 } 352 } 353 *fl = PETSC_TRUE; 354 restore: 355 PetscCall(MatDenseRestoreArrayRead(A, &v)); 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 PetscErrorCode MatDuplicateNoCreate_SeqDense(Mat newi, Mat A, MatDuplicateOption cpvalues) 360 { 361 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 362 PetscInt lda = mat->lda, j, m, nlda = lda; 363 PetscBool isdensecpu; 364 365 PetscFunctionBegin; 366 PetscCall(PetscLayoutReference(A->rmap, &newi->rmap)); 367 PetscCall(PetscLayoutReference(A->cmap, &newi->cmap)); 368 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { /* propagate LDA */ 369 PetscCall(MatDenseSetLDA(newi, lda)); 370 } 371 PetscCall(PetscObjectTypeCompare((PetscObject)newi, MATSEQDENSE, &isdensecpu)); 372 if (isdensecpu) PetscCall(MatSeqDenseSetPreallocation(newi, NULL)); 373 if (cpvalues == MAT_COPY_VALUES) { 374 const PetscScalar *av; 375 PetscScalar *v; 376 377 PetscCall(MatDenseGetArrayRead(A, &av)); 378 PetscCall(MatDenseGetArrayWrite(newi, &v)); 379 PetscCall(MatDenseGetLDA(newi, &nlda)); 380 m = A->rmap->n; 381 if (lda > m || nlda > m) { 382 for (j = 0; j < A->cmap->n; j++) PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(v, j * nlda), PetscSafePointerPlusOffset(av, j * lda), m)); 383 } else { 384 PetscCall(PetscArraycpy(v, av, A->rmap->n * A->cmap->n)); 385 } 386 PetscCall(MatDenseRestoreArrayWrite(newi, &v)); 387 PetscCall(MatDenseRestoreArrayRead(A, &av)); 388 PetscCall(MatPropagateSymmetryOptions(A, newi)); 389 } 390 PetscFunctionReturn(PETSC_SUCCESS); 391 } 392 393 PetscErrorCode MatDuplicate_SeqDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 394 { 395 PetscFunctionBegin; 396 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), newmat)); 397 PetscCall(MatSetSizes(*newmat, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 398 PetscCall(MatSetType(*newmat, ((PetscObject)A)->type_name)); 399 PetscCall(MatDuplicateNoCreate_SeqDense(*newmat, A, cpvalues)); 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 static PetscErrorCode MatSolve_SeqDense_Internal_LU(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) 404 { 405 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 406 PetscBLASInt info; 407 408 PetscFunctionBegin; 409 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 410 PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_(T ? "T" : "N", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 411 PetscCall(PetscFPTrapPop()); 412 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve %" PetscBLASInt_FMT, info); 413 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 414 PetscFunctionReturn(PETSC_SUCCESS); 415 } 416 417 static PetscErrorCode MatSolve_SeqDense_Internal_Cholesky(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) 418 { 419 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 420 PetscBLASInt info; 421 422 PetscFunctionBegin; 423 if (A->spd == PETSC_BOOL3_TRUE) { 424 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 425 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 426 PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &m, &nrhs, mat->v, &mat->lda, x, &m, &info)); 427 PetscCall(PetscFPTrapPop()); 428 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "POTRS Bad solve %" PetscBLASInt_FMT, info); 429 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 430 #if defined(PETSC_USE_COMPLEX) 431 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 432 if (T) PetscCall(MatConjugate_SeqDense(A)); 433 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 434 PetscCallBLAS("LAPACKhetrs", LAPACKhetrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 435 PetscCall(PetscFPTrapPop()); 436 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "HETRS Bad solve %" PetscBLASInt_FMT, info); 437 if (T) PetscCall(MatConjugate_SeqDense(A)); 438 #endif 439 } else { /* symmetric case */ 440 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 441 PetscCallBLAS("LAPACKsytrs", LAPACKsytrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 442 PetscCall(PetscFPTrapPop()); 443 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "SYTRS Bad solve %" PetscBLASInt_FMT, info); 444 } 445 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 446 PetscFunctionReturn(PETSC_SUCCESS); 447 } 448 449 static PetscErrorCode MatSolve_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) 450 { 451 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 452 PetscBLASInt info; 453 char trans; 454 455 PetscFunctionBegin; 456 if (PetscDefined(USE_COMPLEX)) { 457 trans = 'C'; 458 } else { 459 trans = 'T'; 460 } 461 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 462 { /* lwork depends on the number of right-hand sides */ 463 PetscBLASInt nlfwork, lfwork = -1; 464 PetscScalar fwork; 465 466 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 467 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 468 if (nlfwork > mat->lfwork) { 469 mat->lfwork = nlfwork; 470 PetscCall(PetscFree(mat->fwork)); 471 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 472 } 473 } 474 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 475 PetscCall(PetscFPTrapPop()); 476 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %" PetscBLASInt_FMT, info); 477 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 478 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &mat->rank, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 479 PetscCall(PetscFPTrapPop()); 480 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %" PetscBLASInt_FMT, info); 481 for (PetscInt j = 0; j < nrhs; j++) { 482 for (PetscInt i = mat->rank; i < k; i++) x[j * ldx + i] = 0.; 483 } 484 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 static PetscErrorCode MatSolveTranspose_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) 489 { 490 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 491 PetscBLASInt info; 492 493 PetscFunctionBegin; 494 if (A->rmap->n == A->cmap->n && mat->rank == A->rmap->n) { 495 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 496 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &m, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 497 PetscCall(PetscFPTrapPop()); 498 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %" PetscBLASInt_FMT, info); 499 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 500 { /* lwork depends on the number of right-hand sides */ 501 PetscBLASInt nlfwork, lfwork = -1; 502 PetscScalar fwork; 503 504 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 505 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 506 if (nlfwork > mat->lfwork) { 507 mat->lfwork = nlfwork; 508 PetscCall(PetscFree(mat->fwork)); 509 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 510 } 511 } 512 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 513 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 514 PetscCall(PetscFPTrapPop()); 515 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %" PetscBLASInt_FMT, info); 516 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 517 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "QR factored matrix cannot be used for transpose solve"); 518 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 519 PetscFunctionReturn(PETSC_SUCCESS); 520 } 521 522 static PetscErrorCode MatSolve_SeqDense_SetUp(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) 523 { 524 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 525 PetscScalar *y; 526 PetscBLASInt m = 0, k = 0; 527 528 PetscFunctionBegin; 529 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 530 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 531 if (k < m) { 532 PetscCall(VecCopy(xx, mat->qrrhs)); 533 PetscCall(VecGetArray(mat->qrrhs, &y)); 534 } else { 535 PetscCall(VecCopy(xx, yy)); 536 PetscCall(VecGetArray(yy, &y)); 537 } 538 *_y = y; 539 *_k = k; 540 *_m = m; 541 PetscFunctionReturn(PETSC_SUCCESS); 542 } 543 544 static PetscErrorCode MatSolve_SeqDense_TearDown(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) 545 { 546 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 547 PetscScalar *y = NULL; 548 PetscBLASInt m, k; 549 550 PetscFunctionBegin; 551 y = *_y; 552 *_y = NULL; 553 k = *_k; 554 m = *_m; 555 if (k < m) { 556 PetscScalar *yv; 557 PetscCall(VecGetArray(yy, &yv)); 558 PetscCall(PetscArraycpy(yv, y, k)); 559 PetscCall(VecRestoreArray(yy, &yv)); 560 PetscCall(VecRestoreArray(mat->qrrhs, &y)); 561 } else { 562 PetscCall(VecRestoreArray(yy, &y)); 563 } 564 PetscFunctionReturn(PETSC_SUCCESS); 565 } 566 567 static PetscErrorCode MatSolve_SeqDense_LU(Mat A, Vec xx, Vec yy) 568 { 569 PetscScalar *y = NULL; 570 PetscBLASInt m = 0, k = 0; 571 572 PetscFunctionBegin; 573 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 574 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_FALSE)); 575 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 576 PetscFunctionReturn(PETSC_SUCCESS); 577 } 578 579 static PetscErrorCode MatSolveTranspose_SeqDense_LU(Mat A, Vec xx, Vec yy) 580 { 581 PetscScalar *y = NULL; 582 PetscBLASInt m = 0, k = 0; 583 584 PetscFunctionBegin; 585 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 586 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_TRUE)); 587 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 588 PetscFunctionReturn(PETSC_SUCCESS); 589 } 590 591 static PetscErrorCode MatSolve_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) 592 { 593 PetscScalar *y = NULL; 594 PetscBLASInt m = 0, k = 0; 595 596 PetscFunctionBegin; 597 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 598 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_FALSE)); 599 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 600 PetscFunctionReturn(PETSC_SUCCESS); 601 } 602 603 static PetscErrorCode MatSolveTranspose_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) 604 { 605 PetscScalar *y = NULL; 606 PetscBLASInt m = 0, k = 0; 607 608 PetscFunctionBegin; 609 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 610 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_TRUE)); 611 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 612 PetscFunctionReturn(PETSC_SUCCESS); 613 } 614 615 static PetscErrorCode MatSolve_SeqDense_QR(Mat A, Vec xx, Vec yy) 616 { 617 PetscScalar *y = NULL; 618 PetscBLASInt m = 0, k = 0; 619 620 PetscFunctionBegin; 621 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 622 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 623 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 624 PetscFunctionReturn(PETSC_SUCCESS); 625 } 626 627 static PetscErrorCode MatSolveTranspose_SeqDense_QR(Mat A, Vec xx, Vec yy) 628 { 629 PetscScalar *y = NULL; 630 PetscBLASInt m = 0, k = 0; 631 632 PetscFunctionBegin; 633 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 634 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 635 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 636 PetscFunctionReturn(PETSC_SUCCESS); 637 } 638 639 static PetscErrorCode MatMatSolve_SeqDense_SetUp(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) 640 { 641 const PetscScalar *b; 642 PetscScalar *y; 643 PetscInt n, _ldb, _ldx; 644 PetscBLASInt nrhs = 0, m = 0, k = 0, ldb = 0, ldx = 0, ldy = 0; 645 646 PetscFunctionBegin; 647 *_ldy = 0; 648 *_m = 0; 649 *_nrhs = 0; 650 *_k = 0; 651 *_y = NULL; 652 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 653 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 654 PetscCall(MatGetSize(B, NULL, &n)); 655 PetscCall(PetscBLASIntCast(n, &nrhs)); 656 PetscCall(MatDenseGetLDA(B, &_ldb)); 657 PetscCall(PetscBLASIntCast(_ldb, &ldb)); 658 PetscCall(MatDenseGetLDA(X, &_ldx)); 659 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 660 if (ldx < m) { 661 PetscCall(MatDenseGetArrayRead(B, &b)); 662 PetscCall(PetscMalloc1(nrhs * m, &y)); 663 if (ldb == m) { 664 PetscCall(PetscArraycpy(y, b, ldb * nrhs)); 665 } else { 666 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * m], &b[j * ldb], m)); 667 } 668 ldy = m; 669 PetscCall(MatDenseRestoreArrayRead(B, &b)); 670 } else { 671 if (ldb == ldx) { 672 PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN)); 673 PetscCall(MatDenseGetArray(X, &y)); 674 } else { 675 PetscCall(MatDenseGetArray(X, &y)); 676 PetscCall(MatDenseGetArrayRead(B, &b)); 677 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * ldx], &b[j * ldb], m)); 678 PetscCall(MatDenseRestoreArrayRead(B, &b)); 679 } 680 ldy = ldx; 681 } 682 *_y = y; 683 *_ldy = ldy; 684 *_k = k; 685 *_m = m; 686 *_nrhs = nrhs; 687 PetscFunctionReturn(PETSC_SUCCESS); 688 } 689 690 static PetscErrorCode MatMatSolve_SeqDense_TearDown(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) 691 { 692 PetscScalar *y; 693 PetscInt _ldx; 694 PetscBLASInt k, ldy, nrhs, ldx = 0; 695 696 PetscFunctionBegin; 697 y = *_y; 698 *_y = NULL; 699 k = *_k; 700 ldy = *_ldy; 701 nrhs = *_nrhs; 702 PetscCall(MatDenseGetLDA(X, &_ldx)); 703 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 704 if (ldx != ldy) { 705 PetscScalar *xv; 706 PetscCall(MatDenseGetArray(X, &xv)); 707 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&xv[j * ldx], &y[j * ldy], k)); 708 PetscCall(MatDenseRestoreArray(X, &xv)); 709 PetscCall(PetscFree(y)); 710 } else { 711 PetscCall(MatDenseRestoreArray(X, &y)); 712 } 713 PetscFunctionReturn(PETSC_SUCCESS); 714 } 715 716 static PetscErrorCode MatMatSolve_SeqDense_LU(Mat A, Mat B, Mat X) 717 { 718 PetscScalar *y; 719 PetscBLASInt m, k, ldy, nrhs; 720 721 PetscFunctionBegin; 722 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 723 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 724 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 725 PetscFunctionReturn(PETSC_SUCCESS); 726 } 727 728 static PetscErrorCode MatMatSolveTranspose_SeqDense_LU(Mat A, Mat B, Mat X) 729 { 730 PetscScalar *y; 731 PetscBLASInt m, k, ldy, nrhs; 732 733 PetscFunctionBegin; 734 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 735 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 736 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 740 static PetscErrorCode MatMatSolve_SeqDense_Cholesky(Mat A, Mat B, Mat X) 741 { 742 PetscScalar *y; 743 PetscBLASInt m, k, ldy, nrhs; 744 745 PetscFunctionBegin; 746 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 747 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 748 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 749 PetscFunctionReturn(PETSC_SUCCESS); 750 } 751 752 static PetscErrorCode MatMatSolveTranspose_SeqDense_Cholesky(Mat A, Mat B, Mat X) 753 { 754 PetscScalar *y; 755 PetscBLASInt m, k, ldy, nrhs; 756 757 PetscFunctionBegin; 758 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 759 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 760 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 761 PetscFunctionReturn(PETSC_SUCCESS); 762 } 763 764 static PetscErrorCode MatMatSolve_SeqDense_QR(Mat A, Mat B, Mat X) 765 { 766 PetscScalar *y; 767 PetscBLASInt m, k, ldy, nrhs; 768 769 PetscFunctionBegin; 770 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 771 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 772 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 773 PetscFunctionReturn(PETSC_SUCCESS); 774 } 775 776 static PetscErrorCode MatMatSolveTranspose_SeqDense_QR(Mat A, Mat B, Mat X) 777 { 778 PetscScalar *y; 779 PetscBLASInt m, k, ldy, nrhs; 780 781 PetscFunctionBegin; 782 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 783 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 784 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 785 PetscFunctionReturn(PETSC_SUCCESS); 786 } 787 788 /* COMMENT: I have chosen to hide row permutation in the pivots, 789 rather than put it in the Mat->row slot.*/ 790 PetscErrorCode MatLUFactor_SeqDense(Mat A, IS row, IS col, PETSC_UNUSED const MatFactorInfo *minfo) 791 { 792 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 793 PetscBLASInt n, m, info; 794 795 PetscFunctionBegin; 796 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 797 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 798 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 799 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 800 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 801 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&m, &n, mat->v, &mat->lda, mat->pivots, &info)); 802 PetscCall(PetscFPTrapPop()); 803 804 PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to LU factorization %" PetscBLASInt_FMT, info); 805 PetscCheck(info <= 0, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Bad LU factorization %" PetscBLASInt_FMT, info); 806 807 A->ops->solve = MatSolve_SeqDense_LU; 808 A->ops->matsolve = MatMatSolve_SeqDense_LU; 809 A->ops->solvetranspose = MatSolveTranspose_SeqDense_LU; 810 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_LU; 811 A->factortype = MAT_FACTOR_LU; 812 813 PetscCall(PetscFree(A->solvertype)); 814 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 815 816 PetscCall(PetscLogFlops((2.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3)); 817 PetscFunctionReturn(PETSC_SUCCESS); 818 } 819 820 static PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info) 821 { 822 PetscFunctionBegin; 823 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 824 PetscUseTypeMethod(fact, lufactor, NULL, NULL, info); 825 PetscFunctionReturn(PETSC_SUCCESS); 826 } 827 828 PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, IS col, PETSC_UNUSED const MatFactorInfo *info) 829 { 830 PetscFunctionBegin; 831 fact->preallocated = PETSC_TRUE; 832 fact->assembled = PETSC_TRUE; 833 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense; 834 PetscFunctionReturn(PETSC_SUCCESS); 835 } 836 837 /* Cholesky as L*L^T or L*D*L^T and the symmetric/hermitian complex variants */ 838 PetscErrorCode MatCholeskyFactor_SeqDense(Mat A, IS perm, PETSC_UNUSED const MatFactorInfo *minfo) 839 { 840 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 841 PetscBLASInt info, n; 842 843 PetscFunctionBegin; 844 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 845 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 846 if (A->spd == PETSC_BOOL3_TRUE) { 847 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 848 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &n, mat->v, &mat->lda, &info)); 849 PetscCall(PetscFPTrapPop()); 850 #if defined(PETSC_USE_COMPLEX) 851 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 852 if (!mat->pivots) PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); 853 if (!mat->fwork) { 854 PetscScalar dummy; 855 856 mat->lfwork = -1; 857 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 858 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 859 PetscCall(PetscFPTrapPop()); 860 PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork)); 861 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 862 } 863 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 864 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 865 PetscCall(PetscFPTrapPop()); 866 #endif 867 } else { /* symmetric case */ 868 if (!mat->pivots) PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); 869 if (!mat->fwork) { 870 PetscScalar dummy; 871 872 mat->lfwork = -1; 873 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 874 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 875 PetscCall(PetscFPTrapPop()); 876 PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork)); 877 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 878 } 879 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 880 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 881 PetscCall(PetscFPTrapPop()); 882 } 883 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad factorization: zero pivot in row %" PetscBLASInt_FMT, info - 1); 884 885 A->ops->solve = MatSolve_SeqDense_Cholesky; 886 A->ops->matsolve = MatMatSolve_SeqDense_Cholesky; 887 A->ops->solvetranspose = MatSolveTranspose_SeqDense_Cholesky; 888 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_Cholesky; 889 A->factortype = MAT_FACTOR_CHOLESKY; 890 891 PetscCall(PetscFree(A->solvertype)); 892 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 893 894 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 895 PetscFunctionReturn(PETSC_SUCCESS); 896 } 897 898 static PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info) 899 { 900 PetscFunctionBegin; 901 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 902 PetscUseTypeMethod(fact, choleskyfactor, NULL, info); 903 PetscFunctionReturn(PETSC_SUCCESS); 904 } 905 906 PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) 907 { 908 PetscFunctionBegin; 909 fact->assembled = PETSC_TRUE; 910 fact->preallocated = PETSC_TRUE; 911 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense; 912 PetscFunctionReturn(PETSC_SUCCESS); 913 } 914 915 PetscErrorCode MatQRFactor_SeqDense(Mat A, IS col, PETSC_UNUSED const MatFactorInfo *minfo) 916 { 917 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 918 PetscBLASInt n, m, info, min, max; 919 920 PetscFunctionBegin; 921 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 922 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 923 max = PetscMax(m, n); 924 min = PetscMin(m, n); 925 if (!mat->tau) PetscCall(PetscMalloc1(min, &mat->tau)); 926 if (!mat->pivots) PetscCall(PetscMalloc1(n, &mat->pivots)); 927 if (!mat->qrrhs) PetscCall(MatCreateVecs(A, NULL, &mat->qrrhs)); 928 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 929 if (!mat->fwork) { 930 PetscScalar dummy; 931 932 mat->lfwork = -1; 933 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 934 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, &dummy, &mat->lfwork, &info)); 935 PetscCall(PetscFPTrapPop()); 936 PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork)); 937 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 938 } 939 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 940 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, mat->fwork, &mat->lfwork, &info)); 941 PetscCall(PetscFPTrapPop()); 942 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to QR factorization %" PetscBLASInt_FMT, info); 943 // TODO: try to estimate rank or test for and use geqp3 for rank revealing QR. For now just say rank is min of m and n 944 mat->rank = min; 945 946 A->ops->solve = MatSolve_SeqDense_QR; 947 A->ops->matsolve = MatMatSolve_SeqDense_QR; 948 A->factortype = MAT_FACTOR_QR; 949 if (m == n) { 950 A->ops->solvetranspose = MatSolveTranspose_SeqDense_QR; 951 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_QR; 952 } 953 954 PetscCall(PetscFree(A->solvertype)); 955 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 956 957 PetscCall(PetscLogFlops(2.0 * min * min * (max - min / 3.0))); 958 PetscFunctionReturn(PETSC_SUCCESS); 959 } 960 961 static PetscErrorCode MatQRFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info) 962 { 963 PetscFunctionBegin; 964 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 965 PetscUseMethod(fact, "MatQRFactor_C", (Mat, IS, const MatFactorInfo *), (fact, NULL, info)); 966 PetscFunctionReturn(PETSC_SUCCESS); 967 } 968 969 PetscErrorCode MatQRFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) 970 { 971 PetscFunctionBegin; 972 fact->assembled = PETSC_TRUE; 973 fact->preallocated = PETSC_TRUE; 974 PetscCall(PetscObjectComposeFunction((PetscObject)fact, "MatQRFactorNumeric_C", MatQRFactorNumeric_SeqDense)); 975 PetscFunctionReturn(PETSC_SUCCESS); 976 } 977 978 /* uses LAPACK */ 979 PETSC_INTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A, MatFactorType ftype, Mat *fact) 980 { 981 PetscFunctionBegin; 982 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), fact)); 983 PetscCall(MatSetSizes(*fact, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 984 PetscCall(MatSetType(*fact, MATDENSE)); 985 (*fact)->trivialsymbolic = PETSC_TRUE; 986 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 987 (*fact)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqDense; 988 (*fact)->ops->ilufactorsymbolic = MatLUFactorSymbolic_SeqDense; 989 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 990 (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense; 991 } else if (ftype == MAT_FACTOR_QR) { 992 PetscCall(PetscObjectComposeFunction((PetscObject)*fact, "MatQRFactorSymbolic_C", MatQRFactorSymbolic_SeqDense)); 993 } 994 (*fact)->factortype = ftype; 995 996 PetscCall(PetscFree((*fact)->solvertype)); 997 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*fact)->solvertype)); 998 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_LU])); 999 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ILU])); 1000 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_CHOLESKY])); 1001 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ICC])); 1002 PetscFunctionReturn(PETSC_SUCCESS); 1003 } 1004 1005 static PetscErrorCode MatSOR_SeqDense(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal shift, PetscInt its, PetscInt lits, Vec xx) 1006 { 1007 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1008 PetscScalar *x, *v = mat->v, zero = 0.0, xt; 1009 const PetscScalar *b; 1010 PetscInt m = A->rmap->n, i; 1011 PetscBLASInt o = 1, bm = 0; 1012 1013 PetscFunctionBegin; 1014 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1015 PetscCheck(A->offloadmask != PETSC_OFFLOAD_GPU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1016 #endif 1017 if (shift == -1) shift = 0.0; /* negative shift indicates do not error on zero diagonal; this code never zeros on zero diagonal */ 1018 PetscCall(PetscBLASIntCast(m, &bm)); 1019 if (flag & SOR_ZERO_INITIAL_GUESS) { 1020 /* this is a hack fix, should have another version without the second BLASdotu */ 1021 PetscCall(VecSet(xx, zero)); 1022 } 1023 PetscCall(VecGetArray(xx, &x)); 1024 PetscCall(VecGetArrayRead(bb, &b)); 1025 its = its * lits; 1026 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 1027 while (its--) { 1028 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 1029 for (i = 0; i < m; i++) { 1030 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1031 x[i] = (1. - omega) * x[i] + (xt + v[i + i * m] * x[i]) * omega / (v[i + i * m] + shift); 1032 } 1033 } 1034 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1035 for (i = m - 1; i >= 0; i--) { 1036 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1037 x[i] = (1. - omega) * x[i] + (xt + v[i + i * m] * x[i]) * omega / (v[i + i * m] + shift); 1038 } 1039 } 1040 } 1041 PetscCall(VecRestoreArrayRead(bb, &b)); 1042 PetscCall(VecRestoreArray(xx, &x)); 1043 PetscFunctionReturn(PETSC_SUCCESS); 1044 } 1045 1046 PETSC_INTERN PetscErrorCode MatMultColumnRangeKernel_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end, PetscBool trans, PetscBool herm) 1047 { 1048 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1049 PetscScalar *y, _DOne = 1.0, _DZero = 0.0; 1050 PetscBLASInt m, n, _One = 1; 1051 const PetscScalar *v = mat->v, *x; 1052 1053 PetscFunctionBegin; 1054 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1055 PetscCall(PetscBLASIntCast(c_end - c_start, &n)); 1056 PetscCall(VecGetArrayRead(xx, &x)); 1057 PetscCall(VecGetArrayWrite(yy, &y)); 1058 if (!m || !n) { 1059 PetscBLASInt i; 1060 if (trans) 1061 for (i = 0; i < n; i++) y[i] = 0.0; 1062 else 1063 for (i = 0; i < m; i++) y[i] = 0.0; 1064 } else { 1065 if (trans) { 1066 if (herm) PetscCallBLAS("BLASgemv", BLASgemv_("C", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DZero, y + c_start, &_One)); 1067 else PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DZero, y + c_start, &_One)); 1068 } else { 1069 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x + c_start, &_One, &_DZero, y, &_One)); 1070 } 1071 PetscCall(PetscLogFlops(2.0 * m * n - n)); 1072 } 1073 PetscCall(VecRestoreArrayRead(xx, &x)); 1074 PetscCall(VecRestoreArrayWrite(yy, &y)); 1075 PetscFunctionReturn(PETSC_SUCCESS); 1076 } 1077 1078 PetscErrorCode MatMultHermitianTransposeColumnRange_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 1079 { 1080 PetscFunctionBegin; 1081 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, c_start, c_end, PETSC_TRUE, PETSC_TRUE)); 1082 PetscFunctionReturn(PETSC_SUCCESS); 1083 } 1084 1085 PetscErrorCode MatMult_SeqDense(Mat A, Vec xx, Vec yy) 1086 { 1087 PetscFunctionBegin; 1088 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_FALSE, PETSC_FALSE)); 1089 PetscFunctionReturn(PETSC_SUCCESS); 1090 } 1091 1092 PetscErrorCode MatMultTranspose_SeqDense(Mat A, Vec xx, Vec yy) 1093 { 1094 PetscFunctionBegin; 1095 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_FALSE)); 1096 PetscFunctionReturn(PETSC_SUCCESS); 1097 } 1098 1099 PetscErrorCode MatMultHermitianTranspose_SeqDense(Mat A, Vec xx, Vec yy) 1100 { 1101 PetscFunctionBegin; 1102 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_TRUE)); 1103 PetscFunctionReturn(PETSC_SUCCESS); 1104 } 1105 1106 PETSC_INTERN PetscErrorCode MatMultAddColumnRangeKernel_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end, PetscBool trans, PetscBool herm) 1107 { 1108 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1109 const PetscScalar *v = mat->v, *x; 1110 PetscScalar *y, _DOne = 1.0; 1111 PetscBLASInt m, n, _One = 1; 1112 1113 PetscFunctionBegin; 1114 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1115 PetscCall(PetscBLASIntCast(c_end - c_start, &n)); 1116 PetscCall(VecCopy(zz, yy)); 1117 if (!m || !n) PetscFunctionReturn(PETSC_SUCCESS); 1118 PetscCall(VecGetArray(yy, &y)); 1119 PetscCall(VecGetArrayRead(xx, &x)); 1120 if (trans) { 1121 if (herm) PetscCallBLAS("BLASgemv", BLASgemv_("C", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DOne, y + c_start, &_One)); 1122 else PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DOne, y + c_start, &_One)); 1123 } else { 1124 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x + c_start, &_One, &_DOne, y, &_One)); 1125 } 1126 PetscCall(VecRestoreArrayRead(xx, &x)); 1127 PetscCall(VecRestoreArray(yy, &y)); 1128 PetscCall(PetscLogFlops(2.0 * m * n)); 1129 PetscFunctionReturn(PETSC_SUCCESS); 1130 } 1131 1132 PetscErrorCode MatMultColumnRange_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 1133 { 1134 PetscFunctionBegin; 1135 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, c_start, c_end, PETSC_FALSE, PETSC_FALSE)); 1136 PetscFunctionReturn(PETSC_SUCCESS); 1137 } 1138 1139 PetscErrorCode MatMultAddColumnRange_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end) 1140 { 1141 PetscFunctionBegin; 1142 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, c_start, c_end, PETSC_FALSE, PETSC_FALSE)); 1143 PetscFunctionReturn(PETSC_SUCCESS); 1144 } 1145 1146 PetscErrorCode MatMultHermitianTransposeAddColumnRange_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end) 1147 { 1148 PetscFunctionBegin; 1149 PetscMPIInt rank; 1150 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 1151 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, c_start, c_end, PETSC_TRUE, PETSC_TRUE)); 1152 PetscFunctionReturn(PETSC_SUCCESS); 1153 } 1154 1155 PetscErrorCode MatMultAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1156 { 1157 PetscFunctionBegin; 1158 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_FALSE, PETSC_FALSE)); 1159 PetscFunctionReturn(PETSC_SUCCESS); 1160 } 1161 1162 PetscErrorCode MatMultTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1163 { 1164 PetscFunctionBegin; 1165 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_FALSE)); 1166 PetscFunctionReturn(PETSC_SUCCESS); 1167 } 1168 1169 PetscErrorCode MatMultHermitianTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1170 { 1171 PetscFunctionBegin; 1172 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_TRUE)); 1173 PetscFunctionReturn(PETSC_SUCCESS); 1174 } 1175 1176 static PetscErrorCode MatGetRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) 1177 { 1178 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1179 PetscInt i; 1180 1181 PetscFunctionBegin; 1182 if (ncols) *ncols = A->cmap->n; 1183 if (cols) { 1184 PetscCall(PetscMalloc1(A->cmap->n, cols)); 1185 for (i = 0; i < A->cmap->n; i++) (*cols)[i] = i; 1186 } 1187 if (vals) { 1188 const PetscScalar *v; 1189 1190 PetscCall(MatDenseGetArrayRead(A, &v)); 1191 PetscCall(PetscMalloc1(A->cmap->n, vals)); 1192 v += row; 1193 for (i = 0; i < A->cmap->n; i++) { 1194 (*vals)[i] = *v; 1195 v += mat->lda; 1196 } 1197 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1198 } 1199 PetscFunctionReturn(PETSC_SUCCESS); 1200 } 1201 1202 static PetscErrorCode MatRestoreRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) 1203 { 1204 PetscFunctionBegin; 1205 if (cols) PetscCall(PetscFree(*cols)); 1206 if (vals) PetscCall(PetscFree(*vals)); 1207 PetscFunctionReturn(PETSC_SUCCESS); 1208 } 1209 1210 static PetscErrorCode MatSetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], const PetscScalar v[], InsertMode addv) 1211 { 1212 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1213 PetscScalar *av; 1214 PetscInt i, j, idx = 0; 1215 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1216 PetscOffloadMask oldf; 1217 #endif 1218 1219 PetscFunctionBegin; 1220 PetscCall(MatDenseGetArray(A, &av)); 1221 if (!mat->roworiented) { 1222 if (addv == INSERT_VALUES) { 1223 for (j = 0; j < n; j++) { 1224 if (indexn[j] < 0) { 1225 idx += m; 1226 continue; 1227 } 1228 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1229 for (i = 0; i < m; i++) { 1230 if (indexm[i] < 0) { 1231 idx++; 1232 continue; 1233 } 1234 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1235 av[indexn[j] * mat->lda + indexm[i]] = v ? v[idx++] : (idx++, 0.0); 1236 } 1237 } 1238 } else { 1239 for (j = 0; j < n; j++) { 1240 if (indexn[j] < 0) { 1241 idx += m; 1242 continue; 1243 } 1244 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1245 for (i = 0; i < m; i++) { 1246 if (indexm[i] < 0) { 1247 idx++; 1248 continue; 1249 } 1250 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1251 av[indexn[j] * mat->lda + indexm[i]] += v ? v[idx++] : (idx++, 0.0); 1252 } 1253 } 1254 } 1255 } else { 1256 if (addv == INSERT_VALUES) { 1257 for (i = 0; i < m; i++) { 1258 if (indexm[i] < 0) { 1259 idx += n; 1260 continue; 1261 } 1262 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1263 for (j = 0; j < n; j++) { 1264 if (indexn[j] < 0) { 1265 idx++; 1266 continue; 1267 } 1268 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1269 av[indexn[j] * mat->lda + indexm[i]] = v ? v[idx++] : (idx++, 0.0); 1270 } 1271 } 1272 } else { 1273 for (i = 0; i < m; i++) { 1274 if (indexm[i] < 0) { 1275 idx += n; 1276 continue; 1277 } 1278 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1279 for (j = 0; j < n; j++) { 1280 if (indexn[j] < 0) { 1281 idx++; 1282 continue; 1283 } 1284 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1285 av[indexn[j] * mat->lda + indexm[i]] += v ? v[idx++] : (idx++, 0.0); 1286 } 1287 } 1288 } 1289 } 1290 /* hack to prevent unneeded copy to the GPU while returning the array */ 1291 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1292 oldf = A->offloadmask; 1293 A->offloadmask = PETSC_OFFLOAD_GPU; 1294 #endif 1295 PetscCall(MatDenseRestoreArray(A, &av)); 1296 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1297 A->offloadmask = (oldf == PETSC_OFFLOAD_UNALLOCATED ? PETSC_OFFLOAD_UNALLOCATED : PETSC_OFFLOAD_CPU); 1298 #endif 1299 PetscFunctionReturn(PETSC_SUCCESS); 1300 } 1301 1302 static PetscErrorCode MatGetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], PetscScalar v[]) 1303 { 1304 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1305 const PetscScalar *vv; 1306 PetscInt i, j; 1307 1308 PetscFunctionBegin; 1309 PetscCall(MatDenseGetArrayRead(A, &vv)); 1310 /* row-oriented output */ 1311 for (i = 0; i < m; i++) { 1312 if (indexm[i] < 0) { 1313 v += n; 1314 continue; 1315 } 1316 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested larger than number rows %" PetscInt_FMT, indexm[i], A->rmap->n); 1317 for (j = 0; j < n; j++) { 1318 if (indexn[j] < 0) { 1319 v++; 1320 continue; 1321 } 1322 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " requested larger than number columns %" PetscInt_FMT, indexn[j], A->cmap->n); 1323 *v++ = vv[indexn[j] * mat->lda + indexm[i]]; 1324 } 1325 } 1326 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1327 PetscFunctionReturn(PETSC_SUCCESS); 1328 } 1329 1330 PetscErrorCode MatView_Dense_Binary(Mat mat, PetscViewer viewer) 1331 { 1332 PetscBool skipHeader; 1333 PetscViewerFormat format; 1334 PetscInt header[4], M, N, m, lda, i, j; 1335 PetscCount k; 1336 const PetscScalar *v; 1337 PetscScalar *vwork; 1338 1339 PetscFunctionBegin; 1340 PetscCall(PetscViewerSetUp(viewer)); 1341 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1342 PetscCall(PetscViewerGetFormat(viewer, &format)); 1343 if (skipHeader) format = PETSC_VIEWER_NATIVE; 1344 1345 PetscCall(MatGetSize(mat, &M, &N)); 1346 1347 /* write matrix header */ 1348 header[0] = MAT_FILE_CLASSID; 1349 header[1] = M; 1350 header[2] = N; 1351 header[3] = (format == PETSC_VIEWER_NATIVE) ? MATRIX_BINARY_FORMAT_DENSE : M * N; 1352 if (!skipHeader) PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1353 1354 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1355 if (format != PETSC_VIEWER_NATIVE) { 1356 PetscInt nnz = m * N, *iwork; 1357 /* store row lengths for each row */ 1358 PetscCall(PetscMalloc1(nnz, &iwork)); 1359 for (i = 0; i < m; i++) iwork[i] = N; 1360 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1361 /* store column indices (zero start index) */ 1362 for (k = 0, i = 0; i < m; i++) 1363 for (j = 0; j < N; j++, k++) iwork[k] = j; 1364 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1365 PetscCall(PetscFree(iwork)); 1366 } 1367 /* store matrix values as a dense matrix in row major order */ 1368 PetscCall(PetscMalloc1(m * N, &vwork)); 1369 PetscCall(MatDenseGetArrayRead(mat, &v)); 1370 PetscCall(MatDenseGetLDA(mat, &lda)); 1371 for (k = 0, i = 0; i < m; i++) 1372 for (j = 0; j < N; j++, k++) vwork[k] = v[i + (size_t)lda * j]; 1373 PetscCall(MatDenseRestoreArrayRead(mat, &v)); 1374 PetscCall(PetscViewerBinaryWriteAll(viewer, vwork, m * N, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1375 PetscCall(PetscFree(vwork)); 1376 PetscFunctionReturn(PETSC_SUCCESS); 1377 } 1378 1379 PetscErrorCode MatLoad_Dense_Binary(Mat mat, PetscViewer viewer) 1380 { 1381 PetscBool skipHeader; 1382 PetscInt header[4], M, N, m, nz, lda, i, j, k; 1383 PetscInt rows, cols; 1384 PetscScalar *v, *vwork; 1385 1386 PetscFunctionBegin; 1387 PetscCall(PetscViewerSetUp(viewer)); 1388 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1389 1390 if (!skipHeader) { 1391 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 1392 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 1393 M = header[1]; 1394 N = header[2]; 1395 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 1396 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 1397 nz = header[3]; 1398 PetscCheck(nz == MATRIX_BINARY_FORMAT_DENSE || nz >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Unknown matrix format %" PetscInt_FMT " in file", nz); 1399 } else { 1400 PetscCall(MatGetSize(mat, &M, &N)); 1401 PetscCheck(M >= 0 && N >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "Matrix binary file header was skipped, thus the user must specify the global sizes of input matrix"); 1402 nz = MATRIX_BINARY_FORMAT_DENSE; 1403 } 1404 1405 /* setup global sizes if not set */ 1406 if (mat->rmap->N < 0) mat->rmap->N = M; 1407 if (mat->cmap->N < 0) mat->cmap->N = N; 1408 PetscCall(MatSetUp(mat)); 1409 /* check if global sizes are correct */ 1410 PetscCall(MatGetSize(mat, &rows, &cols)); 1411 PetscCheck(M == rows && N == cols, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 1412 1413 PetscCall(MatGetSize(mat, NULL, &N)); 1414 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1415 PetscCall(MatDenseGetArray(mat, &v)); 1416 PetscCall(MatDenseGetLDA(mat, &lda)); 1417 if (nz == MATRIX_BINARY_FORMAT_DENSE) { /* matrix in file is dense format */ 1418 PetscCount nnz = (size_t)m * N; 1419 /* read in matrix values */ 1420 PetscCall(PetscMalloc1(nnz, &vwork)); 1421 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1422 /* store values in column major order */ 1423 for (j = 0; j < N; j++) 1424 for (i = 0; i < m; i++) v[i + (size_t)lda * j] = vwork[(size_t)i * N + j]; 1425 PetscCall(PetscFree(vwork)); 1426 } else { /* matrix in file is sparse format */ 1427 PetscInt nnz = 0, *rlens, *icols; 1428 /* read in row lengths */ 1429 PetscCall(PetscMalloc1(m, &rlens)); 1430 PetscCall(PetscViewerBinaryReadAll(viewer, rlens, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1431 for (i = 0; i < m; i++) nnz += rlens[i]; 1432 /* read in column indices and values */ 1433 PetscCall(PetscMalloc2(nnz, &icols, nnz, &vwork)); 1434 PetscCall(PetscViewerBinaryReadAll(viewer, icols, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1435 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1436 /* store values in column major order */ 1437 for (k = 0, i = 0; i < m; i++) 1438 for (j = 0; j < rlens[i]; j++, k++) v[i + lda * icols[k]] = vwork[k]; 1439 PetscCall(PetscFree(rlens)); 1440 PetscCall(PetscFree2(icols, vwork)); 1441 } 1442 PetscCall(MatDenseRestoreArray(mat, &v)); 1443 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 1444 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 1445 PetscFunctionReturn(PETSC_SUCCESS); 1446 } 1447 1448 static PetscErrorCode MatLoad_SeqDense(Mat newMat, PetscViewer viewer) 1449 { 1450 PetscBool isbinary, ishdf5; 1451 1452 PetscFunctionBegin; 1453 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1454 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1455 /* force binary viewer to load .info file if it has not yet done so */ 1456 PetscCall(PetscViewerSetUp(viewer)); 1457 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1458 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1459 if (isbinary) { 1460 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1461 } else if (ishdf5) { 1462 #if defined(PETSC_HAVE_HDF5) 1463 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1464 #else 1465 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 1466 #endif 1467 } else { 1468 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1469 } 1470 PetscFunctionReturn(PETSC_SUCCESS); 1471 } 1472 1473 static PetscErrorCode MatView_SeqDense_ASCII(Mat A, PetscViewer viewer) 1474 { 1475 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1476 PetscInt i, j; 1477 const char *name; 1478 PetscScalar *v, *av; 1479 PetscViewerFormat format; 1480 #if defined(PETSC_USE_COMPLEX) 1481 PetscBool allreal = PETSC_TRUE; 1482 #endif 1483 1484 PetscFunctionBegin; 1485 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&av)); 1486 PetscCall(PetscViewerGetFormat(viewer, &format)); 1487 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1488 PetscFunctionReturn(PETSC_SUCCESS); /* do nothing for now */ 1489 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1490 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1491 for (i = 0; i < A->rmap->n; i++) { 1492 v = av + i; 1493 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i)); 1494 for (j = 0; j < A->cmap->n; j++) { 1495 #if defined(PETSC_USE_COMPLEX) 1496 if (PetscRealPart(*v) != 0.0 && PetscImaginaryPart(*v) != 0.0) { 1497 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", j, (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1498 } else if (PetscRealPart(*v)) { 1499 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)PetscRealPart(*v))); 1500 } 1501 #else 1502 if (*v) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)*v)); 1503 #endif 1504 v += a->lda; 1505 } 1506 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1507 } 1508 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1509 } else { 1510 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1511 #if defined(PETSC_USE_COMPLEX) 1512 /* determine if matrix has all real values */ 1513 for (j = 0; j < A->cmap->n; j++) { 1514 v = av + j * a->lda; 1515 for (i = 0; i < A->rmap->n; i++) { 1516 if (PetscImaginaryPart(v[i])) { 1517 allreal = PETSC_FALSE; 1518 break; 1519 } 1520 } 1521 } 1522 #endif 1523 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1524 PetscCall(PetscObjectGetName((PetscObject)A, &name)); 1525 PetscCall(PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", A->rmap->n, A->cmap->n)); 1526 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = zeros(%" PetscInt_FMT ",%" PetscInt_FMT ");\n", name, A->rmap->n, A->cmap->n)); 1527 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = [\n", name)); 1528 } 1529 1530 for (i = 0; i < A->rmap->n; i++) { 1531 v = av + i; 1532 for (j = 0; j < A->cmap->n; j++) { 1533 #if defined(PETSC_USE_COMPLEX) 1534 if (allreal) { 1535 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(*v))); 1536 } else { 1537 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei ", (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1538 } 1539 #else 1540 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)*v)); 1541 #endif 1542 v += a->lda; 1543 } 1544 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1545 } 1546 if (format == PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, "];\n")); 1547 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1548 } 1549 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&av)); 1550 PetscCall(PetscViewerFlush(viewer)); 1551 PetscFunctionReturn(PETSC_SUCCESS); 1552 } 1553 1554 #include <petscdraw.h> 1555 static PetscErrorCode MatView_SeqDense_Draw_Zoom(PetscDraw draw, void *Aa) 1556 { 1557 Mat A = (Mat)Aa; 1558 PetscInt m = A->rmap->n, n = A->cmap->n, i, j; 1559 int color = PETSC_DRAW_WHITE; 1560 const PetscScalar *v; 1561 PetscViewer viewer; 1562 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1563 PetscViewerFormat format; 1564 1565 PetscFunctionBegin; 1566 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1567 PetscCall(PetscViewerGetFormat(viewer, &format)); 1568 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1569 1570 /* Loop over matrix elements drawing boxes */ 1571 PetscCall(MatDenseGetArrayRead(A, &v)); 1572 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1573 PetscDrawCollectiveBegin(draw); 1574 /* Blue for negative and Red for positive */ 1575 for (j = 0; j < n; j++) { 1576 x_l = j; 1577 x_r = x_l + 1.0; 1578 for (i = 0; i < m; i++) { 1579 y_l = m - i - 1.0; 1580 y_r = y_l + 1.0; 1581 if (PetscRealPart(v[j * m + i]) > 0.) color = PETSC_DRAW_RED; 1582 else if (PetscRealPart(v[j * m + i]) < 0.) color = PETSC_DRAW_BLUE; 1583 else continue; 1584 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1585 } 1586 } 1587 PetscDrawCollectiveEnd(draw); 1588 } else { 1589 /* use contour shading to indicate magnitude of values */ 1590 /* first determine max of all nonzero values */ 1591 PetscReal minv = 0.0, maxv = 0.0; 1592 PetscDraw popup; 1593 1594 for (i = 0; i < m * n; i++) { 1595 if (PetscAbsScalar(v[i]) > maxv) maxv = PetscAbsScalar(v[i]); 1596 } 1597 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1598 PetscCall(PetscDrawGetPopup(draw, &popup)); 1599 PetscCall(PetscDrawScalePopup(popup, minv, maxv)); 1600 1601 PetscDrawCollectiveBegin(draw); 1602 for (j = 0; j < n; j++) { 1603 x_l = j; 1604 x_r = x_l + 1.0; 1605 for (i = 0; i < m; i++) { 1606 y_l = m - i - 1.0; 1607 y_r = y_l + 1.0; 1608 color = PetscDrawRealToColor(PetscAbsScalar(v[j * m + i]), minv, maxv); 1609 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1610 } 1611 } 1612 PetscDrawCollectiveEnd(draw); 1613 } 1614 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1615 PetscFunctionReturn(PETSC_SUCCESS); 1616 } 1617 1618 static PetscErrorCode MatView_SeqDense_Draw(Mat A, PetscViewer viewer) 1619 { 1620 PetscDraw draw; 1621 PetscBool isnull; 1622 PetscReal xr, yr, xl, yl, h, w; 1623 1624 PetscFunctionBegin; 1625 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1626 PetscCall(PetscDrawIsNull(draw, &isnull)); 1627 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1628 1629 xr = A->cmap->n; 1630 yr = A->rmap->n; 1631 h = yr / 10.0; 1632 w = xr / 10.0; 1633 xr += w; 1634 yr += h; 1635 xl = -w; 1636 yl = -h; 1637 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 1638 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 1639 PetscCall(PetscDrawZoom(draw, MatView_SeqDense_Draw_Zoom, A)); 1640 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 1641 PetscCall(PetscDrawSave(draw)); 1642 PetscFunctionReturn(PETSC_SUCCESS); 1643 } 1644 1645 PetscErrorCode MatView_SeqDense(Mat A, PetscViewer viewer) 1646 { 1647 PetscBool isascii, isbinary, isdraw; 1648 1649 PetscFunctionBegin; 1650 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1651 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1652 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1653 if (isascii) PetscCall(MatView_SeqDense_ASCII(A, viewer)); 1654 else if (isbinary) PetscCall(MatView_Dense_Binary(A, viewer)); 1655 else if (isdraw) PetscCall(MatView_SeqDense_Draw(A, viewer)); 1656 PetscFunctionReturn(PETSC_SUCCESS); 1657 } 1658 1659 static PetscErrorCode MatDensePlaceArray_SeqDense(Mat A, const PetscScalar *array) 1660 { 1661 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1662 1663 PetscFunctionBegin; 1664 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1665 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1666 PetscCheck(!a->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseResetArray() first"); 1667 a->unplacedarray = a->v; 1668 a->unplaced_user_alloc = a->user_alloc; 1669 a->v = (PetscScalar *)array; 1670 a->user_alloc = PETSC_TRUE; 1671 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1672 A->offloadmask = PETSC_OFFLOAD_CPU; 1673 #endif 1674 PetscFunctionReturn(PETSC_SUCCESS); 1675 } 1676 1677 static PetscErrorCode MatDenseResetArray_SeqDense(Mat A) 1678 { 1679 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1680 1681 PetscFunctionBegin; 1682 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1683 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1684 a->v = a->unplacedarray; 1685 a->user_alloc = a->unplaced_user_alloc; 1686 a->unplacedarray = NULL; 1687 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1688 A->offloadmask = PETSC_OFFLOAD_CPU; 1689 #endif 1690 PetscFunctionReturn(PETSC_SUCCESS); 1691 } 1692 1693 static PetscErrorCode MatDenseReplaceArray_SeqDense(Mat A, const PetscScalar *array) 1694 { 1695 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1696 1697 PetscFunctionBegin; 1698 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1699 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1700 if (!a->user_alloc) PetscCall(PetscFree(a->v)); 1701 a->v = (PetscScalar *)array; 1702 a->user_alloc = PETSC_FALSE; 1703 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1704 A->offloadmask = PETSC_OFFLOAD_CPU; 1705 #endif 1706 PetscFunctionReturn(PETSC_SUCCESS); 1707 } 1708 1709 PetscErrorCode MatDestroy_SeqDense(Mat mat) 1710 { 1711 Mat_SeqDense *l = (Mat_SeqDense *)mat->data; 1712 1713 PetscFunctionBegin; 1714 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows %" PetscInt_FMT " Cols %" PetscInt_FMT, mat->rmap->n, mat->cmap->n)); 1715 PetscCall(VecDestroy(&l->qrrhs)); 1716 PetscCall(PetscFree(l->tau)); 1717 PetscCall(PetscFree(l->pivots)); 1718 PetscCall(PetscFree(l->fwork)); 1719 if (!l->user_alloc) PetscCall(PetscFree(l->v)); 1720 if (!l->unplaced_user_alloc) PetscCall(PetscFree(l->unplacedarray)); 1721 PetscCheck(!l->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1722 PetscCheck(!l->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1723 PetscCall(VecDestroy(&l->cvec)); 1724 PetscCall(MatDestroy(&l->cmat)); 1725 PetscCall(PetscFree(mat->data)); 1726 1727 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1728 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactor_C", NULL)); 1729 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorSymbolic_C", NULL)); 1730 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorNumeric_C", NULL)); 1731 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 1732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 1733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 1734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 1735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 1736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 1737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 1738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 1739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 1740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 1741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 1742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqaij_C", NULL)); 1743 #if defined(PETSC_HAVE_ELEMENTAL) 1744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_elemental_C", NULL)); 1745 #endif 1746 #if defined(PETSC_HAVE_SCALAPACK) 1747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_scalapack_C", NULL)); 1748 #endif 1749 #if defined(PETSC_HAVE_CUDA) 1750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensecuda_C", NULL)); 1751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", NULL)); 1752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdense_C", NULL)); 1753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensecuda_C", NULL)); 1754 #endif 1755 #if defined(PETSC_HAVE_HIP) 1756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensehip_C", NULL)); 1757 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", NULL)); 1758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdense_C", NULL)); 1759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensehip_C", NULL)); 1760 #endif 1761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSeqDenseSetPreallocation_C", NULL)); 1762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqaij_seqdense_C", NULL)); 1763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdense_C", NULL)); 1764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqbaij_seqdense_C", NULL)); 1765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqsbaij_seqdense_C", NULL)); 1766 1767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 1768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 1769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 1770 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 1771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 1772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 1773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 1774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 1775 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 1776 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 1777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", NULL)); 1778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); 1779 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); 1780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); 1781 PetscFunctionReturn(PETSC_SUCCESS); 1782 } 1783 1784 static PetscErrorCode MatTranspose_SeqDense(Mat A, MatReuse reuse, Mat *matout) 1785 { 1786 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1787 PetscInt k, j, m = A->rmap->n, M = mat->lda, n = A->cmap->n; 1788 PetscScalar *v, tmp; 1789 1790 PetscFunctionBegin; 1791 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1792 if (reuse == MAT_INPLACE_MATRIX) { 1793 if (m == n) { /* in place transpose */ 1794 PetscCall(MatDenseGetArray(A, &v)); 1795 for (j = 0; j < m; j++) { 1796 for (k = 0; k < j; k++) { 1797 tmp = v[j + k * M]; 1798 v[j + k * M] = v[k + j * M]; 1799 v[k + j * M] = tmp; 1800 } 1801 } 1802 PetscCall(MatDenseRestoreArray(A, &v)); 1803 } else { /* reuse memory, temporary allocates new memory */ 1804 PetscScalar *v2; 1805 PetscLayout tmplayout; 1806 1807 PetscCall(PetscMalloc1((size_t)m * n, &v2)); 1808 PetscCall(MatDenseGetArray(A, &v)); 1809 for (j = 0; j < n; j++) { 1810 for (k = 0; k < m; k++) v2[j + (size_t)k * n] = v[k + (size_t)j * M]; 1811 } 1812 PetscCall(PetscArraycpy(v, v2, (size_t)m * n)); 1813 PetscCall(PetscFree(v2)); 1814 PetscCall(MatDenseRestoreArray(A, &v)); 1815 /* cleanup size dependent quantities */ 1816 PetscCall(VecDestroy(&mat->cvec)); 1817 PetscCall(MatDestroy(&mat->cmat)); 1818 PetscCall(PetscFree(mat->pivots)); 1819 PetscCall(PetscFree(mat->fwork)); 1820 /* swap row/col layouts */ 1821 PetscCall(PetscBLASIntCast(n, &mat->lda)); 1822 tmplayout = A->rmap; 1823 A->rmap = A->cmap; 1824 A->cmap = tmplayout; 1825 } 1826 } else { /* out-of-place transpose */ 1827 Mat tmat; 1828 Mat_SeqDense *tmatd; 1829 PetscScalar *v2; 1830 PetscInt M2; 1831 1832 if (reuse == MAT_INITIAL_MATRIX) { 1833 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &tmat)); 1834 PetscCall(MatSetSizes(tmat, A->cmap->n, A->rmap->n, A->cmap->n, A->rmap->n)); 1835 PetscCall(MatSetType(tmat, ((PetscObject)A)->type_name)); 1836 PetscCall(MatSeqDenseSetPreallocation(tmat, NULL)); 1837 } else tmat = *matout; 1838 1839 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&v)); 1840 PetscCall(MatDenseGetArray(tmat, &v2)); 1841 tmatd = (Mat_SeqDense *)tmat->data; 1842 M2 = tmatd->lda; 1843 for (j = 0; j < n; j++) { 1844 for (k = 0; k < m; k++) v2[j + k * M2] = v[k + j * M]; 1845 } 1846 PetscCall(MatDenseRestoreArray(tmat, &v2)); 1847 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&v)); 1848 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 1849 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 1850 *matout = tmat; 1851 } 1852 PetscFunctionReturn(PETSC_SUCCESS); 1853 } 1854 1855 static PetscErrorCode MatEqual_SeqDense(Mat A1, Mat A2, PetscBool *flg) 1856 { 1857 Mat_SeqDense *mat1 = (Mat_SeqDense *)A1->data; 1858 Mat_SeqDense *mat2 = (Mat_SeqDense *)A2->data; 1859 PetscInt i; 1860 const PetscScalar *v1, *v2; 1861 1862 PetscFunctionBegin; 1863 if (A1->rmap->n != A2->rmap->n) { 1864 *flg = PETSC_FALSE; 1865 PetscFunctionReturn(PETSC_SUCCESS); 1866 } 1867 if (A1->cmap->n != A2->cmap->n) { 1868 *flg = PETSC_FALSE; 1869 PetscFunctionReturn(PETSC_SUCCESS); 1870 } 1871 PetscCall(MatDenseGetArrayRead(A1, &v1)); 1872 PetscCall(MatDenseGetArrayRead(A2, &v2)); 1873 for (i = 0; i < A1->cmap->n; i++) { 1874 PetscCall(PetscArraycmp(v1, v2, A1->rmap->n, flg)); 1875 if (*flg == PETSC_FALSE) PetscFunctionReturn(PETSC_SUCCESS); 1876 v1 += mat1->lda; 1877 v2 += mat2->lda; 1878 } 1879 PetscCall(MatDenseRestoreArrayRead(A1, &v1)); 1880 PetscCall(MatDenseRestoreArrayRead(A2, &v2)); 1881 *flg = PETSC_TRUE; 1882 PetscFunctionReturn(PETSC_SUCCESS); 1883 } 1884 1885 PetscErrorCode MatGetDiagonal_SeqDense(Mat A, Vec v) 1886 { 1887 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1888 PetscInt i, n, len; 1889 PetscScalar *x; 1890 const PetscScalar *vv; 1891 1892 PetscFunctionBegin; 1893 PetscCall(VecGetSize(v, &n)); 1894 PetscCall(VecGetArray(v, &x)); 1895 len = PetscMin(A->rmap->n, A->cmap->n); 1896 PetscCall(MatDenseGetArrayRead(A, &vv)); 1897 PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 1898 for (i = 0; i < len; i++) x[i] = vv[i * mat->lda + i]; 1899 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1900 PetscCall(VecRestoreArray(v, &x)); 1901 PetscFunctionReturn(PETSC_SUCCESS); 1902 } 1903 1904 static PetscErrorCode MatDiagonalScale_SeqDense(Mat A, Vec ll, Vec rr) 1905 { 1906 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1907 const PetscScalar *l, *r; 1908 PetscScalar x, *v, *vv; 1909 PetscInt i, j, m = A->rmap->n, n = A->cmap->n; 1910 1911 PetscFunctionBegin; 1912 PetscCall(MatDenseGetArray(A, &vv)); 1913 if (ll) { 1914 PetscCall(VecGetSize(ll, &m)); 1915 PetscCall(VecGetArrayRead(ll, &l)); 1916 PetscCheck(m == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vec wrong size"); 1917 for (i = 0; i < m; i++) { 1918 x = l[i]; 1919 v = vv + i; 1920 for (j = 0; j < n; j++) { 1921 (*v) *= x; 1922 v += mat->lda; 1923 } 1924 } 1925 PetscCall(VecRestoreArrayRead(ll, &l)); 1926 PetscCall(PetscLogFlops(1.0 * n * m)); 1927 } 1928 if (rr) { 1929 PetscCall(VecGetSize(rr, &n)); 1930 PetscCall(VecGetArrayRead(rr, &r)); 1931 PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec wrong size"); 1932 for (i = 0; i < n; i++) { 1933 x = r[i]; 1934 v = vv + i * mat->lda; 1935 for (j = 0; j < m; j++) (*v++) *= x; 1936 } 1937 PetscCall(VecRestoreArrayRead(rr, &r)); 1938 PetscCall(PetscLogFlops(1.0 * n * m)); 1939 } 1940 PetscCall(MatDenseRestoreArray(A, &vv)); 1941 PetscFunctionReturn(PETSC_SUCCESS); 1942 } 1943 1944 PetscErrorCode MatNorm_SeqDense(Mat A, NormType type, PetscReal *nrm) 1945 { 1946 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1947 PetscScalar *v, *vv; 1948 PetscReal sum = 0.0; 1949 PetscInt lda, m = A->rmap->n, i, j; 1950 1951 PetscFunctionBegin; 1952 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&vv)); 1953 PetscCall(MatDenseGetLDA(A, &lda)); 1954 v = vv; 1955 if (type == NORM_FROBENIUS) { 1956 if (lda > m) { 1957 for (j = 0; j < A->cmap->n; j++) { 1958 v = vv + j * lda; 1959 for (i = 0; i < m; i++) { 1960 sum += PetscRealPart(PetscConj(*v) * (*v)); 1961 v++; 1962 } 1963 } 1964 } else { 1965 #if defined(PETSC_USE_REAL___FP16) 1966 PetscBLASInt one = 1, cnt = A->cmap->n * A->rmap->n; 1967 PetscCallBLAS("BLASnrm2", *nrm = BLASnrm2_(&cnt, v, &one)); 1968 } 1969 #else 1970 for (i = 0; i < A->cmap->n * A->rmap->n; i++) { 1971 sum += PetscRealPart(PetscConj(*v) * (*v)); 1972 v++; 1973 } 1974 } 1975 *nrm = PetscSqrtReal(sum); 1976 #endif 1977 PetscCall(PetscLogFlops(2.0 * A->cmap->n * A->rmap->n)); 1978 } else if (type == NORM_1) { 1979 *nrm = 0.0; 1980 for (j = 0; j < A->cmap->n; j++) { 1981 v = vv + j * mat->lda; 1982 sum = 0.0; 1983 for (i = 0; i < A->rmap->n; i++) { 1984 sum += PetscAbsScalar(*v); 1985 v++; 1986 } 1987 if (sum > *nrm) *nrm = sum; 1988 } 1989 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 1990 } else if (type == NORM_INFINITY) { 1991 *nrm = 0.0; 1992 for (j = 0; j < A->rmap->n; j++) { 1993 v = vv + j; 1994 sum = 0.0; 1995 for (i = 0; i < A->cmap->n; i++) { 1996 sum += PetscAbsScalar(*v); 1997 v += mat->lda; 1998 } 1999 if (sum > *nrm) *nrm = sum; 2000 } 2001 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 2002 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No two norm"); 2003 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&vv)); 2004 PetscFunctionReturn(PETSC_SUCCESS); 2005 } 2006 2007 static PetscErrorCode MatSetOption_SeqDense(Mat A, MatOption op, PetscBool flg) 2008 { 2009 Mat_SeqDense *aij = (Mat_SeqDense *)A->data; 2010 2011 PetscFunctionBegin; 2012 switch (op) { 2013 case MAT_ROW_ORIENTED: 2014 aij->roworiented = flg; 2015 break; 2016 default: 2017 break; 2018 } 2019 PetscFunctionReturn(PETSC_SUCCESS); 2020 } 2021 2022 PetscErrorCode MatZeroEntries_SeqDense(Mat A) 2023 { 2024 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 2025 PetscInt lda = l->lda, m = A->rmap->n, n = A->cmap->n, j; 2026 PetscScalar *v; 2027 2028 PetscFunctionBegin; 2029 PetscCall(MatDenseGetArrayWrite(A, &v)); 2030 if (lda > m) { 2031 for (j = 0; j < n; j++) PetscCall(PetscArrayzero(v + j * lda, m)); 2032 } else { 2033 PetscCall(PetscArrayzero(v, PetscInt64Mult(m, n))); 2034 } 2035 PetscCall(MatDenseRestoreArrayWrite(A, &v)); 2036 PetscFunctionReturn(PETSC_SUCCESS); 2037 } 2038 2039 static PetscErrorCode MatZeroRows_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 2040 { 2041 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 2042 PetscInt m = l->lda, n = A->cmap->n, i, j; 2043 PetscScalar *slot, *bb, *v; 2044 const PetscScalar *xx; 2045 2046 PetscFunctionBegin; 2047 if (PetscDefined(USE_DEBUG)) { 2048 for (i = 0; i < N; i++) { 2049 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 2050 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 2051 } 2052 } 2053 if (!N) PetscFunctionReturn(PETSC_SUCCESS); 2054 2055 /* fix right-hand side if needed */ 2056 if (x && b) { 2057 PetscCall(VecGetArrayRead(x, &xx)); 2058 PetscCall(VecGetArray(b, &bb)); 2059 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 2060 PetscCall(VecRestoreArrayRead(x, &xx)); 2061 PetscCall(VecRestoreArray(b, &bb)); 2062 } 2063 2064 PetscCall(MatDenseGetArray(A, &v)); 2065 for (i = 0; i < N; i++) { 2066 slot = v + rows[i]; 2067 for (j = 0; j < n; j++) { 2068 *slot = 0.0; 2069 slot += m; 2070 } 2071 } 2072 if (diag != 0.0) { 2073 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 2074 for (i = 0; i < N; i++) { 2075 slot = v + (m + 1) * rows[i]; 2076 *slot = diag; 2077 } 2078 } 2079 PetscCall(MatDenseRestoreArray(A, &v)); 2080 PetscFunctionReturn(PETSC_SUCCESS); 2081 } 2082 2083 static PetscErrorCode MatDenseGetLDA_SeqDense(Mat A, PetscInt *lda) 2084 { 2085 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2086 2087 PetscFunctionBegin; 2088 *lda = mat->lda; 2089 PetscFunctionReturn(PETSC_SUCCESS); 2090 } 2091 2092 PetscErrorCode MatDenseGetArray_SeqDense(Mat A, PetscScalar **array) 2093 { 2094 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2095 2096 PetscFunctionBegin; 2097 PetscCheck(!mat->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2098 *array = mat->v; 2099 PetscFunctionReturn(PETSC_SUCCESS); 2100 } 2101 2102 PetscErrorCode MatDenseRestoreArray_SeqDense(Mat A, PetscScalar **array) 2103 { 2104 PetscFunctionBegin; 2105 if (array) *array = NULL; 2106 PetscFunctionReturn(PETSC_SUCCESS); 2107 } 2108 2109 /*@ 2110 MatDenseGetLDA - gets the leading dimension of the array returned from `MatDenseGetArray()` 2111 2112 Not Collective 2113 2114 Input Parameter: 2115 . A - a `MATDENSE` or `MATDENSECUDA` matrix 2116 2117 Output Parameter: 2118 . lda - the leading dimension 2119 2120 Level: intermediate 2121 2122 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseSetLDA()` 2123 @*/ 2124 PetscErrorCode MatDenseGetLDA(Mat A, PetscInt *lda) 2125 { 2126 PetscFunctionBegin; 2127 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2128 PetscAssertPointer(lda, 2); 2129 MatCheckPreallocated(A, 1); 2130 PetscUseMethod(A, "MatDenseGetLDA_C", (Mat, PetscInt *), (A, lda)); 2131 PetscFunctionReturn(PETSC_SUCCESS); 2132 } 2133 2134 /*@ 2135 MatDenseSetLDA - Sets the leading dimension of the array used by the `MATDENSE` matrix 2136 2137 Collective if the matrix layouts have not yet been setup 2138 2139 Input Parameters: 2140 + A - a `MATDENSE` or `MATDENSECUDA` matrix 2141 - lda - the leading dimension 2142 2143 Level: intermediate 2144 2145 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetLDA()` 2146 @*/ 2147 PetscErrorCode MatDenseSetLDA(Mat A, PetscInt lda) 2148 { 2149 PetscFunctionBegin; 2150 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2151 PetscTryMethod(A, "MatDenseSetLDA_C", (Mat, PetscInt), (A, lda)); 2152 PetscFunctionReturn(PETSC_SUCCESS); 2153 } 2154 2155 /*@C 2156 MatDenseGetArray - gives read-write access to the array where the data for a `MATDENSE` matrix is stored 2157 2158 Logically Collective 2159 2160 Input Parameter: 2161 . A - a dense matrix 2162 2163 Output Parameter: 2164 . array - pointer to the data 2165 2166 Level: intermediate 2167 2168 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2169 @*/ 2170 PetscErrorCode MatDenseGetArray(Mat A, PetscScalar *array[]) PeNS 2171 { 2172 PetscFunctionBegin; 2173 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2174 PetscAssertPointer(array, 2); 2175 PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array)); 2176 PetscFunctionReturn(PETSC_SUCCESS); 2177 } 2178 2179 /*@C 2180 MatDenseRestoreArray - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArray()` 2181 2182 Logically Collective 2183 2184 Input Parameters: 2185 + A - a dense matrix 2186 - array - pointer to the data (may be `NULL`) 2187 2188 Level: intermediate 2189 2190 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2191 @*/ 2192 PetscErrorCode MatDenseRestoreArray(Mat A, PetscScalar *array[]) PeNS 2193 { 2194 PetscFunctionBegin; 2195 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2196 if (array) PetscAssertPointer(array, 2); 2197 PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array)); 2198 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2199 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2200 A->offloadmask = PETSC_OFFLOAD_CPU; 2201 #endif 2202 PetscFunctionReturn(PETSC_SUCCESS); 2203 } 2204 2205 /*@C 2206 MatDenseGetArrayRead - gives read-only access to the array where the data for a `MATDENSE` matrix is stored 2207 2208 Not Collective 2209 2210 Input Parameter: 2211 . A - a dense matrix 2212 2213 Output Parameter: 2214 . array - pointer to the data 2215 2216 Level: intermediate 2217 2218 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2219 @*/ 2220 PetscErrorCode MatDenseGetArrayRead(Mat A, const PetscScalar *array[]) PeNS 2221 { 2222 PetscFunctionBegin; 2223 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2224 PetscAssertPointer(array, 2); 2225 PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2226 PetscFunctionReturn(PETSC_SUCCESS); 2227 } 2228 2229 /*@C 2230 MatDenseRestoreArrayRead - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayRead()` 2231 2232 Not Collective 2233 2234 Input Parameters: 2235 + A - a dense matrix 2236 - array - pointer to the data (may be `NULL`) 2237 2238 Level: intermediate 2239 2240 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2241 @*/ 2242 PetscErrorCode MatDenseRestoreArrayRead(Mat A, const PetscScalar *array[]) PeNS 2243 { 2244 PetscFunctionBegin; 2245 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2246 if (array) PetscAssertPointer(array, 2); 2247 PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2248 PetscFunctionReturn(PETSC_SUCCESS); 2249 } 2250 2251 /*@C 2252 MatDenseGetArrayWrite - gives write-only access to the array where the data for a `MATDENSE` matrix is stored 2253 2254 Not Collective 2255 2256 Input Parameter: 2257 . A - a dense matrix 2258 2259 Output Parameter: 2260 . array - pointer to the data 2261 2262 Level: intermediate 2263 2264 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2265 @*/ 2266 PetscErrorCode MatDenseGetArrayWrite(Mat A, PetscScalar *array[]) PeNS 2267 { 2268 PetscFunctionBegin; 2269 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2270 PetscAssertPointer(array, 2); 2271 PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2272 PetscFunctionReturn(PETSC_SUCCESS); 2273 } 2274 2275 /*@C 2276 MatDenseRestoreArrayWrite - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayWrite()` 2277 2278 Not Collective 2279 2280 Input Parameters: 2281 + A - a dense matrix 2282 - array - pointer to the data (may be `NULL`) 2283 2284 Level: intermediate 2285 2286 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2287 @*/ 2288 PetscErrorCode MatDenseRestoreArrayWrite(Mat A, PetscScalar *array[]) PeNS 2289 { 2290 PetscFunctionBegin; 2291 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2292 if (array) PetscAssertPointer(array, 2); 2293 PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2294 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2295 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2296 A->offloadmask = PETSC_OFFLOAD_CPU; 2297 #endif 2298 PetscFunctionReturn(PETSC_SUCCESS); 2299 } 2300 2301 /*@C 2302 MatDenseGetArrayAndMemType - gives read-write access to the array where the data for a `MATDENSE` matrix is stored 2303 2304 Logically Collective 2305 2306 Input Parameter: 2307 . A - a dense matrix 2308 2309 Output Parameters: 2310 + array - pointer to the data 2311 - mtype - memory type of the returned pointer 2312 2313 Level: intermediate 2314 2315 Note: 2316 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., 2317 an array on device is always returned and is guaranteed to contain the matrix's latest data. 2318 2319 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayAndMemType()`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArrayRead()`, 2320 `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` 2321 @*/ 2322 PetscErrorCode MatDenseGetArrayAndMemType(Mat A, PetscScalar *array[], PetscMemType *mtype) 2323 { 2324 PetscBool isMPI; 2325 2326 PetscFunctionBegin; 2327 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2328 PetscAssertPointer(array, 2); 2329 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ 2330 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2331 if (isMPI) { 2332 /* Dispatch here so that the code can be reused for all subclasses of MATDENSE */ 2333 PetscCall(MatDenseGetArrayAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); 2334 } else { 2335 PetscErrorCode (*fptr)(Mat, PetscScalar **, PetscMemType *); 2336 2337 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayAndMemType_C", &fptr)); 2338 if (fptr) { 2339 PetscCall((*fptr)(A, array, mtype)); 2340 } else { 2341 PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array)); 2342 if (mtype) *mtype = PETSC_MEMTYPE_HOST; 2343 } 2344 } 2345 PetscFunctionReturn(PETSC_SUCCESS); 2346 } 2347 2348 /*@C 2349 MatDenseRestoreArrayAndMemType - returns access to the array that is obtained by `MatDenseGetArrayAndMemType()` 2350 2351 Logically Collective 2352 2353 Input Parameters: 2354 + A - a dense matrix 2355 - array - pointer to the data 2356 2357 Level: intermediate 2358 2359 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2360 @*/ 2361 PetscErrorCode MatDenseRestoreArrayAndMemType(Mat A, PetscScalar *array[]) 2362 { 2363 PetscBool isMPI; 2364 2365 PetscFunctionBegin; 2366 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2367 if (array) PetscAssertPointer(array, 2); 2368 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2369 if (isMPI) { 2370 PetscCall(MatDenseRestoreArrayAndMemType(((Mat_MPIDense *)A->data)->A, array)); 2371 } else { 2372 PetscErrorCode (*fptr)(Mat, PetscScalar **); 2373 2374 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayAndMemType_C", &fptr)); 2375 if (fptr) { 2376 PetscCall((*fptr)(A, array)); 2377 } else { 2378 PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array)); 2379 } 2380 if (array) *array = NULL; 2381 } 2382 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2383 PetscFunctionReturn(PETSC_SUCCESS); 2384 } 2385 2386 /*@C 2387 MatDenseGetArrayReadAndMemType - gives read-only access to the array where the data for a `MATDENSE` matrix is stored 2388 2389 Logically Collective 2390 2391 Input Parameter: 2392 . A - a dense matrix 2393 2394 Output Parameters: 2395 + array - pointer to the data 2396 - mtype - memory type of the returned pointer 2397 2398 Level: intermediate 2399 2400 Note: 2401 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., 2402 an array on device is always returned and is guaranteed to contain the matrix's latest data. 2403 2404 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayReadAndMemType()`, `MatDenseGetArrayWriteAndMemType()`, 2405 `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` 2406 @*/ 2407 PetscErrorCode MatDenseGetArrayReadAndMemType(Mat A, const PetscScalar *array[], PetscMemType *mtype) 2408 { 2409 PetscBool isMPI; 2410 2411 PetscFunctionBegin; 2412 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2413 PetscAssertPointer(array, 2); 2414 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ 2415 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2416 if (isMPI) { /* Dispatch here so that the code can be reused for all subclasses of MATDENSE */ 2417 PetscCall(MatDenseGetArrayReadAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); 2418 } else { 2419 PetscErrorCode (*fptr)(Mat, const PetscScalar **, PetscMemType *); 2420 2421 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayReadAndMemType_C", &fptr)); 2422 if (fptr) { 2423 PetscCall((*fptr)(A, array, mtype)); 2424 } else { 2425 PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2426 if (mtype) *mtype = PETSC_MEMTYPE_HOST; 2427 } 2428 } 2429 PetscFunctionReturn(PETSC_SUCCESS); 2430 } 2431 2432 /*@C 2433 MatDenseRestoreArrayReadAndMemType - returns access to the array that is obtained by `MatDenseGetArrayReadAndMemType()` 2434 2435 Logically Collective 2436 2437 Input Parameters: 2438 + A - a dense matrix 2439 - array - pointer to the data 2440 2441 Level: intermediate 2442 2443 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2444 @*/ 2445 PetscErrorCode MatDenseRestoreArrayReadAndMemType(Mat A, const PetscScalar *array[]) 2446 { 2447 PetscBool isMPI; 2448 2449 PetscFunctionBegin; 2450 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2451 if (array) PetscAssertPointer(array, 2); 2452 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2453 if (isMPI) { 2454 PetscCall(MatDenseRestoreArrayReadAndMemType(((Mat_MPIDense *)A->data)->A, array)); 2455 } else { 2456 PetscErrorCode (*fptr)(Mat, const PetscScalar **); 2457 2458 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayReadAndMemType_C", &fptr)); 2459 if (fptr) { 2460 PetscCall((*fptr)(A, array)); 2461 } else { 2462 PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2463 } 2464 if (array) *array = NULL; 2465 } 2466 PetscFunctionReturn(PETSC_SUCCESS); 2467 } 2468 2469 /*@C 2470 MatDenseGetArrayWriteAndMemType - gives write-only access to the array where the data for a `MATDENSE` matrix is stored 2471 2472 Logically Collective 2473 2474 Input Parameter: 2475 . A - a dense matrix 2476 2477 Output Parameters: 2478 + array - pointer to the data 2479 - mtype - memory type of the returned pointer 2480 2481 Level: intermediate 2482 2483 Note: 2484 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., 2485 an array on device is always returned and is guaranteed to contain the matrix's latest data. 2486 2487 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayWriteAndMemType()`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArrayRead()`, 2488 `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` 2489 @*/ 2490 PetscErrorCode MatDenseGetArrayWriteAndMemType(Mat A, PetscScalar *array[], PetscMemType *mtype) 2491 { 2492 PetscBool isMPI; 2493 2494 PetscFunctionBegin; 2495 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2496 PetscAssertPointer(array, 2); 2497 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ 2498 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2499 if (isMPI) { 2500 PetscCall(MatDenseGetArrayWriteAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); 2501 } else { 2502 PetscErrorCode (*fptr)(Mat, PetscScalar **, PetscMemType *); 2503 2504 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayWriteAndMemType_C", &fptr)); 2505 if (fptr) { 2506 PetscCall((*fptr)(A, array, mtype)); 2507 } else { 2508 PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2509 if (mtype) *mtype = PETSC_MEMTYPE_HOST; 2510 } 2511 } 2512 PetscFunctionReturn(PETSC_SUCCESS); 2513 } 2514 2515 /*@C 2516 MatDenseRestoreArrayWriteAndMemType - returns access to the array that is obtained by `MatDenseGetArrayReadAndMemType()` 2517 2518 Logically Collective 2519 2520 Input Parameters: 2521 + A - a dense matrix 2522 - array - pointer to the data 2523 2524 Level: intermediate 2525 2526 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2527 @*/ 2528 PetscErrorCode MatDenseRestoreArrayWriteAndMemType(Mat A, PetscScalar *array[]) 2529 { 2530 PetscBool isMPI; 2531 2532 PetscFunctionBegin; 2533 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2534 if (array) PetscAssertPointer(array, 2); 2535 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2536 if (isMPI) { 2537 PetscCall(MatDenseRestoreArrayWriteAndMemType(((Mat_MPIDense *)A->data)->A, array)); 2538 } else { 2539 PetscErrorCode (*fptr)(Mat, PetscScalar **); 2540 2541 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayWriteAndMemType_C", &fptr)); 2542 if (fptr) { 2543 PetscCall((*fptr)(A, array)); 2544 } else { 2545 PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2546 } 2547 if (array) *array = NULL; 2548 } 2549 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2550 PetscFunctionReturn(PETSC_SUCCESS); 2551 } 2552 2553 static PetscErrorCode MatCreateSubMatrix_SeqDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 2554 { 2555 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2556 PetscInt i, j, nrows, ncols, ldb; 2557 const PetscInt *irow, *icol; 2558 PetscScalar *av, *bv, *v = mat->v; 2559 Mat newmat; 2560 2561 PetscFunctionBegin; 2562 PetscCall(ISGetIndices(isrow, &irow)); 2563 PetscCall(ISGetIndices(iscol, &icol)); 2564 PetscCall(ISGetLocalSize(isrow, &nrows)); 2565 PetscCall(ISGetLocalSize(iscol, &ncols)); 2566 2567 /* Check submatrixcall */ 2568 if (scall == MAT_REUSE_MATRIX) { 2569 PetscInt n_cols, n_rows; 2570 PetscCall(MatGetSize(*B, &n_rows, &n_cols)); 2571 if (n_rows != nrows || n_cols != ncols) { 2572 /* resize the result matrix to match number of requested rows/columns */ 2573 PetscCall(MatSetSizes(*B, nrows, ncols, nrows, ncols)); 2574 } 2575 newmat = *B; 2576 } else { 2577 /* Create and fill new matrix */ 2578 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 2579 PetscCall(MatSetSizes(newmat, nrows, ncols, nrows, ncols)); 2580 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 2581 PetscCall(MatSeqDenseSetPreallocation(newmat, NULL)); 2582 } 2583 2584 /* Now extract the data pointers and do the copy,column at a time */ 2585 PetscCall(MatDenseGetArray(newmat, &bv)); 2586 PetscCall(MatDenseGetLDA(newmat, &ldb)); 2587 for (i = 0; i < ncols; i++) { 2588 av = v + mat->lda * icol[i]; 2589 for (j = 0; j < nrows; j++) bv[j] = av[irow[j]]; 2590 bv += ldb; 2591 } 2592 PetscCall(MatDenseRestoreArray(newmat, &bv)); 2593 2594 /* Assemble the matrices so that the correct flags are set */ 2595 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 2596 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 2597 2598 /* Free work space */ 2599 PetscCall(ISRestoreIndices(isrow, &irow)); 2600 PetscCall(ISRestoreIndices(iscol, &icol)); 2601 *B = newmat; 2602 PetscFunctionReturn(PETSC_SUCCESS); 2603 } 2604 2605 static PetscErrorCode MatCreateSubMatrices_SeqDense(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 2606 { 2607 PetscInt i; 2608 2609 PetscFunctionBegin; 2610 if (scall == MAT_INITIAL_MATRIX) PetscCall(PetscCalloc1(n, B)); 2611 2612 for (i = 0; i < n; i++) PetscCall(MatCreateSubMatrix_SeqDense(A, irow[i], icol[i], scall, &(*B)[i])); 2613 PetscFunctionReturn(PETSC_SUCCESS); 2614 } 2615 2616 PetscErrorCode MatCopy_SeqDense(Mat A, Mat B, MatStructure str) 2617 { 2618 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data; 2619 const PetscScalar *va; 2620 PetscScalar *vb; 2621 PetscInt lda1 = a->lda, lda2 = b->lda, m = A->rmap->n, n = A->cmap->n, j; 2622 2623 PetscFunctionBegin; 2624 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2625 if (A->ops->copy != B->ops->copy) { 2626 PetscCall(MatCopy_Basic(A, B, str)); 2627 PetscFunctionReturn(PETSC_SUCCESS); 2628 } 2629 PetscCheck(m == B->rmap->n && n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "size(B) != size(A)"); 2630 PetscCall(MatDenseGetArrayRead(A, &va)); 2631 PetscCall(MatDenseGetArray(B, &vb)); 2632 if (lda1 > m || lda2 > m) { 2633 for (j = 0; j < n; j++) PetscCall(PetscArraycpy(vb + j * lda2, va + j * lda1, m)); 2634 } else { 2635 PetscCall(PetscArraycpy(vb, va, A->rmap->n * A->cmap->n)); 2636 } 2637 PetscCall(MatDenseRestoreArray(B, &vb)); 2638 PetscCall(MatDenseRestoreArrayRead(A, &va)); 2639 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2640 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2641 PetscFunctionReturn(PETSC_SUCCESS); 2642 } 2643 2644 PetscErrorCode MatSetUp_SeqDense(Mat A) 2645 { 2646 PetscFunctionBegin; 2647 PetscCall(PetscLayoutSetUp(A->rmap)); 2648 PetscCall(PetscLayoutSetUp(A->cmap)); 2649 if (!A->preallocated) PetscCall(MatSeqDenseSetPreallocation(A, NULL)); 2650 PetscFunctionReturn(PETSC_SUCCESS); 2651 } 2652 2653 PetscErrorCode MatConjugate_SeqDense(Mat A) 2654 { 2655 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2656 PetscInt i, j; 2657 PetscInt min = PetscMin(A->rmap->n, A->cmap->n); 2658 PetscScalar *aa; 2659 2660 PetscFunctionBegin; 2661 PetscCall(MatDenseGetArray(A, &aa)); 2662 for (j = 0; j < A->cmap->n; j++) { 2663 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscConj(aa[i + j * mat->lda]); 2664 } 2665 PetscCall(MatDenseRestoreArray(A, &aa)); 2666 if (mat->tau) 2667 for (i = 0; i < min; i++) mat->tau[i] = PetscConj(mat->tau[i]); 2668 PetscFunctionReturn(PETSC_SUCCESS); 2669 } 2670 2671 static PetscErrorCode MatRealPart_SeqDense(Mat A) 2672 { 2673 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2674 PetscInt i, j; 2675 PetscScalar *aa; 2676 2677 PetscFunctionBegin; 2678 PetscCall(MatDenseGetArray(A, &aa)); 2679 for (j = 0; j < A->cmap->n; j++) { 2680 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscRealPart(aa[i + j * mat->lda]); 2681 } 2682 PetscCall(MatDenseRestoreArray(A, &aa)); 2683 PetscFunctionReturn(PETSC_SUCCESS); 2684 } 2685 2686 static PetscErrorCode MatImaginaryPart_SeqDense(Mat A) 2687 { 2688 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2689 PetscInt i, j; 2690 PetscScalar *aa; 2691 2692 PetscFunctionBegin; 2693 PetscCall(MatDenseGetArray(A, &aa)); 2694 for (j = 0; j < A->cmap->n; j++) { 2695 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscImaginaryPart(aa[i + j * mat->lda]); 2696 } 2697 PetscCall(MatDenseRestoreArray(A, &aa)); 2698 PetscFunctionReturn(PETSC_SUCCESS); 2699 } 2700 2701 PetscErrorCode MatMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2702 { 2703 PetscInt m = A->rmap->n, n = B->cmap->n; 2704 PetscBool cisdense = PETSC_FALSE; 2705 2706 PetscFunctionBegin; 2707 PetscCall(MatSetSizes(C, m, n, m, n)); 2708 #if defined(PETSC_HAVE_CUDA) 2709 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2710 #endif 2711 #if defined(PETSC_HAVE_HIP) 2712 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2713 #endif 2714 if (!cisdense) { 2715 PetscBool flg; 2716 2717 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2718 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2719 } 2720 PetscCall(MatSetUp(C)); 2721 PetscFunctionReturn(PETSC_SUCCESS); 2722 } 2723 2724 PetscErrorCode MatMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2725 { 2726 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data, *c = (Mat_SeqDense *)C->data; 2727 PetscBLASInt m, n, k; 2728 const PetscScalar *av, *bv; 2729 PetscScalar *cv; 2730 PetscScalar _DOne = 1.0, _DZero = 0.0; 2731 2732 PetscFunctionBegin; 2733 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2734 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2735 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2736 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); 2737 PetscCall(MatDenseGetArrayRead(A, &av)); 2738 PetscCall(MatDenseGetArrayRead(B, &bv)); 2739 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2740 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2741 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2742 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2743 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2744 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2745 PetscFunctionReturn(PETSC_SUCCESS); 2746 } 2747 2748 PetscErrorCode MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2749 { 2750 PetscInt m = A->rmap->n, n = B->rmap->n; 2751 PetscBool cisdense = PETSC_FALSE; 2752 2753 PetscFunctionBegin; 2754 PetscCall(MatSetSizes(C, m, n, m, n)); 2755 #if defined(PETSC_HAVE_CUDA) 2756 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2757 #endif 2758 #if defined(PETSC_HAVE_HIP) 2759 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2760 #endif 2761 if (!cisdense) { 2762 PetscBool flg; 2763 2764 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2765 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2766 } 2767 PetscCall(MatSetUp(C)); 2768 PetscFunctionReturn(PETSC_SUCCESS); 2769 } 2770 2771 PetscErrorCode MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2772 { 2773 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2774 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2775 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2776 const PetscScalar *av, *bv; 2777 PetscScalar *cv; 2778 PetscBLASInt m, n, k; 2779 PetscScalar _DOne = 1.0, _DZero = 0.0; 2780 2781 PetscFunctionBegin; 2782 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2783 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2784 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2785 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); 2786 PetscCall(MatDenseGetArrayRead(A, &av)); 2787 PetscCall(MatDenseGetArrayRead(B, &bv)); 2788 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2789 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2790 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2791 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2792 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2793 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2794 PetscFunctionReturn(PETSC_SUCCESS); 2795 } 2796 2797 PetscErrorCode MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2798 { 2799 PetscInt m = A->cmap->n, n = B->cmap->n; 2800 PetscBool cisdense = PETSC_FALSE; 2801 2802 PetscFunctionBegin; 2803 PetscCall(MatSetSizes(C, m, n, m, n)); 2804 #if defined(PETSC_HAVE_CUDA) 2805 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2806 #endif 2807 #if defined(PETSC_HAVE_HIP) 2808 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2809 #endif 2810 if (!cisdense) { 2811 PetscBool flg; 2812 2813 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2814 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2815 } 2816 PetscCall(MatSetUp(C)); 2817 PetscFunctionReturn(PETSC_SUCCESS); 2818 } 2819 2820 PetscErrorCode MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2821 { 2822 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2823 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2824 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2825 const PetscScalar *av, *bv; 2826 PetscScalar *cv; 2827 PetscBLASInt m, n, k; 2828 PetscScalar _DOne = 1.0, _DZero = 0.0; 2829 2830 PetscFunctionBegin; 2831 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2832 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2833 PetscCall(PetscBLASIntCast(A->rmap->n, &k)); 2834 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); 2835 PetscCall(MatDenseGetArrayRead(A, &av)); 2836 PetscCall(MatDenseGetArrayRead(B, &bv)); 2837 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2838 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2839 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2840 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2841 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2842 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2843 PetscFunctionReturn(PETSC_SUCCESS); 2844 } 2845 2846 static PetscErrorCode MatProductSetFromOptions_SeqDense_AB(Mat C) 2847 { 2848 PetscFunctionBegin; 2849 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqDense; 2850 C->ops->productsymbolic = MatProductSymbolic_AB; 2851 PetscFunctionReturn(PETSC_SUCCESS); 2852 } 2853 2854 static PetscErrorCode MatProductSetFromOptions_SeqDense_AtB(Mat C) 2855 { 2856 PetscFunctionBegin; 2857 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqDense_SeqDense; 2858 C->ops->productsymbolic = MatProductSymbolic_AtB; 2859 PetscFunctionReturn(PETSC_SUCCESS); 2860 } 2861 2862 static PetscErrorCode MatProductSetFromOptions_SeqDense_ABt(Mat C) 2863 { 2864 PetscFunctionBegin; 2865 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqDense_SeqDense; 2866 C->ops->productsymbolic = MatProductSymbolic_ABt; 2867 PetscFunctionReturn(PETSC_SUCCESS); 2868 } 2869 2870 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense(Mat C) 2871 { 2872 Mat_Product *product = C->product; 2873 2874 PetscFunctionBegin; 2875 switch (product->type) { 2876 case MATPRODUCT_AB: 2877 PetscCall(MatProductSetFromOptions_SeqDense_AB(C)); 2878 break; 2879 case MATPRODUCT_AtB: 2880 PetscCall(MatProductSetFromOptions_SeqDense_AtB(C)); 2881 break; 2882 case MATPRODUCT_ABt: 2883 PetscCall(MatProductSetFromOptions_SeqDense_ABt(C)); 2884 break; 2885 default: 2886 break; 2887 } 2888 PetscFunctionReturn(PETSC_SUCCESS); 2889 } 2890 2891 static PetscErrorCode MatGetRowMax_SeqDense(Mat A, Vec v, PetscInt idx[]) 2892 { 2893 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2894 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2895 PetscScalar *x; 2896 const PetscScalar *aa; 2897 2898 PetscFunctionBegin; 2899 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2900 PetscCall(VecGetArray(v, &x)); 2901 PetscCall(VecGetLocalSize(v, &p)); 2902 PetscCall(MatDenseGetArrayRead(A, &aa)); 2903 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2904 for (i = 0; i < m; i++) { 2905 x[i] = aa[i]; 2906 if (idx) idx[i] = 0; 2907 for (j = 1; j < n; j++) { 2908 if (PetscRealPart(x[i]) < PetscRealPart(aa[i + a->lda * j])) { 2909 x[i] = aa[i + a->lda * j]; 2910 if (idx) idx[i] = j; 2911 } 2912 } 2913 } 2914 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2915 PetscCall(VecRestoreArray(v, &x)); 2916 PetscFunctionReturn(PETSC_SUCCESS); 2917 } 2918 2919 static PetscErrorCode MatGetRowMaxAbs_SeqDense(Mat A, Vec v, PetscInt idx[]) 2920 { 2921 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2922 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2923 PetscScalar *x; 2924 PetscReal atmp; 2925 const PetscScalar *aa; 2926 2927 PetscFunctionBegin; 2928 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2929 PetscCall(VecGetArray(v, &x)); 2930 PetscCall(VecGetLocalSize(v, &p)); 2931 PetscCall(MatDenseGetArrayRead(A, &aa)); 2932 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2933 for (i = 0; i < m; i++) { 2934 x[i] = PetscAbsScalar(aa[i]); 2935 for (j = 1; j < n; j++) { 2936 atmp = PetscAbsScalar(aa[i + a->lda * j]); 2937 if (PetscAbsScalar(x[i]) < atmp) { 2938 x[i] = atmp; 2939 if (idx) idx[i] = j; 2940 } 2941 } 2942 } 2943 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2944 PetscCall(VecRestoreArray(v, &x)); 2945 PetscFunctionReturn(PETSC_SUCCESS); 2946 } 2947 2948 static PetscErrorCode MatGetRowMin_SeqDense(Mat A, Vec v, PetscInt idx[]) 2949 { 2950 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2951 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2952 PetscScalar *x; 2953 const PetscScalar *aa; 2954 2955 PetscFunctionBegin; 2956 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2957 PetscCall(MatDenseGetArrayRead(A, &aa)); 2958 PetscCall(VecGetArray(v, &x)); 2959 PetscCall(VecGetLocalSize(v, &p)); 2960 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2961 for (i = 0; i < m; i++) { 2962 x[i] = aa[i]; 2963 if (idx) idx[i] = 0; 2964 for (j = 1; j < n; j++) { 2965 if (PetscRealPart(x[i]) > PetscRealPart(aa[i + a->lda * j])) { 2966 x[i] = aa[i + a->lda * j]; 2967 if (idx) idx[i] = j; 2968 } 2969 } 2970 } 2971 PetscCall(VecRestoreArray(v, &x)); 2972 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2973 PetscFunctionReturn(PETSC_SUCCESS); 2974 } 2975 2976 PetscErrorCode MatGetColumnVector_SeqDense(Mat A, Vec v, PetscInt col) 2977 { 2978 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2979 PetscScalar *x; 2980 const PetscScalar *aa; 2981 2982 PetscFunctionBegin; 2983 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2984 PetscCall(MatDenseGetArrayRead(A, &aa)); 2985 PetscCall(VecGetArray(v, &x)); 2986 PetscCall(PetscArraycpy(x, aa + col * a->lda, A->rmap->n)); 2987 PetscCall(VecRestoreArray(v, &x)); 2988 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2989 PetscFunctionReturn(PETSC_SUCCESS); 2990 } 2991 2992 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat A, PetscInt type, PetscReal *reductions) 2993 { 2994 PetscInt i, j, m, n; 2995 const PetscScalar *a; 2996 2997 PetscFunctionBegin; 2998 PetscCall(MatGetSize(A, &m, &n)); 2999 PetscCall(PetscArrayzero(reductions, n)); 3000 PetscCall(MatDenseGetArrayRead(A, &a)); 3001 if (type == NORM_2) { 3002 for (i = 0; i < n; i++) { 3003 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j] * a[j]); 3004 a = PetscSafePointerPlusOffset(a, m); 3005 } 3006 } else if (type == NORM_1) { 3007 for (i = 0; i < n; i++) { 3008 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j]); 3009 a = PetscSafePointerPlusOffset(a, m); 3010 } 3011 } else if (type == NORM_INFINITY) { 3012 for (i = 0; i < n; i++) { 3013 for (j = 0; j < m; j++) reductions[i] = PetscMax(PetscAbsScalar(a[j]), reductions[i]); 3014 a = PetscSafePointerPlusOffset(a, m); 3015 } 3016 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 3017 for (i = 0; i < n; i++) { 3018 for (j = 0; j < m; j++) reductions[i] += PetscRealPart(a[j]); 3019 a = PetscSafePointerPlusOffset(a, m); 3020 } 3021 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 3022 for (i = 0; i < n; i++) { 3023 for (j = 0; j < m; j++) reductions[i] += PetscImaginaryPart(a[j]); 3024 a = PetscSafePointerPlusOffset(a, m); 3025 } 3026 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 3027 PetscCall(MatDenseRestoreArrayRead(A, &a)); 3028 if (type == NORM_2) { 3029 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 3030 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 3031 for (i = 0; i < n; i++) reductions[i] /= m; 3032 } 3033 PetscFunctionReturn(PETSC_SUCCESS); 3034 } 3035 3036 PetscErrorCode MatSetRandom_SeqDense(Mat x, PetscRandom rctx) 3037 { 3038 PetscScalar *a; 3039 PetscInt lda, m, n, i, j; 3040 3041 PetscFunctionBegin; 3042 PetscCall(MatGetSize(x, &m, &n)); 3043 PetscCall(MatDenseGetLDA(x, &lda)); 3044 PetscCall(MatDenseGetArrayWrite(x, &a)); 3045 for (j = 0; j < n; j++) { 3046 for (i = 0; i < m; i++) PetscCall(PetscRandomGetValue(rctx, a + j * lda + i)); 3047 } 3048 PetscCall(MatDenseRestoreArrayWrite(x, &a)); 3049 PetscFunctionReturn(PETSC_SUCCESS); 3050 } 3051 3052 static PetscErrorCode MatMissingDiagonal_SeqDense(Mat A, PetscBool *missing, PetscInt *d) 3053 { 3054 PetscFunctionBegin; 3055 *missing = PETSC_FALSE; 3056 PetscFunctionReturn(PETSC_SUCCESS); 3057 } 3058 3059 /* vals is not const */ 3060 static PetscErrorCode MatDenseGetColumn_SeqDense(Mat A, PetscInt col, PetscScalar **vals) 3061 { 3062 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3063 PetscScalar *v; 3064 3065 PetscFunctionBegin; 3066 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3067 PetscCall(MatDenseGetArray(A, &v)); 3068 *vals = v + col * a->lda; 3069 PetscCall(MatDenseRestoreArray(A, &v)); 3070 PetscFunctionReturn(PETSC_SUCCESS); 3071 } 3072 3073 static PetscErrorCode MatDenseRestoreColumn_SeqDense(Mat A, PetscScalar **vals) 3074 { 3075 PetscFunctionBegin; 3076 if (vals) *vals = NULL; /* user cannot accidentally use the array later */ 3077 PetscFunctionReturn(PETSC_SUCCESS); 3078 } 3079 3080 static struct _MatOps MatOps_Values = {MatSetValues_SeqDense, 3081 MatGetRow_SeqDense, 3082 MatRestoreRow_SeqDense, 3083 MatMult_SeqDense, 3084 /* 4*/ MatMultAdd_SeqDense, 3085 MatMultTranspose_SeqDense, 3086 MatMultTransposeAdd_SeqDense, 3087 NULL, 3088 NULL, 3089 NULL, 3090 /* 10*/ NULL, 3091 MatLUFactor_SeqDense, 3092 MatCholeskyFactor_SeqDense, 3093 MatSOR_SeqDense, 3094 MatTranspose_SeqDense, 3095 /* 15*/ MatGetInfo_SeqDense, 3096 MatEqual_SeqDense, 3097 MatGetDiagonal_SeqDense, 3098 MatDiagonalScale_SeqDense, 3099 MatNorm_SeqDense, 3100 /* 20*/ NULL, 3101 NULL, 3102 MatSetOption_SeqDense, 3103 MatZeroEntries_SeqDense, 3104 /* 24*/ MatZeroRows_SeqDense, 3105 NULL, 3106 NULL, 3107 NULL, 3108 NULL, 3109 /* 29*/ MatSetUp_SeqDense, 3110 NULL, 3111 NULL, 3112 NULL, 3113 NULL, 3114 /* 34*/ MatDuplicate_SeqDense, 3115 NULL, 3116 NULL, 3117 NULL, 3118 NULL, 3119 /* 39*/ MatAXPY_SeqDense, 3120 MatCreateSubMatrices_SeqDense, 3121 NULL, 3122 MatGetValues_SeqDense, 3123 MatCopy_SeqDense, 3124 /* 44*/ MatGetRowMax_SeqDense, 3125 MatScale_SeqDense, 3126 MatShift_SeqDense, 3127 NULL, 3128 MatZeroRowsColumns_SeqDense, 3129 /* 49*/ MatSetRandom_SeqDense, 3130 NULL, 3131 NULL, 3132 NULL, 3133 NULL, 3134 /* 54*/ NULL, 3135 NULL, 3136 NULL, 3137 NULL, 3138 NULL, 3139 /* 59*/ MatCreateSubMatrix_SeqDense, 3140 MatDestroy_SeqDense, 3141 MatView_SeqDense, 3142 NULL, 3143 NULL, 3144 /* 64*/ NULL, 3145 NULL, 3146 NULL, 3147 NULL, 3148 MatGetRowMaxAbs_SeqDense, 3149 /* 69*/ NULL, 3150 NULL, 3151 NULL, 3152 NULL, 3153 NULL, 3154 /* 74*/ NULL, 3155 NULL, 3156 NULL, 3157 NULL, 3158 MatLoad_SeqDense, 3159 /* 79*/ MatIsSymmetric_SeqDense, 3160 MatIsHermitian_SeqDense, 3161 NULL, 3162 NULL, 3163 NULL, 3164 /* 84*/ NULL, 3165 MatMatMultNumeric_SeqDense_SeqDense, 3166 NULL, 3167 NULL, 3168 MatMatTransposeMultNumeric_SeqDense_SeqDense, 3169 /* 89*/ NULL, 3170 MatProductSetFromOptions_SeqDense, 3171 NULL, 3172 NULL, 3173 MatConjugate_SeqDense, 3174 /* 94*/ NULL, 3175 NULL, 3176 MatRealPart_SeqDense, 3177 MatImaginaryPart_SeqDense, 3178 NULL, 3179 /* 99*/ NULL, 3180 NULL, 3181 NULL, 3182 MatGetRowMin_SeqDense, 3183 MatGetColumnVector_SeqDense, 3184 /*104*/ MatMissingDiagonal_SeqDense, 3185 NULL, 3186 NULL, 3187 NULL, 3188 NULL, 3189 /*109*/ NULL, 3190 NULL, 3191 NULL, 3192 MatMultHermitianTranspose_SeqDense, 3193 MatMultHermitianTransposeAdd_SeqDense, 3194 /*114*/ NULL, 3195 NULL, 3196 MatGetColumnReductions_SeqDense, 3197 NULL, 3198 NULL, 3199 /*119*/ NULL, 3200 NULL, 3201 NULL, 3202 MatTransposeMatMultNumeric_SeqDense_SeqDense, 3203 NULL, 3204 /*124*/ NULL, 3205 NULL, 3206 NULL, 3207 NULL, 3208 NULL, 3209 /*129*/ NULL, 3210 NULL, 3211 MatCreateMPIMatConcatenateSeqMat_SeqDense, 3212 NULL, 3213 NULL, 3214 /*134*/ NULL, 3215 NULL, 3216 NULL, 3217 NULL, 3218 NULL, 3219 /*139*/ NULL, 3220 NULL, 3221 NULL, 3222 NULL, 3223 NULL}; 3224 3225 /*@ 3226 MatCreateSeqDense - Creates a `MATSEQDENSE` that 3227 is stored in column major order (the usual Fortran format). 3228 3229 Collective 3230 3231 Input Parameters: 3232 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3233 . m - number of rows 3234 . n - number of columns 3235 - data - optional location of matrix data in column major order. Use `NULL` for PETSc 3236 to control all matrix memory allocation. 3237 3238 Output Parameter: 3239 . A - the matrix 3240 3241 Level: intermediate 3242 3243 Note: 3244 The data input variable is intended primarily for Fortran programmers 3245 who wish to allocate their own matrix memory space. Most users should 3246 set `data` = `NULL`. 3247 3248 Developer Note: 3249 Many of the matrix operations for this variant use the BLAS and LAPACK routines. 3250 3251 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 3252 @*/ 3253 PetscErrorCode MatCreateSeqDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscScalar data[], Mat *A) 3254 { 3255 PetscFunctionBegin; 3256 PetscCall(MatCreate(comm, A)); 3257 PetscCall(MatSetSizes(*A, m, n, m, n)); 3258 PetscCall(MatSetType(*A, MATSEQDENSE)); 3259 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 3260 PetscFunctionReturn(PETSC_SUCCESS); 3261 } 3262 3263 /*@ 3264 MatSeqDenseSetPreallocation - Sets the array used for storing the matrix elements of a `MATSEQDENSE` matrix 3265 3266 Collective 3267 3268 Input Parameters: 3269 + B - the matrix 3270 - data - the array (or `NULL`) 3271 3272 Level: intermediate 3273 3274 Note: 3275 The data input variable is intended primarily for Fortran programmers 3276 who wish to allocate their own matrix memory space. Most users should 3277 need not call this routine. 3278 3279 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`, `MatDenseSetLDA()` 3280 @*/ 3281 PetscErrorCode MatSeqDenseSetPreallocation(Mat B, PetscScalar data[]) 3282 { 3283 PetscFunctionBegin; 3284 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3285 PetscTryMethod(B, "MatSeqDenseSetPreallocation_C", (Mat, PetscScalar[]), (B, data)); 3286 PetscFunctionReturn(PETSC_SUCCESS); 3287 } 3288 3289 PetscErrorCode MatSeqDenseSetPreallocation_SeqDense(Mat B, PetscScalar *data) 3290 { 3291 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3292 3293 PetscFunctionBegin; 3294 PetscCheck(!b->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3295 B->preallocated = PETSC_TRUE; 3296 3297 PetscCall(PetscLayoutSetUp(B->rmap)); 3298 PetscCall(PetscLayoutSetUp(B->cmap)); 3299 3300 if (b->lda <= 0) PetscCall(PetscBLASIntCast(B->rmap->n, &b->lda)); 3301 3302 if (!data) { /* petsc-allocated storage */ 3303 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 3304 PetscCall(PetscCalloc1((size_t)b->lda * B->cmap->n, &b->v)); 3305 3306 b->user_alloc = PETSC_FALSE; 3307 } else { /* user-allocated storage */ 3308 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 3309 b->v = data; 3310 b->user_alloc = PETSC_TRUE; 3311 } 3312 B->assembled = PETSC_TRUE; 3313 PetscFunctionReturn(PETSC_SUCCESS); 3314 } 3315 3316 #if defined(PETSC_HAVE_ELEMENTAL) 3317 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 3318 { 3319 Mat mat_elemental; 3320 const PetscScalar *array; 3321 PetscScalar *v_colwise; 3322 PetscInt M = A->rmap->N, N = A->cmap->N, i, j, k, *rows, *cols; 3323 3324 PetscFunctionBegin; 3325 PetscCall(PetscMalloc3(M * N, &v_colwise, M, &rows, N, &cols)); 3326 PetscCall(MatDenseGetArrayRead(A, &array)); 3327 /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */ 3328 k = 0; 3329 for (j = 0; j < N; j++) { 3330 cols[j] = j; 3331 for (i = 0; i < M; i++) v_colwise[j * M + i] = array[k++]; 3332 } 3333 for (i = 0; i < M; i++) rows[i] = i; 3334 PetscCall(MatDenseRestoreArrayRead(A, &array)); 3335 3336 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 3337 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N)); 3338 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 3339 PetscCall(MatSetUp(mat_elemental)); 3340 3341 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 3342 PetscCall(MatSetValues(mat_elemental, M, rows, N, cols, v_colwise, ADD_VALUES)); 3343 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 3344 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 3345 PetscCall(PetscFree3(v_colwise, rows, cols)); 3346 3347 if (reuse == MAT_INPLACE_MATRIX) { 3348 PetscCall(MatHeaderReplace(A, &mat_elemental)); 3349 } else { 3350 *newmat = mat_elemental; 3351 } 3352 PetscFunctionReturn(PETSC_SUCCESS); 3353 } 3354 #endif 3355 3356 PetscErrorCode MatDenseSetLDA_SeqDense(Mat B, PetscInt lda) 3357 { 3358 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3359 PetscBool data; 3360 3361 PetscFunctionBegin; 3362 data = (B->rmap->n > 0 && B->cmap->n > 0) ? (b->v ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 3363 PetscCheck(b->user_alloc || !data || b->lda == lda, PETSC_COMM_SELF, PETSC_ERR_ORDER, "LDA cannot be changed after allocation of internal storage"); 3364 PetscCheck(lda >= B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "LDA %" PetscInt_FMT " must be at least matrix dimension %" PetscInt_FMT, lda, B->rmap->n); 3365 PetscCall(PetscBLASIntCast(lda, &b->lda)); 3366 PetscFunctionReturn(PETSC_SUCCESS); 3367 } 3368 3369 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3370 { 3371 PetscFunctionBegin; 3372 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIDense(comm, inmat, n, scall, outmat)); 3373 PetscFunctionReturn(PETSC_SUCCESS); 3374 } 3375 3376 PetscErrorCode MatDenseCreateColumnVec_Private(Mat A, Vec *v) 3377 { 3378 PetscBool isstd, iskok, iscuda, iship; 3379 PetscMPIInt size; 3380 #if PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP) 3381 /* we pass the data of A, to prevent allocating needless GPU memory the first time VecCUPMPlaceArray is called. */ 3382 const PetscScalar *a; 3383 #endif 3384 3385 PetscFunctionBegin; 3386 *v = NULL; 3387 PetscCall(PetscStrcmpAny(A->defaultvectype, &isstd, VECSTANDARD, VECSEQ, VECMPI, "")); 3388 PetscCall(PetscStrcmpAny(A->defaultvectype, &iskok, VECKOKKOS, VECSEQKOKKOS, VECMPIKOKKOS, "")); 3389 PetscCall(PetscStrcmpAny(A->defaultvectype, &iscuda, VECCUDA, VECSEQCUDA, VECMPICUDA, "")); 3390 PetscCall(PetscStrcmpAny(A->defaultvectype, &iship, VECHIP, VECSEQHIP, VECMPIHIP, "")); 3391 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3392 if (isstd) { 3393 if (size > 1) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, v)); 3394 else PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, v)); 3395 } else if (iskok) { 3396 PetscCheck(PetscDefined(HAVE_KOKKOS_KERNELS), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using KOKKOS kernels support"); 3397 #if PetscDefined(HAVE_KOKKOS_KERNELS) 3398 if (size > 1) PetscCall(VecCreateMPIKokkosWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, v)); 3399 else PetscCall(VecCreateSeqKokkosWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, v)); 3400 #endif 3401 } else if (iscuda) { 3402 PetscCheck(PetscDefined(HAVE_CUDA), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using CUDA support"); 3403 #if PetscDefined(HAVE_CUDA) 3404 PetscCall(MatDenseCUDAGetArrayRead(A, &a)); 3405 if (size > 1) PetscCall(VecCreateMPICUDAWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, a, v)); 3406 else PetscCall(VecCreateSeqCUDAWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, a, v)); 3407 #endif 3408 } else if (iship) { 3409 PetscCheck(PetscDefined(HAVE_HIP), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using HIP support"); 3410 #if PetscDefined(HAVE_HIP) 3411 PetscCall(MatDenseHIPGetArrayRead(A, &a)); 3412 if (size > 1) PetscCall(VecCreateMPIHIPWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, a, v)); 3413 else PetscCall(VecCreateSeqHIPWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, a, v)); 3414 #endif 3415 } 3416 PetscCheck(*v, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not coded for type %s", A->defaultvectype); 3417 PetscFunctionReturn(PETSC_SUCCESS); 3418 } 3419 3420 PetscErrorCode MatDenseGetColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) 3421 { 3422 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3423 3424 PetscFunctionBegin; 3425 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3426 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3427 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 3428 a->vecinuse = col + 1; 3429 PetscCall(MatDenseGetArray(A, (PetscScalar **)&a->ptrinuse)); 3430 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3431 *v = a->cvec; 3432 PetscFunctionReturn(PETSC_SUCCESS); 3433 } 3434 3435 PetscErrorCode MatDenseRestoreColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) 3436 { 3437 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3438 3439 PetscFunctionBegin; 3440 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3441 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3442 VecCheckAssembled(a->cvec); 3443 a->vecinuse = 0; 3444 PetscCall(MatDenseRestoreArray(A, (PetscScalar **)&a->ptrinuse)); 3445 PetscCall(VecResetArray(a->cvec)); 3446 if (v) *v = NULL; 3447 PetscFunctionReturn(PETSC_SUCCESS); 3448 } 3449 3450 PetscErrorCode MatDenseGetColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) 3451 { 3452 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3453 3454 PetscFunctionBegin; 3455 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3456 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3457 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 3458 a->vecinuse = col + 1; 3459 PetscCall(MatDenseGetArrayRead(A, &a->ptrinuse)); 3460 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)a->lda))); 3461 PetscCall(VecLockReadPush(a->cvec)); 3462 *v = a->cvec; 3463 PetscFunctionReturn(PETSC_SUCCESS); 3464 } 3465 3466 PetscErrorCode MatDenseRestoreColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) 3467 { 3468 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3469 3470 PetscFunctionBegin; 3471 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3472 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3473 VecCheckAssembled(a->cvec); 3474 a->vecinuse = 0; 3475 PetscCall(MatDenseRestoreArrayRead(A, &a->ptrinuse)); 3476 PetscCall(VecLockReadPop(a->cvec)); 3477 PetscCall(VecResetArray(a->cvec)); 3478 if (v) *v = NULL; 3479 PetscFunctionReturn(PETSC_SUCCESS); 3480 } 3481 3482 PetscErrorCode MatDenseGetColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) 3483 { 3484 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3485 3486 PetscFunctionBegin; 3487 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3488 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3489 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 3490 a->vecinuse = col + 1; 3491 PetscCall(MatDenseGetArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3492 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)a->lda))); 3493 *v = a->cvec; 3494 PetscFunctionReturn(PETSC_SUCCESS); 3495 } 3496 3497 PetscErrorCode MatDenseRestoreColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) 3498 { 3499 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3500 3501 PetscFunctionBegin; 3502 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3503 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3504 VecCheckAssembled(a->cvec); 3505 a->vecinuse = 0; 3506 PetscCall(MatDenseRestoreArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3507 PetscCall(VecResetArray(a->cvec)); 3508 if (v) *v = NULL; 3509 PetscFunctionReturn(PETSC_SUCCESS); 3510 } 3511 3512 PetscErrorCode MatDenseGetSubMatrix_SeqDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 3513 { 3514 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3515 3516 PetscFunctionBegin; 3517 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3518 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3519 if (a->cmat && (cend - cbegin != a->cmat->cmap->N || rend - rbegin != a->cmat->rmap->N)) PetscCall(MatDestroy(&a->cmat)); 3520 if (!a->cmat) { 3521 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), rend - rbegin, PETSC_DECIDE, rend - rbegin, cend - cbegin, PetscSafePointerPlusOffset(a->v, rbegin + (size_t)cbegin * a->lda), &a->cmat)); 3522 } else { 3523 PetscCall(MatDensePlaceArray(a->cmat, PetscSafePointerPlusOffset(a->v, rbegin + (size_t)cbegin * a->lda))); 3524 } 3525 PetscCall(MatDenseSetLDA(a->cmat, a->lda)); 3526 a->matinuse = cbegin + 1; 3527 *v = a->cmat; 3528 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 3529 A->offloadmask = PETSC_OFFLOAD_CPU; 3530 #endif 3531 PetscFunctionReturn(PETSC_SUCCESS); 3532 } 3533 3534 PetscErrorCode MatDenseRestoreSubMatrix_SeqDense(Mat A, Mat *v) 3535 { 3536 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3537 3538 PetscFunctionBegin; 3539 PetscCheck(a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 3540 PetscCheck(a->cmat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column matrix"); 3541 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 3542 a->matinuse = 0; 3543 PetscCall(MatDenseResetArray(a->cmat)); 3544 if (v) *v = NULL; 3545 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 3546 A->offloadmask = PETSC_OFFLOAD_CPU; 3547 #endif 3548 PetscFunctionReturn(PETSC_SUCCESS); 3549 } 3550 3551 /*MC 3552 MATSEQDENSE - MATSEQDENSE = "seqdense" - A matrix type to be used for sequential dense matrices. 3553 3554 Options Database Key: 3555 . -mat_type seqdense - sets the matrix type to `MATSEQDENSE` during a call to `MatSetFromOptions()` 3556 3557 Level: beginner 3558 3559 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreateSeqDense()` 3560 M*/ 3561 PetscErrorCode MatCreate_SeqDense(Mat B) 3562 { 3563 Mat_SeqDense *b; 3564 PetscMPIInt size; 3565 3566 PetscFunctionBegin; 3567 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3568 PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3569 3570 PetscCall(PetscNew(&b)); 3571 B->data = (void *)b; 3572 B->ops[0] = MatOps_Values; 3573 3574 b->roworiented = PETSC_TRUE; 3575 3576 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatQRFactor_C", MatQRFactor_SeqDense)); 3577 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetLDA_C", MatDenseGetLDA_SeqDense)); 3578 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseSetLDA_C", MatDenseSetLDA_SeqDense)); 3579 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArray_C", MatDenseGetArray_SeqDense)); 3580 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArray_C", MatDenseRestoreArray_SeqDense)); 3581 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDensePlaceArray_C", MatDensePlaceArray_SeqDense)); 3582 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseResetArray_C", MatDenseResetArray_SeqDense)); 3583 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseReplaceArray_C", MatDenseReplaceArray_SeqDense)); 3584 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayRead_C", MatDenseGetArray_SeqDense)); 3585 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayRead_C", MatDenseRestoreArray_SeqDense)); 3586 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayWrite_C", MatDenseGetArray_SeqDense)); 3587 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArray_SeqDense)); 3588 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqaij_C", MatConvert_SeqDense_SeqAIJ)); 3589 #if defined(PETSC_HAVE_ELEMENTAL) 3590 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_elemental_C", MatConvert_SeqDense_Elemental)); 3591 #endif 3592 #if defined(PETSC_HAVE_SCALAPACK) 3593 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 3594 #endif 3595 #if defined(PETSC_HAVE_CUDA) 3596 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensecuda_C", MatConvert_SeqDense_SeqDenseCUDA)); 3597 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3598 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdense_C", MatProductSetFromOptions_SeqDense)); 3599 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3600 #endif 3601 #if defined(PETSC_HAVE_HIP) 3602 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensehip_C", MatConvert_SeqDense_SeqDenseHIP)); 3603 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", MatProductSetFromOptions_SeqDense)); 3604 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdense_C", MatProductSetFromOptions_SeqDense)); 3605 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensehip_C", MatProductSetFromOptions_SeqDense)); 3606 #endif 3607 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqDenseSetPreallocation_C", MatSeqDenseSetPreallocation_SeqDense)); 3608 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 3609 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdense_C", MatProductSetFromOptions_SeqDense)); 3610 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3611 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqsbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3612 3613 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumn_C", MatDenseGetColumn_SeqDense)); 3614 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_SeqDense)); 3615 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_SeqDense)); 3616 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_SeqDense)); 3617 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_SeqDense)); 3618 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_SeqDense)); 3619 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_SeqDense)); 3620 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_SeqDense)); 3621 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_SeqDense)); 3622 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_SeqDense)); 3623 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultColumnRange_C", MatMultColumnRange_SeqDense)); 3624 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultAddColumnRange_C", MatMultAddColumnRange_SeqDense)); 3625 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_SeqDense)); 3626 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_SeqDense)); 3627 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQDENSE)); 3628 PetscFunctionReturn(PETSC_SUCCESS); 3629 } 3630 3631 /*@C 3632 MatDenseGetColumn - gives access to a column of a dense matrix. This is only the local part of the column. You MUST call `MatDenseRestoreColumn()` to avoid memory bleeding. 3633 3634 Not Collective 3635 3636 Input Parameters: 3637 + A - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3638 - col - column index 3639 3640 Output Parameter: 3641 . vals - pointer to the data 3642 3643 Level: intermediate 3644 3645 Note: 3646 Use `MatDenseGetColumnVec()` to get access to a column of a `MATDENSE` treated as a `Vec` 3647 3648 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreColumn()`, `MatDenseGetColumnVec()` 3649 @*/ 3650 PetscErrorCode MatDenseGetColumn(Mat A, PetscInt col, PetscScalar *vals[]) 3651 { 3652 PetscFunctionBegin; 3653 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3654 PetscValidLogicalCollectiveInt(A, col, 2); 3655 PetscAssertPointer(vals, 3); 3656 PetscUseMethod(A, "MatDenseGetColumn_C", (Mat, PetscInt, PetscScalar **), (A, col, vals)); 3657 PetscFunctionReturn(PETSC_SUCCESS); 3658 } 3659 3660 /*@C 3661 MatDenseRestoreColumn - returns access to a column of a `MATDENSE` matrix which is returned by `MatDenseGetColumn()`. 3662 3663 Not Collective 3664 3665 Input Parameters: 3666 + A - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3667 - vals - pointer to the data (may be `NULL`) 3668 3669 Level: intermediate 3670 3671 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetColumn()` 3672 @*/ 3673 PetscErrorCode MatDenseRestoreColumn(Mat A, PetscScalar *vals[]) 3674 { 3675 PetscFunctionBegin; 3676 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3677 PetscAssertPointer(vals, 2); 3678 PetscUseMethod(A, "MatDenseRestoreColumn_C", (Mat, PetscScalar **), (A, vals)); 3679 PetscFunctionReturn(PETSC_SUCCESS); 3680 } 3681 3682 /*@ 3683 MatDenseGetColumnVec - Gives read-write access to a column of a `MATDENSE` matrix, represented as a `Vec`. 3684 3685 Collective 3686 3687 Input Parameters: 3688 + A - the `Mat` object 3689 - col - the column index 3690 3691 Output Parameter: 3692 . v - the vector 3693 3694 Level: intermediate 3695 3696 Notes: 3697 The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVec()` when the vector is no longer needed. 3698 3699 Use `MatDenseGetColumnVecRead()` to obtain read-only access or `MatDenseGetColumnVecWrite()` for write-only access. 3700 3701 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`, `MatDenseGetColumn()` 3702 @*/ 3703 PetscErrorCode MatDenseGetColumnVec(Mat A, PetscInt col, Vec *v) 3704 { 3705 PetscFunctionBegin; 3706 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3707 PetscValidType(A, 1); 3708 PetscValidLogicalCollectiveInt(A, col, 2); 3709 PetscAssertPointer(v, 3); 3710 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3711 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3712 PetscUseMethod(A, "MatDenseGetColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3713 PetscFunctionReturn(PETSC_SUCCESS); 3714 } 3715 3716 /*@ 3717 MatDenseRestoreColumnVec - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVec()`. 3718 3719 Collective 3720 3721 Input Parameters: 3722 + A - the `Mat` object 3723 . col - the column index 3724 - v - the `Vec` object (may be `NULL`) 3725 3726 Level: intermediate 3727 3728 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3729 @*/ 3730 PetscErrorCode MatDenseRestoreColumnVec(Mat A, PetscInt col, Vec *v) 3731 { 3732 PetscFunctionBegin; 3733 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3734 PetscValidType(A, 1); 3735 PetscValidLogicalCollectiveInt(A, col, 2); 3736 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3737 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3738 PetscUseMethod(A, "MatDenseRestoreColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3739 PetscFunctionReturn(PETSC_SUCCESS); 3740 } 3741 3742 /*@ 3743 MatDenseGetColumnVecRead - Gives read-only access to a column of a dense matrix, represented as a `Vec`. 3744 3745 Collective 3746 3747 Input Parameters: 3748 + A - the `Mat` object 3749 - col - the column index 3750 3751 Output Parameter: 3752 . v - the vector 3753 3754 Level: intermediate 3755 3756 Notes: 3757 The vector is owned by PETSc and users cannot modify it. 3758 3759 Users need to call `MatDenseRestoreColumnVecRead()` when the vector is no longer needed. 3760 3761 Use `MatDenseGetColumnVec()` to obtain read-write access or `MatDenseGetColumnVecWrite()` for write-only access. 3762 3763 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3764 @*/ 3765 PetscErrorCode MatDenseGetColumnVecRead(Mat A, PetscInt col, Vec *v) 3766 { 3767 PetscFunctionBegin; 3768 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3769 PetscValidType(A, 1); 3770 PetscValidLogicalCollectiveInt(A, col, 2); 3771 PetscAssertPointer(v, 3); 3772 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3773 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3774 PetscUseMethod(A, "MatDenseGetColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3775 PetscFunctionReturn(PETSC_SUCCESS); 3776 } 3777 3778 /*@ 3779 MatDenseRestoreColumnVecRead - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVecRead()`. 3780 3781 Collective 3782 3783 Input Parameters: 3784 + A - the `Mat` object 3785 . col - the column index 3786 - v - the `Vec` object (may be `NULL`) 3787 3788 Level: intermediate 3789 3790 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecWrite()` 3791 @*/ 3792 PetscErrorCode MatDenseRestoreColumnVecRead(Mat A, PetscInt col, Vec *v) 3793 { 3794 PetscFunctionBegin; 3795 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3796 PetscValidType(A, 1); 3797 PetscValidLogicalCollectiveInt(A, col, 2); 3798 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3799 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3800 PetscUseMethod(A, "MatDenseRestoreColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3801 PetscFunctionReturn(PETSC_SUCCESS); 3802 } 3803 3804 /*@ 3805 MatDenseGetColumnVecWrite - Gives write-only access to a column of a dense matrix, represented as a `Vec`. 3806 3807 Collective 3808 3809 Input Parameters: 3810 + A - the `Mat` object 3811 - col - the column index 3812 3813 Output Parameter: 3814 . v - the vector 3815 3816 Level: intermediate 3817 3818 Notes: 3819 The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVecWrite()` when the vector is no longer needed. 3820 3821 Use `MatDenseGetColumnVec()` to obtain read-write access or `MatDenseGetColumnVecRead()` for read-only access. 3822 3823 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3824 @*/ 3825 PetscErrorCode MatDenseGetColumnVecWrite(Mat A, PetscInt col, Vec *v) 3826 { 3827 PetscFunctionBegin; 3828 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3829 PetscValidType(A, 1); 3830 PetscValidLogicalCollectiveInt(A, col, 2); 3831 PetscAssertPointer(v, 3); 3832 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3833 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3834 PetscUseMethod(A, "MatDenseGetColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3835 PetscFunctionReturn(PETSC_SUCCESS); 3836 } 3837 3838 /*@ 3839 MatDenseRestoreColumnVecWrite - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVecWrite()`. 3840 3841 Collective 3842 3843 Input Parameters: 3844 + A - the `Mat` object 3845 . col - the column index 3846 - v - the `Vec` object (may be `NULL`) 3847 3848 Level: intermediate 3849 3850 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()` 3851 @*/ 3852 PetscErrorCode MatDenseRestoreColumnVecWrite(Mat A, PetscInt col, Vec *v) 3853 { 3854 PetscFunctionBegin; 3855 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3856 PetscValidType(A, 1); 3857 PetscValidLogicalCollectiveInt(A, col, 2); 3858 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3859 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3860 PetscUseMethod(A, "MatDenseRestoreColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3861 PetscFunctionReturn(PETSC_SUCCESS); 3862 } 3863 3864 /*@ 3865 MatDenseGetSubMatrix - Gives access to a block of rows and columns of a dense matrix, represented as a `Mat`. 3866 3867 Collective 3868 3869 Input Parameters: 3870 + A - the `Mat` object 3871 . rbegin - the first global row index in the block (if `PETSC_DECIDE`, is 0) 3872 . rend - the global row index past the last one in the block (if `PETSC_DECIDE`, is `M`) 3873 . cbegin - the first global column index in the block (if `PETSC_DECIDE`, is 0) 3874 - cend - the global column index past the last one in the block (if `PETSC_DECIDE`, is `N`) 3875 3876 Output Parameter: 3877 . v - the matrix 3878 3879 Level: intermediate 3880 3881 Notes: 3882 The matrix is owned by PETSc. Users need to call `MatDenseRestoreSubMatrix()` when the matrix is no longer needed. 3883 3884 The output matrix is not redistributed by PETSc, so depending on the values of `rbegin` and `rend`, some processes may have no local rows. 3885 3886 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreSubMatrix()` 3887 @*/ 3888 PetscErrorCode MatDenseGetSubMatrix(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 3889 { 3890 PetscFunctionBegin; 3891 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3892 PetscValidType(A, 1); 3893 PetscValidLogicalCollectiveInt(A, rbegin, 2); 3894 PetscValidLogicalCollectiveInt(A, rend, 3); 3895 PetscValidLogicalCollectiveInt(A, cbegin, 4); 3896 PetscValidLogicalCollectiveInt(A, cend, 5); 3897 PetscAssertPointer(v, 6); 3898 if (rbegin == PETSC_DECIDE) rbegin = 0; 3899 if (rend == PETSC_DECIDE) rend = A->rmap->N; 3900 if (cbegin == PETSC_DECIDE) cbegin = 0; 3901 if (cend == PETSC_DECIDE) cend = A->cmap->N; 3902 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3903 PetscCheck(rbegin >= 0 && rbegin <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", rbegin, A->rmap->N); 3904 PetscCheck(rend >= rbegin && rend <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", rend, rbegin, A->rmap->N); 3905 PetscCheck(cbegin >= 0 && cbegin <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", cbegin, A->cmap->N); 3906 PetscCheck(cend >= cbegin && cend <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", cend, cbegin, A->cmap->N); 3907 PetscUseMethod(A, "MatDenseGetSubMatrix_C", (Mat, PetscInt, PetscInt, PetscInt, PetscInt, Mat *), (A, rbegin, rend, cbegin, cend, v)); 3908 PetscFunctionReturn(PETSC_SUCCESS); 3909 } 3910 3911 /*@ 3912 MatDenseRestoreSubMatrix - Returns access to a block of columns of a dense matrix obtained from `MatDenseGetSubMatrix()`. 3913 3914 Collective 3915 3916 Input Parameters: 3917 + A - the `Mat` object 3918 - v - the `Mat` object (may be `NULL`) 3919 3920 Level: intermediate 3921 3922 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseGetSubMatrix()` 3923 @*/ 3924 PetscErrorCode MatDenseRestoreSubMatrix(Mat A, Mat *v) 3925 { 3926 PetscFunctionBegin; 3927 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3928 PetscValidType(A, 1); 3929 PetscAssertPointer(v, 2); 3930 PetscUseMethod(A, "MatDenseRestoreSubMatrix_C", (Mat, Mat *), (A, v)); 3931 PetscFunctionReturn(PETSC_SUCCESS); 3932 } 3933 3934 #include <petscblaslapack.h> 3935 #include <petsc/private/kernels/blockinvert.h> 3936 3937 PetscErrorCode MatSeqDenseInvert(Mat A) 3938 { 3939 PetscInt m; 3940 const PetscReal shift = 0.0; 3941 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 3942 PetscScalar *values; 3943 3944 PetscFunctionBegin; 3945 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3946 PetscCall(MatDenseGetArray(A, &values)); 3947 PetscCall(MatGetLocalSize(A, &m, NULL)); 3948 allowzeropivot = PetscNot(A->erroriffailure); 3949 /* factor and invert each block */ 3950 switch (m) { 3951 case 1: 3952 values[0] = (PetscScalar)1.0 / (values[0] + shift); 3953 break; 3954 case 2: 3955 PetscCall(PetscKernel_A_gets_inverse_A_2(values, shift, allowzeropivot, &zeropivotdetected)); 3956 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3957 break; 3958 case 3: 3959 PetscCall(PetscKernel_A_gets_inverse_A_3(values, shift, allowzeropivot, &zeropivotdetected)); 3960 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3961 break; 3962 case 4: 3963 PetscCall(PetscKernel_A_gets_inverse_A_4(values, shift, allowzeropivot, &zeropivotdetected)); 3964 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3965 break; 3966 case 5: { 3967 PetscScalar work[25]; 3968 PetscInt ipvt[5]; 3969 3970 PetscCall(PetscKernel_A_gets_inverse_A_5(values, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 3971 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3972 } break; 3973 case 6: 3974 PetscCall(PetscKernel_A_gets_inverse_A_6(values, shift, allowzeropivot, &zeropivotdetected)); 3975 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3976 break; 3977 case 7: 3978 PetscCall(PetscKernel_A_gets_inverse_A_7(values, shift, allowzeropivot, &zeropivotdetected)); 3979 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3980 break; 3981 default: { 3982 PetscInt *v_pivots, *IJ, j; 3983 PetscScalar *v_work; 3984 3985 PetscCall(PetscMalloc3(m, &v_work, m, &v_pivots, m, &IJ)); 3986 for (j = 0; j < m; j++) IJ[j] = j; 3987 PetscCall(PetscKernel_A_gets_inverse_A(m, values, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 3988 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3989 PetscCall(PetscFree3(v_work, v_pivots, IJ)); 3990 } 3991 } 3992 PetscCall(MatDenseRestoreArray(A, &values)); 3993 PetscFunctionReturn(PETSC_SUCCESS); 3994 } 3995