1 /* 2 Defines the basic matrix operations for sequential dense. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/dense/mpi/mpidense.h> 9 #include <petscblaslapack.h> 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <petsc/private/vecimpl.h> 12 13 PetscErrorCode MatSeqDenseSymmetrize_Private(Mat A, PetscBool hermitian) 14 { 15 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 16 PetscInt j, k, n = A->rmap->n; 17 PetscScalar *v; 18 19 PetscFunctionBegin; 20 PetscCheck(A->rmap->n == A->cmap->n, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot symmetrize a rectangular matrix"); 21 PetscCall(MatDenseGetArray(A, &v)); 22 if (!hermitian) { 23 for (k = 0; k < n; k++) { 24 for (j = k; j < n; j++) v[j * mat->lda + k] = v[k * mat->lda + j]; 25 } 26 } else { 27 for (k = 0; k < n; k++) { 28 for (j = k; j < n; j++) v[j * mat->lda + k] = PetscConj(v[k * mat->lda + j]); 29 } 30 } 31 PetscCall(MatDenseRestoreArray(A, &v)); 32 PetscFunctionReturn(PETSC_SUCCESS); 33 } 34 35 PetscErrorCode MatSeqDenseInvertFactors_Private(Mat A) 36 { 37 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 38 PetscBLASInt info, n; 39 40 PetscFunctionBegin; 41 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 42 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 43 if (A->factortype == MAT_FACTOR_LU) { 44 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 45 if (!mat->fwork) { 46 mat->lfwork = n; 47 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 48 } 49 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 50 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 51 PetscCall(PetscFPTrapPop()); 52 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 53 } else if (A->factortype == MAT_FACTOR_CHOLESKY) { 54 if (A->spd == PETSC_BOOL3_TRUE) { 55 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 56 PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &n, mat->v, &mat->lda, &info)); 57 PetscCall(PetscFPTrapPop()); 58 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 59 #if defined(PETSC_USE_COMPLEX) 60 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 61 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 62 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 63 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 64 PetscCallBLAS("LAPACKhetri", LAPACKhetri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 65 PetscCall(PetscFPTrapPop()); 66 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 67 #endif 68 } else { /* symmetric case */ 69 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 70 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 71 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 72 PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 73 PetscCall(PetscFPTrapPop()); 74 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_FALSE)); 75 } 76 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad Inversion: zero pivot in row %" PetscInt_FMT, (PetscInt)info - 1); 77 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 78 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix must be factored to solve"); 79 80 A->ops->solve = NULL; 81 A->ops->matsolve = NULL; 82 A->ops->solvetranspose = NULL; 83 A->ops->matsolvetranspose = NULL; 84 A->ops->solveadd = NULL; 85 A->ops->solvetransposeadd = NULL; 86 A->factortype = MAT_FACTOR_NONE; 87 PetscCall(PetscFree(A->solvertype)); 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 91 static PetscErrorCode MatZeroRowsColumns_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 92 { 93 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 94 PetscInt m = l->lda, n = A->cmap->n, r = A->rmap->n, i, j; 95 PetscScalar *slot, *bb, *v; 96 const PetscScalar *xx; 97 98 PetscFunctionBegin; 99 if (PetscDefined(USE_DEBUG)) { 100 for (i = 0; i < N; i++) { 101 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 102 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 103 PetscCheck(rows[i] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Col %" PetscInt_FMT " requested to be zeroed greater than or equal number of cols %" PetscInt_FMT, rows[i], A->cmap->n); 104 } 105 } 106 if (!N) PetscFunctionReturn(PETSC_SUCCESS); 107 108 /* fix right-hand side if needed */ 109 if (x && b) { 110 Vec xt; 111 112 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 113 PetscCall(VecDuplicate(x, &xt)); 114 PetscCall(VecCopy(x, xt)); 115 PetscCall(VecScale(xt, -1.0)); 116 PetscCall(MatMultAdd(A, xt, b, b)); 117 PetscCall(VecDestroy(&xt)); 118 PetscCall(VecGetArrayRead(x, &xx)); 119 PetscCall(VecGetArray(b, &bb)); 120 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 121 PetscCall(VecRestoreArrayRead(x, &xx)); 122 PetscCall(VecRestoreArray(b, &bb)); 123 } 124 125 PetscCall(MatDenseGetArray(A, &v)); 126 for (i = 0; i < N; i++) { 127 slot = v + rows[i] * m; 128 PetscCall(PetscArrayzero(slot, r)); 129 } 130 for (i = 0; i < N; i++) { 131 slot = v + rows[i]; 132 for (j = 0; j < n; j++) { 133 *slot = 0.0; 134 slot += m; 135 } 136 } 137 if (diag != 0.0) { 138 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 139 for (i = 0; i < N; i++) { 140 slot = v + (m + 1) * rows[i]; 141 *slot = diag; 142 } 143 } 144 PetscCall(MatDenseRestoreArray(A, &v)); 145 PetscFunctionReturn(PETSC_SUCCESS); 146 } 147 148 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 149 { 150 Mat B = NULL; 151 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 152 Mat_SeqDense *b; 153 PetscInt *ai = a->i, *aj = a->j, m = A->rmap->N, n = A->cmap->N, i; 154 const MatScalar *av; 155 PetscBool isseqdense; 156 157 PetscFunctionBegin; 158 if (reuse == MAT_REUSE_MATRIX) { 159 PetscCall(PetscObjectTypeCompare((PetscObject)*newmat, MATSEQDENSE, &isseqdense)); 160 PetscCheck(isseqdense, PetscObjectComm((PetscObject)*newmat), PETSC_ERR_USER, "Cannot reuse matrix of type %s", ((PetscObject)*newmat)->type_name); 161 } 162 if (reuse != MAT_REUSE_MATRIX) { 163 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 164 PetscCall(MatSetSizes(B, m, n, m, n)); 165 PetscCall(MatSetType(B, MATSEQDENSE)); 166 PetscCall(MatSeqDenseSetPreallocation(B, NULL)); 167 b = (Mat_SeqDense *)B->data; 168 } else { 169 b = (Mat_SeqDense *)((*newmat)->data); 170 for (i = 0; i < n; i++) PetscCall(PetscArrayzero(b->v + i * b->lda, m)); 171 } 172 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 173 for (i = 0; i < m; i++) { 174 PetscInt j; 175 for (j = 0; j < ai[1] - ai[0]; j++) { 176 b->v[*aj * b->lda + i] = *av; 177 aj++; 178 av++; 179 } 180 ai++; 181 } 182 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 183 184 if (reuse == MAT_INPLACE_MATRIX) { 185 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 186 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 187 PetscCall(MatHeaderReplace(A, &B)); 188 } else { 189 if (B) *newmat = B; 190 PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY)); 191 PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY)); 192 } 193 PetscFunctionReturn(PETSC_SUCCESS); 194 } 195 196 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 197 { 198 Mat B = NULL; 199 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 200 PetscInt i, j; 201 PetscInt *rows, *nnz; 202 MatScalar *aa = a->v, *vals; 203 204 PetscFunctionBegin; 205 PetscCall(PetscCalloc3(A->rmap->n, &rows, A->rmap->n, &nnz, A->rmap->n, &vals)); 206 if (reuse != MAT_REUSE_MATRIX) { 207 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 208 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 209 PetscCall(MatSetType(B, MATSEQAIJ)); 210 for (j = 0; j < A->cmap->n; j++) { 211 for (i = 0; i < A->rmap->n; i++) 212 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) ++nnz[i]; 213 aa += a->lda; 214 } 215 PetscCall(MatSeqAIJSetPreallocation(B, PETSC_DETERMINE, nnz)); 216 } else B = *newmat; 217 aa = a->v; 218 for (j = 0; j < A->cmap->n; j++) { 219 PetscInt numRows = 0; 220 for (i = 0; i < A->rmap->n; i++) 221 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) { 222 rows[numRows] = i; 223 vals[numRows++] = aa[i]; 224 } 225 PetscCall(MatSetValues(B, numRows, rows, 1, &j, vals, INSERT_VALUES)); 226 aa += a->lda; 227 } 228 PetscCall(PetscFree3(rows, nnz, vals)); 229 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 230 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 231 232 if (reuse == MAT_INPLACE_MATRIX) { 233 PetscCall(MatHeaderReplace(A, &B)); 234 } else if (reuse != MAT_REUSE_MATRIX) *newmat = B; 235 PetscFunctionReturn(PETSC_SUCCESS); 236 } 237 238 PetscErrorCode MatAXPY_SeqDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 239 { 240 Mat_SeqDense *x = (Mat_SeqDense *)X->data, *y = (Mat_SeqDense *)Y->data; 241 const PetscScalar *xv; 242 PetscScalar *yv; 243 PetscBLASInt N, m, ldax = 0, lday = 0, one = 1; 244 245 PetscFunctionBegin; 246 PetscCall(MatDenseGetArrayRead(X, &xv)); 247 PetscCall(MatDenseGetArray(Y, &yv)); 248 PetscCall(PetscBLASIntCast(X->rmap->n * X->cmap->n, &N)); 249 PetscCall(PetscBLASIntCast(X->rmap->n, &m)); 250 PetscCall(PetscBLASIntCast(x->lda, &ldax)); 251 PetscCall(PetscBLASIntCast(y->lda, &lday)); 252 if (ldax > m || lday > m) { 253 for (PetscInt j = 0; j < X->cmap->n; j++) PetscCallBLAS("BLASaxpy", BLASaxpy_(&m, &alpha, PetscSafePointerPlusOffset(xv, j * ldax), &one, PetscSafePointerPlusOffset(yv, j * lday), &one)); 254 } else { 255 PetscCallBLAS("BLASaxpy", BLASaxpy_(&N, &alpha, xv, &one, yv, &one)); 256 } 257 PetscCall(MatDenseRestoreArrayRead(X, &xv)); 258 PetscCall(MatDenseRestoreArray(Y, &yv)); 259 PetscCall(PetscLogFlops(PetscMax(2.0 * N - 1, 0))); 260 PetscFunctionReturn(PETSC_SUCCESS); 261 } 262 263 static PetscErrorCode MatGetInfo_SeqDense(Mat A, MatInfoType flag, MatInfo *info) 264 { 265 PetscLogDouble N = A->rmap->n * A->cmap->n; 266 267 PetscFunctionBegin; 268 info->block_size = 1.0; 269 info->nz_allocated = N; 270 info->nz_used = N; 271 info->nz_unneeded = 0; 272 info->assemblies = A->num_ass; 273 info->mallocs = 0; 274 info->memory = 0; /* REVIEW ME */ 275 info->fill_ratio_given = 0; 276 info->fill_ratio_needed = 0; 277 info->factor_mallocs = 0; 278 PetscFunctionReturn(PETSC_SUCCESS); 279 } 280 281 PetscErrorCode MatScale_SeqDense(Mat A, PetscScalar alpha) 282 { 283 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 284 PetscScalar *v; 285 PetscBLASInt one = 1, j, nz, lda = 0; 286 287 PetscFunctionBegin; 288 PetscCall(MatDenseGetArray(A, &v)); 289 PetscCall(PetscBLASIntCast(a->lda, &lda)); 290 if (lda > A->rmap->n) { 291 PetscCall(PetscBLASIntCast(A->rmap->n, &nz)); 292 for (j = 0; j < A->cmap->n; j++) PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v + j * lda, &one)); 293 } else { 294 PetscCall(PetscBLASIntCast(A->rmap->n * A->cmap->n, &nz)); 295 PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v, &one)); 296 } 297 PetscCall(PetscLogFlops(A->rmap->n * A->cmap->n)); 298 PetscCall(MatDenseRestoreArray(A, &v)); 299 PetscFunctionReturn(PETSC_SUCCESS); 300 } 301 302 PetscErrorCode MatShift_SeqDense(Mat A, PetscScalar alpha) 303 { 304 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 305 PetscScalar *v; 306 PetscInt j, k; 307 308 PetscFunctionBegin; 309 PetscCall(MatDenseGetArray(A, &v)); 310 k = PetscMin(A->rmap->n, A->cmap->n); 311 for (j = 0; j < k; j++) v[j + j * a->lda] += alpha; 312 PetscCall(PetscLogFlops(k)); 313 PetscCall(MatDenseRestoreArray(A, &v)); 314 PetscFunctionReturn(PETSC_SUCCESS); 315 } 316 317 static PetscErrorCode MatIsHermitian_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) 318 { 319 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 320 PetscInt i, j, m = A->rmap->n, N = a->lda; 321 const PetscScalar *v; 322 323 PetscFunctionBegin; 324 *fl = PETSC_FALSE; 325 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 326 PetscCall(MatDenseGetArrayRead(A, &v)); 327 for (i = 0; i < m; i++) { 328 for (j = i; j < m; j++) { 329 if (PetscAbsScalar(v[i + j * N] - PetscConj(v[j + i * N])) > rtol) goto restore; 330 } 331 } 332 *fl = PETSC_TRUE; 333 restore: 334 PetscCall(MatDenseRestoreArrayRead(A, &v)); 335 PetscFunctionReturn(PETSC_SUCCESS); 336 } 337 338 static PetscErrorCode MatIsSymmetric_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) 339 { 340 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 341 PetscInt i, j, m = A->rmap->n, N = a->lda; 342 const PetscScalar *v; 343 344 PetscFunctionBegin; 345 *fl = PETSC_FALSE; 346 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 347 PetscCall(MatDenseGetArrayRead(A, &v)); 348 for (i = 0; i < m; i++) { 349 for (j = i; j < m; j++) { 350 if (PetscAbsScalar(v[i + j * N] - v[j + i * N]) > rtol) goto restore; 351 } 352 } 353 *fl = PETSC_TRUE; 354 restore: 355 PetscCall(MatDenseRestoreArrayRead(A, &v)); 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 PetscErrorCode MatDuplicateNoCreate_SeqDense(Mat newi, Mat A, MatDuplicateOption cpvalues) 360 { 361 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 362 PetscInt lda = (PetscInt)mat->lda, j, m, nlda = lda; 363 PetscBool isdensecpu; 364 365 PetscFunctionBegin; 366 PetscCall(PetscLayoutReference(A->rmap, &newi->rmap)); 367 PetscCall(PetscLayoutReference(A->cmap, &newi->cmap)); 368 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { /* propagate LDA */ 369 PetscCall(MatDenseSetLDA(newi, lda)); 370 } 371 PetscCall(PetscObjectTypeCompare((PetscObject)newi, MATSEQDENSE, &isdensecpu)); 372 if (isdensecpu) PetscCall(MatSeqDenseSetPreallocation(newi, NULL)); 373 if (cpvalues == MAT_COPY_VALUES) { 374 const PetscScalar *av; 375 PetscScalar *v; 376 377 PetscCall(MatDenseGetArrayRead(A, &av)); 378 PetscCall(MatDenseGetArrayWrite(newi, &v)); 379 PetscCall(MatDenseGetLDA(newi, &nlda)); 380 m = A->rmap->n; 381 if (lda > m || nlda > m) { 382 for (j = 0; j < A->cmap->n; j++) PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(v, j * nlda), PetscSafePointerPlusOffset(av, j * lda), m)); 383 } else { 384 PetscCall(PetscArraycpy(v, av, A->rmap->n * A->cmap->n)); 385 } 386 PetscCall(MatDenseRestoreArrayWrite(newi, &v)); 387 PetscCall(MatDenseRestoreArrayRead(A, &av)); 388 PetscCall(MatPropagateSymmetryOptions(A, newi)); 389 } 390 PetscFunctionReturn(PETSC_SUCCESS); 391 } 392 393 PetscErrorCode MatDuplicate_SeqDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 394 { 395 PetscFunctionBegin; 396 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), newmat)); 397 PetscCall(MatSetSizes(*newmat, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 398 PetscCall(MatSetType(*newmat, ((PetscObject)A)->type_name)); 399 PetscCall(MatDuplicateNoCreate_SeqDense(*newmat, A, cpvalues)); 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 static PetscErrorCode MatSolve_SeqDense_Internal_LU(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) 404 { 405 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 406 PetscBLASInt info; 407 408 PetscFunctionBegin; 409 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 410 PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_(T ? "T" : "N", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 411 PetscCall(PetscFPTrapPop()); 412 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve %d", (int)info); 413 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 414 PetscFunctionReturn(PETSC_SUCCESS); 415 } 416 417 static PetscErrorCode MatConjugate_SeqDense(Mat); 418 419 static PetscErrorCode MatSolve_SeqDense_Internal_Cholesky(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) 420 { 421 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 422 PetscBLASInt info; 423 424 PetscFunctionBegin; 425 if (A->spd == PETSC_BOOL3_TRUE) { 426 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 427 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 428 PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &m, &nrhs, mat->v, &mat->lda, x, &m, &info)); 429 PetscCall(PetscFPTrapPop()); 430 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "POTRS Bad solve %d", (int)info); 431 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 432 #if defined(PETSC_USE_COMPLEX) 433 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 434 if (T) PetscCall(MatConjugate_SeqDense(A)); 435 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 436 PetscCallBLAS("LAPACKhetrs", LAPACKhetrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 437 PetscCall(PetscFPTrapPop()); 438 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "HETRS Bad solve %d", (int)info); 439 if (T) PetscCall(MatConjugate_SeqDense(A)); 440 #endif 441 } else { /* symmetric case */ 442 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 443 PetscCallBLAS("LAPACKsytrs", LAPACKsytrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 444 PetscCall(PetscFPTrapPop()); 445 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "SYTRS Bad solve %d", (int)info); 446 } 447 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 448 PetscFunctionReturn(PETSC_SUCCESS); 449 } 450 451 static PetscErrorCode MatSolve_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) 452 { 453 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 454 PetscBLASInt info; 455 char trans; 456 457 PetscFunctionBegin; 458 if (PetscDefined(USE_COMPLEX)) { 459 trans = 'C'; 460 } else { 461 trans = 'T'; 462 } 463 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 464 { /* lwork depends on the number of right-hand sides */ 465 PetscBLASInt nlfwork, lfwork = -1; 466 PetscScalar fwork; 467 468 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 469 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 470 if (nlfwork > mat->lfwork) { 471 mat->lfwork = nlfwork; 472 PetscCall(PetscFree(mat->fwork)); 473 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 474 } 475 } 476 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 477 PetscCall(PetscFPTrapPop()); 478 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %d", (int)info); 479 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 480 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &mat->rank, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 481 PetscCall(PetscFPTrapPop()); 482 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %d", (int)info); 483 for (PetscInt j = 0; j < nrhs; j++) { 484 for (PetscInt i = mat->rank; i < k; i++) x[j * ldx + i] = 0.; 485 } 486 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 487 PetscFunctionReturn(PETSC_SUCCESS); 488 } 489 490 static PetscErrorCode MatSolveTranspose_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) 491 { 492 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 493 PetscBLASInt info; 494 495 PetscFunctionBegin; 496 if (A->rmap->n == A->cmap->n && mat->rank == A->rmap->n) { 497 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 498 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &m, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 499 PetscCall(PetscFPTrapPop()); 500 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %d", (int)info); 501 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 502 { /* lwork depends on the number of right-hand sides */ 503 PetscBLASInt nlfwork, lfwork = -1; 504 PetscScalar fwork; 505 506 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 507 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 508 if (nlfwork > mat->lfwork) { 509 mat->lfwork = nlfwork; 510 PetscCall(PetscFree(mat->fwork)); 511 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 512 } 513 } 514 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 515 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 516 PetscCall(PetscFPTrapPop()); 517 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %d", (int)info); 518 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 519 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "QR factored matrix cannot be used for transpose solve"); 520 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 521 PetscFunctionReturn(PETSC_SUCCESS); 522 } 523 524 static PetscErrorCode MatSolve_SeqDense_SetUp(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) 525 { 526 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 527 PetscScalar *y; 528 PetscBLASInt m = 0, k = 0; 529 530 PetscFunctionBegin; 531 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 532 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 533 if (k < m) { 534 PetscCall(VecCopy(xx, mat->qrrhs)); 535 PetscCall(VecGetArray(mat->qrrhs, &y)); 536 } else { 537 PetscCall(VecCopy(xx, yy)); 538 PetscCall(VecGetArray(yy, &y)); 539 } 540 *_y = y; 541 *_k = k; 542 *_m = m; 543 PetscFunctionReturn(PETSC_SUCCESS); 544 } 545 546 static PetscErrorCode MatSolve_SeqDense_TearDown(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) 547 { 548 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 549 PetscScalar *y = NULL; 550 PetscBLASInt m, k; 551 552 PetscFunctionBegin; 553 y = *_y; 554 *_y = NULL; 555 k = *_k; 556 m = *_m; 557 if (k < m) { 558 PetscScalar *yv; 559 PetscCall(VecGetArray(yy, &yv)); 560 PetscCall(PetscArraycpy(yv, y, k)); 561 PetscCall(VecRestoreArray(yy, &yv)); 562 PetscCall(VecRestoreArray(mat->qrrhs, &y)); 563 } else { 564 PetscCall(VecRestoreArray(yy, &y)); 565 } 566 PetscFunctionReturn(PETSC_SUCCESS); 567 } 568 569 static PetscErrorCode MatSolve_SeqDense_LU(Mat A, Vec xx, Vec yy) 570 { 571 PetscScalar *y = NULL; 572 PetscBLASInt m = 0, k = 0; 573 574 PetscFunctionBegin; 575 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 576 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_FALSE)); 577 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 578 PetscFunctionReturn(PETSC_SUCCESS); 579 } 580 581 static PetscErrorCode MatSolveTranspose_SeqDense_LU(Mat A, Vec xx, Vec yy) 582 { 583 PetscScalar *y = NULL; 584 PetscBLASInt m = 0, k = 0; 585 586 PetscFunctionBegin; 587 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 588 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_TRUE)); 589 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 590 PetscFunctionReturn(PETSC_SUCCESS); 591 } 592 593 static PetscErrorCode MatSolve_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) 594 { 595 PetscScalar *y = NULL; 596 PetscBLASInt m = 0, k = 0; 597 598 PetscFunctionBegin; 599 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 600 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_FALSE)); 601 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 602 PetscFunctionReturn(PETSC_SUCCESS); 603 } 604 605 static PetscErrorCode MatSolveTranspose_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) 606 { 607 PetscScalar *y = NULL; 608 PetscBLASInt m = 0, k = 0; 609 610 PetscFunctionBegin; 611 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 612 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_TRUE)); 613 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 614 PetscFunctionReturn(PETSC_SUCCESS); 615 } 616 617 static PetscErrorCode MatSolve_SeqDense_QR(Mat A, Vec xx, Vec yy) 618 { 619 PetscScalar *y = NULL; 620 PetscBLASInt m = 0, k = 0; 621 622 PetscFunctionBegin; 623 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 624 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 625 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 626 PetscFunctionReturn(PETSC_SUCCESS); 627 } 628 629 static PetscErrorCode MatSolveTranspose_SeqDense_QR(Mat A, Vec xx, Vec yy) 630 { 631 PetscScalar *y = NULL; 632 PetscBLASInt m = 0, k = 0; 633 634 PetscFunctionBegin; 635 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 636 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 637 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 638 PetscFunctionReturn(PETSC_SUCCESS); 639 } 640 641 static PetscErrorCode MatMatSolve_SeqDense_SetUp(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) 642 { 643 const PetscScalar *b; 644 PetscScalar *y; 645 PetscInt n, _ldb, _ldx; 646 PetscBLASInt nrhs = 0, m = 0, k = 0, ldb = 0, ldx = 0, ldy = 0; 647 648 PetscFunctionBegin; 649 *_ldy = 0; 650 *_m = 0; 651 *_nrhs = 0; 652 *_k = 0; 653 *_y = NULL; 654 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 655 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 656 PetscCall(MatGetSize(B, NULL, &n)); 657 PetscCall(PetscBLASIntCast(n, &nrhs)); 658 PetscCall(MatDenseGetLDA(B, &_ldb)); 659 PetscCall(PetscBLASIntCast(_ldb, &ldb)); 660 PetscCall(MatDenseGetLDA(X, &_ldx)); 661 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 662 if (ldx < m) { 663 PetscCall(MatDenseGetArrayRead(B, &b)); 664 PetscCall(PetscMalloc1(nrhs * m, &y)); 665 if (ldb == m) { 666 PetscCall(PetscArraycpy(y, b, ldb * nrhs)); 667 } else { 668 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * m], &b[j * ldb], m)); 669 } 670 ldy = m; 671 PetscCall(MatDenseRestoreArrayRead(B, &b)); 672 } else { 673 if (ldb == ldx) { 674 PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN)); 675 PetscCall(MatDenseGetArray(X, &y)); 676 } else { 677 PetscCall(MatDenseGetArray(X, &y)); 678 PetscCall(MatDenseGetArrayRead(B, &b)); 679 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * ldx], &b[j * ldb], m)); 680 PetscCall(MatDenseRestoreArrayRead(B, &b)); 681 } 682 ldy = ldx; 683 } 684 *_y = y; 685 *_ldy = ldy; 686 *_k = k; 687 *_m = m; 688 *_nrhs = nrhs; 689 PetscFunctionReturn(PETSC_SUCCESS); 690 } 691 692 static PetscErrorCode MatMatSolve_SeqDense_TearDown(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) 693 { 694 PetscScalar *y; 695 PetscInt _ldx; 696 PetscBLASInt k, ldy, nrhs, ldx = 0; 697 698 PetscFunctionBegin; 699 y = *_y; 700 *_y = NULL; 701 k = *_k; 702 ldy = *_ldy; 703 nrhs = *_nrhs; 704 PetscCall(MatDenseGetLDA(X, &_ldx)); 705 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 706 if (ldx != ldy) { 707 PetscScalar *xv; 708 PetscCall(MatDenseGetArray(X, &xv)); 709 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&xv[j * ldx], &y[j * ldy], k)); 710 PetscCall(MatDenseRestoreArray(X, &xv)); 711 PetscCall(PetscFree(y)); 712 } else { 713 PetscCall(MatDenseRestoreArray(X, &y)); 714 } 715 PetscFunctionReturn(PETSC_SUCCESS); 716 } 717 718 static PetscErrorCode MatMatSolve_SeqDense_LU(Mat A, Mat B, Mat X) 719 { 720 PetscScalar *y; 721 PetscBLASInt m, k, ldy, nrhs; 722 723 PetscFunctionBegin; 724 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 725 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 726 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 727 PetscFunctionReturn(PETSC_SUCCESS); 728 } 729 730 static PetscErrorCode MatMatSolveTranspose_SeqDense_LU(Mat A, Mat B, Mat X) 731 { 732 PetscScalar *y; 733 PetscBLASInt m, k, ldy, nrhs; 734 735 PetscFunctionBegin; 736 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 737 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 738 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 739 PetscFunctionReturn(PETSC_SUCCESS); 740 } 741 742 static PetscErrorCode MatMatSolve_SeqDense_Cholesky(Mat A, Mat B, Mat X) 743 { 744 PetscScalar *y; 745 PetscBLASInt m, k, ldy, nrhs; 746 747 PetscFunctionBegin; 748 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 749 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 750 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 751 PetscFunctionReturn(PETSC_SUCCESS); 752 } 753 754 static PetscErrorCode MatMatSolveTranspose_SeqDense_Cholesky(Mat A, Mat B, Mat X) 755 { 756 PetscScalar *y; 757 PetscBLASInt m, k, ldy, nrhs; 758 759 PetscFunctionBegin; 760 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 761 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 762 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 763 PetscFunctionReturn(PETSC_SUCCESS); 764 } 765 766 static PetscErrorCode MatMatSolve_SeqDense_QR(Mat A, Mat B, Mat X) 767 { 768 PetscScalar *y; 769 PetscBLASInt m, k, ldy, nrhs; 770 771 PetscFunctionBegin; 772 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 773 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 774 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 775 PetscFunctionReturn(PETSC_SUCCESS); 776 } 777 778 static PetscErrorCode MatMatSolveTranspose_SeqDense_QR(Mat A, Mat B, Mat X) 779 { 780 PetscScalar *y; 781 PetscBLASInt m, k, ldy, nrhs; 782 783 PetscFunctionBegin; 784 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 785 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 786 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 787 PetscFunctionReturn(PETSC_SUCCESS); 788 } 789 790 /* COMMENT: I have chosen to hide row permutation in the pivots, 791 rather than put it in the Mat->row slot.*/ 792 PetscErrorCode MatLUFactor_SeqDense(Mat A, IS row, IS col, const MatFactorInfo *minfo) 793 { 794 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 795 PetscBLASInt n, m, info; 796 797 PetscFunctionBegin; 798 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 799 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 800 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 801 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 802 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 803 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&m, &n, mat->v, &mat->lda, mat->pivots, &info)); 804 PetscCall(PetscFPTrapPop()); 805 806 PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to LU factorization %d", (int)info); 807 PetscCheck(info <= 0, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Bad LU factorization %d", (int)info); 808 809 A->ops->solve = MatSolve_SeqDense_LU; 810 A->ops->matsolve = MatMatSolve_SeqDense_LU; 811 A->ops->solvetranspose = MatSolveTranspose_SeqDense_LU; 812 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_LU; 813 A->factortype = MAT_FACTOR_LU; 814 815 PetscCall(PetscFree(A->solvertype)); 816 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 817 818 PetscCall(PetscLogFlops((2.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3)); 819 PetscFunctionReturn(PETSC_SUCCESS); 820 } 821 822 static PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 823 { 824 MatFactorInfo info; 825 826 PetscFunctionBegin; 827 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 828 PetscUseTypeMethod(fact, lufactor, NULL, NULL, &info); 829 PetscFunctionReturn(PETSC_SUCCESS); 830 } 831 832 PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) 833 { 834 PetscFunctionBegin; 835 fact->preallocated = PETSC_TRUE; 836 fact->assembled = PETSC_TRUE; 837 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense; 838 PetscFunctionReturn(PETSC_SUCCESS); 839 } 840 841 /* Cholesky as L*L^T or L*D*L^T and the symmetric/hermitian complex variants */ 842 PetscErrorCode MatCholeskyFactor_SeqDense(Mat A, IS perm, const MatFactorInfo *factinfo) 843 { 844 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 845 PetscBLASInt info, n; 846 847 PetscFunctionBegin; 848 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 849 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 850 if (A->spd == PETSC_BOOL3_TRUE) { 851 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 852 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &n, mat->v, &mat->lda, &info)); 853 PetscCall(PetscFPTrapPop()); 854 #if defined(PETSC_USE_COMPLEX) 855 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 856 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 857 if (!mat->fwork) { 858 PetscScalar dummy; 859 860 mat->lfwork = -1; 861 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 862 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 863 PetscCall(PetscFPTrapPop()); 864 PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork)); 865 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 866 } 867 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 868 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 869 PetscCall(PetscFPTrapPop()); 870 #endif 871 } else { /* symmetric case */ 872 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 873 if (!mat->fwork) { 874 PetscScalar dummy; 875 876 mat->lfwork = -1; 877 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 878 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 879 PetscCall(PetscFPTrapPop()); 880 PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork)); 881 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 882 } 883 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 884 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 885 PetscCall(PetscFPTrapPop()); 886 } 887 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad factorization: zero pivot in row %" PetscInt_FMT, (PetscInt)info - 1); 888 889 A->ops->solve = MatSolve_SeqDense_Cholesky; 890 A->ops->matsolve = MatMatSolve_SeqDense_Cholesky; 891 A->ops->solvetranspose = MatSolveTranspose_SeqDense_Cholesky; 892 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_Cholesky; 893 A->factortype = MAT_FACTOR_CHOLESKY; 894 895 PetscCall(PetscFree(A->solvertype)); 896 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 897 898 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 899 PetscFunctionReturn(PETSC_SUCCESS); 900 } 901 902 static PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 903 { 904 MatFactorInfo info; 905 906 PetscFunctionBegin; 907 info.fill = 1.0; 908 909 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 910 PetscUseTypeMethod(fact, choleskyfactor, NULL, &info); 911 PetscFunctionReturn(PETSC_SUCCESS); 912 } 913 914 PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) 915 { 916 PetscFunctionBegin; 917 fact->assembled = PETSC_TRUE; 918 fact->preallocated = PETSC_TRUE; 919 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense; 920 PetscFunctionReturn(PETSC_SUCCESS); 921 } 922 923 PetscErrorCode MatQRFactor_SeqDense(Mat A, IS col, const MatFactorInfo *minfo) 924 { 925 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 926 PetscBLASInt n, m, info, min, max; 927 928 PetscFunctionBegin; 929 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 930 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 931 max = PetscMax(m, n); 932 min = PetscMin(m, n); 933 if (!mat->tau) { PetscCall(PetscMalloc1(min, &mat->tau)); } 934 if (!mat->pivots) { PetscCall(PetscMalloc1(n, &mat->pivots)); } 935 if (!mat->qrrhs) PetscCall(MatCreateVecs(A, NULL, &mat->qrrhs)); 936 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 937 if (!mat->fwork) { 938 PetscScalar dummy; 939 940 mat->lfwork = -1; 941 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 942 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, &dummy, &mat->lfwork, &info)); 943 PetscCall(PetscFPTrapPop()); 944 PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork)); 945 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 946 } 947 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 948 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, mat->fwork, &mat->lfwork, &info)); 949 PetscCall(PetscFPTrapPop()); 950 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to QR factorization %d", (int)info); 951 // TODO: try to estimate rank or test for and use geqp3 for rank revealing QR. For now just say rank is min of m and n 952 mat->rank = min; 953 954 A->ops->solve = MatSolve_SeqDense_QR; 955 A->ops->matsolve = MatMatSolve_SeqDense_QR; 956 A->factortype = MAT_FACTOR_QR; 957 if (m == n) { 958 A->ops->solvetranspose = MatSolveTranspose_SeqDense_QR; 959 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_QR; 960 } 961 962 PetscCall(PetscFree(A->solvertype)); 963 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 964 965 PetscCall(PetscLogFlops(2.0 * min * min * (max - min / 3.0))); 966 PetscFunctionReturn(PETSC_SUCCESS); 967 } 968 969 static PetscErrorCode MatQRFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 970 { 971 MatFactorInfo info; 972 973 PetscFunctionBegin; 974 info.fill = 1.0; 975 976 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 977 PetscUseMethod(fact, "MatQRFactor_C", (Mat, IS, const MatFactorInfo *), (fact, NULL, &info)); 978 PetscFunctionReturn(PETSC_SUCCESS); 979 } 980 981 PetscErrorCode MatQRFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) 982 { 983 PetscFunctionBegin; 984 fact->assembled = PETSC_TRUE; 985 fact->preallocated = PETSC_TRUE; 986 PetscCall(PetscObjectComposeFunction((PetscObject)fact, "MatQRFactorNumeric_C", MatQRFactorNumeric_SeqDense)); 987 PetscFunctionReturn(PETSC_SUCCESS); 988 } 989 990 /* uses LAPACK */ 991 PETSC_INTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A, MatFactorType ftype, Mat *fact) 992 { 993 PetscFunctionBegin; 994 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), fact)); 995 PetscCall(MatSetSizes(*fact, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 996 PetscCall(MatSetType(*fact, MATDENSE)); 997 (*fact)->trivialsymbolic = PETSC_TRUE; 998 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 999 (*fact)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqDense; 1000 (*fact)->ops->ilufactorsymbolic = MatLUFactorSymbolic_SeqDense; 1001 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 1002 (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense; 1003 } else if (ftype == MAT_FACTOR_QR) { 1004 PetscCall(PetscObjectComposeFunction((PetscObject)*fact, "MatQRFactorSymbolic_C", MatQRFactorSymbolic_SeqDense)); 1005 } 1006 (*fact)->factortype = ftype; 1007 1008 PetscCall(PetscFree((*fact)->solvertype)); 1009 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*fact)->solvertype)); 1010 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_LU])); 1011 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ILU])); 1012 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_CHOLESKY])); 1013 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ICC])); 1014 PetscFunctionReturn(PETSC_SUCCESS); 1015 } 1016 1017 static PetscErrorCode MatSOR_SeqDense(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal shift, PetscInt its, PetscInt lits, Vec xx) 1018 { 1019 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1020 PetscScalar *x, *v = mat->v, zero = 0.0, xt; 1021 const PetscScalar *b; 1022 PetscInt m = A->rmap->n, i; 1023 PetscBLASInt o = 1, bm = 0; 1024 1025 PetscFunctionBegin; 1026 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1027 PetscCheck(A->offloadmask != PETSC_OFFLOAD_GPU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1028 #endif 1029 if (shift == -1) shift = 0.0; /* negative shift indicates do not error on zero diagonal; this code never zeros on zero diagonal */ 1030 PetscCall(PetscBLASIntCast(m, &bm)); 1031 if (flag & SOR_ZERO_INITIAL_GUESS) { 1032 /* this is a hack fix, should have another version without the second BLASdotu */ 1033 PetscCall(VecSet(xx, zero)); 1034 } 1035 PetscCall(VecGetArray(xx, &x)); 1036 PetscCall(VecGetArrayRead(bb, &b)); 1037 its = its * lits; 1038 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 1039 while (its--) { 1040 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 1041 for (i = 0; i < m; i++) { 1042 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1043 x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift); 1044 } 1045 } 1046 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1047 for (i = m - 1; i >= 0; i--) { 1048 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1049 x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift); 1050 } 1051 } 1052 } 1053 PetscCall(VecRestoreArrayRead(bb, &b)); 1054 PetscCall(VecRestoreArray(xx, &x)); 1055 PetscFunctionReturn(PETSC_SUCCESS); 1056 } 1057 1058 static PetscErrorCode MatMultColumnRangeKernel_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end, PetscBool trans, PetscBool herm) 1059 { 1060 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1061 PetscScalar *y, _DOne = 1.0, _DZero = 0.0; 1062 PetscBLASInt m, n, _One = 1; 1063 const PetscScalar *v = mat->v, *x; 1064 1065 PetscFunctionBegin; 1066 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1067 PetscCall(PetscBLASIntCast(c_end - c_start, &n)); 1068 PetscCall(VecGetArrayRead(xx, &x)); 1069 PetscCall(VecGetArrayWrite(yy, &y)); 1070 if (!m || !n) { 1071 PetscBLASInt i; 1072 if (trans) 1073 for (i = 0; i < n; i++) y[i] = 0.0; 1074 else 1075 for (i = 0; i < m; i++) y[i] = 0.0; 1076 } else { 1077 if (trans) { 1078 if (herm) PetscCallBLAS("BLASgemv", BLASgemv_("C", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DZero, y + c_start, &_One)); 1079 else PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DZero, y + c_start, &_One)); 1080 } else { 1081 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x + c_start, &_One, &_DZero, y, &_One)); 1082 } 1083 PetscCall(PetscLogFlops(2.0 * m * n - n)); 1084 } 1085 PetscCall(VecRestoreArrayRead(xx, &x)); 1086 PetscCall(VecRestoreArrayWrite(yy, &y)); 1087 PetscFunctionReturn(PETSC_SUCCESS); 1088 } 1089 1090 PetscErrorCode MatMultHermitianTransposeColumnRange_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 1091 { 1092 PetscFunctionBegin; 1093 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, c_start, c_end, PETSC_TRUE, PETSC_TRUE)); 1094 PetscFunctionReturn(PETSC_SUCCESS); 1095 } 1096 1097 PetscErrorCode MatMult_SeqDense(Mat A, Vec xx, Vec yy) 1098 { 1099 PetscFunctionBegin; 1100 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_FALSE, PETSC_FALSE)); 1101 PetscFunctionReturn(PETSC_SUCCESS); 1102 } 1103 1104 PetscErrorCode MatMultTranspose_SeqDense(Mat A, Vec xx, Vec yy) 1105 { 1106 PetscFunctionBegin; 1107 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_FALSE)); 1108 PetscFunctionReturn(PETSC_SUCCESS); 1109 } 1110 1111 PetscErrorCode MatMultHermitianTranspose_SeqDense(Mat A, Vec xx, Vec yy) 1112 { 1113 PetscFunctionBegin; 1114 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_TRUE)); 1115 PetscFunctionReturn(PETSC_SUCCESS); 1116 } 1117 1118 static PetscErrorCode MatMultAddColumnRangeKernel_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end, PetscBool trans, PetscBool herm) 1119 { 1120 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1121 const PetscScalar *v = mat->v, *x; 1122 PetscScalar *y, _DOne = 1.0; 1123 PetscBLASInt m, n, _One = 1; 1124 1125 PetscFunctionBegin; 1126 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1127 PetscCall(PetscBLASIntCast(c_end - c_start, &n)); 1128 PetscCall(VecCopy(zz, yy)); 1129 if (!m || !n) PetscFunctionReturn(PETSC_SUCCESS); 1130 PetscCall(VecGetArray(yy, &y)); 1131 PetscCall(VecGetArrayRead(xx, &x)); 1132 if (trans) { 1133 if (herm) PetscCallBLAS("BLASgemv", BLASgemv_("C", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DOne, y + c_start, &_One)); 1134 else PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DOne, y + c_start, &_One)); 1135 } else { 1136 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x + c_start, &_One, &_DOne, y, &_One)); 1137 } 1138 PetscCall(VecRestoreArrayRead(xx, &x)); 1139 PetscCall(VecRestoreArray(yy, &y)); 1140 PetscCall(PetscLogFlops(2.0 * m * n)); 1141 PetscFunctionReturn(PETSC_SUCCESS); 1142 } 1143 1144 PetscErrorCode MatMultAddColumnRange_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end) 1145 { 1146 PetscFunctionBegin; 1147 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, c_start, c_end, PETSC_FALSE, PETSC_FALSE)); 1148 PetscFunctionReturn(PETSC_SUCCESS); 1149 } 1150 1151 PetscErrorCode MatMultHermitianTransposeAddColumnRange_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end) 1152 { 1153 PetscFunctionBegin; 1154 PetscMPIInt rank; 1155 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 1156 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, c_start, c_end, PETSC_TRUE, PETSC_TRUE)); 1157 PetscFunctionReturn(PETSC_SUCCESS); 1158 } 1159 1160 PetscErrorCode MatMultAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1161 { 1162 PetscFunctionBegin; 1163 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_FALSE, PETSC_FALSE)); 1164 PetscFunctionReturn(PETSC_SUCCESS); 1165 } 1166 1167 PetscErrorCode MatMultTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1168 { 1169 PetscFunctionBegin; 1170 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_FALSE)); 1171 PetscFunctionReturn(PETSC_SUCCESS); 1172 } 1173 1174 PetscErrorCode MatMultHermitianTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1175 { 1176 PetscFunctionBegin; 1177 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_TRUE)); 1178 PetscFunctionReturn(PETSC_SUCCESS); 1179 } 1180 1181 static PetscErrorCode MatGetRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) 1182 { 1183 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1184 PetscInt i; 1185 1186 PetscFunctionBegin; 1187 if (ncols) *ncols = A->cmap->n; 1188 if (cols) { 1189 PetscCall(PetscMalloc1(A->cmap->n, cols)); 1190 for (i = 0; i < A->cmap->n; i++) (*cols)[i] = i; 1191 } 1192 if (vals) { 1193 const PetscScalar *v; 1194 1195 PetscCall(MatDenseGetArrayRead(A, &v)); 1196 PetscCall(PetscMalloc1(A->cmap->n, vals)); 1197 v += row; 1198 for (i = 0; i < A->cmap->n; i++) { 1199 (*vals)[i] = *v; 1200 v += mat->lda; 1201 } 1202 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1203 } 1204 PetscFunctionReturn(PETSC_SUCCESS); 1205 } 1206 1207 static PetscErrorCode MatRestoreRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) 1208 { 1209 PetscFunctionBegin; 1210 if (cols) PetscCall(PetscFree(*cols)); 1211 if (vals) PetscCall(PetscFree(*vals)); 1212 PetscFunctionReturn(PETSC_SUCCESS); 1213 } 1214 1215 static PetscErrorCode MatSetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], const PetscScalar v[], InsertMode addv) 1216 { 1217 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1218 PetscScalar *av; 1219 PetscInt i, j, idx = 0; 1220 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1221 PetscOffloadMask oldf; 1222 #endif 1223 1224 PetscFunctionBegin; 1225 PetscCall(MatDenseGetArray(A, &av)); 1226 if (!mat->roworiented) { 1227 if (addv == INSERT_VALUES) { 1228 for (j = 0; j < n; j++) { 1229 if (indexn[j] < 0) { 1230 idx += m; 1231 continue; 1232 } 1233 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1234 for (i = 0; i < m; i++) { 1235 if (indexm[i] < 0) { 1236 idx++; 1237 continue; 1238 } 1239 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1240 av[indexn[j] * mat->lda + indexm[i]] = v ? v[idx++] : (idx++, 0.0); 1241 } 1242 } 1243 } else { 1244 for (j = 0; j < n; j++) { 1245 if (indexn[j] < 0) { 1246 idx += m; 1247 continue; 1248 } 1249 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1250 for (i = 0; i < m; i++) { 1251 if (indexm[i] < 0) { 1252 idx++; 1253 continue; 1254 } 1255 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1256 av[indexn[j] * mat->lda + indexm[i]] += v ? v[idx++] : (idx++, 0.0); 1257 } 1258 } 1259 } 1260 } else { 1261 if (addv == INSERT_VALUES) { 1262 for (i = 0; i < m; i++) { 1263 if (indexm[i] < 0) { 1264 idx += n; 1265 continue; 1266 } 1267 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1268 for (j = 0; j < n; j++) { 1269 if (indexn[j] < 0) { 1270 idx++; 1271 continue; 1272 } 1273 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1274 av[indexn[j] * mat->lda + indexm[i]] = v ? v[idx++] : (idx++, 0.0); 1275 } 1276 } 1277 } else { 1278 for (i = 0; i < m; i++) { 1279 if (indexm[i] < 0) { 1280 idx += n; 1281 continue; 1282 } 1283 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1284 for (j = 0; j < n; j++) { 1285 if (indexn[j] < 0) { 1286 idx++; 1287 continue; 1288 } 1289 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1290 av[indexn[j] * mat->lda + indexm[i]] += v ? v[idx++] : (idx++, 0.0); 1291 } 1292 } 1293 } 1294 } 1295 /* hack to prevent unneeded copy to the GPU while returning the array */ 1296 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1297 oldf = A->offloadmask; 1298 A->offloadmask = PETSC_OFFLOAD_GPU; 1299 #endif 1300 PetscCall(MatDenseRestoreArray(A, &av)); 1301 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1302 A->offloadmask = (oldf == PETSC_OFFLOAD_UNALLOCATED ? PETSC_OFFLOAD_UNALLOCATED : PETSC_OFFLOAD_CPU); 1303 #endif 1304 PetscFunctionReturn(PETSC_SUCCESS); 1305 } 1306 1307 static PetscErrorCode MatGetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], PetscScalar v[]) 1308 { 1309 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1310 const PetscScalar *vv; 1311 PetscInt i, j; 1312 1313 PetscFunctionBegin; 1314 PetscCall(MatDenseGetArrayRead(A, &vv)); 1315 /* row-oriented output */ 1316 for (i = 0; i < m; i++) { 1317 if (indexm[i] < 0) { 1318 v += n; 1319 continue; 1320 } 1321 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested larger than number rows %" PetscInt_FMT, indexm[i], A->rmap->n); 1322 for (j = 0; j < n; j++) { 1323 if (indexn[j] < 0) { 1324 v++; 1325 continue; 1326 } 1327 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " requested larger than number columns %" PetscInt_FMT, indexn[j], A->cmap->n); 1328 *v++ = vv[indexn[j] * mat->lda + indexm[i]]; 1329 } 1330 } 1331 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1332 PetscFunctionReturn(PETSC_SUCCESS); 1333 } 1334 1335 PetscErrorCode MatView_Dense_Binary(Mat mat, PetscViewer viewer) 1336 { 1337 PetscBool skipHeader; 1338 PetscViewerFormat format; 1339 PetscInt header[4], M, N, m, lda, i, j, k; 1340 const PetscScalar *v; 1341 PetscScalar *vwork; 1342 1343 PetscFunctionBegin; 1344 PetscCall(PetscViewerSetUp(viewer)); 1345 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1346 PetscCall(PetscViewerGetFormat(viewer, &format)); 1347 if (skipHeader) format = PETSC_VIEWER_NATIVE; 1348 1349 PetscCall(MatGetSize(mat, &M, &N)); 1350 1351 /* write matrix header */ 1352 header[0] = MAT_FILE_CLASSID; 1353 header[1] = M; 1354 header[2] = N; 1355 header[3] = (format == PETSC_VIEWER_NATIVE) ? MATRIX_BINARY_FORMAT_DENSE : M * N; 1356 if (!skipHeader) PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1357 1358 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1359 if (format != PETSC_VIEWER_NATIVE) { 1360 PetscInt nnz = m * N, *iwork; 1361 /* store row lengths for each row */ 1362 PetscCall(PetscMalloc1(nnz, &iwork)); 1363 for (i = 0; i < m; i++) iwork[i] = N; 1364 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1365 /* store column indices (zero start index) */ 1366 for (k = 0, i = 0; i < m; i++) 1367 for (j = 0; j < N; j++, k++) iwork[k] = j; 1368 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1369 PetscCall(PetscFree(iwork)); 1370 } 1371 /* store matrix values as a dense matrix in row major order */ 1372 PetscCall(PetscMalloc1(m * N, &vwork)); 1373 PetscCall(MatDenseGetArrayRead(mat, &v)); 1374 PetscCall(MatDenseGetLDA(mat, &lda)); 1375 for (k = 0, i = 0; i < m; i++) 1376 for (j = 0; j < N; j++, k++) vwork[k] = v[i + lda * j]; 1377 PetscCall(MatDenseRestoreArrayRead(mat, &v)); 1378 PetscCall(PetscViewerBinaryWriteAll(viewer, vwork, m * N, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1379 PetscCall(PetscFree(vwork)); 1380 PetscFunctionReturn(PETSC_SUCCESS); 1381 } 1382 1383 PetscErrorCode MatLoad_Dense_Binary(Mat mat, PetscViewer viewer) 1384 { 1385 PetscBool skipHeader; 1386 PetscInt header[4], M, N, m, nz, lda, i, j, k; 1387 PetscInt rows, cols; 1388 PetscScalar *v, *vwork; 1389 1390 PetscFunctionBegin; 1391 PetscCall(PetscViewerSetUp(viewer)); 1392 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1393 1394 if (!skipHeader) { 1395 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 1396 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 1397 M = header[1]; 1398 N = header[2]; 1399 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 1400 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 1401 nz = header[3]; 1402 PetscCheck(nz == MATRIX_BINARY_FORMAT_DENSE || nz >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Unknown matrix format %" PetscInt_FMT " in file", nz); 1403 } else { 1404 PetscCall(MatGetSize(mat, &M, &N)); 1405 PetscCheck(M >= 0 && N >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "Matrix binary file header was skipped, thus the user must specify the global sizes of input matrix"); 1406 nz = MATRIX_BINARY_FORMAT_DENSE; 1407 } 1408 1409 /* setup global sizes if not set */ 1410 if (mat->rmap->N < 0) mat->rmap->N = M; 1411 if (mat->cmap->N < 0) mat->cmap->N = N; 1412 PetscCall(MatSetUp(mat)); 1413 /* check if global sizes are correct */ 1414 PetscCall(MatGetSize(mat, &rows, &cols)); 1415 PetscCheck(M == rows && N == cols, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 1416 1417 PetscCall(MatGetSize(mat, NULL, &N)); 1418 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1419 PetscCall(MatDenseGetArray(mat, &v)); 1420 PetscCall(MatDenseGetLDA(mat, &lda)); 1421 if (nz == MATRIX_BINARY_FORMAT_DENSE) { /* matrix in file is dense format */ 1422 PetscInt nnz = m * N; 1423 /* read in matrix values */ 1424 PetscCall(PetscMalloc1(nnz, &vwork)); 1425 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1426 /* store values in column major order */ 1427 for (j = 0; j < N; j++) 1428 for (i = 0; i < m; i++) v[i + lda * j] = vwork[i * N + j]; 1429 PetscCall(PetscFree(vwork)); 1430 } else { /* matrix in file is sparse format */ 1431 PetscInt nnz = 0, *rlens, *icols; 1432 /* read in row lengths */ 1433 PetscCall(PetscMalloc1(m, &rlens)); 1434 PetscCall(PetscViewerBinaryReadAll(viewer, rlens, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1435 for (i = 0; i < m; i++) nnz += rlens[i]; 1436 /* read in column indices and values */ 1437 PetscCall(PetscMalloc2(nnz, &icols, nnz, &vwork)); 1438 PetscCall(PetscViewerBinaryReadAll(viewer, icols, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1439 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1440 /* store values in column major order */ 1441 for (k = 0, i = 0; i < m; i++) 1442 for (j = 0; j < rlens[i]; j++, k++) v[i + lda * icols[k]] = vwork[k]; 1443 PetscCall(PetscFree(rlens)); 1444 PetscCall(PetscFree2(icols, vwork)); 1445 } 1446 PetscCall(MatDenseRestoreArray(mat, &v)); 1447 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 1448 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 1449 PetscFunctionReturn(PETSC_SUCCESS); 1450 } 1451 1452 static PetscErrorCode MatLoad_SeqDense(Mat newMat, PetscViewer viewer) 1453 { 1454 PetscBool isbinary, ishdf5; 1455 1456 PetscFunctionBegin; 1457 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1458 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1459 /* force binary viewer to load .info file if it has not yet done so */ 1460 PetscCall(PetscViewerSetUp(viewer)); 1461 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1462 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1463 if (isbinary) { 1464 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1465 } else if (ishdf5) { 1466 #if defined(PETSC_HAVE_HDF5) 1467 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1468 #else 1469 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 1470 #endif 1471 } else { 1472 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1473 } 1474 PetscFunctionReturn(PETSC_SUCCESS); 1475 } 1476 1477 static PetscErrorCode MatView_SeqDense_ASCII(Mat A, PetscViewer viewer) 1478 { 1479 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1480 PetscInt i, j; 1481 const char *name; 1482 PetscScalar *v, *av; 1483 PetscViewerFormat format; 1484 #if defined(PETSC_USE_COMPLEX) 1485 PetscBool allreal = PETSC_TRUE; 1486 #endif 1487 1488 PetscFunctionBegin; 1489 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&av)); 1490 PetscCall(PetscViewerGetFormat(viewer, &format)); 1491 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1492 PetscFunctionReturn(PETSC_SUCCESS); /* do nothing for now */ 1493 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1494 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1495 for (i = 0; i < A->rmap->n; i++) { 1496 v = av + i; 1497 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i)); 1498 for (j = 0; j < A->cmap->n; j++) { 1499 #if defined(PETSC_USE_COMPLEX) 1500 if (PetscRealPart(*v) != 0.0 && PetscImaginaryPart(*v) != 0.0) { 1501 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", j, (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1502 } else if (PetscRealPart(*v)) { 1503 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)PetscRealPart(*v))); 1504 } 1505 #else 1506 if (*v) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)*v)); 1507 #endif 1508 v += a->lda; 1509 } 1510 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1511 } 1512 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1513 } else { 1514 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1515 #if defined(PETSC_USE_COMPLEX) 1516 /* determine if matrix has all real values */ 1517 for (j = 0; j < A->cmap->n; j++) { 1518 v = av + j * a->lda; 1519 for (i = 0; i < A->rmap->n; i++) { 1520 if (PetscImaginaryPart(v[i])) { 1521 allreal = PETSC_FALSE; 1522 break; 1523 } 1524 } 1525 } 1526 #endif 1527 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1528 PetscCall(PetscObjectGetName((PetscObject)A, &name)); 1529 PetscCall(PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", A->rmap->n, A->cmap->n)); 1530 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = zeros(%" PetscInt_FMT ",%" PetscInt_FMT ");\n", name, A->rmap->n, A->cmap->n)); 1531 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = [\n", name)); 1532 } 1533 1534 for (i = 0; i < A->rmap->n; i++) { 1535 v = av + i; 1536 for (j = 0; j < A->cmap->n; j++) { 1537 #if defined(PETSC_USE_COMPLEX) 1538 if (allreal) { 1539 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(*v))); 1540 } else { 1541 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei ", (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1542 } 1543 #else 1544 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)*v)); 1545 #endif 1546 v += a->lda; 1547 } 1548 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1549 } 1550 if (format == PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, "];\n")); 1551 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1552 } 1553 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&av)); 1554 PetscCall(PetscViewerFlush(viewer)); 1555 PetscFunctionReturn(PETSC_SUCCESS); 1556 } 1557 1558 #include <petscdraw.h> 1559 static PetscErrorCode MatView_SeqDense_Draw_Zoom(PetscDraw draw, void *Aa) 1560 { 1561 Mat A = (Mat)Aa; 1562 PetscInt m = A->rmap->n, n = A->cmap->n, i, j; 1563 int color = PETSC_DRAW_WHITE; 1564 const PetscScalar *v; 1565 PetscViewer viewer; 1566 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1567 PetscViewerFormat format; 1568 1569 PetscFunctionBegin; 1570 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1571 PetscCall(PetscViewerGetFormat(viewer, &format)); 1572 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1573 1574 /* Loop over matrix elements drawing boxes */ 1575 PetscCall(MatDenseGetArrayRead(A, &v)); 1576 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1577 PetscDrawCollectiveBegin(draw); 1578 /* Blue for negative and Red for positive */ 1579 for (j = 0; j < n; j++) { 1580 x_l = j; 1581 x_r = x_l + 1.0; 1582 for (i = 0; i < m; i++) { 1583 y_l = m - i - 1.0; 1584 y_r = y_l + 1.0; 1585 if (PetscRealPart(v[j * m + i]) > 0.) color = PETSC_DRAW_RED; 1586 else if (PetscRealPart(v[j * m + i]) < 0.) color = PETSC_DRAW_BLUE; 1587 else continue; 1588 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1589 } 1590 } 1591 PetscDrawCollectiveEnd(draw); 1592 } else { 1593 /* use contour shading to indicate magnitude of values */ 1594 /* first determine max of all nonzero values */ 1595 PetscReal minv = 0.0, maxv = 0.0; 1596 PetscDraw popup; 1597 1598 for (i = 0; i < m * n; i++) { 1599 if (PetscAbsScalar(v[i]) > maxv) maxv = PetscAbsScalar(v[i]); 1600 } 1601 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1602 PetscCall(PetscDrawGetPopup(draw, &popup)); 1603 PetscCall(PetscDrawScalePopup(popup, minv, maxv)); 1604 1605 PetscDrawCollectiveBegin(draw); 1606 for (j = 0; j < n; j++) { 1607 x_l = j; 1608 x_r = x_l + 1.0; 1609 for (i = 0; i < m; i++) { 1610 y_l = m - i - 1.0; 1611 y_r = y_l + 1.0; 1612 color = PetscDrawRealToColor(PetscAbsScalar(v[j * m + i]), minv, maxv); 1613 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1614 } 1615 } 1616 PetscDrawCollectiveEnd(draw); 1617 } 1618 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1619 PetscFunctionReturn(PETSC_SUCCESS); 1620 } 1621 1622 static PetscErrorCode MatView_SeqDense_Draw(Mat A, PetscViewer viewer) 1623 { 1624 PetscDraw draw; 1625 PetscBool isnull; 1626 PetscReal xr, yr, xl, yl, h, w; 1627 1628 PetscFunctionBegin; 1629 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1630 PetscCall(PetscDrawIsNull(draw, &isnull)); 1631 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1632 1633 xr = A->cmap->n; 1634 yr = A->rmap->n; 1635 h = yr / 10.0; 1636 w = xr / 10.0; 1637 xr += w; 1638 yr += h; 1639 xl = -w; 1640 yl = -h; 1641 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 1642 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 1643 PetscCall(PetscDrawZoom(draw, MatView_SeqDense_Draw_Zoom, A)); 1644 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 1645 PetscCall(PetscDrawSave(draw)); 1646 PetscFunctionReturn(PETSC_SUCCESS); 1647 } 1648 1649 PetscErrorCode MatView_SeqDense(Mat A, PetscViewer viewer) 1650 { 1651 PetscBool iascii, isbinary, isdraw; 1652 1653 PetscFunctionBegin; 1654 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1655 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1656 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1657 if (iascii) PetscCall(MatView_SeqDense_ASCII(A, viewer)); 1658 else if (isbinary) PetscCall(MatView_Dense_Binary(A, viewer)); 1659 else if (isdraw) PetscCall(MatView_SeqDense_Draw(A, viewer)); 1660 PetscFunctionReturn(PETSC_SUCCESS); 1661 } 1662 1663 static PetscErrorCode MatDensePlaceArray_SeqDense(Mat A, const PetscScalar *array) 1664 { 1665 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1666 1667 PetscFunctionBegin; 1668 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1669 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1670 PetscCheck(!a->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseResetArray() first"); 1671 a->unplacedarray = a->v; 1672 a->unplaced_user_alloc = a->user_alloc; 1673 a->v = (PetscScalar *)array; 1674 a->user_alloc = PETSC_TRUE; 1675 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1676 A->offloadmask = PETSC_OFFLOAD_CPU; 1677 #endif 1678 PetscFunctionReturn(PETSC_SUCCESS); 1679 } 1680 1681 static PetscErrorCode MatDenseResetArray_SeqDense(Mat A) 1682 { 1683 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1684 1685 PetscFunctionBegin; 1686 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1687 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1688 a->v = a->unplacedarray; 1689 a->user_alloc = a->unplaced_user_alloc; 1690 a->unplacedarray = NULL; 1691 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1692 A->offloadmask = PETSC_OFFLOAD_CPU; 1693 #endif 1694 PetscFunctionReturn(PETSC_SUCCESS); 1695 } 1696 1697 static PetscErrorCode MatDenseReplaceArray_SeqDense(Mat A, const PetscScalar *array) 1698 { 1699 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1700 1701 PetscFunctionBegin; 1702 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1703 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1704 if (!a->user_alloc) PetscCall(PetscFree(a->v)); 1705 a->v = (PetscScalar *)array; 1706 a->user_alloc = PETSC_FALSE; 1707 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1708 A->offloadmask = PETSC_OFFLOAD_CPU; 1709 #endif 1710 PetscFunctionReturn(PETSC_SUCCESS); 1711 } 1712 1713 PetscErrorCode MatDestroy_SeqDense(Mat mat) 1714 { 1715 Mat_SeqDense *l = (Mat_SeqDense *)mat->data; 1716 1717 PetscFunctionBegin; 1718 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows %" PetscInt_FMT " Cols %" PetscInt_FMT, mat->rmap->n, mat->cmap->n)); 1719 PetscCall(VecDestroy(&l->qrrhs)); 1720 PetscCall(PetscFree(l->tau)); 1721 PetscCall(PetscFree(l->pivots)); 1722 PetscCall(PetscFree(l->fwork)); 1723 if (!l->user_alloc) PetscCall(PetscFree(l->v)); 1724 if (!l->unplaced_user_alloc) PetscCall(PetscFree(l->unplacedarray)); 1725 PetscCheck(!l->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1726 PetscCheck(!l->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1727 PetscCall(VecDestroy(&l->cvec)); 1728 PetscCall(MatDestroy(&l->cmat)); 1729 PetscCall(PetscFree(mat->data)); 1730 1731 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactor_C", NULL)); 1733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorSymbolic_C", NULL)); 1734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorNumeric_C", NULL)); 1735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 1736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 1737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 1738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 1739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 1740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 1741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 1742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 1743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 1744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 1745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 1746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqaij_C", NULL)); 1747 #if defined(PETSC_HAVE_ELEMENTAL) 1748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_elemental_C", NULL)); 1749 #endif 1750 #if defined(PETSC_HAVE_SCALAPACK) 1751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_scalapack_C", NULL)); 1752 #endif 1753 #if defined(PETSC_HAVE_CUDA) 1754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensecuda_C", NULL)); 1755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", NULL)); 1756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdense_C", NULL)); 1757 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensecuda_C", NULL)); 1758 #endif 1759 #if defined(PETSC_HAVE_HIP) 1760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensehip_C", NULL)); 1761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", NULL)); 1762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdense_C", NULL)); 1763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensehip_C", NULL)); 1764 #endif 1765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSeqDenseSetPreallocation_C", NULL)); 1766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqaij_seqdense_C", NULL)); 1767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdense_C", NULL)); 1768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqbaij_seqdense_C", NULL)); 1769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqsbaij_seqdense_C", NULL)); 1770 1771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 1772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 1773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 1774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 1775 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 1776 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 1777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 1778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 1779 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 1780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 1781 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); 1782 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); 1783 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); 1784 PetscFunctionReturn(PETSC_SUCCESS); 1785 } 1786 1787 static PetscErrorCode MatTranspose_SeqDense(Mat A, MatReuse reuse, Mat *matout) 1788 { 1789 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1790 PetscInt k, j, m = A->rmap->n, M = mat->lda, n = A->cmap->n; 1791 PetscScalar *v, tmp; 1792 1793 PetscFunctionBegin; 1794 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1795 if (reuse == MAT_INPLACE_MATRIX) { 1796 if (m == n) { /* in place transpose */ 1797 PetscCall(MatDenseGetArray(A, &v)); 1798 for (j = 0; j < m; j++) { 1799 for (k = 0; k < j; k++) { 1800 tmp = v[j + k * M]; 1801 v[j + k * M] = v[k + j * M]; 1802 v[k + j * M] = tmp; 1803 } 1804 } 1805 PetscCall(MatDenseRestoreArray(A, &v)); 1806 } else { /* reuse memory, temporary allocates new memory */ 1807 PetscScalar *v2; 1808 PetscLayout tmplayout; 1809 1810 PetscCall(PetscMalloc1((size_t)m * n, &v2)); 1811 PetscCall(MatDenseGetArray(A, &v)); 1812 for (j = 0; j < n; j++) { 1813 for (k = 0; k < m; k++) v2[j + (size_t)k * n] = v[k + (size_t)j * M]; 1814 } 1815 PetscCall(PetscArraycpy(v, v2, (size_t)m * n)); 1816 PetscCall(PetscFree(v2)); 1817 PetscCall(MatDenseRestoreArray(A, &v)); 1818 /* cleanup size dependent quantities */ 1819 PetscCall(VecDestroy(&mat->cvec)); 1820 PetscCall(MatDestroy(&mat->cmat)); 1821 PetscCall(PetscFree(mat->pivots)); 1822 PetscCall(PetscFree(mat->fwork)); 1823 /* swap row/col layouts */ 1824 PetscCall(PetscBLASIntCast(n, &mat->lda)); 1825 tmplayout = A->rmap; 1826 A->rmap = A->cmap; 1827 A->cmap = tmplayout; 1828 } 1829 } else { /* out-of-place transpose */ 1830 Mat tmat; 1831 Mat_SeqDense *tmatd; 1832 PetscScalar *v2; 1833 PetscInt M2; 1834 1835 if (reuse == MAT_INITIAL_MATRIX) { 1836 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &tmat)); 1837 PetscCall(MatSetSizes(tmat, A->cmap->n, A->rmap->n, A->cmap->n, A->rmap->n)); 1838 PetscCall(MatSetType(tmat, ((PetscObject)A)->type_name)); 1839 PetscCall(MatSeqDenseSetPreallocation(tmat, NULL)); 1840 } else tmat = *matout; 1841 1842 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&v)); 1843 PetscCall(MatDenseGetArray(tmat, &v2)); 1844 tmatd = (Mat_SeqDense *)tmat->data; 1845 M2 = tmatd->lda; 1846 for (j = 0; j < n; j++) { 1847 for (k = 0; k < m; k++) v2[j + k * M2] = v[k + j * M]; 1848 } 1849 PetscCall(MatDenseRestoreArray(tmat, &v2)); 1850 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&v)); 1851 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 1852 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 1853 *matout = tmat; 1854 } 1855 PetscFunctionReturn(PETSC_SUCCESS); 1856 } 1857 1858 static PetscErrorCode MatEqual_SeqDense(Mat A1, Mat A2, PetscBool *flg) 1859 { 1860 Mat_SeqDense *mat1 = (Mat_SeqDense *)A1->data; 1861 Mat_SeqDense *mat2 = (Mat_SeqDense *)A2->data; 1862 PetscInt i; 1863 const PetscScalar *v1, *v2; 1864 1865 PetscFunctionBegin; 1866 if (A1->rmap->n != A2->rmap->n) { 1867 *flg = PETSC_FALSE; 1868 PetscFunctionReturn(PETSC_SUCCESS); 1869 } 1870 if (A1->cmap->n != A2->cmap->n) { 1871 *flg = PETSC_FALSE; 1872 PetscFunctionReturn(PETSC_SUCCESS); 1873 } 1874 PetscCall(MatDenseGetArrayRead(A1, &v1)); 1875 PetscCall(MatDenseGetArrayRead(A2, &v2)); 1876 for (i = 0; i < A1->cmap->n; i++) { 1877 PetscCall(PetscArraycmp(v1, v2, A1->rmap->n, flg)); 1878 if (*flg == PETSC_FALSE) PetscFunctionReturn(PETSC_SUCCESS); 1879 v1 += mat1->lda; 1880 v2 += mat2->lda; 1881 } 1882 PetscCall(MatDenseRestoreArrayRead(A1, &v1)); 1883 PetscCall(MatDenseRestoreArrayRead(A2, &v2)); 1884 *flg = PETSC_TRUE; 1885 PetscFunctionReturn(PETSC_SUCCESS); 1886 } 1887 1888 PetscErrorCode MatGetDiagonal_SeqDense(Mat A, Vec v) 1889 { 1890 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1891 PetscInt i, n, len; 1892 PetscScalar *x; 1893 const PetscScalar *vv; 1894 1895 PetscFunctionBegin; 1896 PetscCall(VecGetSize(v, &n)); 1897 PetscCall(VecGetArray(v, &x)); 1898 len = PetscMin(A->rmap->n, A->cmap->n); 1899 PetscCall(MatDenseGetArrayRead(A, &vv)); 1900 PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 1901 for (i = 0; i < len; i++) x[i] = vv[i * mat->lda + i]; 1902 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1903 PetscCall(VecRestoreArray(v, &x)); 1904 PetscFunctionReturn(PETSC_SUCCESS); 1905 } 1906 1907 static PetscErrorCode MatDiagonalScale_SeqDense(Mat A, Vec ll, Vec rr) 1908 { 1909 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1910 const PetscScalar *l, *r; 1911 PetscScalar x, *v, *vv; 1912 PetscInt i, j, m = A->rmap->n, n = A->cmap->n; 1913 1914 PetscFunctionBegin; 1915 PetscCall(MatDenseGetArray(A, &vv)); 1916 if (ll) { 1917 PetscCall(VecGetSize(ll, &m)); 1918 PetscCall(VecGetArrayRead(ll, &l)); 1919 PetscCheck(m == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vec wrong size"); 1920 for (i = 0; i < m; i++) { 1921 x = l[i]; 1922 v = vv + i; 1923 for (j = 0; j < n; j++) { 1924 (*v) *= x; 1925 v += mat->lda; 1926 } 1927 } 1928 PetscCall(VecRestoreArrayRead(ll, &l)); 1929 PetscCall(PetscLogFlops(1.0 * n * m)); 1930 } 1931 if (rr) { 1932 PetscCall(VecGetSize(rr, &n)); 1933 PetscCall(VecGetArrayRead(rr, &r)); 1934 PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec wrong size"); 1935 for (i = 0; i < n; i++) { 1936 x = r[i]; 1937 v = vv + i * mat->lda; 1938 for (j = 0; j < m; j++) (*v++) *= x; 1939 } 1940 PetscCall(VecRestoreArrayRead(rr, &r)); 1941 PetscCall(PetscLogFlops(1.0 * n * m)); 1942 } 1943 PetscCall(MatDenseRestoreArray(A, &vv)); 1944 PetscFunctionReturn(PETSC_SUCCESS); 1945 } 1946 1947 PetscErrorCode MatNorm_SeqDense(Mat A, NormType type, PetscReal *nrm) 1948 { 1949 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1950 PetscScalar *v, *vv; 1951 PetscReal sum = 0.0; 1952 PetscInt lda, m = A->rmap->n, i, j; 1953 1954 PetscFunctionBegin; 1955 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&vv)); 1956 PetscCall(MatDenseGetLDA(A, &lda)); 1957 v = vv; 1958 if (type == NORM_FROBENIUS) { 1959 if (lda > m) { 1960 for (j = 0; j < A->cmap->n; j++) { 1961 v = vv + j * lda; 1962 for (i = 0; i < m; i++) { 1963 sum += PetscRealPart(PetscConj(*v) * (*v)); 1964 v++; 1965 } 1966 } 1967 } else { 1968 #if defined(PETSC_USE_REAL___FP16) 1969 PetscBLASInt one = 1, cnt = A->cmap->n * A->rmap->n; 1970 PetscCallBLAS("BLASnrm2", *nrm = BLASnrm2_(&cnt, v, &one)); 1971 } 1972 #else 1973 for (i = 0; i < A->cmap->n * A->rmap->n; i++) { 1974 sum += PetscRealPart(PetscConj(*v) * (*v)); 1975 v++; 1976 } 1977 } 1978 *nrm = PetscSqrtReal(sum); 1979 #endif 1980 PetscCall(PetscLogFlops(2.0 * A->cmap->n * A->rmap->n)); 1981 } else if (type == NORM_1) { 1982 *nrm = 0.0; 1983 for (j = 0; j < A->cmap->n; j++) { 1984 v = vv + j * mat->lda; 1985 sum = 0.0; 1986 for (i = 0; i < A->rmap->n; i++) { 1987 sum += PetscAbsScalar(*v); 1988 v++; 1989 } 1990 if (sum > *nrm) *nrm = sum; 1991 } 1992 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 1993 } else if (type == NORM_INFINITY) { 1994 *nrm = 0.0; 1995 for (j = 0; j < A->rmap->n; j++) { 1996 v = vv + j; 1997 sum = 0.0; 1998 for (i = 0; i < A->cmap->n; i++) { 1999 sum += PetscAbsScalar(*v); 2000 v += mat->lda; 2001 } 2002 if (sum > *nrm) *nrm = sum; 2003 } 2004 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 2005 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No two norm"); 2006 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&vv)); 2007 PetscFunctionReturn(PETSC_SUCCESS); 2008 } 2009 2010 static PetscErrorCode MatSetOption_SeqDense(Mat A, MatOption op, PetscBool flg) 2011 { 2012 Mat_SeqDense *aij = (Mat_SeqDense *)A->data; 2013 2014 PetscFunctionBegin; 2015 switch (op) { 2016 case MAT_ROW_ORIENTED: 2017 aij->roworiented = flg; 2018 break; 2019 case MAT_NEW_NONZERO_LOCATIONS: 2020 case MAT_NEW_NONZERO_LOCATION_ERR: 2021 case MAT_NEW_NONZERO_ALLOCATION_ERR: 2022 case MAT_FORCE_DIAGONAL_ENTRIES: 2023 case MAT_KEEP_NONZERO_PATTERN: 2024 case MAT_IGNORE_OFF_PROC_ENTRIES: 2025 case MAT_USE_HASH_TABLE: 2026 case MAT_IGNORE_ZERO_ENTRIES: 2027 case MAT_IGNORE_LOWER_TRIANGULAR: 2028 case MAT_SORTED_FULL: 2029 case MAT_SUBMAT_SINGLEIS: 2030 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 2031 break; 2032 case MAT_SPD: 2033 case MAT_SYMMETRIC: 2034 case MAT_STRUCTURALLY_SYMMETRIC: 2035 case MAT_HERMITIAN: 2036 case MAT_SYMMETRY_ETERNAL: 2037 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 2038 case MAT_SPD_ETERNAL: 2039 break; 2040 default: 2041 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 2042 } 2043 PetscFunctionReturn(PETSC_SUCCESS); 2044 } 2045 2046 PetscErrorCode MatZeroEntries_SeqDense(Mat A) 2047 { 2048 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 2049 PetscInt lda = l->lda, m = A->rmap->n, n = A->cmap->n, j; 2050 PetscScalar *v; 2051 2052 PetscFunctionBegin; 2053 PetscCall(MatDenseGetArrayWrite(A, &v)); 2054 if (lda > m) { 2055 for (j = 0; j < n; j++) PetscCall(PetscArrayzero(v + j * lda, m)); 2056 } else { 2057 PetscCall(PetscArrayzero(v, PetscInt64Mult(m, n))); 2058 } 2059 PetscCall(MatDenseRestoreArrayWrite(A, &v)); 2060 PetscFunctionReturn(PETSC_SUCCESS); 2061 } 2062 2063 static PetscErrorCode MatZeroRows_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 2064 { 2065 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 2066 PetscInt m = l->lda, n = A->cmap->n, i, j; 2067 PetscScalar *slot, *bb, *v; 2068 const PetscScalar *xx; 2069 2070 PetscFunctionBegin; 2071 if (PetscDefined(USE_DEBUG)) { 2072 for (i = 0; i < N; i++) { 2073 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 2074 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 2075 } 2076 } 2077 if (!N) PetscFunctionReturn(PETSC_SUCCESS); 2078 2079 /* fix right-hand side if needed */ 2080 if (x && b) { 2081 PetscCall(VecGetArrayRead(x, &xx)); 2082 PetscCall(VecGetArray(b, &bb)); 2083 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 2084 PetscCall(VecRestoreArrayRead(x, &xx)); 2085 PetscCall(VecRestoreArray(b, &bb)); 2086 } 2087 2088 PetscCall(MatDenseGetArray(A, &v)); 2089 for (i = 0; i < N; i++) { 2090 slot = v + rows[i]; 2091 for (j = 0; j < n; j++) { 2092 *slot = 0.0; 2093 slot += m; 2094 } 2095 } 2096 if (diag != 0.0) { 2097 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 2098 for (i = 0; i < N; i++) { 2099 slot = v + (m + 1) * rows[i]; 2100 *slot = diag; 2101 } 2102 } 2103 PetscCall(MatDenseRestoreArray(A, &v)); 2104 PetscFunctionReturn(PETSC_SUCCESS); 2105 } 2106 2107 static PetscErrorCode MatDenseGetLDA_SeqDense(Mat A, PetscInt *lda) 2108 { 2109 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2110 2111 PetscFunctionBegin; 2112 *lda = mat->lda; 2113 PetscFunctionReturn(PETSC_SUCCESS); 2114 } 2115 2116 PetscErrorCode MatDenseGetArray_SeqDense(Mat A, PetscScalar **array) 2117 { 2118 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2119 2120 PetscFunctionBegin; 2121 PetscCheck(!mat->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2122 *array = mat->v; 2123 PetscFunctionReturn(PETSC_SUCCESS); 2124 } 2125 2126 PetscErrorCode MatDenseRestoreArray_SeqDense(Mat A, PetscScalar **array) 2127 { 2128 PetscFunctionBegin; 2129 if (array) *array = NULL; 2130 PetscFunctionReturn(PETSC_SUCCESS); 2131 } 2132 2133 /*@ 2134 MatDenseGetLDA - gets the leading dimension of the array returned from `MatDenseGetArray()` 2135 2136 Not Collective 2137 2138 Input Parameter: 2139 . A - a `MATDENSE` or `MATDENSECUDA` matrix 2140 2141 Output Parameter: 2142 . lda - the leading dimension 2143 2144 Level: intermediate 2145 2146 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseSetLDA()` 2147 @*/ 2148 PetscErrorCode MatDenseGetLDA(Mat A, PetscInt *lda) 2149 { 2150 PetscFunctionBegin; 2151 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2152 PetscAssertPointer(lda, 2); 2153 MatCheckPreallocated(A, 1); 2154 PetscUseMethod(A, "MatDenseGetLDA_C", (Mat, PetscInt *), (A, lda)); 2155 PetscFunctionReturn(PETSC_SUCCESS); 2156 } 2157 2158 /*@ 2159 MatDenseSetLDA - Sets the leading dimension of the array used by the `MATDENSE` matrix 2160 2161 Collective if the matrix layouts have not yet been setup 2162 2163 Input Parameters: 2164 + A - a `MATDENSE` or `MATDENSECUDA` matrix 2165 - lda - the leading dimension 2166 2167 Level: intermediate 2168 2169 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetLDA()` 2170 @*/ 2171 PetscErrorCode MatDenseSetLDA(Mat A, PetscInt lda) 2172 { 2173 PetscFunctionBegin; 2174 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2175 PetscTryMethod(A, "MatDenseSetLDA_C", (Mat, PetscInt), (A, lda)); 2176 PetscFunctionReturn(PETSC_SUCCESS); 2177 } 2178 2179 /*@C 2180 MatDenseGetArray - gives read-write access to the array where the data for a `MATDENSE` matrix is stored 2181 2182 Logically Collective 2183 2184 Input Parameter: 2185 . A - a dense matrix 2186 2187 Output Parameter: 2188 . array - pointer to the data 2189 2190 Level: intermediate 2191 2192 Fortran Notes: 2193 `MatDenseGetArray()` Fortran binding is deprecated (since PETSc 3.19), use `MatDenseGetArrayF90()` 2194 2195 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2196 @*/ 2197 PetscErrorCode MatDenseGetArray(Mat A, PetscScalar *array[]) 2198 { 2199 PetscFunctionBegin; 2200 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2201 PetscAssertPointer(array, 2); 2202 PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array)); 2203 PetscFunctionReturn(PETSC_SUCCESS); 2204 } 2205 2206 /*@C 2207 MatDenseRestoreArray - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArray()` 2208 2209 Logically Collective 2210 2211 Input Parameters: 2212 + A - a dense matrix 2213 - array - pointer to the data (may be `NULL`) 2214 2215 Level: intermediate 2216 2217 Fortran Notes: 2218 `MatDenseRestoreArray()` Fortran binding is deprecated (since PETSc 3.19), use `MatDenseRestoreArrayF90()` 2219 2220 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2221 @*/ 2222 PetscErrorCode MatDenseRestoreArray(Mat A, PetscScalar *array[]) 2223 { 2224 PetscFunctionBegin; 2225 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2226 if (array) PetscAssertPointer(array, 2); 2227 PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array)); 2228 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2229 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2230 A->offloadmask = PETSC_OFFLOAD_CPU; 2231 #endif 2232 PetscFunctionReturn(PETSC_SUCCESS); 2233 } 2234 2235 /*@C 2236 MatDenseGetArrayRead - gives read-only access to the array where the data for a `MATDENSE` matrix is stored 2237 2238 Not Collective 2239 2240 Input Parameter: 2241 . A - a dense matrix 2242 2243 Output Parameter: 2244 . array - pointer to the data 2245 2246 Level: intermediate 2247 2248 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2249 @*/ 2250 PetscErrorCode MatDenseGetArrayRead(Mat A, const PetscScalar *array[]) 2251 { 2252 PetscFunctionBegin; 2253 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2254 PetscAssertPointer(array, 2); 2255 PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2256 PetscFunctionReturn(PETSC_SUCCESS); 2257 } 2258 2259 /*@C 2260 MatDenseRestoreArrayRead - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayRead()` 2261 2262 Not Collective 2263 2264 Input Parameters: 2265 + A - a dense matrix 2266 - array - pointer to the data (may be `NULL`) 2267 2268 Level: intermediate 2269 2270 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2271 @*/ 2272 PetscErrorCode MatDenseRestoreArrayRead(Mat A, const PetscScalar *array[]) 2273 { 2274 PetscFunctionBegin; 2275 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2276 if (array) PetscAssertPointer(array, 2); 2277 PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2278 PetscFunctionReturn(PETSC_SUCCESS); 2279 } 2280 2281 /*@C 2282 MatDenseGetArrayWrite - gives write-only access to the array where the data for a `MATDENSE` matrix is stored 2283 2284 Not Collective 2285 2286 Input Parameter: 2287 . A - a dense matrix 2288 2289 Output Parameter: 2290 . array - pointer to the data 2291 2292 Level: intermediate 2293 2294 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2295 @*/ 2296 PetscErrorCode MatDenseGetArrayWrite(Mat A, PetscScalar *array[]) 2297 { 2298 PetscFunctionBegin; 2299 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2300 PetscAssertPointer(array, 2); 2301 PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2302 PetscFunctionReturn(PETSC_SUCCESS); 2303 } 2304 2305 /*@C 2306 MatDenseRestoreArrayWrite - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayWrite()` 2307 2308 Not Collective 2309 2310 Input Parameters: 2311 + A - a dense matrix 2312 - array - pointer to the data (may be `NULL`) 2313 2314 Level: intermediate 2315 2316 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2317 @*/ 2318 PetscErrorCode MatDenseRestoreArrayWrite(Mat A, PetscScalar *array[]) 2319 { 2320 PetscFunctionBegin; 2321 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2322 if (array) PetscAssertPointer(array, 2); 2323 PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2324 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2325 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2326 A->offloadmask = PETSC_OFFLOAD_CPU; 2327 #endif 2328 PetscFunctionReturn(PETSC_SUCCESS); 2329 } 2330 2331 /*@C 2332 MatDenseGetArrayAndMemType - gives read-write access to the array where the data for a `MATDENSE` matrix is stored 2333 2334 Logically Collective 2335 2336 Input Parameter: 2337 . A - a dense matrix 2338 2339 Output Parameters: 2340 + array - pointer to the data 2341 - mtype - memory type of the returned pointer 2342 2343 Level: intermediate 2344 2345 Note: 2346 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., 2347 an array on device is always returned and is guaranteed to contain the matrix's latest data. 2348 2349 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayAndMemType()`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArrayRead()`, 2350 `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` 2351 @*/ 2352 PetscErrorCode MatDenseGetArrayAndMemType(Mat A, PetscScalar *array[], PetscMemType *mtype) 2353 { 2354 PetscBool isMPI; 2355 2356 PetscFunctionBegin; 2357 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2358 PetscAssertPointer(array, 2); 2359 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ 2360 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2361 if (isMPI) { 2362 /* Dispatch here so that the code can be reused for all subclasses of MATDENSE */ 2363 PetscCall(MatDenseGetArrayAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); 2364 } else { 2365 PetscErrorCode (*fptr)(Mat, PetscScalar **, PetscMemType *); 2366 2367 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayAndMemType_C", &fptr)); 2368 if (fptr) { 2369 PetscCall((*fptr)(A, array, mtype)); 2370 } else { 2371 PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array)); 2372 if (mtype) *mtype = PETSC_MEMTYPE_HOST; 2373 } 2374 } 2375 PetscFunctionReturn(PETSC_SUCCESS); 2376 } 2377 2378 /*@C 2379 MatDenseRestoreArrayAndMemType - returns access to the array that is obtained by `MatDenseGetArrayAndMemType()` 2380 2381 Logically Collective 2382 2383 Input Parameters: 2384 + A - a dense matrix 2385 - array - pointer to the data 2386 2387 Level: intermediate 2388 2389 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2390 @*/ 2391 PetscErrorCode MatDenseRestoreArrayAndMemType(Mat A, PetscScalar *array[]) 2392 { 2393 PetscBool isMPI; 2394 2395 PetscFunctionBegin; 2396 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2397 PetscAssertPointer(array, 2); 2398 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2399 if (isMPI) { 2400 PetscCall(MatDenseRestoreArrayAndMemType(((Mat_MPIDense *)A->data)->A, array)); 2401 } else { 2402 PetscErrorCode (*fptr)(Mat, PetscScalar **); 2403 2404 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayAndMemType_C", &fptr)); 2405 if (fptr) { 2406 PetscCall((*fptr)(A, array)); 2407 } else { 2408 PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array)); 2409 } 2410 *array = NULL; 2411 } 2412 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2413 PetscFunctionReturn(PETSC_SUCCESS); 2414 } 2415 2416 /*@C 2417 MatDenseGetArrayReadAndMemType - gives read-only access to the array where the data for a `MATDENSE` matrix is stored 2418 2419 Logically Collective 2420 2421 Input Parameter: 2422 . A - a dense matrix 2423 2424 Output Parameters: 2425 + array - pointer to the data 2426 - mtype - memory type of the returned pointer 2427 2428 Level: intermediate 2429 2430 Note: 2431 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., 2432 an array on device is always returned and is guaranteed to contain the matrix's latest data. 2433 2434 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayReadAndMemType()`, `MatDenseGetArrayWriteAndMemType()`, 2435 `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` 2436 @*/ 2437 PetscErrorCode MatDenseGetArrayReadAndMemType(Mat A, const PetscScalar *array[], PetscMemType *mtype) 2438 { 2439 PetscBool isMPI; 2440 2441 PetscFunctionBegin; 2442 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2443 PetscAssertPointer(array, 2); 2444 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ 2445 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2446 if (isMPI) { /* Dispatch here so that the code can be reused for all subclasses of MATDENSE */ 2447 PetscCall(MatDenseGetArrayReadAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); 2448 } else { 2449 PetscErrorCode (*fptr)(Mat, const PetscScalar **, PetscMemType *); 2450 2451 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayReadAndMemType_C", &fptr)); 2452 if (fptr) { 2453 PetscCall((*fptr)(A, array, mtype)); 2454 } else { 2455 PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2456 if (mtype) *mtype = PETSC_MEMTYPE_HOST; 2457 } 2458 } 2459 PetscFunctionReturn(PETSC_SUCCESS); 2460 } 2461 2462 /*@C 2463 MatDenseRestoreArrayReadAndMemType - returns access to the array that is obtained by `MatDenseGetArrayReadAndMemType()` 2464 2465 Logically Collective 2466 2467 Input Parameters: 2468 + A - a dense matrix 2469 - array - pointer to the data 2470 2471 Level: intermediate 2472 2473 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2474 @*/ 2475 PetscErrorCode MatDenseRestoreArrayReadAndMemType(Mat A, const PetscScalar *array[]) 2476 { 2477 PetscBool isMPI; 2478 2479 PetscFunctionBegin; 2480 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2481 PetscAssertPointer(array, 2); 2482 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2483 if (isMPI) { 2484 PetscCall(MatDenseRestoreArrayReadAndMemType(((Mat_MPIDense *)A->data)->A, array)); 2485 } else { 2486 PetscErrorCode (*fptr)(Mat, const PetscScalar **); 2487 2488 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayReadAndMemType_C", &fptr)); 2489 if (fptr) { 2490 PetscCall((*fptr)(A, array)); 2491 } else { 2492 PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2493 } 2494 *array = NULL; 2495 } 2496 PetscFunctionReturn(PETSC_SUCCESS); 2497 } 2498 2499 /*@C 2500 MatDenseGetArrayWriteAndMemType - gives write-only access to the array where the data for a `MATDENSE` matrix is stored 2501 2502 Logically Collective 2503 2504 Input Parameter: 2505 . A - a dense matrix 2506 2507 Output Parameters: 2508 + array - pointer to the data 2509 - mtype - memory type of the returned pointer 2510 2511 Level: intermediate 2512 2513 Note: 2514 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., 2515 an array on device is always returned and is guaranteed to contain the matrix's latest data. 2516 2517 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayWriteAndMemType()`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArrayRead()`, 2518 `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` 2519 @*/ 2520 PetscErrorCode MatDenseGetArrayWriteAndMemType(Mat A, PetscScalar *array[], PetscMemType *mtype) 2521 { 2522 PetscBool isMPI; 2523 2524 PetscFunctionBegin; 2525 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2526 PetscAssertPointer(array, 2); 2527 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ 2528 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2529 if (isMPI) { 2530 PetscCall(MatDenseGetArrayWriteAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); 2531 } else { 2532 PetscErrorCode (*fptr)(Mat, PetscScalar **, PetscMemType *); 2533 2534 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayWriteAndMemType_C", &fptr)); 2535 if (fptr) { 2536 PetscCall((*fptr)(A, array, mtype)); 2537 } else { 2538 PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2539 if (mtype) *mtype = PETSC_MEMTYPE_HOST; 2540 } 2541 } 2542 PetscFunctionReturn(PETSC_SUCCESS); 2543 } 2544 2545 /*@C 2546 MatDenseRestoreArrayWriteAndMemType - returns access to the array that is obtained by `MatDenseGetArrayReadAndMemType()` 2547 2548 Logically Collective 2549 2550 Input Parameters: 2551 + A - a dense matrix 2552 - array - pointer to the data 2553 2554 Level: intermediate 2555 2556 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2557 @*/ 2558 PetscErrorCode MatDenseRestoreArrayWriteAndMemType(Mat A, PetscScalar *array[]) 2559 { 2560 PetscBool isMPI; 2561 2562 PetscFunctionBegin; 2563 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2564 PetscAssertPointer(array, 2); 2565 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2566 if (isMPI) { 2567 PetscCall(MatDenseRestoreArrayWriteAndMemType(((Mat_MPIDense *)A->data)->A, array)); 2568 } else { 2569 PetscErrorCode (*fptr)(Mat, PetscScalar **); 2570 2571 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayWriteAndMemType_C", &fptr)); 2572 if (fptr) { 2573 PetscCall((*fptr)(A, array)); 2574 } else { 2575 PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2576 } 2577 *array = NULL; 2578 } 2579 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2580 PetscFunctionReturn(PETSC_SUCCESS); 2581 } 2582 2583 static PetscErrorCode MatCreateSubMatrix_SeqDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 2584 { 2585 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2586 PetscInt i, j, nrows, ncols, ldb; 2587 const PetscInt *irow, *icol; 2588 PetscScalar *av, *bv, *v = mat->v; 2589 Mat newmat; 2590 2591 PetscFunctionBegin; 2592 PetscCall(ISGetIndices(isrow, &irow)); 2593 PetscCall(ISGetIndices(iscol, &icol)); 2594 PetscCall(ISGetLocalSize(isrow, &nrows)); 2595 PetscCall(ISGetLocalSize(iscol, &ncols)); 2596 2597 /* Check submatrixcall */ 2598 if (scall == MAT_REUSE_MATRIX) { 2599 PetscInt n_cols, n_rows; 2600 PetscCall(MatGetSize(*B, &n_rows, &n_cols)); 2601 if (n_rows != nrows || n_cols != ncols) { 2602 /* resize the result matrix to match number of requested rows/columns */ 2603 PetscCall(MatSetSizes(*B, nrows, ncols, nrows, ncols)); 2604 } 2605 newmat = *B; 2606 } else { 2607 /* Create and fill new matrix */ 2608 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 2609 PetscCall(MatSetSizes(newmat, nrows, ncols, nrows, ncols)); 2610 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 2611 PetscCall(MatSeqDenseSetPreallocation(newmat, NULL)); 2612 } 2613 2614 /* Now extract the data pointers and do the copy,column at a time */ 2615 PetscCall(MatDenseGetArray(newmat, &bv)); 2616 PetscCall(MatDenseGetLDA(newmat, &ldb)); 2617 for (i = 0; i < ncols; i++) { 2618 av = v + mat->lda * icol[i]; 2619 for (j = 0; j < nrows; j++) bv[j] = av[irow[j]]; 2620 bv += ldb; 2621 } 2622 PetscCall(MatDenseRestoreArray(newmat, &bv)); 2623 2624 /* Assemble the matrices so that the correct flags are set */ 2625 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 2626 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 2627 2628 /* Free work space */ 2629 PetscCall(ISRestoreIndices(isrow, &irow)); 2630 PetscCall(ISRestoreIndices(iscol, &icol)); 2631 *B = newmat; 2632 PetscFunctionReturn(PETSC_SUCCESS); 2633 } 2634 2635 static PetscErrorCode MatCreateSubMatrices_SeqDense(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 2636 { 2637 PetscInt i; 2638 2639 PetscFunctionBegin; 2640 if (scall == MAT_INITIAL_MATRIX) PetscCall(PetscCalloc1(n, B)); 2641 2642 for (i = 0; i < n; i++) PetscCall(MatCreateSubMatrix_SeqDense(A, irow[i], icol[i], scall, &(*B)[i])); 2643 PetscFunctionReturn(PETSC_SUCCESS); 2644 } 2645 2646 static PetscErrorCode MatAssemblyBegin_SeqDense(Mat mat, MatAssemblyType mode) 2647 { 2648 PetscFunctionBegin; 2649 PetscFunctionReturn(PETSC_SUCCESS); 2650 } 2651 2652 static PetscErrorCode MatAssemblyEnd_SeqDense(Mat mat, MatAssemblyType mode) 2653 { 2654 PetscFunctionBegin; 2655 PetscFunctionReturn(PETSC_SUCCESS); 2656 } 2657 2658 PetscErrorCode MatCopy_SeqDense(Mat A, Mat B, MatStructure str) 2659 { 2660 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data; 2661 const PetscScalar *va; 2662 PetscScalar *vb; 2663 PetscInt lda1 = a->lda, lda2 = b->lda, m = A->rmap->n, n = A->cmap->n, j; 2664 2665 PetscFunctionBegin; 2666 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2667 if (A->ops->copy != B->ops->copy) { 2668 PetscCall(MatCopy_Basic(A, B, str)); 2669 PetscFunctionReturn(PETSC_SUCCESS); 2670 } 2671 PetscCheck(m == B->rmap->n && n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "size(B) != size(A)"); 2672 PetscCall(MatDenseGetArrayRead(A, &va)); 2673 PetscCall(MatDenseGetArray(B, &vb)); 2674 if (lda1 > m || lda2 > m) { 2675 for (j = 0; j < n; j++) PetscCall(PetscArraycpy(vb + j * lda2, va + j * lda1, m)); 2676 } else { 2677 PetscCall(PetscArraycpy(vb, va, A->rmap->n * A->cmap->n)); 2678 } 2679 PetscCall(MatDenseRestoreArray(B, &vb)); 2680 PetscCall(MatDenseRestoreArrayRead(A, &va)); 2681 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2682 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2683 PetscFunctionReturn(PETSC_SUCCESS); 2684 } 2685 2686 PetscErrorCode MatSetUp_SeqDense(Mat A) 2687 { 2688 PetscFunctionBegin; 2689 PetscCall(PetscLayoutSetUp(A->rmap)); 2690 PetscCall(PetscLayoutSetUp(A->cmap)); 2691 if (!A->preallocated) PetscCall(MatSeqDenseSetPreallocation(A, NULL)); 2692 PetscFunctionReturn(PETSC_SUCCESS); 2693 } 2694 2695 static PetscErrorCode MatConjugate_SeqDense(Mat A) 2696 { 2697 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2698 PetscInt i, j; 2699 PetscInt min = PetscMin(A->rmap->n, A->cmap->n); 2700 PetscScalar *aa; 2701 2702 PetscFunctionBegin; 2703 PetscCall(MatDenseGetArray(A, &aa)); 2704 for (j = 0; j < A->cmap->n; j++) { 2705 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscConj(aa[i + j * mat->lda]); 2706 } 2707 PetscCall(MatDenseRestoreArray(A, &aa)); 2708 if (mat->tau) 2709 for (i = 0; i < min; i++) mat->tau[i] = PetscConj(mat->tau[i]); 2710 PetscFunctionReturn(PETSC_SUCCESS); 2711 } 2712 2713 static PetscErrorCode MatRealPart_SeqDense(Mat A) 2714 { 2715 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2716 PetscInt i, j; 2717 PetscScalar *aa; 2718 2719 PetscFunctionBegin; 2720 PetscCall(MatDenseGetArray(A, &aa)); 2721 for (j = 0; j < A->cmap->n; j++) { 2722 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscRealPart(aa[i + j * mat->lda]); 2723 } 2724 PetscCall(MatDenseRestoreArray(A, &aa)); 2725 PetscFunctionReturn(PETSC_SUCCESS); 2726 } 2727 2728 static PetscErrorCode MatImaginaryPart_SeqDense(Mat A) 2729 { 2730 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2731 PetscInt i, j; 2732 PetscScalar *aa; 2733 2734 PetscFunctionBegin; 2735 PetscCall(MatDenseGetArray(A, &aa)); 2736 for (j = 0; j < A->cmap->n; j++) { 2737 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscImaginaryPart(aa[i + j * mat->lda]); 2738 } 2739 PetscCall(MatDenseRestoreArray(A, &aa)); 2740 PetscFunctionReturn(PETSC_SUCCESS); 2741 } 2742 2743 PetscErrorCode MatMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2744 { 2745 PetscInt m = A->rmap->n, n = B->cmap->n; 2746 PetscBool cisdense = PETSC_FALSE; 2747 2748 PetscFunctionBegin; 2749 PetscCall(MatSetSizes(C, m, n, m, n)); 2750 #if defined(PETSC_HAVE_CUDA) 2751 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2752 #endif 2753 #if defined(PETSC_HAVE_HIP) 2754 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2755 #endif 2756 if (!cisdense) { 2757 PetscBool flg; 2758 2759 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2760 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2761 } 2762 PetscCall(MatSetUp(C)); 2763 PetscFunctionReturn(PETSC_SUCCESS); 2764 } 2765 2766 PetscErrorCode MatMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2767 { 2768 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data, *c = (Mat_SeqDense *)C->data; 2769 PetscBLASInt m, n, k; 2770 const PetscScalar *av, *bv; 2771 PetscScalar *cv; 2772 PetscScalar _DOne = 1.0, _DZero = 0.0; 2773 2774 PetscFunctionBegin; 2775 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2776 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2777 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2778 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); 2779 PetscCall(MatDenseGetArrayRead(A, &av)); 2780 PetscCall(MatDenseGetArrayRead(B, &bv)); 2781 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2782 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2783 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2784 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2785 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2786 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2787 PetscFunctionReturn(PETSC_SUCCESS); 2788 } 2789 2790 PetscErrorCode MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2791 { 2792 PetscInt m = A->rmap->n, n = B->rmap->n; 2793 PetscBool cisdense = PETSC_FALSE; 2794 2795 PetscFunctionBegin; 2796 PetscCall(MatSetSizes(C, m, n, m, n)); 2797 #if defined(PETSC_HAVE_CUDA) 2798 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2799 #endif 2800 #if defined(PETSC_HAVE_HIP) 2801 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2802 #endif 2803 if (!cisdense) { 2804 PetscBool flg; 2805 2806 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2807 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2808 } 2809 PetscCall(MatSetUp(C)); 2810 PetscFunctionReturn(PETSC_SUCCESS); 2811 } 2812 2813 PetscErrorCode MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2814 { 2815 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2816 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2817 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2818 const PetscScalar *av, *bv; 2819 PetscScalar *cv; 2820 PetscBLASInt m, n, k; 2821 PetscScalar _DOne = 1.0, _DZero = 0.0; 2822 2823 PetscFunctionBegin; 2824 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2825 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2826 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2827 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); 2828 PetscCall(MatDenseGetArrayRead(A, &av)); 2829 PetscCall(MatDenseGetArrayRead(B, &bv)); 2830 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2831 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2832 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2833 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2834 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2835 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2836 PetscFunctionReturn(PETSC_SUCCESS); 2837 } 2838 2839 PetscErrorCode MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2840 { 2841 PetscInt m = A->cmap->n, n = B->cmap->n; 2842 PetscBool cisdense = PETSC_FALSE; 2843 2844 PetscFunctionBegin; 2845 PetscCall(MatSetSizes(C, m, n, m, n)); 2846 #if defined(PETSC_HAVE_CUDA) 2847 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2848 #endif 2849 #if defined(PETSC_HAVE_HIP) 2850 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2851 #endif 2852 if (!cisdense) { 2853 PetscBool flg; 2854 2855 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2856 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2857 } 2858 PetscCall(MatSetUp(C)); 2859 PetscFunctionReturn(PETSC_SUCCESS); 2860 } 2861 2862 PetscErrorCode MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2863 { 2864 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2865 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2866 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2867 const PetscScalar *av, *bv; 2868 PetscScalar *cv; 2869 PetscBLASInt m, n, k; 2870 PetscScalar _DOne = 1.0, _DZero = 0.0; 2871 2872 PetscFunctionBegin; 2873 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2874 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2875 PetscCall(PetscBLASIntCast(A->rmap->n, &k)); 2876 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); 2877 PetscCall(MatDenseGetArrayRead(A, &av)); 2878 PetscCall(MatDenseGetArrayRead(B, &bv)); 2879 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2880 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2881 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2882 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2883 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2884 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2885 PetscFunctionReturn(PETSC_SUCCESS); 2886 } 2887 2888 static PetscErrorCode MatProductSetFromOptions_SeqDense_AB(Mat C) 2889 { 2890 PetscFunctionBegin; 2891 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqDense; 2892 C->ops->productsymbolic = MatProductSymbolic_AB; 2893 PetscFunctionReturn(PETSC_SUCCESS); 2894 } 2895 2896 static PetscErrorCode MatProductSetFromOptions_SeqDense_AtB(Mat C) 2897 { 2898 PetscFunctionBegin; 2899 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqDense_SeqDense; 2900 C->ops->productsymbolic = MatProductSymbolic_AtB; 2901 PetscFunctionReturn(PETSC_SUCCESS); 2902 } 2903 2904 static PetscErrorCode MatProductSetFromOptions_SeqDense_ABt(Mat C) 2905 { 2906 PetscFunctionBegin; 2907 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqDense_SeqDense; 2908 C->ops->productsymbolic = MatProductSymbolic_ABt; 2909 PetscFunctionReturn(PETSC_SUCCESS); 2910 } 2911 2912 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense(Mat C) 2913 { 2914 Mat_Product *product = C->product; 2915 2916 PetscFunctionBegin; 2917 switch (product->type) { 2918 case MATPRODUCT_AB: 2919 PetscCall(MatProductSetFromOptions_SeqDense_AB(C)); 2920 break; 2921 case MATPRODUCT_AtB: 2922 PetscCall(MatProductSetFromOptions_SeqDense_AtB(C)); 2923 break; 2924 case MATPRODUCT_ABt: 2925 PetscCall(MatProductSetFromOptions_SeqDense_ABt(C)); 2926 break; 2927 default: 2928 break; 2929 } 2930 PetscFunctionReturn(PETSC_SUCCESS); 2931 } 2932 2933 static PetscErrorCode MatGetRowMax_SeqDense(Mat A, Vec v, PetscInt idx[]) 2934 { 2935 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2936 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2937 PetscScalar *x; 2938 const PetscScalar *aa; 2939 2940 PetscFunctionBegin; 2941 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2942 PetscCall(VecGetArray(v, &x)); 2943 PetscCall(VecGetLocalSize(v, &p)); 2944 PetscCall(MatDenseGetArrayRead(A, &aa)); 2945 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2946 for (i = 0; i < m; i++) { 2947 x[i] = aa[i]; 2948 if (idx) idx[i] = 0; 2949 for (j = 1; j < n; j++) { 2950 if (PetscRealPart(x[i]) < PetscRealPart(aa[i + a->lda * j])) { 2951 x[i] = aa[i + a->lda * j]; 2952 if (idx) idx[i] = j; 2953 } 2954 } 2955 } 2956 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2957 PetscCall(VecRestoreArray(v, &x)); 2958 PetscFunctionReturn(PETSC_SUCCESS); 2959 } 2960 2961 static PetscErrorCode MatGetRowMaxAbs_SeqDense(Mat A, Vec v, PetscInt idx[]) 2962 { 2963 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2964 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2965 PetscScalar *x; 2966 PetscReal atmp; 2967 const PetscScalar *aa; 2968 2969 PetscFunctionBegin; 2970 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2971 PetscCall(VecGetArray(v, &x)); 2972 PetscCall(VecGetLocalSize(v, &p)); 2973 PetscCall(MatDenseGetArrayRead(A, &aa)); 2974 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2975 for (i = 0; i < m; i++) { 2976 x[i] = PetscAbsScalar(aa[i]); 2977 for (j = 1; j < n; j++) { 2978 atmp = PetscAbsScalar(aa[i + a->lda * j]); 2979 if (PetscAbsScalar(x[i]) < atmp) { 2980 x[i] = atmp; 2981 if (idx) idx[i] = j; 2982 } 2983 } 2984 } 2985 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2986 PetscCall(VecRestoreArray(v, &x)); 2987 PetscFunctionReturn(PETSC_SUCCESS); 2988 } 2989 2990 static PetscErrorCode MatGetRowMin_SeqDense(Mat A, Vec v, PetscInt idx[]) 2991 { 2992 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2993 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2994 PetscScalar *x; 2995 const PetscScalar *aa; 2996 2997 PetscFunctionBegin; 2998 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2999 PetscCall(MatDenseGetArrayRead(A, &aa)); 3000 PetscCall(VecGetArray(v, &x)); 3001 PetscCall(VecGetLocalSize(v, &p)); 3002 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 3003 for (i = 0; i < m; i++) { 3004 x[i] = aa[i]; 3005 if (idx) idx[i] = 0; 3006 for (j = 1; j < n; j++) { 3007 if (PetscRealPart(x[i]) > PetscRealPart(aa[i + a->lda * j])) { 3008 x[i] = aa[i + a->lda * j]; 3009 if (idx) idx[i] = j; 3010 } 3011 } 3012 } 3013 PetscCall(VecRestoreArray(v, &x)); 3014 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 3015 PetscFunctionReturn(PETSC_SUCCESS); 3016 } 3017 3018 PetscErrorCode MatGetColumnVector_SeqDense(Mat A, Vec v, PetscInt col) 3019 { 3020 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3021 PetscScalar *x; 3022 const PetscScalar *aa; 3023 3024 PetscFunctionBegin; 3025 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3026 PetscCall(MatDenseGetArrayRead(A, &aa)); 3027 PetscCall(VecGetArray(v, &x)); 3028 PetscCall(PetscArraycpy(x, aa + col * a->lda, A->rmap->n)); 3029 PetscCall(VecRestoreArray(v, &x)); 3030 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 3031 PetscFunctionReturn(PETSC_SUCCESS); 3032 } 3033 3034 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat A, PetscInt type, PetscReal *reductions) 3035 { 3036 PetscInt i, j, m, n; 3037 const PetscScalar *a; 3038 3039 PetscFunctionBegin; 3040 PetscCall(MatGetSize(A, &m, &n)); 3041 PetscCall(PetscArrayzero(reductions, n)); 3042 PetscCall(MatDenseGetArrayRead(A, &a)); 3043 if (type == NORM_2) { 3044 for (i = 0; i < n; i++) { 3045 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j] * a[j]); 3046 a = PetscSafePointerPlusOffset(a, m); 3047 } 3048 } else if (type == NORM_1) { 3049 for (i = 0; i < n; i++) { 3050 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j]); 3051 a = PetscSafePointerPlusOffset(a, m); 3052 } 3053 } else if (type == NORM_INFINITY) { 3054 for (i = 0; i < n; i++) { 3055 for (j = 0; j < m; j++) reductions[i] = PetscMax(PetscAbsScalar(a[j]), reductions[i]); 3056 a = PetscSafePointerPlusOffset(a, m); 3057 } 3058 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 3059 for (i = 0; i < n; i++) { 3060 for (j = 0; j < m; j++) reductions[i] += PetscRealPart(a[j]); 3061 a = PetscSafePointerPlusOffset(a, m); 3062 } 3063 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 3064 for (i = 0; i < n; i++) { 3065 for (j = 0; j < m; j++) reductions[i] += PetscImaginaryPart(a[j]); 3066 a = PetscSafePointerPlusOffset(a, m); 3067 } 3068 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 3069 PetscCall(MatDenseRestoreArrayRead(A, &a)); 3070 if (type == NORM_2) { 3071 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 3072 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 3073 for (i = 0; i < n; i++) reductions[i] /= m; 3074 } 3075 PetscFunctionReturn(PETSC_SUCCESS); 3076 } 3077 3078 PetscErrorCode MatSetRandom_SeqDense(Mat x, PetscRandom rctx) 3079 { 3080 PetscScalar *a; 3081 PetscInt lda, m, n, i, j; 3082 3083 PetscFunctionBegin; 3084 PetscCall(MatGetSize(x, &m, &n)); 3085 PetscCall(MatDenseGetLDA(x, &lda)); 3086 PetscCall(MatDenseGetArrayWrite(x, &a)); 3087 for (j = 0; j < n; j++) { 3088 for (i = 0; i < m; i++) PetscCall(PetscRandomGetValue(rctx, a + j * lda + i)); 3089 } 3090 PetscCall(MatDenseRestoreArrayWrite(x, &a)); 3091 PetscFunctionReturn(PETSC_SUCCESS); 3092 } 3093 3094 static PetscErrorCode MatMissingDiagonal_SeqDense(Mat A, PetscBool *missing, PetscInt *d) 3095 { 3096 PetscFunctionBegin; 3097 *missing = PETSC_FALSE; 3098 PetscFunctionReturn(PETSC_SUCCESS); 3099 } 3100 3101 /* vals is not const */ 3102 static PetscErrorCode MatDenseGetColumn_SeqDense(Mat A, PetscInt col, PetscScalar **vals) 3103 { 3104 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3105 PetscScalar *v; 3106 3107 PetscFunctionBegin; 3108 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3109 PetscCall(MatDenseGetArray(A, &v)); 3110 *vals = v + col * a->lda; 3111 PetscCall(MatDenseRestoreArray(A, &v)); 3112 PetscFunctionReturn(PETSC_SUCCESS); 3113 } 3114 3115 static PetscErrorCode MatDenseRestoreColumn_SeqDense(Mat A, PetscScalar **vals) 3116 { 3117 PetscFunctionBegin; 3118 if (vals) *vals = NULL; /* user cannot accidentally use the array later */ 3119 PetscFunctionReturn(PETSC_SUCCESS); 3120 } 3121 3122 static struct _MatOps MatOps_Values = {MatSetValues_SeqDense, 3123 MatGetRow_SeqDense, 3124 MatRestoreRow_SeqDense, 3125 MatMult_SeqDense, 3126 /* 4*/ MatMultAdd_SeqDense, 3127 MatMultTranspose_SeqDense, 3128 MatMultTransposeAdd_SeqDense, 3129 NULL, 3130 NULL, 3131 NULL, 3132 /* 10*/ NULL, 3133 MatLUFactor_SeqDense, 3134 MatCholeskyFactor_SeqDense, 3135 MatSOR_SeqDense, 3136 MatTranspose_SeqDense, 3137 /* 15*/ MatGetInfo_SeqDense, 3138 MatEqual_SeqDense, 3139 MatGetDiagonal_SeqDense, 3140 MatDiagonalScale_SeqDense, 3141 MatNorm_SeqDense, 3142 /* 20*/ MatAssemblyBegin_SeqDense, 3143 MatAssemblyEnd_SeqDense, 3144 MatSetOption_SeqDense, 3145 MatZeroEntries_SeqDense, 3146 /* 24*/ MatZeroRows_SeqDense, 3147 NULL, 3148 NULL, 3149 NULL, 3150 NULL, 3151 /* 29*/ MatSetUp_SeqDense, 3152 NULL, 3153 NULL, 3154 NULL, 3155 NULL, 3156 /* 34*/ MatDuplicate_SeqDense, 3157 NULL, 3158 NULL, 3159 NULL, 3160 NULL, 3161 /* 39*/ MatAXPY_SeqDense, 3162 MatCreateSubMatrices_SeqDense, 3163 NULL, 3164 MatGetValues_SeqDense, 3165 MatCopy_SeqDense, 3166 /* 44*/ MatGetRowMax_SeqDense, 3167 MatScale_SeqDense, 3168 MatShift_SeqDense, 3169 NULL, 3170 MatZeroRowsColumns_SeqDense, 3171 /* 49*/ MatSetRandom_SeqDense, 3172 NULL, 3173 NULL, 3174 NULL, 3175 NULL, 3176 /* 54*/ NULL, 3177 NULL, 3178 NULL, 3179 NULL, 3180 NULL, 3181 /* 59*/ MatCreateSubMatrix_SeqDense, 3182 MatDestroy_SeqDense, 3183 MatView_SeqDense, 3184 NULL, 3185 NULL, 3186 /* 64*/ NULL, 3187 NULL, 3188 NULL, 3189 NULL, 3190 NULL, 3191 /* 69*/ MatGetRowMaxAbs_SeqDense, 3192 NULL, 3193 NULL, 3194 NULL, 3195 NULL, 3196 /* 74*/ NULL, 3197 NULL, 3198 NULL, 3199 NULL, 3200 NULL, 3201 /* 79*/ NULL, 3202 NULL, 3203 NULL, 3204 NULL, 3205 /* 83*/ MatLoad_SeqDense, 3206 MatIsSymmetric_SeqDense, 3207 MatIsHermitian_SeqDense, 3208 NULL, 3209 NULL, 3210 NULL, 3211 /* 89*/ NULL, 3212 NULL, 3213 MatMatMultNumeric_SeqDense_SeqDense, 3214 NULL, 3215 NULL, 3216 /* 94*/ NULL, 3217 NULL, 3218 NULL, 3219 MatMatTransposeMultNumeric_SeqDense_SeqDense, 3220 NULL, 3221 /* 99*/ MatProductSetFromOptions_SeqDense, 3222 NULL, 3223 NULL, 3224 MatConjugate_SeqDense, 3225 NULL, 3226 /*104*/ NULL, 3227 MatRealPart_SeqDense, 3228 MatImaginaryPart_SeqDense, 3229 NULL, 3230 NULL, 3231 /*109*/ NULL, 3232 NULL, 3233 MatGetRowMin_SeqDense, 3234 MatGetColumnVector_SeqDense, 3235 MatMissingDiagonal_SeqDense, 3236 /*114*/ NULL, 3237 NULL, 3238 NULL, 3239 NULL, 3240 NULL, 3241 /*119*/ NULL, 3242 NULL, 3243 MatMultHermitianTranspose_SeqDense, 3244 MatMultHermitianTransposeAdd_SeqDense, 3245 NULL, 3246 /*124*/ NULL, 3247 MatGetColumnReductions_SeqDense, 3248 NULL, 3249 NULL, 3250 NULL, 3251 /*129*/ NULL, 3252 NULL, 3253 NULL, 3254 MatTransposeMatMultNumeric_SeqDense_SeqDense, 3255 NULL, 3256 /*134*/ NULL, 3257 NULL, 3258 NULL, 3259 NULL, 3260 NULL, 3261 /*139*/ NULL, 3262 NULL, 3263 NULL, 3264 NULL, 3265 NULL, 3266 MatCreateMPIMatConcatenateSeqMat_SeqDense, 3267 /*145*/ NULL, 3268 NULL, 3269 NULL, 3270 NULL, 3271 NULL, 3272 /*150*/ NULL, 3273 NULL, 3274 NULL, 3275 NULL, 3276 NULL, 3277 NULL}; 3278 3279 /*@ 3280 MatCreateSeqDense - Creates a `MATSEQDENSE` that 3281 is stored in column major order (the usual Fortran format). 3282 3283 Collective 3284 3285 Input Parameters: 3286 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3287 . m - number of rows 3288 . n - number of columns 3289 - data - optional location of matrix data in column major order. Use `NULL` for PETSc 3290 to control all matrix memory allocation. 3291 3292 Output Parameter: 3293 . A - the matrix 3294 3295 Level: intermediate 3296 3297 Note: 3298 The data input variable is intended primarily for Fortran programmers 3299 who wish to allocate their own matrix memory space. Most users should 3300 set `data` = `NULL`. 3301 3302 Developer Note: 3303 Many of the matrix operations for this variant use the BLAS and LAPACK routines. 3304 3305 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 3306 @*/ 3307 PetscErrorCode MatCreateSeqDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscScalar data[], Mat *A) 3308 { 3309 PetscFunctionBegin; 3310 PetscCall(MatCreate(comm, A)); 3311 PetscCall(MatSetSizes(*A, m, n, m, n)); 3312 PetscCall(MatSetType(*A, MATSEQDENSE)); 3313 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 3314 PetscFunctionReturn(PETSC_SUCCESS); 3315 } 3316 3317 /*@ 3318 MatSeqDenseSetPreallocation - Sets the array used for storing the matrix elements of a `MATSEQDENSE` matrix 3319 3320 Collective 3321 3322 Input Parameters: 3323 + B - the matrix 3324 - data - the array (or `NULL`) 3325 3326 Level: intermediate 3327 3328 Note: 3329 The data input variable is intended primarily for Fortran programmers 3330 who wish to allocate their own matrix memory space. Most users should 3331 need not call this routine. 3332 3333 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`, `MatDenseSetLDA()` 3334 @*/ 3335 PetscErrorCode MatSeqDenseSetPreallocation(Mat B, PetscScalar data[]) 3336 { 3337 PetscFunctionBegin; 3338 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3339 PetscTryMethod(B, "MatSeqDenseSetPreallocation_C", (Mat, PetscScalar[]), (B, data)); 3340 PetscFunctionReturn(PETSC_SUCCESS); 3341 } 3342 3343 PetscErrorCode MatSeqDenseSetPreallocation_SeqDense(Mat B, PetscScalar *data) 3344 { 3345 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3346 3347 PetscFunctionBegin; 3348 PetscCheck(!b->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3349 B->preallocated = PETSC_TRUE; 3350 3351 PetscCall(PetscLayoutSetUp(B->rmap)); 3352 PetscCall(PetscLayoutSetUp(B->cmap)); 3353 3354 if (b->lda <= 0) PetscCall(PetscBLASIntCast(B->rmap->n, &b->lda)); 3355 3356 if (!data) { /* petsc-allocated storage */ 3357 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 3358 PetscCall(PetscCalloc1((size_t)b->lda * B->cmap->n, &b->v)); 3359 3360 b->user_alloc = PETSC_FALSE; 3361 } else { /* user-allocated storage */ 3362 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 3363 b->v = data; 3364 b->user_alloc = PETSC_TRUE; 3365 } 3366 B->assembled = PETSC_TRUE; 3367 PetscFunctionReturn(PETSC_SUCCESS); 3368 } 3369 3370 #if defined(PETSC_HAVE_ELEMENTAL) 3371 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 3372 { 3373 Mat mat_elemental; 3374 const PetscScalar *array; 3375 PetscScalar *v_colwise; 3376 PetscInt M = A->rmap->N, N = A->cmap->N, i, j, k, *rows, *cols; 3377 3378 PetscFunctionBegin; 3379 PetscCall(PetscMalloc3(M * N, &v_colwise, M, &rows, N, &cols)); 3380 PetscCall(MatDenseGetArrayRead(A, &array)); 3381 /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */ 3382 k = 0; 3383 for (j = 0; j < N; j++) { 3384 cols[j] = j; 3385 for (i = 0; i < M; i++) v_colwise[j * M + i] = array[k++]; 3386 } 3387 for (i = 0; i < M; i++) rows[i] = i; 3388 PetscCall(MatDenseRestoreArrayRead(A, &array)); 3389 3390 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 3391 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N)); 3392 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 3393 PetscCall(MatSetUp(mat_elemental)); 3394 3395 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 3396 PetscCall(MatSetValues(mat_elemental, M, rows, N, cols, v_colwise, ADD_VALUES)); 3397 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 3398 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 3399 PetscCall(PetscFree3(v_colwise, rows, cols)); 3400 3401 if (reuse == MAT_INPLACE_MATRIX) { 3402 PetscCall(MatHeaderReplace(A, &mat_elemental)); 3403 } else { 3404 *newmat = mat_elemental; 3405 } 3406 PetscFunctionReturn(PETSC_SUCCESS); 3407 } 3408 #endif 3409 3410 PetscErrorCode MatDenseSetLDA_SeqDense(Mat B, PetscInt lda) 3411 { 3412 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3413 PetscBool data; 3414 3415 PetscFunctionBegin; 3416 data = (PetscBool)((B->rmap->n > 0 && B->cmap->n > 0) ? (b->v ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE); 3417 PetscCheck(b->user_alloc || !data || b->lda == lda, PETSC_COMM_SELF, PETSC_ERR_ORDER, "LDA cannot be changed after allocation of internal storage"); 3418 PetscCheck(lda >= B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "LDA %" PetscInt_FMT " must be at least matrix dimension %" PetscInt_FMT, lda, B->rmap->n); 3419 PetscCall(PetscBLASIntCast(lda, &b->lda)); 3420 PetscFunctionReturn(PETSC_SUCCESS); 3421 } 3422 3423 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3424 { 3425 PetscFunctionBegin; 3426 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIDense(comm, inmat, n, scall, outmat)); 3427 PetscFunctionReturn(PETSC_SUCCESS); 3428 } 3429 3430 PetscErrorCode MatDenseCreateColumnVec_Private(Mat A, Vec *v) 3431 { 3432 PetscBool isstd, iskok, iscuda, iship; 3433 PetscMPIInt size; 3434 #if PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP) 3435 /* we pass the data of A, to prevent allocating needless GPU memory the first time VecCUPMPlaceArray is called. */ 3436 const PetscScalar *a; 3437 #endif 3438 3439 PetscFunctionBegin; 3440 *v = NULL; 3441 PetscCall(PetscStrcmpAny(A->defaultvectype, &isstd, VECSTANDARD, VECSEQ, VECMPI, "")); 3442 PetscCall(PetscStrcmpAny(A->defaultvectype, &iskok, VECKOKKOS, VECSEQKOKKOS, VECMPIKOKKOS, "")); 3443 PetscCall(PetscStrcmpAny(A->defaultvectype, &iscuda, VECCUDA, VECSEQCUDA, VECMPICUDA, "")); 3444 PetscCall(PetscStrcmpAny(A->defaultvectype, &iship, VECHIP, VECSEQHIP, VECMPIHIP, "")); 3445 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3446 if (isstd) { 3447 if (size > 1) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, v)); 3448 else PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, v)); 3449 } else if (iskok) { 3450 PetscCheck(PetscDefined(HAVE_KOKKOS_KERNELS), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using KOKKOS kernels support"); 3451 #if PetscDefined(HAVE_KOKKOS_KERNELS) 3452 if (size > 1) PetscCall(VecCreateMPIKokkosWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, v)); 3453 else PetscCall(VecCreateSeqKokkosWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, v)); 3454 #endif 3455 } else if (iscuda) { 3456 PetscCheck(PetscDefined(HAVE_CUDA), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using CUDA support"); 3457 #if PetscDefined(HAVE_CUDA) 3458 PetscCall(MatDenseCUDAGetArrayRead(A, &a)); 3459 if (size > 1) PetscCall(VecCreateMPICUDAWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, a, v)); 3460 else PetscCall(VecCreateSeqCUDAWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, a, v)); 3461 #endif 3462 } else if (iship) { 3463 PetscCheck(PetscDefined(HAVE_HIP), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using HIP support"); 3464 #if PetscDefined(HAVE_HIP) 3465 PetscCall(MatDenseHIPGetArrayRead(A, &a)); 3466 if (size > 1) PetscCall(VecCreateMPIHIPWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, a, v)); 3467 else PetscCall(VecCreateSeqHIPWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, a, v)); 3468 #endif 3469 } 3470 PetscCheck(*v, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not coded for type %s", A->defaultvectype); 3471 PetscFunctionReturn(PETSC_SUCCESS); 3472 } 3473 3474 PetscErrorCode MatDenseGetColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) 3475 { 3476 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3477 3478 PetscFunctionBegin; 3479 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3480 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3481 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 3482 a->vecinuse = col + 1; 3483 PetscCall(MatDenseGetArray(A, (PetscScalar **)&a->ptrinuse)); 3484 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3485 *v = a->cvec; 3486 PetscFunctionReturn(PETSC_SUCCESS); 3487 } 3488 3489 PetscErrorCode MatDenseRestoreColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) 3490 { 3491 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3492 3493 PetscFunctionBegin; 3494 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3495 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3496 VecCheckAssembled(a->cvec); 3497 a->vecinuse = 0; 3498 PetscCall(MatDenseRestoreArray(A, (PetscScalar **)&a->ptrinuse)); 3499 PetscCall(VecResetArray(a->cvec)); 3500 if (v) *v = NULL; 3501 PetscFunctionReturn(PETSC_SUCCESS); 3502 } 3503 3504 PetscErrorCode MatDenseGetColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) 3505 { 3506 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3507 3508 PetscFunctionBegin; 3509 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3510 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3511 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 3512 a->vecinuse = col + 1; 3513 PetscCall(MatDenseGetArrayRead(A, &a->ptrinuse)); 3514 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)a->lda))); 3515 PetscCall(VecLockReadPush(a->cvec)); 3516 *v = a->cvec; 3517 PetscFunctionReturn(PETSC_SUCCESS); 3518 } 3519 3520 PetscErrorCode MatDenseRestoreColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) 3521 { 3522 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3523 3524 PetscFunctionBegin; 3525 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3526 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3527 VecCheckAssembled(a->cvec); 3528 a->vecinuse = 0; 3529 PetscCall(MatDenseRestoreArrayRead(A, &a->ptrinuse)); 3530 PetscCall(VecLockReadPop(a->cvec)); 3531 PetscCall(VecResetArray(a->cvec)); 3532 if (v) *v = NULL; 3533 PetscFunctionReturn(PETSC_SUCCESS); 3534 } 3535 3536 PetscErrorCode MatDenseGetColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) 3537 { 3538 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3539 3540 PetscFunctionBegin; 3541 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3542 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3543 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 3544 a->vecinuse = col + 1; 3545 PetscCall(MatDenseGetArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3546 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)a->lda))); 3547 *v = a->cvec; 3548 PetscFunctionReturn(PETSC_SUCCESS); 3549 } 3550 3551 PetscErrorCode MatDenseRestoreColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) 3552 { 3553 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3554 3555 PetscFunctionBegin; 3556 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3557 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3558 VecCheckAssembled(a->cvec); 3559 a->vecinuse = 0; 3560 PetscCall(MatDenseRestoreArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3561 PetscCall(VecResetArray(a->cvec)); 3562 if (v) *v = NULL; 3563 PetscFunctionReturn(PETSC_SUCCESS); 3564 } 3565 3566 PetscErrorCode MatDenseGetSubMatrix_SeqDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 3567 { 3568 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3569 3570 PetscFunctionBegin; 3571 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3572 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3573 if (a->cmat && (cend - cbegin != a->cmat->cmap->N || rend - rbegin != a->cmat->rmap->N)) PetscCall(MatDestroy(&a->cmat)); 3574 if (!a->cmat) { 3575 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), rend - rbegin, PETSC_DECIDE, rend - rbegin, cend - cbegin, PetscSafePointerPlusOffset(a->v, rbegin + (size_t)cbegin * a->lda), &a->cmat)); 3576 } else { 3577 PetscCall(MatDensePlaceArray(a->cmat, PetscSafePointerPlusOffset(a->v, rbegin + (size_t)cbegin * a->lda))); 3578 } 3579 PetscCall(MatDenseSetLDA(a->cmat, a->lda)); 3580 a->matinuse = cbegin + 1; 3581 *v = a->cmat; 3582 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 3583 A->offloadmask = PETSC_OFFLOAD_CPU; 3584 #endif 3585 PetscFunctionReturn(PETSC_SUCCESS); 3586 } 3587 3588 PetscErrorCode MatDenseRestoreSubMatrix_SeqDense(Mat A, Mat *v) 3589 { 3590 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3591 3592 PetscFunctionBegin; 3593 PetscCheck(a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 3594 PetscCheck(a->cmat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column matrix"); 3595 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 3596 a->matinuse = 0; 3597 PetscCall(MatDenseResetArray(a->cmat)); 3598 if (v) *v = NULL; 3599 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 3600 A->offloadmask = PETSC_OFFLOAD_CPU; 3601 #endif 3602 PetscFunctionReturn(PETSC_SUCCESS); 3603 } 3604 3605 /*MC 3606 MATSEQDENSE - MATSEQDENSE = "seqdense" - A matrix type to be used for sequential dense matrices. 3607 3608 Options Database Key: 3609 . -mat_type seqdense - sets the matrix type to `MATSEQDENSE` during a call to `MatSetFromOptions()` 3610 3611 Level: beginner 3612 3613 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreateSeqDense()` 3614 M*/ 3615 PetscErrorCode MatCreate_SeqDense(Mat B) 3616 { 3617 Mat_SeqDense *b; 3618 PetscMPIInt size; 3619 3620 PetscFunctionBegin; 3621 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3622 PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3623 3624 PetscCall(PetscNew(&b)); 3625 B->data = (void *)b; 3626 B->ops[0] = MatOps_Values; 3627 3628 b->roworiented = PETSC_TRUE; 3629 3630 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatQRFactor_C", MatQRFactor_SeqDense)); 3631 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetLDA_C", MatDenseGetLDA_SeqDense)); 3632 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseSetLDA_C", MatDenseSetLDA_SeqDense)); 3633 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArray_C", MatDenseGetArray_SeqDense)); 3634 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArray_C", MatDenseRestoreArray_SeqDense)); 3635 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDensePlaceArray_C", MatDensePlaceArray_SeqDense)); 3636 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseResetArray_C", MatDenseResetArray_SeqDense)); 3637 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseReplaceArray_C", MatDenseReplaceArray_SeqDense)); 3638 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayRead_C", MatDenseGetArray_SeqDense)); 3639 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayRead_C", MatDenseRestoreArray_SeqDense)); 3640 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayWrite_C", MatDenseGetArray_SeqDense)); 3641 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArray_SeqDense)); 3642 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqaij_C", MatConvert_SeqDense_SeqAIJ)); 3643 #if defined(PETSC_HAVE_ELEMENTAL) 3644 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_elemental_C", MatConvert_SeqDense_Elemental)); 3645 #endif 3646 #if defined(PETSC_HAVE_SCALAPACK) 3647 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 3648 #endif 3649 #if defined(PETSC_HAVE_CUDA) 3650 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensecuda_C", MatConvert_SeqDense_SeqDenseCUDA)); 3651 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3652 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdense_C", MatProductSetFromOptions_SeqDense)); 3653 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3654 #endif 3655 #if defined(PETSC_HAVE_HIP) 3656 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensehip_C", MatConvert_SeqDense_SeqDenseHIP)); 3657 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", MatProductSetFromOptions_SeqDense)); 3658 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdense_C", MatProductSetFromOptions_SeqDense)); 3659 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensehip_C", MatProductSetFromOptions_SeqDense)); 3660 #endif 3661 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqDenseSetPreallocation_C", MatSeqDenseSetPreallocation_SeqDense)); 3662 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 3663 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdense_C", MatProductSetFromOptions_SeqDense)); 3664 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3665 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqsbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3666 3667 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumn_C", MatDenseGetColumn_SeqDense)); 3668 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_SeqDense)); 3669 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_SeqDense)); 3670 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_SeqDense)); 3671 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_SeqDense)); 3672 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_SeqDense)); 3673 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_SeqDense)); 3674 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_SeqDense)); 3675 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_SeqDense)); 3676 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_SeqDense)); 3677 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultAddColumnRange_C", MatMultAddColumnRange_SeqDense)); 3678 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_SeqDense)); 3679 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_SeqDense)); 3680 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQDENSE)); 3681 PetscFunctionReturn(PETSC_SUCCESS); 3682 } 3683 3684 /*@C 3685 MatDenseGetColumn - gives access to a column of a dense matrix. This is only the local part of the column. You MUST call `MatDenseRestoreColumn()` to avoid memory bleeding. 3686 3687 Not Collective 3688 3689 Input Parameters: 3690 + A - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3691 - col - column index 3692 3693 Output Parameter: 3694 . vals - pointer to the data 3695 3696 Level: intermediate 3697 3698 Note: 3699 Use `MatDenseGetColumnVec()` to get access to a column of a `MATDENSE` treated as a `Vec` 3700 3701 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreColumn()`, `MatDenseGetColumnVec()` 3702 @*/ 3703 PetscErrorCode MatDenseGetColumn(Mat A, PetscInt col, PetscScalar *vals[]) 3704 { 3705 PetscFunctionBegin; 3706 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3707 PetscValidLogicalCollectiveInt(A, col, 2); 3708 PetscAssertPointer(vals, 3); 3709 PetscUseMethod(A, "MatDenseGetColumn_C", (Mat, PetscInt, PetscScalar **), (A, col, vals)); 3710 PetscFunctionReturn(PETSC_SUCCESS); 3711 } 3712 3713 /*@C 3714 MatDenseRestoreColumn - returns access to a column of a `MATDENSE` matrix which is returned by `MatDenseGetColumn()`. 3715 3716 Not Collective 3717 3718 Input Parameters: 3719 + A - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3720 - vals - pointer to the data (may be `NULL`) 3721 3722 Level: intermediate 3723 3724 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetColumn()` 3725 @*/ 3726 PetscErrorCode MatDenseRestoreColumn(Mat A, PetscScalar *vals[]) 3727 { 3728 PetscFunctionBegin; 3729 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3730 PetscAssertPointer(vals, 2); 3731 PetscUseMethod(A, "MatDenseRestoreColumn_C", (Mat, PetscScalar **), (A, vals)); 3732 PetscFunctionReturn(PETSC_SUCCESS); 3733 } 3734 3735 /*@ 3736 MatDenseGetColumnVec - Gives read-write access to a column of a `MATDENSE` matrix, represented as a `Vec`. 3737 3738 Collective 3739 3740 Input Parameters: 3741 + A - the `Mat` object 3742 - col - the column index 3743 3744 Output Parameter: 3745 . v - the vector 3746 3747 Level: intermediate 3748 3749 Notes: 3750 The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVec()` when the vector is no longer needed. 3751 3752 Use `MatDenseGetColumnVecRead()` to obtain read-only access or `MatDenseGetColumnVecWrite()` for write-only access. 3753 3754 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`, `MatDenseGetColumn()` 3755 @*/ 3756 PetscErrorCode MatDenseGetColumnVec(Mat A, PetscInt col, Vec *v) 3757 { 3758 PetscFunctionBegin; 3759 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3760 PetscValidType(A, 1); 3761 PetscValidLogicalCollectiveInt(A, col, 2); 3762 PetscAssertPointer(v, 3); 3763 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3764 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3765 PetscUseMethod(A, "MatDenseGetColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3766 PetscFunctionReturn(PETSC_SUCCESS); 3767 } 3768 3769 /*@ 3770 MatDenseRestoreColumnVec - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVec()`. 3771 3772 Collective 3773 3774 Input Parameters: 3775 + A - the `Mat` object 3776 . col - the column index 3777 - v - the `Vec` object (may be `NULL`) 3778 3779 Level: intermediate 3780 3781 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3782 @*/ 3783 PetscErrorCode MatDenseRestoreColumnVec(Mat A, PetscInt col, Vec *v) 3784 { 3785 PetscFunctionBegin; 3786 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3787 PetscValidType(A, 1); 3788 PetscValidLogicalCollectiveInt(A, col, 2); 3789 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3790 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3791 PetscUseMethod(A, "MatDenseRestoreColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3792 PetscFunctionReturn(PETSC_SUCCESS); 3793 } 3794 3795 /*@ 3796 MatDenseGetColumnVecRead - Gives read-only access to a column of a dense matrix, represented as a `Vec`. 3797 3798 Collective 3799 3800 Input Parameters: 3801 + A - the `Mat` object 3802 - col - the column index 3803 3804 Output Parameter: 3805 . v - the vector 3806 3807 Level: intermediate 3808 3809 Notes: 3810 The vector is owned by PETSc and users cannot modify it. 3811 3812 Users need to call `MatDenseRestoreColumnVecRead()` when the vector is no longer needed. 3813 3814 Use `MatDenseGetColumnVec()` to obtain read-write access or `MatDenseGetColumnVecWrite()` for write-only access. 3815 3816 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3817 @*/ 3818 PetscErrorCode MatDenseGetColumnVecRead(Mat A, PetscInt col, Vec *v) 3819 { 3820 PetscFunctionBegin; 3821 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3822 PetscValidType(A, 1); 3823 PetscValidLogicalCollectiveInt(A, col, 2); 3824 PetscAssertPointer(v, 3); 3825 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3826 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3827 PetscUseMethod(A, "MatDenseGetColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3828 PetscFunctionReturn(PETSC_SUCCESS); 3829 } 3830 3831 /*@ 3832 MatDenseRestoreColumnVecRead - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVecRead()`. 3833 3834 Collective 3835 3836 Input Parameters: 3837 + A - the `Mat` object 3838 . col - the column index 3839 - v - the `Vec` object (may be `NULL`) 3840 3841 Level: intermediate 3842 3843 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecWrite()` 3844 @*/ 3845 PetscErrorCode MatDenseRestoreColumnVecRead(Mat A, PetscInt col, Vec *v) 3846 { 3847 PetscFunctionBegin; 3848 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3849 PetscValidType(A, 1); 3850 PetscValidLogicalCollectiveInt(A, col, 2); 3851 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3852 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3853 PetscUseMethod(A, "MatDenseRestoreColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3854 PetscFunctionReturn(PETSC_SUCCESS); 3855 } 3856 3857 /*@ 3858 MatDenseGetColumnVecWrite - Gives write-only access to a column of a dense matrix, represented as a `Vec`. 3859 3860 Collective 3861 3862 Input Parameters: 3863 + A - the `Mat` object 3864 - col - the column index 3865 3866 Output Parameter: 3867 . v - the vector 3868 3869 Level: intermediate 3870 3871 Notes: 3872 The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVecWrite()` when the vector is no longer needed. 3873 3874 Use `MatDenseGetColumnVec()` to obtain read-write access or `MatDenseGetColumnVecRead()` for read-only access. 3875 3876 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3877 @*/ 3878 PetscErrorCode MatDenseGetColumnVecWrite(Mat A, PetscInt col, Vec *v) 3879 { 3880 PetscFunctionBegin; 3881 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3882 PetscValidType(A, 1); 3883 PetscValidLogicalCollectiveInt(A, col, 2); 3884 PetscAssertPointer(v, 3); 3885 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3886 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3887 PetscUseMethod(A, "MatDenseGetColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3888 PetscFunctionReturn(PETSC_SUCCESS); 3889 } 3890 3891 /*@ 3892 MatDenseRestoreColumnVecWrite - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVecWrite()`. 3893 3894 Collective 3895 3896 Input Parameters: 3897 + A - the `Mat` object 3898 . col - the column index 3899 - v - the `Vec` object (may be `NULL`) 3900 3901 Level: intermediate 3902 3903 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()` 3904 @*/ 3905 PetscErrorCode MatDenseRestoreColumnVecWrite(Mat A, PetscInt col, Vec *v) 3906 { 3907 PetscFunctionBegin; 3908 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3909 PetscValidType(A, 1); 3910 PetscValidLogicalCollectiveInt(A, col, 2); 3911 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3912 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3913 PetscUseMethod(A, "MatDenseRestoreColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3914 PetscFunctionReturn(PETSC_SUCCESS); 3915 } 3916 3917 /*@ 3918 MatDenseGetSubMatrix - Gives access to a block of rows and columns of a dense matrix, represented as a `Mat`. 3919 3920 Collective 3921 3922 Input Parameters: 3923 + A - the `Mat` object 3924 . rbegin - the first global row index in the block (if `PETSC_DECIDE`, is 0) 3925 . rend - the global row index past the last one in the block (if `PETSC_DECIDE`, is `M`) 3926 . cbegin - the first global column index in the block (if `PETSC_DECIDE`, is 0) 3927 - cend - the global column index past the last one in the block (if `PETSC_DECIDE`, is `N`) 3928 3929 Output Parameter: 3930 . v - the matrix 3931 3932 Level: intermediate 3933 3934 Notes: 3935 The matrix is owned by PETSc. Users need to call `MatDenseRestoreSubMatrix()` when the matrix is no longer needed. 3936 3937 The output matrix is not redistributed by PETSc, so depending on the values of `rbegin` and `rend`, some processes may have no local rows. 3938 3939 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreSubMatrix()` 3940 @*/ 3941 PetscErrorCode MatDenseGetSubMatrix(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 3942 { 3943 PetscFunctionBegin; 3944 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3945 PetscValidType(A, 1); 3946 PetscValidLogicalCollectiveInt(A, rbegin, 2); 3947 PetscValidLogicalCollectiveInt(A, rend, 3); 3948 PetscValidLogicalCollectiveInt(A, cbegin, 4); 3949 PetscValidLogicalCollectiveInt(A, cend, 5); 3950 PetscAssertPointer(v, 6); 3951 if (rbegin == PETSC_DECIDE) rbegin = 0; 3952 if (rend == PETSC_DECIDE) rend = A->rmap->N; 3953 if (cbegin == PETSC_DECIDE) cbegin = 0; 3954 if (cend == PETSC_DECIDE) cend = A->cmap->N; 3955 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3956 PetscCheck(rbegin >= 0 && rbegin <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", rbegin, A->rmap->N); 3957 PetscCheck(rend >= rbegin && rend <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", rend, rbegin, A->rmap->N); 3958 PetscCheck(cbegin >= 0 && cbegin <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", cbegin, A->cmap->N); 3959 PetscCheck(cend >= cbegin && cend <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", cend, cbegin, A->cmap->N); 3960 PetscUseMethod(A, "MatDenseGetSubMatrix_C", (Mat, PetscInt, PetscInt, PetscInt, PetscInt, Mat *), (A, rbegin, rend, cbegin, cend, v)); 3961 PetscFunctionReturn(PETSC_SUCCESS); 3962 } 3963 3964 /*@ 3965 MatDenseRestoreSubMatrix - Returns access to a block of columns of a dense matrix obtained from `MatDenseGetSubMatrix()`. 3966 3967 Collective 3968 3969 Input Parameters: 3970 + A - the `Mat` object 3971 - v - the `Mat` object (may be `NULL`) 3972 3973 Level: intermediate 3974 3975 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseGetSubMatrix()` 3976 @*/ 3977 PetscErrorCode MatDenseRestoreSubMatrix(Mat A, Mat *v) 3978 { 3979 PetscFunctionBegin; 3980 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3981 PetscValidType(A, 1); 3982 PetscAssertPointer(v, 2); 3983 PetscUseMethod(A, "MatDenseRestoreSubMatrix_C", (Mat, Mat *), (A, v)); 3984 PetscFunctionReturn(PETSC_SUCCESS); 3985 } 3986 3987 #include <petscblaslapack.h> 3988 #include <petsc/private/kernels/blockinvert.h> 3989 3990 PetscErrorCode MatSeqDenseInvert(Mat A) 3991 { 3992 PetscInt m; 3993 const PetscReal shift = 0.0; 3994 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 3995 PetscScalar *values; 3996 3997 PetscFunctionBegin; 3998 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3999 PetscCall(MatDenseGetArray(A, &values)); 4000 PetscCall(MatGetLocalSize(A, &m, NULL)); 4001 allowzeropivot = PetscNot(A->erroriffailure); 4002 /* factor and invert each block */ 4003 switch (m) { 4004 case 1: 4005 values[0] = (PetscScalar)1.0 / (values[0] + shift); 4006 break; 4007 case 2: 4008 PetscCall(PetscKernel_A_gets_inverse_A_2(values, shift, allowzeropivot, &zeropivotdetected)); 4009 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4010 break; 4011 case 3: 4012 PetscCall(PetscKernel_A_gets_inverse_A_3(values, shift, allowzeropivot, &zeropivotdetected)); 4013 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4014 break; 4015 case 4: 4016 PetscCall(PetscKernel_A_gets_inverse_A_4(values, shift, allowzeropivot, &zeropivotdetected)); 4017 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4018 break; 4019 case 5: { 4020 PetscScalar work[25]; 4021 PetscInt ipvt[5]; 4022 4023 PetscCall(PetscKernel_A_gets_inverse_A_5(values, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 4024 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4025 } break; 4026 case 6: 4027 PetscCall(PetscKernel_A_gets_inverse_A_6(values, shift, allowzeropivot, &zeropivotdetected)); 4028 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4029 break; 4030 case 7: 4031 PetscCall(PetscKernel_A_gets_inverse_A_7(values, shift, allowzeropivot, &zeropivotdetected)); 4032 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4033 break; 4034 default: { 4035 PetscInt *v_pivots, *IJ, j; 4036 PetscScalar *v_work; 4037 4038 PetscCall(PetscMalloc3(m, &v_work, m, &v_pivots, m, &IJ)); 4039 for (j = 0; j < m; j++) IJ[j] = j; 4040 PetscCall(PetscKernel_A_gets_inverse_A(m, values, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 4041 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4042 PetscCall(PetscFree3(v_work, v_pivots, IJ)); 4043 } 4044 } 4045 PetscCall(MatDenseRestoreArray(A, &values)); 4046 PetscFunctionReturn(PETSC_SUCCESS); 4047 } 4048