1 /* 2 Defines the basic matrix operations for sequential dense. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/dense/mpi/mpidense.h> 9 #include <petscblaslapack.h> 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <petsc/private/vecimpl.h> 12 13 PetscErrorCode MatSeqDenseSymmetrize_Private(Mat A, PetscBool hermitian) 14 { 15 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 16 PetscInt j, k, n = A->rmap->n; 17 PetscScalar *v; 18 19 PetscFunctionBegin; 20 PetscCheck(A->rmap->n == A->cmap->n, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot symmetrize a rectangular matrix"); 21 PetscCall(MatDenseGetArray(A, &v)); 22 if (!hermitian) { 23 for (k = 0; k < n; k++) { 24 for (j = k; j < n; j++) v[j * mat->lda + k] = v[k * mat->lda + j]; 25 } 26 } else { 27 for (k = 0; k < n; k++) { 28 for (j = k; j < n; j++) v[j * mat->lda + k] = PetscConj(v[k * mat->lda + j]); 29 } 30 } 31 PetscCall(MatDenseRestoreArray(A, &v)); 32 PetscFunctionReturn(PETSC_SUCCESS); 33 } 34 35 PetscErrorCode MatSeqDenseInvertFactors_Private(Mat A) 36 { 37 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 38 PetscBLASInt info, n; 39 40 PetscFunctionBegin; 41 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 42 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 43 if (A->factortype == MAT_FACTOR_LU) { 44 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 45 if (!mat->fwork) { 46 mat->lfwork = n; 47 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 48 } 49 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 50 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 51 PetscCall(PetscFPTrapPop()); 52 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 53 } else if (A->factortype == MAT_FACTOR_CHOLESKY) { 54 if (A->spd == PETSC_BOOL3_TRUE) { 55 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 56 PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &n, mat->v, &mat->lda, &info)); 57 PetscCall(PetscFPTrapPop()); 58 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 59 #if defined(PETSC_USE_COMPLEX) 60 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 61 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 62 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 63 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 64 PetscCallBLAS("LAPACKhetri", LAPACKhetri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 65 PetscCall(PetscFPTrapPop()); 66 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 67 #endif 68 } else { /* symmetric case */ 69 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 70 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 71 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 72 PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 73 PetscCall(PetscFPTrapPop()); 74 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_FALSE)); 75 } 76 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad Inversion: zero pivot in row %" PetscInt_FMT, (PetscInt)info - 1); 77 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 78 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix must be factored to solve"); 79 80 A->ops->solve = NULL; 81 A->ops->matsolve = NULL; 82 A->ops->solvetranspose = NULL; 83 A->ops->matsolvetranspose = NULL; 84 A->ops->solveadd = NULL; 85 A->ops->solvetransposeadd = NULL; 86 A->factortype = MAT_FACTOR_NONE; 87 PetscCall(PetscFree(A->solvertype)); 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 91 static PetscErrorCode MatZeroRowsColumns_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 92 { 93 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 94 PetscInt m = l->lda, n = A->cmap->n, r = A->rmap->n, i, j; 95 PetscScalar *slot, *bb, *v; 96 const PetscScalar *xx; 97 98 PetscFunctionBegin; 99 if (PetscDefined(USE_DEBUG)) { 100 for (i = 0; i < N; i++) { 101 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 102 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 103 PetscCheck(rows[i] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Col %" PetscInt_FMT " requested to be zeroed greater than or equal number of cols %" PetscInt_FMT, rows[i], A->cmap->n); 104 } 105 } 106 if (!N) PetscFunctionReturn(PETSC_SUCCESS); 107 108 /* fix right-hand side if needed */ 109 if (x && b) { 110 Vec xt; 111 112 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 113 PetscCall(VecDuplicate(x, &xt)); 114 PetscCall(VecCopy(x, xt)); 115 PetscCall(VecScale(xt, -1.0)); 116 PetscCall(MatMultAdd(A, xt, b, b)); 117 PetscCall(VecDestroy(&xt)); 118 PetscCall(VecGetArrayRead(x, &xx)); 119 PetscCall(VecGetArray(b, &bb)); 120 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 121 PetscCall(VecRestoreArrayRead(x, &xx)); 122 PetscCall(VecRestoreArray(b, &bb)); 123 } 124 125 PetscCall(MatDenseGetArray(A, &v)); 126 for (i = 0; i < N; i++) { 127 slot = v + rows[i] * m; 128 PetscCall(PetscArrayzero(slot, r)); 129 } 130 for (i = 0; i < N; i++) { 131 slot = v + rows[i]; 132 for (j = 0; j < n; j++) { 133 *slot = 0.0; 134 slot += m; 135 } 136 } 137 if (diag != 0.0) { 138 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 139 for (i = 0; i < N; i++) { 140 slot = v + (m + 1) * rows[i]; 141 *slot = diag; 142 } 143 } 144 PetscCall(MatDenseRestoreArray(A, &v)); 145 PetscFunctionReturn(PETSC_SUCCESS); 146 } 147 148 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 149 { 150 Mat B = NULL; 151 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 152 Mat_SeqDense *b; 153 PetscInt *ai = a->i, *aj = a->j, m = A->rmap->N, n = A->cmap->N, i; 154 const MatScalar *av; 155 PetscBool isseqdense; 156 157 PetscFunctionBegin; 158 if (reuse == MAT_REUSE_MATRIX) { 159 PetscCall(PetscObjectTypeCompare((PetscObject)*newmat, MATSEQDENSE, &isseqdense)); 160 PetscCheck(isseqdense, PetscObjectComm((PetscObject)*newmat), PETSC_ERR_USER, "Cannot reuse matrix of type %s", ((PetscObject)*newmat)->type_name); 161 } 162 if (reuse != MAT_REUSE_MATRIX) { 163 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 164 PetscCall(MatSetSizes(B, m, n, m, n)); 165 PetscCall(MatSetType(B, MATSEQDENSE)); 166 PetscCall(MatSeqDenseSetPreallocation(B, NULL)); 167 b = (Mat_SeqDense *)B->data; 168 } else { 169 b = (Mat_SeqDense *)((*newmat)->data); 170 for (i = 0; i < n; i++) PetscCall(PetscArrayzero(b->v + i * b->lda, m)); 171 } 172 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 173 for (i = 0; i < m; i++) { 174 PetscInt j; 175 for (j = 0; j < ai[1] - ai[0]; j++) { 176 b->v[*aj * b->lda + i] = *av; 177 aj++; 178 av++; 179 } 180 ai++; 181 } 182 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 183 184 if (reuse == MAT_INPLACE_MATRIX) { 185 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 186 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 187 PetscCall(MatHeaderReplace(A, &B)); 188 } else { 189 if (B) *newmat = B; 190 PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY)); 191 PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY)); 192 } 193 PetscFunctionReturn(PETSC_SUCCESS); 194 } 195 196 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 197 { 198 Mat B = NULL; 199 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 200 PetscInt i, j; 201 PetscInt *rows, *nnz; 202 MatScalar *aa = a->v, *vals; 203 204 PetscFunctionBegin; 205 PetscCall(PetscCalloc3(A->rmap->n, &rows, A->rmap->n, &nnz, A->rmap->n, &vals)); 206 if (reuse != MAT_REUSE_MATRIX) { 207 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 208 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 209 PetscCall(MatSetType(B, MATSEQAIJ)); 210 for (j = 0; j < A->cmap->n; j++) { 211 for (i = 0; i < A->rmap->n; i++) 212 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) ++nnz[i]; 213 aa += a->lda; 214 } 215 PetscCall(MatSeqAIJSetPreallocation(B, PETSC_DETERMINE, nnz)); 216 } else B = *newmat; 217 aa = a->v; 218 for (j = 0; j < A->cmap->n; j++) { 219 PetscInt numRows = 0; 220 for (i = 0; i < A->rmap->n; i++) 221 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) { 222 rows[numRows] = i; 223 vals[numRows++] = aa[i]; 224 } 225 PetscCall(MatSetValues(B, numRows, rows, 1, &j, vals, INSERT_VALUES)); 226 aa += a->lda; 227 } 228 PetscCall(PetscFree3(rows, nnz, vals)); 229 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 230 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 231 232 if (reuse == MAT_INPLACE_MATRIX) { 233 PetscCall(MatHeaderReplace(A, &B)); 234 } else if (reuse != MAT_REUSE_MATRIX) *newmat = B; 235 PetscFunctionReturn(PETSC_SUCCESS); 236 } 237 238 PetscErrorCode MatAXPY_SeqDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 239 { 240 Mat_SeqDense *x = (Mat_SeqDense *)X->data, *y = (Mat_SeqDense *)Y->data; 241 const PetscScalar *xv; 242 PetscScalar *yv; 243 PetscBLASInt N, m, ldax = 0, lday = 0, one = 1; 244 245 PetscFunctionBegin; 246 PetscCall(MatDenseGetArrayRead(X, &xv)); 247 PetscCall(MatDenseGetArray(Y, &yv)); 248 PetscCall(PetscBLASIntCast(X->rmap->n * X->cmap->n, &N)); 249 PetscCall(PetscBLASIntCast(X->rmap->n, &m)); 250 PetscCall(PetscBLASIntCast(x->lda, &ldax)); 251 PetscCall(PetscBLASIntCast(y->lda, &lday)); 252 if (ldax > m || lday > m) { 253 for (PetscInt j = 0; j < X->cmap->n; j++) PetscCallBLAS("BLASaxpy", BLASaxpy_(&m, &alpha, PetscSafePointerPlusOffset(xv, j * ldax), &one, PetscSafePointerPlusOffset(yv, j * lday), &one)); 254 } else { 255 PetscCallBLAS("BLASaxpy", BLASaxpy_(&N, &alpha, xv, &one, yv, &one)); 256 } 257 PetscCall(MatDenseRestoreArrayRead(X, &xv)); 258 PetscCall(MatDenseRestoreArray(Y, &yv)); 259 PetscCall(PetscLogFlops(PetscMax(2.0 * N - 1, 0))); 260 PetscFunctionReturn(PETSC_SUCCESS); 261 } 262 263 static PetscErrorCode MatGetInfo_SeqDense(Mat A, MatInfoType flag, MatInfo *info) 264 { 265 PetscLogDouble N = A->rmap->n * A->cmap->n; 266 267 PetscFunctionBegin; 268 info->block_size = 1.0; 269 info->nz_allocated = N; 270 info->nz_used = N; 271 info->nz_unneeded = 0; 272 info->assemblies = A->num_ass; 273 info->mallocs = 0; 274 info->memory = 0; /* REVIEW ME */ 275 info->fill_ratio_given = 0; 276 info->fill_ratio_needed = 0; 277 info->factor_mallocs = 0; 278 PetscFunctionReturn(PETSC_SUCCESS); 279 } 280 281 PetscErrorCode MatScale_SeqDense(Mat A, PetscScalar alpha) 282 { 283 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 284 PetscScalar *v; 285 PetscBLASInt one = 1, j, nz, lda = 0; 286 287 PetscFunctionBegin; 288 PetscCall(MatDenseGetArray(A, &v)); 289 PetscCall(PetscBLASIntCast(a->lda, &lda)); 290 if (lda > A->rmap->n) { 291 PetscCall(PetscBLASIntCast(A->rmap->n, &nz)); 292 for (j = 0; j < A->cmap->n; j++) PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v + j * lda, &one)); 293 } else { 294 PetscCall(PetscBLASIntCast(A->rmap->n * A->cmap->n, &nz)); 295 PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v, &one)); 296 } 297 PetscCall(PetscLogFlops(A->rmap->n * A->cmap->n)); 298 PetscCall(MatDenseRestoreArray(A, &v)); 299 PetscFunctionReturn(PETSC_SUCCESS); 300 } 301 302 PetscErrorCode MatShift_SeqDense(Mat A, PetscScalar alpha) 303 { 304 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 305 PetscScalar *v; 306 PetscInt j, k; 307 308 PetscFunctionBegin; 309 PetscCall(MatDenseGetArray(A, &v)); 310 k = PetscMin(A->rmap->n, A->cmap->n); 311 for (j = 0; j < k; j++) v[j + j * a->lda] += alpha; 312 PetscCall(PetscLogFlops(k)); 313 PetscCall(MatDenseRestoreArray(A, &v)); 314 PetscFunctionReturn(PETSC_SUCCESS); 315 } 316 317 static PetscErrorCode MatIsHermitian_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) 318 { 319 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 320 PetscInt i, j, m = A->rmap->n, N = a->lda; 321 const PetscScalar *v; 322 323 PetscFunctionBegin; 324 *fl = PETSC_FALSE; 325 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 326 PetscCall(MatDenseGetArrayRead(A, &v)); 327 for (i = 0; i < m; i++) { 328 for (j = i; j < m; j++) { 329 if (PetscAbsScalar(v[i + j * N] - PetscConj(v[j + i * N])) > rtol) goto restore; 330 } 331 } 332 *fl = PETSC_TRUE; 333 restore: 334 PetscCall(MatDenseRestoreArrayRead(A, &v)); 335 PetscFunctionReturn(PETSC_SUCCESS); 336 } 337 338 static PetscErrorCode MatIsSymmetric_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) 339 { 340 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 341 PetscInt i, j, m = A->rmap->n, N = a->lda; 342 const PetscScalar *v; 343 344 PetscFunctionBegin; 345 *fl = PETSC_FALSE; 346 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 347 PetscCall(MatDenseGetArrayRead(A, &v)); 348 for (i = 0; i < m; i++) { 349 for (j = i; j < m; j++) { 350 if (PetscAbsScalar(v[i + j * N] - v[j + i * N]) > rtol) goto restore; 351 } 352 } 353 *fl = PETSC_TRUE; 354 restore: 355 PetscCall(MatDenseRestoreArrayRead(A, &v)); 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 PetscErrorCode MatDuplicateNoCreate_SeqDense(Mat newi, Mat A, MatDuplicateOption cpvalues) 360 { 361 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 362 PetscInt lda = (PetscInt)mat->lda, j, m, nlda = lda; 363 PetscBool isdensecpu; 364 365 PetscFunctionBegin; 366 PetscCall(PetscLayoutReference(A->rmap, &newi->rmap)); 367 PetscCall(PetscLayoutReference(A->cmap, &newi->cmap)); 368 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { /* propagate LDA */ 369 PetscCall(MatDenseSetLDA(newi, lda)); 370 } 371 PetscCall(PetscObjectTypeCompare((PetscObject)newi, MATSEQDENSE, &isdensecpu)); 372 if (isdensecpu) PetscCall(MatSeqDenseSetPreallocation(newi, NULL)); 373 if (cpvalues == MAT_COPY_VALUES) { 374 const PetscScalar *av; 375 PetscScalar *v; 376 377 PetscCall(MatDenseGetArrayRead(A, &av)); 378 PetscCall(MatDenseGetArrayWrite(newi, &v)); 379 PetscCall(MatDenseGetLDA(newi, &nlda)); 380 m = A->rmap->n; 381 if (lda > m || nlda > m) { 382 for (j = 0; j < A->cmap->n; j++) PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(v, j * nlda), PetscSafePointerPlusOffset(av, j * lda), m)); 383 } else { 384 PetscCall(PetscArraycpy(v, av, A->rmap->n * A->cmap->n)); 385 } 386 PetscCall(MatDenseRestoreArrayWrite(newi, &v)); 387 PetscCall(MatDenseRestoreArrayRead(A, &av)); 388 } 389 PetscFunctionReturn(PETSC_SUCCESS); 390 } 391 392 PetscErrorCode MatDuplicate_SeqDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 393 { 394 PetscFunctionBegin; 395 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), newmat)); 396 PetscCall(MatSetSizes(*newmat, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 397 PetscCall(MatSetType(*newmat, ((PetscObject)A)->type_name)); 398 PetscCall(MatDuplicateNoCreate_SeqDense(*newmat, A, cpvalues)); 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 static PetscErrorCode MatSolve_SeqDense_Internal_LU(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) 403 { 404 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 405 PetscBLASInt info; 406 407 PetscFunctionBegin; 408 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 409 PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_(T ? "T" : "N", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 410 PetscCall(PetscFPTrapPop()); 411 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve %d", (int)info); 412 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 413 PetscFunctionReturn(PETSC_SUCCESS); 414 } 415 416 static PetscErrorCode MatConjugate_SeqDense(Mat); 417 418 static PetscErrorCode MatSolve_SeqDense_Internal_Cholesky(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) 419 { 420 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 421 PetscBLASInt info; 422 423 PetscFunctionBegin; 424 if (A->spd == PETSC_BOOL3_TRUE) { 425 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 426 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 427 PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &m, &nrhs, mat->v, &mat->lda, x, &m, &info)); 428 PetscCall(PetscFPTrapPop()); 429 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "POTRS Bad solve %d", (int)info); 430 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 431 #if defined(PETSC_USE_COMPLEX) 432 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 433 if (T) PetscCall(MatConjugate_SeqDense(A)); 434 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 435 PetscCallBLAS("LAPACKhetrs", LAPACKhetrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 436 PetscCall(PetscFPTrapPop()); 437 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "HETRS Bad solve %d", (int)info); 438 if (T) PetscCall(MatConjugate_SeqDense(A)); 439 #endif 440 } else { /* symmetric case */ 441 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 442 PetscCallBLAS("LAPACKsytrs", LAPACKsytrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 443 PetscCall(PetscFPTrapPop()); 444 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "SYTRS Bad solve %d", (int)info); 445 } 446 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 447 PetscFunctionReturn(PETSC_SUCCESS); 448 } 449 450 static PetscErrorCode MatSolve_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) 451 { 452 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 453 PetscBLASInt info; 454 char trans; 455 456 PetscFunctionBegin; 457 if (PetscDefined(USE_COMPLEX)) { 458 trans = 'C'; 459 } else { 460 trans = 'T'; 461 } 462 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 463 { /* lwork depends on the number of right-hand sides */ 464 PetscBLASInt nlfwork, lfwork = -1; 465 PetscScalar fwork; 466 467 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 468 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 469 if (nlfwork > mat->lfwork) { 470 mat->lfwork = nlfwork; 471 PetscCall(PetscFree(mat->fwork)); 472 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 473 } 474 } 475 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 476 PetscCall(PetscFPTrapPop()); 477 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %d", (int)info); 478 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 479 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &mat->rank, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 480 PetscCall(PetscFPTrapPop()); 481 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %d", (int)info); 482 for (PetscInt j = 0; j < nrhs; j++) { 483 for (PetscInt i = mat->rank; i < k; i++) x[j * ldx + i] = 0.; 484 } 485 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 486 PetscFunctionReturn(PETSC_SUCCESS); 487 } 488 489 static PetscErrorCode MatSolveTranspose_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) 490 { 491 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 492 PetscBLASInt info; 493 494 PetscFunctionBegin; 495 if (A->rmap->n == A->cmap->n && mat->rank == A->rmap->n) { 496 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 497 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &m, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 498 PetscCall(PetscFPTrapPop()); 499 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %d", (int)info); 500 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 501 { /* lwork depends on the number of right-hand sides */ 502 PetscBLASInt nlfwork, lfwork = -1; 503 PetscScalar fwork; 504 505 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 506 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 507 if (nlfwork > mat->lfwork) { 508 mat->lfwork = nlfwork; 509 PetscCall(PetscFree(mat->fwork)); 510 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 511 } 512 } 513 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 514 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 515 PetscCall(PetscFPTrapPop()); 516 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %d", (int)info); 517 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 518 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "QR factored matrix cannot be used for transpose solve"); 519 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 520 PetscFunctionReturn(PETSC_SUCCESS); 521 } 522 523 static PetscErrorCode MatSolve_SeqDense_SetUp(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) 524 { 525 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 526 PetscScalar *y; 527 PetscBLASInt m = 0, k = 0; 528 529 PetscFunctionBegin; 530 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 531 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 532 if (k < m) { 533 PetscCall(VecCopy(xx, mat->qrrhs)); 534 PetscCall(VecGetArray(mat->qrrhs, &y)); 535 } else { 536 PetscCall(VecCopy(xx, yy)); 537 PetscCall(VecGetArray(yy, &y)); 538 } 539 *_y = y; 540 *_k = k; 541 *_m = m; 542 PetscFunctionReturn(PETSC_SUCCESS); 543 } 544 545 static PetscErrorCode MatSolve_SeqDense_TearDown(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) 546 { 547 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 548 PetscScalar *y = NULL; 549 PetscBLASInt m, k; 550 551 PetscFunctionBegin; 552 y = *_y; 553 *_y = NULL; 554 k = *_k; 555 m = *_m; 556 if (k < m) { 557 PetscScalar *yv; 558 PetscCall(VecGetArray(yy, &yv)); 559 PetscCall(PetscArraycpy(yv, y, k)); 560 PetscCall(VecRestoreArray(yy, &yv)); 561 PetscCall(VecRestoreArray(mat->qrrhs, &y)); 562 } else { 563 PetscCall(VecRestoreArray(yy, &y)); 564 } 565 PetscFunctionReturn(PETSC_SUCCESS); 566 } 567 568 static PetscErrorCode MatSolve_SeqDense_LU(Mat A, Vec xx, Vec yy) 569 { 570 PetscScalar *y = NULL; 571 PetscBLASInt m = 0, k = 0; 572 573 PetscFunctionBegin; 574 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 575 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_FALSE)); 576 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 577 PetscFunctionReturn(PETSC_SUCCESS); 578 } 579 580 static PetscErrorCode MatSolveTranspose_SeqDense_LU(Mat A, Vec xx, Vec yy) 581 { 582 PetscScalar *y = NULL; 583 PetscBLASInt m = 0, k = 0; 584 585 PetscFunctionBegin; 586 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 587 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_TRUE)); 588 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 589 PetscFunctionReturn(PETSC_SUCCESS); 590 } 591 592 static PetscErrorCode MatSolve_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) 593 { 594 PetscScalar *y = NULL; 595 PetscBLASInt m = 0, k = 0; 596 597 PetscFunctionBegin; 598 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 599 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_FALSE)); 600 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 601 PetscFunctionReturn(PETSC_SUCCESS); 602 } 603 604 static PetscErrorCode MatSolveTranspose_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) 605 { 606 PetscScalar *y = NULL; 607 PetscBLASInt m = 0, k = 0; 608 609 PetscFunctionBegin; 610 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 611 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_TRUE)); 612 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 613 PetscFunctionReturn(PETSC_SUCCESS); 614 } 615 616 static PetscErrorCode MatSolve_SeqDense_QR(Mat A, Vec xx, Vec yy) 617 { 618 PetscScalar *y = NULL; 619 PetscBLASInt m = 0, k = 0; 620 621 PetscFunctionBegin; 622 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 623 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 624 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 625 PetscFunctionReturn(PETSC_SUCCESS); 626 } 627 628 static PetscErrorCode MatSolveTranspose_SeqDense_QR(Mat A, Vec xx, Vec yy) 629 { 630 PetscScalar *y = NULL; 631 PetscBLASInt m = 0, k = 0; 632 633 PetscFunctionBegin; 634 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 635 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 636 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 637 PetscFunctionReturn(PETSC_SUCCESS); 638 } 639 640 static PetscErrorCode MatMatSolve_SeqDense_SetUp(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) 641 { 642 const PetscScalar *b; 643 PetscScalar *y; 644 PetscInt n, _ldb, _ldx; 645 PetscBLASInt nrhs = 0, m = 0, k = 0, ldb = 0, ldx = 0, ldy = 0; 646 647 PetscFunctionBegin; 648 *_ldy = 0; 649 *_m = 0; 650 *_nrhs = 0; 651 *_k = 0; 652 *_y = NULL; 653 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 654 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 655 PetscCall(MatGetSize(B, NULL, &n)); 656 PetscCall(PetscBLASIntCast(n, &nrhs)); 657 PetscCall(MatDenseGetLDA(B, &_ldb)); 658 PetscCall(PetscBLASIntCast(_ldb, &ldb)); 659 PetscCall(MatDenseGetLDA(X, &_ldx)); 660 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 661 if (ldx < m) { 662 PetscCall(MatDenseGetArrayRead(B, &b)); 663 PetscCall(PetscMalloc1(nrhs * m, &y)); 664 if (ldb == m) { 665 PetscCall(PetscArraycpy(y, b, ldb * nrhs)); 666 } else { 667 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * m], &b[j * ldb], m)); 668 } 669 ldy = m; 670 PetscCall(MatDenseRestoreArrayRead(B, &b)); 671 } else { 672 if (ldb == ldx) { 673 PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN)); 674 PetscCall(MatDenseGetArray(X, &y)); 675 } else { 676 PetscCall(MatDenseGetArray(X, &y)); 677 PetscCall(MatDenseGetArrayRead(B, &b)); 678 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * ldx], &b[j * ldb], m)); 679 PetscCall(MatDenseRestoreArrayRead(B, &b)); 680 } 681 ldy = ldx; 682 } 683 *_y = y; 684 *_ldy = ldy; 685 *_k = k; 686 *_m = m; 687 *_nrhs = nrhs; 688 PetscFunctionReturn(PETSC_SUCCESS); 689 } 690 691 static PetscErrorCode MatMatSolve_SeqDense_TearDown(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) 692 { 693 PetscScalar *y; 694 PetscInt _ldx; 695 PetscBLASInt k, ldy, nrhs, ldx = 0; 696 697 PetscFunctionBegin; 698 y = *_y; 699 *_y = NULL; 700 k = *_k; 701 ldy = *_ldy; 702 nrhs = *_nrhs; 703 PetscCall(MatDenseGetLDA(X, &_ldx)); 704 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 705 if (ldx != ldy) { 706 PetscScalar *xv; 707 PetscCall(MatDenseGetArray(X, &xv)); 708 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&xv[j * ldx], &y[j * ldy], k)); 709 PetscCall(MatDenseRestoreArray(X, &xv)); 710 PetscCall(PetscFree(y)); 711 } else { 712 PetscCall(MatDenseRestoreArray(X, &y)); 713 } 714 PetscFunctionReturn(PETSC_SUCCESS); 715 } 716 717 static PetscErrorCode MatMatSolve_SeqDense_LU(Mat A, Mat B, Mat X) 718 { 719 PetscScalar *y; 720 PetscBLASInt m, k, ldy, nrhs; 721 722 PetscFunctionBegin; 723 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 724 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 725 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 726 PetscFunctionReturn(PETSC_SUCCESS); 727 } 728 729 static PetscErrorCode MatMatSolveTranspose_SeqDense_LU(Mat A, Mat B, Mat X) 730 { 731 PetscScalar *y; 732 PetscBLASInt m, k, ldy, nrhs; 733 734 PetscFunctionBegin; 735 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 736 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 737 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 738 PetscFunctionReturn(PETSC_SUCCESS); 739 } 740 741 static PetscErrorCode MatMatSolve_SeqDense_Cholesky(Mat A, Mat B, Mat X) 742 { 743 PetscScalar *y; 744 PetscBLASInt m, k, ldy, nrhs; 745 746 PetscFunctionBegin; 747 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 748 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 749 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 750 PetscFunctionReturn(PETSC_SUCCESS); 751 } 752 753 static PetscErrorCode MatMatSolveTranspose_SeqDense_Cholesky(Mat A, Mat B, Mat X) 754 { 755 PetscScalar *y; 756 PetscBLASInt m, k, ldy, nrhs; 757 758 PetscFunctionBegin; 759 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 760 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 761 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 762 PetscFunctionReturn(PETSC_SUCCESS); 763 } 764 765 static PetscErrorCode MatMatSolve_SeqDense_QR(Mat A, Mat B, Mat X) 766 { 767 PetscScalar *y; 768 PetscBLASInt m, k, ldy, nrhs; 769 770 PetscFunctionBegin; 771 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 772 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 773 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 774 PetscFunctionReturn(PETSC_SUCCESS); 775 } 776 777 static PetscErrorCode MatMatSolveTranspose_SeqDense_QR(Mat A, Mat B, Mat X) 778 { 779 PetscScalar *y; 780 PetscBLASInt m, k, ldy, nrhs; 781 782 PetscFunctionBegin; 783 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 784 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 785 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 786 PetscFunctionReturn(PETSC_SUCCESS); 787 } 788 789 /* COMMENT: I have chosen to hide row permutation in the pivots, 790 rather than put it in the Mat->row slot.*/ 791 PetscErrorCode MatLUFactor_SeqDense(Mat A, IS row, IS col, const MatFactorInfo *minfo) 792 { 793 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 794 PetscBLASInt n, m, info; 795 796 PetscFunctionBegin; 797 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 798 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 799 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 800 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 801 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 802 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&m, &n, mat->v, &mat->lda, mat->pivots, &info)); 803 PetscCall(PetscFPTrapPop()); 804 805 PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to LU factorization %d", (int)info); 806 PetscCheck(info <= 0, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Bad LU factorization %d", (int)info); 807 808 A->ops->solve = MatSolve_SeqDense_LU; 809 A->ops->matsolve = MatMatSolve_SeqDense_LU; 810 A->ops->solvetranspose = MatSolveTranspose_SeqDense_LU; 811 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_LU; 812 A->factortype = MAT_FACTOR_LU; 813 814 PetscCall(PetscFree(A->solvertype)); 815 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 816 817 PetscCall(PetscLogFlops((2.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3)); 818 PetscFunctionReturn(PETSC_SUCCESS); 819 } 820 821 static PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 822 { 823 MatFactorInfo info; 824 825 PetscFunctionBegin; 826 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 827 PetscUseTypeMethod(fact, lufactor, NULL, NULL, &info); 828 PetscFunctionReturn(PETSC_SUCCESS); 829 } 830 831 PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) 832 { 833 PetscFunctionBegin; 834 fact->preallocated = PETSC_TRUE; 835 fact->assembled = PETSC_TRUE; 836 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense; 837 PetscFunctionReturn(PETSC_SUCCESS); 838 } 839 840 /* Cholesky as L*L^T or L*D*L^T and the symmetric/hermitian complex variants */ 841 PetscErrorCode MatCholeskyFactor_SeqDense(Mat A, IS perm, const MatFactorInfo *factinfo) 842 { 843 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 844 PetscBLASInt info, n; 845 846 PetscFunctionBegin; 847 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 848 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 849 if (A->spd == PETSC_BOOL3_TRUE) { 850 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 851 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &n, mat->v, &mat->lda, &info)); 852 PetscCall(PetscFPTrapPop()); 853 #if defined(PETSC_USE_COMPLEX) 854 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 855 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 856 if (!mat->fwork) { 857 PetscScalar dummy; 858 859 mat->lfwork = -1; 860 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 861 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 862 PetscCall(PetscFPTrapPop()); 863 mat->lfwork = (PetscInt)PetscRealPart(dummy); 864 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 865 } 866 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 867 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 868 PetscCall(PetscFPTrapPop()); 869 #endif 870 } else { /* symmetric case */ 871 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 872 if (!mat->fwork) { 873 PetscScalar dummy; 874 875 mat->lfwork = -1; 876 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 877 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 878 PetscCall(PetscFPTrapPop()); 879 mat->lfwork = (PetscInt)PetscRealPart(dummy); 880 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 881 } 882 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 883 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 884 PetscCall(PetscFPTrapPop()); 885 } 886 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad factorization: zero pivot in row %" PetscInt_FMT, (PetscInt)info - 1); 887 888 A->ops->solve = MatSolve_SeqDense_Cholesky; 889 A->ops->matsolve = MatMatSolve_SeqDense_Cholesky; 890 A->ops->solvetranspose = MatSolveTranspose_SeqDense_Cholesky; 891 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_Cholesky; 892 A->factortype = MAT_FACTOR_CHOLESKY; 893 894 PetscCall(PetscFree(A->solvertype)); 895 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 896 897 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 898 PetscFunctionReturn(PETSC_SUCCESS); 899 } 900 901 static PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 902 { 903 MatFactorInfo info; 904 905 PetscFunctionBegin; 906 info.fill = 1.0; 907 908 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 909 PetscUseTypeMethod(fact, choleskyfactor, NULL, &info); 910 PetscFunctionReturn(PETSC_SUCCESS); 911 } 912 913 PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) 914 { 915 PetscFunctionBegin; 916 fact->assembled = PETSC_TRUE; 917 fact->preallocated = PETSC_TRUE; 918 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense; 919 PetscFunctionReturn(PETSC_SUCCESS); 920 } 921 922 PetscErrorCode MatQRFactor_SeqDense(Mat A, IS col, const MatFactorInfo *minfo) 923 { 924 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 925 PetscBLASInt n, m, info, min, max; 926 927 PetscFunctionBegin; 928 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 929 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 930 max = PetscMax(m, n); 931 min = PetscMin(m, n); 932 if (!mat->tau) { PetscCall(PetscMalloc1(min, &mat->tau)); } 933 if (!mat->pivots) { PetscCall(PetscMalloc1(n, &mat->pivots)); } 934 if (!mat->qrrhs) PetscCall(MatCreateVecs(A, NULL, &mat->qrrhs)); 935 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS); 936 if (!mat->fwork) { 937 PetscScalar dummy; 938 939 mat->lfwork = -1; 940 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 941 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, &dummy, &mat->lfwork, &info)); 942 PetscCall(PetscFPTrapPop()); 943 mat->lfwork = (PetscInt)PetscRealPart(dummy); 944 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 945 } 946 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 947 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, mat->fwork, &mat->lfwork, &info)); 948 PetscCall(PetscFPTrapPop()); 949 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to QR factorization %d", (int)info); 950 // TODO: try to estimate rank or test for and use geqp3 for rank revealing QR. For now just say rank is min of m and n 951 mat->rank = min; 952 953 A->ops->solve = MatSolve_SeqDense_QR; 954 A->ops->matsolve = MatMatSolve_SeqDense_QR; 955 A->factortype = MAT_FACTOR_QR; 956 if (m == n) { 957 A->ops->solvetranspose = MatSolveTranspose_SeqDense_QR; 958 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_QR; 959 } 960 961 PetscCall(PetscFree(A->solvertype)); 962 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 963 964 PetscCall(PetscLogFlops(2.0 * min * min * (max - min / 3.0))); 965 PetscFunctionReturn(PETSC_SUCCESS); 966 } 967 968 static PetscErrorCode MatQRFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 969 { 970 MatFactorInfo info; 971 972 PetscFunctionBegin; 973 info.fill = 1.0; 974 975 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 976 PetscUseMethod(fact, "MatQRFactor_C", (Mat, IS, const MatFactorInfo *), (fact, NULL, &info)); 977 PetscFunctionReturn(PETSC_SUCCESS); 978 } 979 980 PetscErrorCode MatQRFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) 981 { 982 PetscFunctionBegin; 983 fact->assembled = PETSC_TRUE; 984 fact->preallocated = PETSC_TRUE; 985 PetscCall(PetscObjectComposeFunction((PetscObject)fact, "MatQRFactorNumeric_C", MatQRFactorNumeric_SeqDense)); 986 PetscFunctionReturn(PETSC_SUCCESS); 987 } 988 989 /* uses LAPACK */ 990 PETSC_INTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A, MatFactorType ftype, Mat *fact) 991 { 992 PetscFunctionBegin; 993 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), fact)); 994 PetscCall(MatSetSizes(*fact, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 995 PetscCall(MatSetType(*fact, MATDENSE)); 996 (*fact)->trivialsymbolic = PETSC_TRUE; 997 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 998 (*fact)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqDense; 999 (*fact)->ops->ilufactorsymbolic = MatLUFactorSymbolic_SeqDense; 1000 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 1001 (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense; 1002 } else if (ftype == MAT_FACTOR_QR) { 1003 PetscCall(PetscObjectComposeFunction((PetscObject)*fact, "MatQRFactorSymbolic_C", MatQRFactorSymbolic_SeqDense)); 1004 } 1005 (*fact)->factortype = ftype; 1006 1007 PetscCall(PetscFree((*fact)->solvertype)); 1008 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*fact)->solvertype)); 1009 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_LU])); 1010 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ILU])); 1011 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_CHOLESKY])); 1012 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ICC])); 1013 PetscFunctionReturn(PETSC_SUCCESS); 1014 } 1015 1016 static PetscErrorCode MatSOR_SeqDense(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal shift, PetscInt its, PetscInt lits, Vec xx) 1017 { 1018 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1019 PetscScalar *x, *v = mat->v, zero = 0.0, xt; 1020 const PetscScalar *b; 1021 PetscInt m = A->rmap->n, i; 1022 PetscBLASInt o = 1, bm = 0; 1023 1024 PetscFunctionBegin; 1025 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1026 PetscCheck(A->offloadmask != PETSC_OFFLOAD_GPU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1027 #endif 1028 if (shift == -1) shift = 0.0; /* negative shift indicates do not error on zero diagonal; this code never zeros on zero diagonal */ 1029 PetscCall(PetscBLASIntCast(m, &bm)); 1030 if (flag & SOR_ZERO_INITIAL_GUESS) { 1031 /* this is a hack fix, should have another version without the second BLASdotu */ 1032 PetscCall(VecSet(xx, zero)); 1033 } 1034 PetscCall(VecGetArray(xx, &x)); 1035 PetscCall(VecGetArrayRead(bb, &b)); 1036 its = its * lits; 1037 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 1038 while (its--) { 1039 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 1040 for (i = 0; i < m; i++) { 1041 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1042 x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift); 1043 } 1044 } 1045 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1046 for (i = m - 1; i >= 0; i--) { 1047 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1048 x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift); 1049 } 1050 } 1051 } 1052 PetscCall(VecRestoreArrayRead(bb, &b)); 1053 PetscCall(VecRestoreArray(xx, &x)); 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 static PetscErrorCode MatMultColumnRangeKernel_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end, PetscBool trans, PetscBool herm) 1058 { 1059 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1060 PetscScalar *y, _DOne = 1.0, _DZero = 0.0; 1061 PetscBLASInt m, n, _One = 1; 1062 const PetscScalar *v = mat->v, *x; 1063 1064 PetscFunctionBegin; 1065 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1066 PetscCall(PetscBLASIntCast(c_end - c_start, &n)); 1067 PetscCall(VecGetArrayRead(xx, &x)); 1068 PetscCall(VecGetArrayWrite(yy, &y)); 1069 if (!m || !n) { 1070 PetscBLASInt i; 1071 if (trans) 1072 for (i = 0; i < n; i++) y[i] = 0.0; 1073 else 1074 for (i = 0; i < m; i++) y[i] = 0.0; 1075 } else { 1076 if (trans) { 1077 if (herm) PetscCallBLAS("BLASgemv", BLASgemv_("C", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DZero, y + c_start, &_One)); 1078 else PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DZero, y + c_start, &_One)); 1079 } else { 1080 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x + c_start, &_One, &_DZero, y, &_One)); 1081 } 1082 PetscCall(PetscLogFlops(2.0 * m * n - n)); 1083 } 1084 PetscCall(VecRestoreArrayRead(xx, &x)); 1085 PetscCall(VecRestoreArrayWrite(yy, &y)); 1086 PetscFunctionReturn(PETSC_SUCCESS); 1087 } 1088 1089 PetscErrorCode MatMultHermitianTransposeColumnRange_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 1090 { 1091 PetscFunctionBegin; 1092 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, c_start, c_end, PETSC_TRUE, PETSC_TRUE)); 1093 PetscFunctionReturn(PETSC_SUCCESS); 1094 } 1095 1096 PetscErrorCode MatMult_SeqDense(Mat A, Vec xx, Vec yy) 1097 { 1098 PetscFunctionBegin; 1099 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_FALSE, PETSC_FALSE)); 1100 PetscFunctionReturn(PETSC_SUCCESS); 1101 } 1102 1103 PetscErrorCode MatMultTranspose_SeqDense(Mat A, Vec xx, Vec yy) 1104 { 1105 PetscFunctionBegin; 1106 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_FALSE)); 1107 PetscFunctionReturn(PETSC_SUCCESS); 1108 } 1109 1110 PetscErrorCode MatMultHermitianTranspose_SeqDense(Mat A, Vec xx, Vec yy) 1111 { 1112 PetscFunctionBegin; 1113 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_TRUE)); 1114 PetscFunctionReturn(PETSC_SUCCESS); 1115 } 1116 1117 static PetscErrorCode MatMultAddColumnRangeKernel_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end, PetscBool trans, PetscBool herm) 1118 { 1119 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1120 const PetscScalar *v = mat->v, *x; 1121 PetscScalar *y, _DOne = 1.0; 1122 PetscBLASInt m, n, _One = 1; 1123 1124 PetscFunctionBegin; 1125 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1126 PetscCall(PetscBLASIntCast(c_end - c_start, &n)); 1127 PetscCall(VecCopy(zz, yy)); 1128 if (!m || !n) PetscFunctionReturn(PETSC_SUCCESS); 1129 PetscCall(VecGetArray(yy, &y)); 1130 PetscCall(VecGetArrayRead(xx, &x)); 1131 if (trans) { 1132 if (herm) PetscCallBLAS("BLASgemv", BLASgemv_("C", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DOne, y + c_start, &_One)); 1133 else PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DOne, y + c_start, &_One)); 1134 } else { 1135 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x + c_start, &_One, &_DOne, y, &_One)); 1136 } 1137 PetscCall(VecRestoreArrayRead(xx, &x)); 1138 PetscCall(VecRestoreArray(yy, &y)); 1139 PetscCall(PetscLogFlops(2.0 * m * n)); 1140 PetscFunctionReturn(PETSC_SUCCESS); 1141 } 1142 1143 PetscErrorCode MatMultAddColumnRange_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end) 1144 { 1145 PetscFunctionBegin; 1146 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, c_start, c_end, PETSC_FALSE, PETSC_FALSE)); 1147 PetscFunctionReturn(PETSC_SUCCESS); 1148 } 1149 1150 PetscErrorCode MatMultHermitianTransposeAddColumnRange_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end) 1151 { 1152 PetscFunctionBegin; 1153 PetscMPIInt rank; 1154 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 1155 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, c_start, c_end, PETSC_TRUE, PETSC_TRUE)); 1156 PetscFunctionReturn(PETSC_SUCCESS); 1157 } 1158 1159 PetscErrorCode MatMultAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1160 { 1161 PetscFunctionBegin; 1162 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_FALSE, PETSC_FALSE)); 1163 PetscFunctionReturn(PETSC_SUCCESS); 1164 } 1165 1166 PetscErrorCode MatMultTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1167 { 1168 PetscFunctionBegin; 1169 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_FALSE)); 1170 PetscFunctionReturn(PETSC_SUCCESS); 1171 } 1172 1173 PetscErrorCode MatMultHermitianTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1174 { 1175 PetscFunctionBegin; 1176 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_TRUE)); 1177 PetscFunctionReturn(PETSC_SUCCESS); 1178 } 1179 1180 static PetscErrorCode MatGetRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) 1181 { 1182 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1183 PetscInt i; 1184 1185 PetscFunctionBegin; 1186 if (ncols) *ncols = A->cmap->n; 1187 if (cols) { 1188 PetscCall(PetscMalloc1(A->cmap->n, cols)); 1189 for (i = 0; i < A->cmap->n; i++) (*cols)[i] = i; 1190 } 1191 if (vals) { 1192 const PetscScalar *v; 1193 1194 PetscCall(MatDenseGetArrayRead(A, &v)); 1195 PetscCall(PetscMalloc1(A->cmap->n, vals)); 1196 v += row; 1197 for (i = 0; i < A->cmap->n; i++) { 1198 (*vals)[i] = *v; 1199 v += mat->lda; 1200 } 1201 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1202 } 1203 PetscFunctionReturn(PETSC_SUCCESS); 1204 } 1205 1206 static PetscErrorCode MatRestoreRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) 1207 { 1208 PetscFunctionBegin; 1209 if (cols) PetscCall(PetscFree(*cols)); 1210 if (vals) PetscCall(PetscFree(*vals)); 1211 PetscFunctionReturn(PETSC_SUCCESS); 1212 } 1213 1214 static PetscErrorCode MatSetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], const PetscScalar v[], InsertMode addv) 1215 { 1216 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1217 PetscScalar *av; 1218 PetscInt i, j, idx = 0; 1219 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1220 PetscOffloadMask oldf; 1221 #endif 1222 1223 PetscFunctionBegin; 1224 PetscCall(MatDenseGetArray(A, &av)); 1225 if (!mat->roworiented) { 1226 if (addv == INSERT_VALUES) { 1227 for (j = 0; j < n; j++) { 1228 if (indexn[j] < 0) { 1229 idx += m; 1230 continue; 1231 } 1232 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1233 for (i = 0; i < m; i++) { 1234 if (indexm[i] < 0) { 1235 idx++; 1236 continue; 1237 } 1238 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1239 av[indexn[j] * mat->lda + indexm[i]] = v[idx++]; 1240 } 1241 } 1242 } else { 1243 for (j = 0; j < n; j++) { 1244 if (indexn[j] < 0) { 1245 idx += m; 1246 continue; 1247 } 1248 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1249 for (i = 0; i < m; i++) { 1250 if (indexm[i] < 0) { 1251 idx++; 1252 continue; 1253 } 1254 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1255 av[indexn[j] * mat->lda + indexm[i]] += v[idx++]; 1256 } 1257 } 1258 } 1259 } else { 1260 if (addv == INSERT_VALUES) { 1261 for (i = 0; i < m; i++) { 1262 if (indexm[i] < 0) { 1263 idx += n; 1264 continue; 1265 } 1266 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1267 for (j = 0; j < n; j++) { 1268 if (indexn[j] < 0) { 1269 idx++; 1270 continue; 1271 } 1272 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1273 av[indexn[j] * mat->lda + indexm[i]] = v[idx++]; 1274 } 1275 } 1276 } else { 1277 for (i = 0; i < m; i++) { 1278 if (indexm[i] < 0) { 1279 idx += n; 1280 continue; 1281 } 1282 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1283 for (j = 0; j < n; j++) { 1284 if (indexn[j] < 0) { 1285 idx++; 1286 continue; 1287 } 1288 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1289 av[indexn[j] * mat->lda + indexm[i]] += v[idx++]; 1290 } 1291 } 1292 } 1293 } 1294 /* hack to prevent unneeded copy to the GPU while returning the array */ 1295 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1296 oldf = A->offloadmask; 1297 A->offloadmask = PETSC_OFFLOAD_GPU; 1298 #endif 1299 PetscCall(MatDenseRestoreArray(A, &av)); 1300 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1301 A->offloadmask = (oldf == PETSC_OFFLOAD_UNALLOCATED ? PETSC_OFFLOAD_UNALLOCATED : PETSC_OFFLOAD_CPU); 1302 #endif 1303 PetscFunctionReturn(PETSC_SUCCESS); 1304 } 1305 1306 static PetscErrorCode MatGetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], PetscScalar v[]) 1307 { 1308 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1309 const PetscScalar *vv; 1310 PetscInt i, j; 1311 1312 PetscFunctionBegin; 1313 PetscCall(MatDenseGetArrayRead(A, &vv)); 1314 /* row-oriented output */ 1315 for (i = 0; i < m; i++) { 1316 if (indexm[i] < 0) { 1317 v += n; 1318 continue; 1319 } 1320 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested larger than number rows %" PetscInt_FMT, indexm[i], A->rmap->n); 1321 for (j = 0; j < n; j++) { 1322 if (indexn[j] < 0) { 1323 v++; 1324 continue; 1325 } 1326 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " requested larger than number columns %" PetscInt_FMT, indexn[j], A->cmap->n); 1327 *v++ = vv[indexn[j] * mat->lda + indexm[i]]; 1328 } 1329 } 1330 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1331 PetscFunctionReturn(PETSC_SUCCESS); 1332 } 1333 1334 PetscErrorCode MatView_Dense_Binary(Mat mat, PetscViewer viewer) 1335 { 1336 PetscBool skipHeader; 1337 PetscViewerFormat format; 1338 PetscInt header[4], M, N, m, lda, i, j, k; 1339 const PetscScalar *v; 1340 PetscScalar *vwork; 1341 1342 PetscFunctionBegin; 1343 PetscCall(PetscViewerSetUp(viewer)); 1344 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1345 PetscCall(PetscViewerGetFormat(viewer, &format)); 1346 if (skipHeader) format = PETSC_VIEWER_NATIVE; 1347 1348 PetscCall(MatGetSize(mat, &M, &N)); 1349 1350 /* write matrix header */ 1351 header[0] = MAT_FILE_CLASSID; 1352 header[1] = M; 1353 header[2] = N; 1354 header[3] = (format == PETSC_VIEWER_NATIVE) ? MATRIX_BINARY_FORMAT_DENSE : M * N; 1355 if (!skipHeader) PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1356 1357 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1358 if (format != PETSC_VIEWER_NATIVE) { 1359 PetscInt nnz = m * N, *iwork; 1360 /* store row lengths for each row */ 1361 PetscCall(PetscMalloc1(nnz, &iwork)); 1362 for (i = 0; i < m; i++) iwork[i] = N; 1363 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1364 /* store column indices (zero start index) */ 1365 for (k = 0, i = 0; i < m; i++) 1366 for (j = 0; j < N; j++, k++) iwork[k] = j; 1367 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1368 PetscCall(PetscFree(iwork)); 1369 } 1370 /* store matrix values as a dense matrix in row major order */ 1371 PetscCall(PetscMalloc1(m * N, &vwork)); 1372 PetscCall(MatDenseGetArrayRead(mat, &v)); 1373 PetscCall(MatDenseGetLDA(mat, &lda)); 1374 for (k = 0, i = 0; i < m; i++) 1375 for (j = 0; j < N; j++, k++) vwork[k] = v[i + lda * j]; 1376 PetscCall(MatDenseRestoreArrayRead(mat, &v)); 1377 PetscCall(PetscViewerBinaryWriteAll(viewer, vwork, m * N, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1378 PetscCall(PetscFree(vwork)); 1379 PetscFunctionReturn(PETSC_SUCCESS); 1380 } 1381 1382 PetscErrorCode MatLoad_Dense_Binary(Mat mat, PetscViewer viewer) 1383 { 1384 PetscBool skipHeader; 1385 PetscInt header[4], M, N, m, nz, lda, i, j, k; 1386 PetscInt rows, cols; 1387 PetscScalar *v, *vwork; 1388 1389 PetscFunctionBegin; 1390 PetscCall(PetscViewerSetUp(viewer)); 1391 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1392 1393 if (!skipHeader) { 1394 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 1395 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 1396 M = header[1]; 1397 N = header[2]; 1398 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 1399 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 1400 nz = header[3]; 1401 PetscCheck(nz == MATRIX_BINARY_FORMAT_DENSE || nz >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Unknown matrix format %" PetscInt_FMT " in file", nz); 1402 } else { 1403 PetscCall(MatGetSize(mat, &M, &N)); 1404 PetscCheck(M >= 0 && N >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "Matrix binary file header was skipped, thus the user must specify the global sizes of input matrix"); 1405 nz = MATRIX_BINARY_FORMAT_DENSE; 1406 } 1407 1408 /* setup global sizes if not set */ 1409 if (mat->rmap->N < 0) mat->rmap->N = M; 1410 if (mat->cmap->N < 0) mat->cmap->N = N; 1411 PetscCall(MatSetUp(mat)); 1412 /* check if global sizes are correct */ 1413 PetscCall(MatGetSize(mat, &rows, &cols)); 1414 PetscCheck(M == rows && N == cols, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 1415 1416 PetscCall(MatGetSize(mat, NULL, &N)); 1417 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1418 PetscCall(MatDenseGetArray(mat, &v)); 1419 PetscCall(MatDenseGetLDA(mat, &lda)); 1420 if (nz == MATRIX_BINARY_FORMAT_DENSE) { /* matrix in file is dense format */ 1421 PetscInt nnz = m * N; 1422 /* read in matrix values */ 1423 PetscCall(PetscMalloc1(nnz, &vwork)); 1424 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1425 /* store values in column major order */ 1426 for (j = 0; j < N; j++) 1427 for (i = 0; i < m; i++) v[i + lda * j] = vwork[i * N + j]; 1428 PetscCall(PetscFree(vwork)); 1429 } else { /* matrix in file is sparse format */ 1430 PetscInt nnz = 0, *rlens, *icols; 1431 /* read in row lengths */ 1432 PetscCall(PetscMalloc1(m, &rlens)); 1433 PetscCall(PetscViewerBinaryReadAll(viewer, rlens, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1434 for (i = 0; i < m; i++) nnz += rlens[i]; 1435 /* read in column indices and values */ 1436 PetscCall(PetscMalloc2(nnz, &icols, nnz, &vwork)); 1437 PetscCall(PetscViewerBinaryReadAll(viewer, icols, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1438 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1439 /* store values in column major order */ 1440 for (k = 0, i = 0; i < m; i++) 1441 for (j = 0; j < rlens[i]; j++, k++) v[i + lda * icols[k]] = vwork[k]; 1442 PetscCall(PetscFree(rlens)); 1443 PetscCall(PetscFree2(icols, vwork)); 1444 } 1445 PetscCall(MatDenseRestoreArray(mat, &v)); 1446 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 1447 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 1448 PetscFunctionReturn(PETSC_SUCCESS); 1449 } 1450 1451 static PetscErrorCode MatLoad_SeqDense(Mat newMat, PetscViewer viewer) 1452 { 1453 PetscBool isbinary, ishdf5; 1454 1455 PetscFunctionBegin; 1456 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1457 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1458 /* force binary viewer to load .info file if it has not yet done so */ 1459 PetscCall(PetscViewerSetUp(viewer)); 1460 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1461 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1462 if (isbinary) { 1463 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1464 } else if (ishdf5) { 1465 #if defined(PETSC_HAVE_HDF5) 1466 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1467 #else 1468 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 1469 #endif 1470 } else { 1471 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1472 } 1473 PetscFunctionReturn(PETSC_SUCCESS); 1474 } 1475 1476 static PetscErrorCode MatView_SeqDense_ASCII(Mat A, PetscViewer viewer) 1477 { 1478 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1479 PetscInt i, j; 1480 const char *name; 1481 PetscScalar *v, *av; 1482 PetscViewerFormat format; 1483 #if defined(PETSC_USE_COMPLEX) 1484 PetscBool allreal = PETSC_TRUE; 1485 #endif 1486 1487 PetscFunctionBegin; 1488 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&av)); 1489 PetscCall(PetscViewerGetFormat(viewer, &format)); 1490 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1491 PetscFunctionReturn(PETSC_SUCCESS); /* do nothing for now */ 1492 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1493 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1494 for (i = 0; i < A->rmap->n; i++) { 1495 v = av + i; 1496 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i)); 1497 for (j = 0; j < A->cmap->n; j++) { 1498 #if defined(PETSC_USE_COMPLEX) 1499 if (PetscRealPart(*v) != 0.0 && PetscImaginaryPart(*v) != 0.0) { 1500 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", j, (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1501 } else if (PetscRealPart(*v)) { 1502 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)PetscRealPart(*v))); 1503 } 1504 #else 1505 if (*v) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)*v)); 1506 #endif 1507 v += a->lda; 1508 } 1509 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1510 } 1511 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1512 } else { 1513 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1514 #if defined(PETSC_USE_COMPLEX) 1515 /* determine if matrix has all real values */ 1516 for (j = 0; j < A->cmap->n; j++) { 1517 v = av + j * a->lda; 1518 for (i = 0; i < A->rmap->n; i++) { 1519 if (PetscImaginaryPart(v[i])) { 1520 allreal = PETSC_FALSE; 1521 break; 1522 } 1523 } 1524 } 1525 #endif 1526 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1527 PetscCall(PetscObjectGetName((PetscObject)A, &name)); 1528 PetscCall(PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", A->rmap->n, A->cmap->n)); 1529 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = zeros(%" PetscInt_FMT ",%" PetscInt_FMT ");\n", name, A->rmap->n, A->cmap->n)); 1530 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = [\n", name)); 1531 } 1532 1533 for (i = 0; i < A->rmap->n; i++) { 1534 v = av + i; 1535 for (j = 0; j < A->cmap->n; j++) { 1536 #if defined(PETSC_USE_COMPLEX) 1537 if (allreal) { 1538 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(*v))); 1539 } else { 1540 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei ", (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1541 } 1542 #else 1543 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)*v)); 1544 #endif 1545 v += a->lda; 1546 } 1547 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1548 } 1549 if (format == PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, "];\n")); 1550 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1551 } 1552 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&av)); 1553 PetscCall(PetscViewerFlush(viewer)); 1554 PetscFunctionReturn(PETSC_SUCCESS); 1555 } 1556 1557 #include <petscdraw.h> 1558 static PetscErrorCode MatView_SeqDense_Draw_Zoom(PetscDraw draw, void *Aa) 1559 { 1560 Mat A = (Mat)Aa; 1561 PetscInt m = A->rmap->n, n = A->cmap->n, i, j; 1562 int color = PETSC_DRAW_WHITE; 1563 const PetscScalar *v; 1564 PetscViewer viewer; 1565 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1566 PetscViewerFormat format; 1567 1568 PetscFunctionBegin; 1569 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1570 PetscCall(PetscViewerGetFormat(viewer, &format)); 1571 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1572 1573 /* Loop over matrix elements drawing boxes */ 1574 PetscCall(MatDenseGetArrayRead(A, &v)); 1575 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1576 PetscDrawCollectiveBegin(draw); 1577 /* Blue for negative and Red for positive */ 1578 for (j = 0; j < n; j++) { 1579 x_l = j; 1580 x_r = x_l + 1.0; 1581 for (i = 0; i < m; i++) { 1582 y_l = m - i - 1.0; 1583 y_r = y_l + 1.0; 1584 if (PetscRealPart(v[j * m + i]) > 0.) color = PETSC_DRAW_RED; 1585 else if (PetscRealPart(v[j * m + i]) < 0.) color = PETSC_DRAW_BLUE; 1586 else continue; 1587 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1588 } 1589 } 1590 PetscDrawCollectiveEnd(draw); 1591 } else { 1592 /* use contour shading to indicate magnitude of values */ 1593 /* first determine max of all nonzero values */ 1594 PetscReal minv = 0.0, maxv = 0.0; 1595 PetscDraw popup; 1596 1597 for (i = 0; i < m * n; i++) { 1598 if (PetscAbsScalar(v[i]) > maxv) maxv = PetscAbsScalar(v[i]); 1599 } 1600 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1601 PetscCall(PetscDrawGetPopup(draw, &popup)); 1602 PetscCall(PetscDrawScalePopup(popup, minv, maxv)); 1603 1604 PetscDrawCollectiveBegin(draw); 1605 for (j = 0; j < n; j++) { 1606 x_l = j; 1607 x_r = x_l + 1.0; 1608 for (i = 0; i < m; i++) { 1609 y_l = m - i - 1.0; 1610 y_r = y_l + 1.0; 1611 color = PetscDrawRealToColor(PetscAbsScalar(v[j * m + i]), minv, maxv); 1612 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1613 } 1614 } 1615 PetscDrawCollectiveEnd(draw); 1616 } 1617 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1618 PetscFunctionReturn(PETSC_SUCCESS); 1619 } 1620 1621 static PetscErrorCode MatView_SeqDense_Draw(Mat A, PetscViewer viewer) 1622 { 1623 PetscDraw draw; 1624 PetscBool isnull; 1625 PetscReal xr, yr, xl, yl, h, w; 1626 1627 PetscFunctionBegin; 1628 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1629 PetscCall(PetscDrawIsNull(draw, &isnull)); 1630 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1631 1632 xr = A->cmap->n; 1633 yr = A->rmap->n; 1634 h = yr / 10.0; 1635 w = xr / 10.0; 1636 xr += w; 1637 yr += h; 1638 xl = -w; 1639 yl = -h; 1640 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 1641 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 1642 PetscCall(PetscDrawZoom(draw, MatView_SeqDense_Draw_Zoom, A)); 1643 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 1644 PetscCall(PetscDrawSave(draw)); 1645 PetscFunctionReturn(PETSC_SUCCESS); 1646 } 1647 1648 PetscErrorCode MatView_SeqDense(Mat A, PetscViewer viewer) 1649 { 1650 PetscBool iascii, isbinary, isdraw; 1651 1652 PetscFunctionBegin; 1653 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1654 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1655 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1656 if (iascii) PetscCall(MatView_SeqDense_ASCII(A, viewer)); 1657 else if (isbinary) PetscCall(MatView_Dense_Binary(A, viewer)); 1658 else if (isdraw) PetscCall(MatView_SeqDense_Draw(A, viewer)); 1659 PetscFunctionReturn(PETSC_SUCCESS); 1660 } 1661 1662 static PetscErrorCode MatDensePlaceArray_SeqDense(Mat A, const PetscScalar *array) 1663 { 1664 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1665 1666 PetscFunctionBegin; 1667 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1668 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1669 PetscCheck(!a->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseResetArray() first"); 1670 a->unplacedarray = a->v; 1671 a->unplaced_user_alloc = a->user_alloc; 1672 a->v = (PetscScalar *)array; 1673 a->user_alloc = PETSC_TRUE; 1674 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1675 A->offloadmask = PETSC_OFFLOAD_CPU; 1676 #endif 1677 PetscFunctionReturn(PETSC_SUCCESS); 1678 } 1679 1680 static PetscErrorCode MatDenseResetArray_SeqDense(Mat A) 1681 { 1682 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1683 1684 PetscFunctionBegin; 1685 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1686 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1687 a->v = a->unplacedarray; 1688 a->user_alloc = a->unplaced_user_alloc; 1689 a->unplacedarray = NULL; 1690 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1691 A->offloadmask = PETSC_OFFLOAD_CPU; 1692 #endif 1693 PetscFunctionReturn(PETSC_SUCCESS); 1694 } 1695 1696 static PetscErrorCode MatDenseReplaceArray_SeqDense(Mat A, const PetscScalar *array) 1697 { 1698 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1699 1700 PetscFunctionBegin; 1701 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1702 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1703 if (!a->user_alloc) PetscCall(PetscFree(a->v)); 1704 a->v = (PetscScalar *)array; 1705 a->user_alloc = PETSC_FALSE; 1706 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1707 A->offloadmask = PETSC_OFFLOAD_CPU; 1708 #endif 1709 PetscFunctionReturn(PETSC_SUCCESS); 1710 } 1711 1712 PetscErrorCode MatDestroy_SeqDense(Mat mat) 1713 { 1714 Mat_SeqDense *l = (Mat_SeqDense *)mat->data; 1715 1716 PetscFunctionBegin; 1717 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows %" PetscInt_FMT " Cols %" PetscInt_FMT, mat->rmap->n, mat->cmap->n)); 1718 PetscCall(VecDestroy(&l->qrrhs)); 1719 PetscCall(PetscFree(l->tau)); 1720 PetscCall(PetscFree(l->pivots)); 1721 PetscCall(PetscFree(l->fwork)); 1722 if (!l->user_alloc) PetscCall(PetscFree(l->v)); 1723 if (!l->unplaced_user_alloc) PetscCall(PetscFree(l->unplacedarray)); 1724 PetscCheck(!l->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1725 PetscCheck(!l->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1726 PetscCall(VecDestroy(&l->cvec)); 1727 PetscCall(MatDestroy(&l->cmat)); 1728 PetscCall(PetscFree(mat->data)); 1729 1730 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1731 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactor_C", NULL)); 1732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorSymbolic_C", NULL)); 1733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorNumeric_C", NULL)); 1734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 1735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 1736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 1737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 1738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 1739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 1740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 1741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 1742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 1743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 1744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 1745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqaij_C", NULL)); 1746 #if defined(PETSC_HAVE_ELEMENTAL) 1747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_elemental_C", NULL)); 1748 #endif 1749 #if defined(PETSC_HAVE_SCALAPACK) 1750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_scalapack_C", NULL)); 1751 #endif 1752 #if defined(PETSC_HAVE_CUDA) 1753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensecuda_C", NULL)); 1754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", NULL)); 1755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdense_C", NULL)); 1756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensecuda_C", NULL)); 1757 #endif 1758 #if defined(PETSC_HAVE_HIP) 1759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensehip_C", NULL)); 1760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", NULL)); 1761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdense_C", NULL)); 1762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensehip_C", NULL)); 1763 #endif 1764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSeqDenseSetPreallocation_C", NULL)); 1765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqaij_seqdense_C", NULL)); 1766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdense_C", NULL)); 1767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqbaij_seqdense_C", NULL)); 1768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqsbaij_seqdense_C", NULL)); 1769 1770 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 1771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 1772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 1773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 1774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 1775 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 1776 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 1777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 1778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 1779 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 1780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); 1781 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); 1782 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); 1783 PetscFunctionReturn(PETSC_SUCCESS); 1784 } 1785 1786 static PetscErrorCode MatTranspose_SeqDense(Mat A, MatReuse reuse, Mat *matout) 1787 { 1788 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1789 PetscInt k, j, m = A->rmap->n, M = mat->lda, n = A->cmap->n; 1790 PetscScalar *v, tmp; 1791 1792 PetscFunctionBegin; 1793 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1794 if (reuse == MAT_INPLACE_MATRIX) { 1795 if (m == n) { /* in place transpose */ 1796 PetscCall(MatDenseGetArray(A, &v)); 1797 for (j = 0; j < m; j++) { 1798 for (k = 0; k < j; k++) { 1799 tmp = v[j + k * M]; 1800 v[j + k * M] = v[k + j * M]; 1801 v[k + j * M] = tmp; 1802 } 1803 } 1804 PetscCall(MatDenseRestoreArray(A, &v)); 1805 } else { /* reuse memory, temporary allocates new memory */ 1806 PetscScalar *v2; 1807 PetscLayout tmplayout; 1808 1809 PetscCall(PetscMalloc1((size_t)m * n, &v2)); 1810 PetscCall(MatDenseGetArray(A, &v)); 1811 for (j = 0; j < n; j++) { 1812 for (k = 0; k < m; k++) v2[j + (size_t)k * n] = v[k + (size_t)j * M]; 1813 } 1814 PetscCall(PetscArraycpy(v, v2, (size_t)m * n)); 1815 PetscCall(PetscFree(v2)); 1816 PetscCall(MatDenseRestoreArray(A, &v)); 1817 /* cleanup size dependent quantities */ 1818 PetscCall(VecDestroy(&mat->cvec)); 1819 PetscCall(MatDestroy(&mat->cmat)); 1820 PetscCall(PetscFree(mat->pivots)); 1821 PetscCall(PetscFree(mat->fwork)); 1822 /* swap row/col layouts */ 1823 mat->lda = n; 1824 tmplayout = A->rmap; 1825 A->rmap = A->cmap; 1826 A->cmap = tmplayout; 1827 } 1828 } else { /* out-of-place transpose */ 1829 Mat tmat; 1830 Mat_SeqDense *tmatd; 1831 PetscScalar *v2; 1832 PetscInt M2; 1833 1834 if (reuse == MAT_INITIAL_MATRIX) { 1835 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &tmat)); 1836 PetscCall(MatSetSizes(tmat, A->cmap->n, A->rmap->n, A->cmap->n, A->rmap->n)); 1837 PetscCall(MatSetType(tmat, ((PetscObject)A)->type_name)); 1838 PetscCall(MatSeqDenseSetPreallocation(tmat, NULL)); 1839 } else tmat = *matout; 1840 1841 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&v)); 1842 PetscCall(MatDenseGetArray(tmat, &v2)); 1843 tmatd = (Mat_SeqDense *)tmat->data; 1844 M2 = tmatd->lda; 1845 for (j = 0; j < n; j++) { 1846 for (k = 0; k < m; k++) v2[j + k * M2] = v[k + j * M]; 1847 } 1848 PetscCall(MatDenseRestoreArray(tmat, &v2)); 1849 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&v)); 1850 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 1851 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 1852 *matout = tmat; 1853 } 1854 PetscFunctionReturn(PETSC_SUCCESS); 1855 } 1856 1857 static PetscErrorCode MatEqual_SeqDense(Mat A1, Mat A2, PetscBool *flg) 1858 { 1859 Mat_SeqDense *mat1 = (Mat_SeqDense *)A1->data; 1860 Mat_SeqDense *mat2 = (Mat_SeqDense *)A2->data; 1861 PetscInt i; 1862 const PetscScalar *v1, *v2; 1863 1864 PetscFunctionBegin; 1865 if (A1->rmap->n != A2->rmap->n) { 1866 *flg = PETSC_FALSE; 1867 PetscFunctionReturn(PETSC_SUCCESS); 1868 } 1869 if (A1->cmap->n != A2->cmap->n) { 1870 *flg = PETSC_FALSE; 1871 PetscFunctionReturn(PETSC_SUCCESS); 1872 } 1873 PetscCall(MatDenseGetArrayRead(A1, &v1)); 1874 PetscCall(MatDenseGetArrayRead(A2, &v2)); 1875 for (i = 0; i < A1->cmap->n; i++) { 1876 PetscCall(PetscArraycmp(v1, v2, A1->rmap->n, flg)); 1877 if (*flg == PETSC_FALSE) PetscFunctionReturn(PETSC_SUCCESS); 1878 v1 += mat1->lda; 1879 v2 += mat2->lda; 1880 } 1881 PetscCall(MatDenseRestoreArrayRead(A1, &v1)); 1882 PetscCall(MatDenseRestoreArrayRead(A2, &v2)); 1883 *flg = PETSC_TRUE; 1884 PetscFunctionReturn(PETSC_SUCCESS); 1885 } 1886 1887 PetscErrorCode MatGetDiagonal_SeqDense(Mat A, Vec v) 1888 { 1889 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1890 PetscInt i, n, len; 1891 PetscScalar *x; 1892 const PetscScalar *vv; 1893 1894 PetscFunctionBegin; 1895 PetscCall(VecGetSize(v, &n)); 1896 PetscCall(VecGetArray(v, &x)); 1897 len = PetscMin(A->rmap->n, A->cmap->n); 1898 PetscCall(MatDenseGetArrayRead(A, &vv)); 1899 PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 1900 for (i = 0; i < len; i++) x[i] = vv[i * mat->lda + i]; 1901 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1902 PetscCall(VecRestoreArray(v, &x)); 1903 PetscFunctionReturn(PETSC_SUCCESS); 1904 } 1905 1906 static PetscErrorCode MatDiagonalScale_SeqDense(Mat A, Vec ll, Vec rr) 1907 { 1908 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1909 const PetscScalar *l, *r; 1910 PetscScalar x, *v, *vv; 1911 PetscInt i, j, m = A->rmap->n, n = A->cmap->n; 1912 1913 PetscFunctionBegin; 1914 PetscCall(MatDenseGetArray(A, &vv)); 1915 if (ll) { 1916 PetscCall(VecGetSize(ll, &m)); 1917 PetscCall(VecGetArrayRead(ll, &l)); 1918 PetscCheck(m == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vec wrong size"); 1919 for (i = 0; i < m; i++) { 1920 x = l[i]; 1921 v = vv + i; 1922 for (j = 0; j < n; j++) { 1923 (*v) *= x; 1924 v += mat->lda; 1925 } 1926 } 1927 PetscCall(VecRestoreArrayRead(ll, &l)); 1928 PetscCall(PetscLogFlops(1.0 * n * m)); 1929 } 1930 if (rr) { 1931 PetscCall(VecGetSize(rr, &n)); 1932 PetscCall(VecGetArrayRead(rr, &r)); 1933 PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec wrong size"); 1934 for (i = 0; i < n; i++) { 1935 x = r[i]; 1936 v = vv + i * mat->lda; 1937 for (j = 0; j < m; j++) (*v++) *= x; 1938 } 1939 PetscCall(VecRestoreArrayRead(rr, &r)); 1940 PetscCall(PetscLogFlops(1.0 * n * m)); 1941 } 1942 PetscCall(MatDenseRestoreArray(A, &vv)); 1943 PetscFunctionReturn(PETSC_SUCCESS); 1944 } 1945 1946 PetscErrorCode MatNorm_SeqDense(Mat A, NormType type, PetscReal *nrm) 1947 { 1948 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1949 PetscScalar *v, *vv; 1950 PetscReal sum = 0.0; 1951 PetscInt lda, m = A->rmap->n, i, j; 1952 1953 PetscFunctionBegin; 1954 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&vv)); 1955 PetscCall(MatDenseGetLDA(A, &lda)); 1956 v = vv; 1957 if (type == NORM_FROBENIUS) { 1958 if (lda > m) { 1959 for (j = 0; j < A->cmap->n; j++) { 1960 v = vv + j * lda; 1961 for (i = 0; i < m; i++) { 1962 sum += PetscRealPart(PetscConj(*v) * (*v)); 1963 v++; 1964 } 1965 } 1966 } else { 1967 #if defined(PETSC_USE_REAL___FP16) 1968 PetscBLASInt one = 1, cnt = A->cmap->n * A->rmap->n; 1969 PetscCallBLAS("BLASnrm2", *nrm = BLASnrm2_(&cnt, v, &one)); 1970 } 1971 #else 1972 for (i = 0; i < A->cmap->n * A->rmap->n; i++) { 1973 sum += PetscRealPart(PetscConj(*v) * (*v)); 1974 v++; 1975 } 1976 } 1977 *nrm = PetscSqrtReal(sum); 1978 #endif 1979 PetscCall(PetscLogFlops(2.0 * A->cmap->n * A->rmap->n)); 1980 } else if (type == NORM_1) { 1981 *nrm = 0.0; 1982 for (j = 0; j < A->cmap->n; j++) { 1983 v = vv + j * mat->lda; 1984 sum = 0.0; 1985 for (i = 0; i < A->rmap->n; i++) { 1986 sum += PetscAbsScalar(*v); 1987 v++; 1988 } 1989 if (sum > *nrm) *nrm = sum; 1990 } 1991 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 1992 } else if (type == NORM_INFINITY) { 1993 *nrm = 0.0; 1994 for (j = 0; j < A->rmap->n; j++) { 1995 v = vv + j; 1996 sum = 0.0; 1997 for (i = 0; i < A->cmap->n; i++) { 1998 sum += PetscAbsScalar(*v); 1999 v += mat->lda; 2000 } 2001 if (sum > *nrm) *nrm = sum; 2002 } 2003 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 2004 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No two norm"); 2005 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&vv)); 2006 PetscFunctionReturn(PETSC_SUCCESS); 2007 } 2008 2009 static PetscErrorCode MatSetOption_SeqDense(Mat A, MatOption op, PetscBool flg) 2010 { 2011 Mat_SeqDense *aij = (Mat_SeqDense *)A->data; 2012 2013 PetscFunctionBegin; 2014 switch (op) { 2015 case MAT_ROW_ORIENTED: 2016 aij->roworiented = flg; 2017 break; 2018 case MAT_NEW_NONZERO_LOCATIONS: 2019 case MAT_NEW_NONZERO_LOCATION_ERR: 2020 case MAT_NEW_NONZERO_ALLOCATION_ERR: 2021 case MAT_FORCE_DIAGONAL_ENTRIES: 2022 case MAT_KEEP_NONZERO_PATTERN: 2023 case MAT_IGNORE_OFF_PROC_ENTRIES: 2024 case MAT_USE_HASH_TABLE: 2025 case MAT_IGNORE_ZERO_ENTRIES: 2026 case MAT_IGNORE_LOWER_TRIANGULAR: 2027 case MAT_SORTED_FULL: 2028 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 2029 break; 2030 case MAT_SPD: 2031 case MAT_SYMMETRIC: 2032 case MAT_STRUCTURALLY_SYMMETRIC: 2033 case MAT_HERMITIAN: 2034 case MAT_SYMMETRY_ETERNAL: 2035 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 2036 case MAT_SPD_ETERNAL: 2037 break; 2038 default: 2039 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 2040 } 2041 PetscFunctionReturn(PETSC_SUCCESS); 2042 } 2043 2044 PetscErrorCode MatZeroEntries_SeqDense(Mat A) 2045 { 2046 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 2047 PetscInt lda = l->lda, m = A->rmap->n, n = A->cmap->n, j; 2048 PetscScalar *v; 2049 2050 PetscFunctionBegin; 2051 PetscCall(MatDenseGetArrayWrite(A, &v)); 2052 if (lda > m) { 2053 for (j = 0; j < n; j++) PetscCall(PetscArrayzero(v + j * lda, m)); 2054 } else { 2055 PetscCall(PetscArrayzero(v, PetscInt64Mult(m, n))); 2056 } 2057 PetscCall(MatDenseRestoreArrayWrite(A, &v)); 2058 PetscFunctionReturn(PETSC_SUCCESS); 2059 } 2060 2061 static PetscErrorCode MatZeroRows_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 2062 { 2063 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 2064 PetscInt m = l->lda, n = A->cmap->n, i, j; 2065 PetscScalar *slot, *bb, *v; 2066 const PetscScalar *xx; 2067 2068 PetscFunctionBegin; 2069 if (PetscDefined(USE_DEBUG)) { 2070 for (i = 0; i < N; i++) { 2071 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 2072 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 2073 } 2074 } 2075 if (!N) PetscFunctionReturn(PETSC_SUCCESS); 2076 2077 /* fix right-hand side if needed */ 2078 if (x && b) { 2079 PetscCall(VecGetArrayRead(x, &xx)); 2080 PetscCall(VecGetArray(b, &bb)); 2081 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 2082 PetscCall(VecRestoreArrayRead(x, &xx)); 2083 PetscCall(VecRestoreArray(b, &bb)); 2084 } 2085 2086 PetscCall(MatDenseGetArray(A, &v)); 2087 for (i = 0; i < N; i++) { 2088 slot = v + rows[i]; 2089 for (j = 0; j < n; j++) { 2090 *slot = 0.0; 2091 slot += m; 2092 } 2093 } 2094 if (diag != 0.0) { 2095 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 2096 for (i = 0; i < N; i++) { 2097 slot = v + (m + 1) * rows[i]; 2098 *slot = diag; 2099 } 2100 } 2101 PetscCall(MatDenseRestoreArray(A, &v)); 2102 PetscFunctionReturn(PETSC_SUCCESS); 2103 } 2104 2105 static PetscErrorCode MatDenseGetLDA_SeqDense(Mat A, PetscInt *lda) 2106 { 2107 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2108 2109 PetscFunctionBegin; 2110 *lda = mat->lda; 2111 PetscFunctionReturn(PETSC_SUCCESS); 2112 } 2113 2114 PetscErrorCode MatDenseGetArray_SeqDense(Mat A, PetscScalar **array) 2115 { 2116 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2117 2118 PetscFunctionBegin; 2119 PetscCheck(!mat->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2120 *array = mat->v; 2121 PetscFunctionReturn(PETSC_SUCCESS); 2122 } 2123 2124 PetscErrorCode MatDenseRestoreArray_SeqDense(Mat A, PetscScalar **array) 2125 { 2126 PetscFunctionBegin; 2127 if (array) *array = NULL; 2128 PetscFunctionReturn(PETSC_SUCCESS); 2129 } 2130 2131 /*@ 2132 MatDenseGetLDA - gets the leading dimension of the array returned from `MatDenseGetArray()` 2133 2134 Not Collective 2135 2136 Input Parameter: 2137 . A - a `MATDENSE` or `MATDENSECUDA` matrix 2138 2139 Output Parameter: 2140 . lda - the leading dimension 2141 2142 Level: intermediate 2143 2144 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseSetLDA()` 2145 @*/ 2146 PetscErrorCode MatDenseGetLDA(Mat A, PetscInt *lda) 2147 { 2148 PetscFunctionBegin; 2149 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2150 PetscAssertPointer(lda, 2); 2151 MatCheckPreallocated(A, 1); 2152 PetscUseMethod(A, "MatDenseGetLDA_C", (Mat, PetscInt *), (A, lda)); 2153 PetscFunctionReturn(PETSC_SUCCESS); 2154 } 2155 2156 /*@ 2157 MatDenseSetLDA - Sets the leading dimension of the array used by the `MATDENSE` matrix 2158 2159 Collective if the matrix layouts have not yet been setup 2160 2161 Input Parameters: 2162 + A - a `MATDENSE` or `MATDENSECUDA` matrix 2163 - lda - the leading dimension 2164 2165 Level: intermediate 2166 2167 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetLDA()` 2168 @*/ 2169 PetscErrorCode MatDenseSetLDA(Mat A, PetscInt lda) 2170 { 2171 PetscFunctionBegin; 2172 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2173 PetscTryMethod(A, "MatDenseSetLDA_C", (Mat, PetscInt), (A, lda)); 2174 PetscFunctionReturn(PETSC_SUCCESS); 2175 } 2176 2177 /*@C 2178 MatDenseGetArray - gives read-write access to the array where the data for a `MATDENSE` matrix is stored 2179 2180 Logically Collective 2181 2182 Input Parameter: 2183 . A - a dense matrix 2184 2185 Output Parameter: 2186 . array - pointer to the data 2187 2188 Level: intermediate 2189 2190 Fortran Notes: 2191 `MatDenseGetArray()` Fortran binding is deprecated (since PETSc 3.19), use `MatDenseGetArrayF90()` 2192 2193 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2194 @*/ 2195 PetscErrorCode MatDenseGetArray(Mat A, PetscScalar **array) 2196 { 2197 PetscFunctionBegin; 2198 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2199 PetscAssertPointer(array, 2); 2200 PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array)); 2201 PetscFunctionReturn(PETSC_SUCCESS); 2202 } 2203 2204 /*@C 2205 MatDenseRestoreArray - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArray()` 2206 2207 Logically Collective 2208 2209 Input Parameters: 2210 + A - a dense matrix 2211 - array - pointer to the data (may be `NULL`) 2212 2213 Level: intermediate 2214 2215 Fortran Notes: 2216 `MatDenseRestoreArray()` Fortran binding is deprecated (since PETSc 3.19), use `MatDenseRestoreArrayF90()` 2217 2218 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2219 @*/ 2220 PetscErrorCode MatDenseRestoreArray(Mat A, PetscScalar **array) 2221 { 2222 PetscFunctionBegin; 2223 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2224 if (array) PetscAssertPointer(array, 2); 2225 PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array)); 2226 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2227 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2228 A->offloadmask = PETSC_OFFLOAD_CPU; 2229 #endif 2230 PetscFunctionReturn(PETSC_SUCCESS); 2231 } 2232 2233 /*@C 2234 MatDenseGetArrayRead - gives read-only access to the array where the data for a `MATDENSE` matrix is stored 2235 2236 Not Collective 2237 2238 Input Parameter: 2239 . A - a dense matrix 2240 2241 Output Parameter: 2242 . array - pointer to the data 2243 2244 Level: intermediate 2245 2246 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2247 @*/ 2248 PetscErrorCode MatDenseGetArrayRead(Mat A, const PetscScalar **array) 2249 { 2250 PetscFunctionBegin; 2251 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2252 PetscAssertPointer(array, 2); 2253 PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2254 PetscFunctionReturn(PETSC_SUCCESS); 2255 } 2256 2257 /*@C 2258 MatDenseRestoreArrayRead - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayRead()` 2259 2260 Not Collective 2261 2262 Input Parameters: 2263 + A - a dense matrix 2264 - array - pointer to the data (may be `NULL`) 2265 2266 Level: intermediate 2267 2268 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2269 @*/ 2270 PetscErrorCode MatDenseRestoreArrayRead(Mat A, const PetscScalar **array) 2271 { 2272 PetscFunctionBegin; 2273 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2274 if (array) PetscAssertPointer(array, 2); 2275 PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2276 PetscFunctionReturn(PETSC_SUCCESS); 2277 } 2278 2279 /*@C 2280 MatDenseGetArrayWrite - gives write-only access to the array where the data for a `MATDENSE` matrix is stored 2281 2282 Not Collective 2283 2284 Input Parameter: 2285 . A - a dense matrix 2286 2287 Output Parameter: 2288 . array - pointer to the data 2289 2290 Level: intermediate 2291 2292 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2293 @*/ 2294 PetscErrorCode MatDenseGetArrayWrite(Mat A, PetscScalar **array) 2295 { 2296 PetscFunctionBegin; 2297 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2298 PetscAssertPointer(array, 2); 2299 PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2300 PetscFunctionReturn(PETSC_SUCCESS); 2301 } 2302 2303 /*@C 2304 MatDenseRestoreArrayWrite - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayWrite()` 2305 2306 Not Collective 2307 2308 Input Parameters: 2309 + A - a dense matrix 2310 - array - pointer to the data (may be `NULL`) 2311 2312 Level: intermediate 2313 2314 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2315 @*/ 2316 PetscErrorCode MatDenseRestoreArrayWrite(Mat A, PetscScalar **array) 2317 { 2318 PetscFunctionBegin; 2319 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2320 if (array) PetscAssertPointer(array, 2); 2321 PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2322 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2323 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2324 A->offloadmask = PETSC_OFFLOAD_CPU; 2325 #endif 2326 PetscFunctionReturn(PETSC_SUCCESS); 2327 } 2328 2329 /*@C 2330 MatDenseGetArrayAndMemType - gives read-write access to the array where the data for a `MATDENSE` matrix is stored 2331 2332 Logically Collective 2333 2334 Input Parameter: 2335 . A - a dense matrix 2336 2337 Output Parameters: 2338 + array - pointer to the data 2339 - mtype - memory type of the returned pointer 2340 2341 Level: intermediate 2342 2343 Note: 2344 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., 2345 an array on device is always returned and is guaranteed to contain the matrix's latest data. 2346 2347 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayAndMemType()`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArrayRead()`, 2348 `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` 2349 @*/ 2350 PetscErrorCode MatDenseGetArrayAndMemType(Mat A, PetscScalar **array, PetscMemType *mtype) 2351 { 2352 PetscBool isMPI; 2353 2354 PetscFunctionBegin; 2355 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2356 PetscAssertPointer(array, 2); 2357 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ 2358 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2359 if (isMPI) { 2360 /* Dispatch here so that the code can be reused for all subclasses of MATDENSE */ 2361 PetscCall(MatDenseGetArrayAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); 2362 } else { 2363 PetscErrorCode (*fptr)(Mat, PetscScalar **, PetscMemType *); 2364 2365 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayAndMemType_C", &fptr)); 2366 if (fptr) { 2367 PetscCall((*fptr)(A, array, mtype)); 2368 } else { 2369 PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array)); 2370 if (mtype) *mtype = PETSC_MEMTYPE_HOST; 2371 } 2372 } 2373 PetscFunctionReturn(PETSC_SUCCESS); 2374 } 2375 2376 /*@C 2377 MatDenseRestoreArrayAndMemType - returns access to the array that is obtained by `MatDenseGetArrayAndMemType()` 2378 2379 Logically Collective 2380 2381 Input Parameters: 2382 + A - a dense matrix 2383 - array - pointer to the data 2384 2385 Level: intermediate 2386 2387 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2388 @*/ 2389 PetscErrorCode MatDenseRestoreArrayAndMemType(Mat A, PetscScalar **array) 2390 { 2391 PetscBool isMPI; 2392 2393 PetscFunctionBegin; 2394 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2395 PetscAssertPointer(array, 2); 2396 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2397 if (isMPI) { 2398 PetscCall(MatDenseRestoreArrayAndMemType(((Mat_MPIDense *)A->data)->A, array)); 2399 } else { 2400 PetscErrorCode (*fptr)(Mat, PetscScalar **); 2401 2402 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayAndMemType_C", &fptr)); 2403 if (fptr) { 2404 PetscCall((*fptr)(A, array)); 2405 } else { 2406 PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array)); 2407 } 2408 *array = NULL; 2409 } 2410 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2411 PetscFunctionReturn(PETSC_SUCCESS); 2412 } 2413 2414 /*@C 2415 MatDenseGetArrayReadAndMemType - gives read-only access to the array where the data for a `MATDENSE` matrix is stored 2416 2417 Logically Collective 2418 2419 Input Parameter: 2420 . A - a dense matrix 2421 2422 Output Parameters: 2423 + array - pointer to the data 2424 - mtype - memory type of the returned pointer 2425 2426 Level: intermediate 2427 2428 Note: 2429 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., 2430 an array on device is always returned and is guaranteed to contain the matrix's latest data. 2431 2432 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayReadAndMemType()`, `MatDenseGetArrayWriteAndMemType()`, 2433 `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` 2434 @*/ 2435 PetscErrorCode MatDenseGetArrayReadAndMemType(Mat A, const PetscScalar **array, PetscMemType *mtype) 2436 { 2437 PetscBool isMPI; 2438 2439 PetscFunctionBegin; 2440 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2441 PetscAssertPointer(array, 2); 2442 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ 2443 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2444 if (isMPI) { /* Dispatch here so that the code can be reused for all subclasses of MATDENSE */ 2445 PetscCall(MatDenseGetArrayReadAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); 2446 } else { 2447 PetscErrorCode (*fptr)(Mat, const PetscScalar **, PetscMemType *); 2448 2449 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayReadAndMemType_C", &fptr)); 2450 if (fptr) { 2451 PetscCall((*fptr)(A, array, mtype)); 2452 } else { 2453 PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2454 if (mtype) *mtype = PETSC_MEMTYPE_HOST; 2455 } 2456 } 2457 PetscFunctionReturn(PETSC_SUCCESS); 2458 } 2459 2460 /*@C 2461 MatDenseRestoreArrayReadAndMemType - returns access to the array that is obtained by `MatDenseGetArrayReadAndMemType()` 2462 2463 Logically Collective 2464 2465 Input Parameters: 2466 + A - a dense matrix 2467 - array - pointer to the data 2468 2469 Level: intermediate 2470 2471 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2472 @*/ 2473 PetscErrorCode MatDenseRestoreArrayReadAndMemType(Mat A, const PetscScalar **array) 2474 { 2475 PetscBool isMPI; 2476 2477 PetscFunctionBegin; 2478 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2479 PetscAssertPointer(array, 2); 2480 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2481 if (isMPI) { 2482 PetscCall(MatDenseRestoreArrayReadAndMemType(((Mat_MPIDense *)A->data)->A, array)); 2483 } else { 2484 PetscErrorCode (*fptr)(Mat, const PetscScalar **); 2485 2486 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayReadAndMemType_C", &fptr)); 2487 if (fptr) { 2488 PetscCall((*fptr)(A, array)); 2489 } else { 2490 PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array)); 2491 } 2492 *array = NULL; 2493 } 2494 PetscFunctionReturn(PETSC_SUCCESS); 2495 } 2496 2497 /*@C 2498 MatDenseGetArrayWriteAndMemType - gives write-only access to the array where the data for a `MATDENSE` matrix is stored 2499 2500 Logically Collective 2501 2502 Input Parameter: 2503 . A - a dense matrix 2504 2505 Output Parameters: 2506 + array - pointer to the data 2507 - mtype - memory type of the returned pointer 2508 2509 Level: intermediate 2510 2511 Note: 2512 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc., 2513 an array on device is always returned and is guaranteed to contain the matrix's latest data. 2514 2515 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayWriteAndMemType()`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArrayRead()`, 2516 `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()` 2517 @*/ 2518 PetscErrorCode MatDenseGetArrayWriteAndMemType(Mat A, PetscScalar **array, PetscMemType *mtype) 2519 { 2520 PetscBool isMPI; 2521 2522 PetscFunctionBegin; 2523 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2524 PetscAssertPointer(array, 2); 2525 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */ 2526 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2527 if (isMPI) { 2528 PetscCall(MatDenseGetArrayWriteAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype)); 2529 } else { 2530 PetscErrorCode (*fptr)(Mat, PetscScalar **, PetscMemType *); 2531 2532 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayWriteAndMemType_C", &fptr)); 2533 if (fptr) { 2534 PetscCall((*fptr)(A, array, mtype)); 2535 } else { 2536 PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2537 if (mtype) *mtype = PETSC_MEMTYPE_HOST; 2538 } 2539 } 2540 PetscFunctionReturn(PETSC_SUCCESS); 2541 } 2542 2543 /*@C 2544 MatDenseRestoreArrayWriteAndMemType - returns access to the array that is obtained by `MatDenseGetArrayReadAndMemType()` 2545 2546 Logically Collective 2547 2548 Input Parameters: 2549 + A - a dense matrix 2550 - array - pointer to the data 2551 2552 Level: intermediate 2553 2554 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2555 @*/ 2556 PetscErrorCode MatDenseRestoreArrayWriteAndMemType(Mat A, PetscScalar **array) 2557 { 2558 PetscBool isMPI; 2559 2560 PetscFunctionBegin; 2561 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2562 PetscAssertPointer(array, 2); 2563 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI)); 2564 if (isMPI) { 2565 PetscCall(MatDenseRestoreArrayWriteAndMemType(((Mat_MPIDense *)A->data)->A, array)); 2566 } else { 2567 PetscErrorCode (*fptr)(Mat, PetscScalar **); 2568 2569 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayWriteAndMemType_C", &fptr)); 2570 if (fptr) { 2571 PetscCall((*fptr)(A, array)); 2572 } else { 2573 PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2574 } 2575 *array = NULL; 2576 } 2577 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2578 PetscFunctionReturn(PETSC_SUCCESS); 2579 } 2580 2581 static PetscErrorCode MatCreateSubMatrix_SeqDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 2582 { 2583 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2584 PetscInt i, j, nrows, ncols, ldb; 2585 const PetscInt *irow, *icol; 2586 PetscScalar *av, *bv, *v = mat->v; 2587 Mat newmat; 2588 2589 PetscFunctionBegin; 2590 PetscCall(ISGetIndices(isrow, &irow)); 2591 PetscCall(ISGetIndices(iscol, &icol)); 2592 PetscCall(ISGetLocalSize(isrow, &nrows)); 2593 PetscCall(ISGetLocalSize(iscol, &ncols)); 2594 2595 /* Check submatrixcall */ 2596 if (scall == MAT_REUSE_MATRIX) { 2597 PetscInt n_cols, n_rows; 2598 PetscCall(MatGetSize(*B, &n_rows, &n_cols)); 2599 if (n_rows != nrows || n_cols != ncols) { 2600 /* resize the result matrix to match number of requested rows/columns */ 2601 PetscCall(MatSetSizes(*B, nrows, ncols, nrows, ncols)); 2602 } 2603 newmat = *B; 2604 } else { 2605 /* Create and fill new matrix */ 2606 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 2607 PetscCall(MatSetSizes(newmat, nrows, ncols, nrows, ncols)); 2608 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 2609 PetscCall(MatSeqDenseSetPreallocation(newmat, NULL)); 2610 } 2611 2612 /* Now extract the data pointers and do the copy,column at a time */ 2613 PetscCall(MatDenseGetArray(newmat, &bv)); 2614 PetscCall(MatDenseGetLDA(newmat, &ldb)); 2615 for (i = 0; i < ncols; i++) { 2616 av = v + mat->lda * icol[i]; 2617 for (j = 0; j < nrows; j++) bv[j] = av[irow[j]]; 2618 bv += ldb; 2619 } 2620 PetscCall(MatDenseRestoreArray(newmat, &bv)); 2621 2622 /* Assemble the matrices so that the correct flags are set */ 2623 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 2624 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 2625 2626 /* Free work space */ 2627 PetscCall(ISRestoreIndices(isrow, &irow)); 2628 PetscCall(ISRestoreIndices(iscol, &icol)); 2629 *B = newmat; 2630 PetscFunctionReturn(PETSC_SUCCESS); 2631 } 2632 2633 static PetscErrorCode MatCreateSubMatrices_SeqDense(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 2634 { 2635 PetscInt i; 2636 2637 PetscFunctionBegin; 2638 if (scall == MAT_INITIAL_MATRIX) PetscCall(PetscCalloc1(n, B)); 2639 2640 for (i = 0; i < n; i++) PetscCall(MatCreateSubMatrix_SeqDense(A, irow[i], icol[i], scall, &(*B)[i])); 2641 PetscFunctionReturn(PETSC_SUCCESS); 2642 } 2643 2644 static PetscErrorCode MatAssemblyBegin_SeqDense(Mat mat, MatAssemblyType mode) 2645 { 2646 PetscFunctionBegin; 2647 PetscFunctionReturn(PETSC_SUCCESS); 2648 } 2649 2650 static PetscErrorCode MatAssemblyEnd_SeqDense(Mat mat, MatAssemblyType mode) 2651 { 2652 PetscFunctionBegin; 2653 PetscFunctionReturn(PETSC_SUCCESS); 2654 } 2655 2656 PetscErrorCode MatCopy_SeqDense(Mat A, Mat B, MatStructure str) 2657 { 2658 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data; 2659 const PetscScalar *va; 2660 PetscScalar *vb; 2661 PetscInt lda1 = a->lda, lda2 = b->lda, m = A->rmap->n, n = A->cmap->n, j; 2662 2663 PetscFunctionBegin; 2664 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2665 if (A->ops->copy != B->ops->copy) { 2666 PetscCall(MatCopy_Basic(A, B, str)); 2667 PetscFunctionReturn(PETSC_SUCCESS); 2668 } 2669 PetscCheck(m == B->rmap->n && n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "size(B) != size(A)"); 2670 PetscCall(MatDenseGetArrayRead(A, &va)); 2671 PetscCall(MatDenseGetArray(B, &vb)); 2672 if (lda1 > m || lda2 > m) { 2673 for (j = 0; j < n; j++) PetscCall(PetscArraycpy(vb + j * lda2, va + j * lda1, m)); 2674 } else { 2675 PetscCall(PetscArraycpy(vb, va, A->rmap->n * A->cmap->n)); 2676 } 2677 PetscCall(MatDenseRestoreArray(B, &vb)); 2678 PetscCall(MatDenseRestoreArrayRead(A, &va)); 2679 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2680 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2681 PetscFunctionReturn(PETSC_SUCCESS); 2682 } 2683 2684 PetscErrorCode MatSetUp_SeqDense(Mat A) 2685 { 2686 PetscFunctionBegin; 2687 PetscCall(PetscLayoutSetUp(A->rmap)); 2688 PetscCall(PetscLayoutSetUp(A->cmap)); 2689 if (!A->preallocated) PetscCall(MatSeqDenseSetPreallocation(A, NULL)); 2690 PetscFunctionReturn(PETSC_SUCCESS); 2691 } 2692 2693 static PetscErrorCode MatConjugate_SeqDense(Mat A) 2694 { 2695 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2696 PetscInt i, j; 2697 PetscInt min = PetscMin(A->rmap->n, A->cmap->n); 2698 PetscScalar *aa; 2699 2700 PetscFunctionBegin; 2701 PetscCall(MatDenseGetArray(A, &aa)); 2702 for (j = 0; j < A->cmap->n; j++) { 2703 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscConj(aa[i + j * mat->lda]); 2704 } 2705 PetscCall(MatDenseRestoreArray(A, &aa)); 2706 if (mat->tau) 2707 for (i = 0; i < min; i++) mat->tau[i] = PetscConj(mat->tau[i]); 2708 PetscFunctionReturn(PETSC_SUCCESS); 2709 } 2710 2711 static PetscErrorCode MatRealPart_SeqDense(Mat A) 2712 { 2713 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2714 PetscInt i, j; 2715 PetscScalar *aa; 2716 2717 PetscFunctionBegin; 2718 PetscCall(MatDenseGetArray(A, &aa)); 2719 for (j = 0; j < A->cmap->n; j++) { 2720 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscRealPart(aa[i + j * mat->lda]); 2721 } 2722 PetscCall(MatDenseRestoreArray(A, &aa)); 2723 PetscFunctionReturn(PETSC_SUCCESS); 2724 } 2725 2726 static PetscErrorCode MatImaginaryPart_SeqDense(Mat A) 2727 { 2728 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2729 PetscInt i, j; 2730 PetscScalar *aa; 2731 2732 PetscFunctionBegin; 2733 PetscCall(MatDenseGetArray(A, &aa)); 2734 for (j = 0; j < A->cmap->n; j++) { 2735 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscImaginaryPart(aa[i + j * mat->lda]); 2736 } 2737 PetscCall(MatDenseRestoreArray(A, &aa)); 2738 PetscFunctionReturn(PETSC_SUCCESS); 2739 } 2740 2741 PetscErrorCode MatMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2742 { 2743 PetscInt m = A->rmap->n, n = B->cmap->n; 2744 PetscBool cisdense = PETSC_FALSE; 2745 2746 PetscFunctionBegin; 2747 PetscCall(MatSetSizes(C, m, n, m, n)); 2748 #if defined(PETSC_HAVE_CUDA) 2749 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2750 #endif 2751 #if defined(PETSC_HAVE_HIP) 2752 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2753 #endif 2754 if (!cisdense) { 2755 PetscBool flg; 2756 2757 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2758 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2759 } 2760 PetscCall(MatSetUp(C)); 2761 PetscFunctionReturn(PETSC_SUCCESS); 2762 } 2763 2764 PetscErrorCode MatMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2765 { 2766 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data, *c = (Mat_SeqDense *)C->data; 2767 PetscBLASInt m, n, k; 2768 const PetscScalar *av, *bv; 2769 PetscScalar *cv; 2770 PetscScalar _DOne = 1.0, _DZero = 0.0; 2771 2772 PetscFunctionBegin; 2773 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2774 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2775 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2776 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); 2777 PetscCall(MatDenseGetArrayRead(A, &av)); 2778 PetscCall(MatDenseGetArrayRead(B, &bv)); 2779 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2780 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2781 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2782 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2783 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2784 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2785 PetscFunctionReturn(PETSC_SUCCESS); 2786 } 2787 2788 PetscErrorCode MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2789 { 2790 PetscInt m = A->rmap->n, n = B->rmap->n; 2791 PetscBool cisdense = PETSC_FALSE; 2792 2793 PetscFunctionBegin; 2794 PetscCall(MatSetSizes(C, m, n, m, n)); 2795 #if defined(PETSC_HAVE_CUDA) 2796 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2797 #endif 2798 #if defined(PETSC_HAVE_HIP) 2799 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2800 #endif 2801 if (!cisdense) { 2802 PetscBool flg; 2803 2804 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2805 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2806 } 2807 PetscCall(MatSetUp(C)); 2808 PetscFunctionReturn(PETSC_SUCCESS); 2809 } 2810 2811 PetscErrorCode MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2812 { 2813 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2814 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2815 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2816 const PetscScalar *av, *bv; 2817 PetscScalar *cv; 2818 PetscBLASInt m, n, k; 2819 PetscScalar _DOne = 1.0, _DZero = 0.0; 2820 2821 PetscFunctionBegin; 2822 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2823 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2824 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2825 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); 2826 PetscCall(MatDenseGetArrayRead(A, &av)); 2827 PetscCall(MatDenseGetArrayRead(B, &bv)); 2828 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2829 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2830 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2831 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2832 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2833 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2834 PetscFunctionReturn(PETSC_SUCCESS); 2835 } 2836 2837 PetscErrorCode MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2838 { 2839 PetscInt m = A->cmap->n, n = B->cmap->n; 2840 PetscBool cisdense = PETSC_FALSE; 2841 2842 PetscFunctionBegin; 2843 PetscCall(MatSetSizes(C, m, n, m, n)); 2844 #if defined(PETSC_HAVE_CUDA) 2845 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2846 #endif 2847 #if defined(PETSC_HAVE_HIP) 2848 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2849 #endif 2850 if (!cisdense) { 2851 PetscBool flg; 2852 2853 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2854 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2855 } 2856 PetscCall(MatSetUp(C)); 2857 PetscFunctionReturn(PETSC_SUCCESS); 2858 } 2859 2860 PetscErrorCode MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2861 { 2862 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2863 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2864 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2865 const PetscScalar *av, *bv; 2866 PetscScalar *cv; 2867 PetscBLASInt m, n, k; 2868 PetscScalar _DOne = 1.0, _DZero = 0.0; 2869 2870 PetscFunctionBegin; 2871 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2872 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2873 PetscCall(PetscBLASIntCast(A->rmap->n, &k)); 2874 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS); 2875 PetscCall(MatDenseGetArrayRead(A, &av)); 2876 PetscCall(MatDenseGetArrayRead(B, &bv)); 2877 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2878 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2879 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2880 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2881 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2882 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2883 PetscFunctionReturn(PETSC_SUCCESS); 2884 } 2885 2886 static PetscErrorCode MatProductSetFromOptions_SeqDense_AB(Mat C) 2887 { 2888 PetscFunctionBegin; 2889 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqDense; 2890 C->ops->productsymbolic = MatProductSymbolic_AB; 2891 PetscFunctionReturn(PETSC_SUCCESS); 2892 } 2893 2894 static PetscErrorCode MatProductSetFromOptions_SeqDense_AtB(Mat C) 2895 { 2896 PetscFunctionBegin; 2897 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqDense_SeqDense; 2898 C->ops->productsymbolic = MatProductSymbolic_AtB; 2899 PetscFunctionReturn(PETSC_SUCCESS); 2900 } 2901 2902 static PetscErrorCode MatProductSetFromOptions_SeqDense_ABt(Mat C) 2903 { 2904 PetscFunctionBegin; 2905 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqDense_SeqDense; 2906 C->ops->productsymbolic = MatProductSymbolic_ABt; 2907 PetscFunctionReturn(PETSC_SUCCESS); 2908 } 2909 2910 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense(Mat C) 2911 { 2912 Mat_Product *product = C->product; 2913 2914 PetscFunctionBegin; 2915 switch (product->type) { 2916 case MATPRODUCT_AB: 2917 PetscCall(MatProductSetFromOptions_SeqDense_AB(C)); 2918 break; 2919 case MATPRODUCT_AtB: 2920 PetscCall(MatProductSetFromOptions_SeqDense_AtB(C)); 2921 break; 2922 case MATPRODUCT_ABt: 2923 PetscCall(MatProductSetFromOptions_SeqDense_ABt(C)); 2924 break; 2925 default: 2926 break; 2927 } 2928 PetscFunctionReturn(PETSC_SUCCESS); 2929 } 2930 2931 static PetscErrorCode MatGetRowMax_SeqDense(Mat A, Vec v, PetscInt idx[]) 2932 { 2933 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2934 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2935 PetscScalar *x; 2936 const PetscScalar *aa; 2937 2938 PetscFunctionBegin; 2939 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2940 PetscCall(VecGetArray(v, &x)); 2941 PetscCall(VecGetLocalSize(v, &p)); 2942 PetscCall(MatDenseGetArrayRead(A, &aa)); 2943 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2944 for (i = 0; i < m; i++) { 2945 x[i] = aa[i]; 2946 if (idx) idx[i] = 0; 2947 for (j = 1; j < n; j++) { 2948 if (PetscRealPart(x[i]) < PetscRealPart(aa[i + a->lda * j])) { 2949 x[i] = aa[i + a->lda * j]; 2950 if (idx) idx[i] = j; 2951 } 2952 } 2953 } 2954 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2955 PetscCall(VecRestoreArray(v, &x)); 2956 PetscFunctionReturn(PETSC_SUCCESS); 2957 } 2958 2959 static PetscErrorCode MatGetRowMaxAbs_SeqDense(Mat A, Vec v, PetscInt idx[]) 2960 { 2961 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2962 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2963 PetscScalar *x; 2964 PetscReal atmp; 2965 const PetscScalar *aa; 2966 2967 PetscFunctionBegin; 2968 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2969 PetscCall(VecGetArray(v, &x)); 2970 PetscCall(VecGetLocalSize(v, &p)); 2971 PetscCall(MatDenseGetArrayRead(A, &aa)); 2972 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2973 for (i = 0; i < m; i++) { 2974 x[i] = PetscAbsScalar(aa[i]); 2975 for (j = 1; j < n; j++) { 2976 atmp = PetscAbsScalar(aa[i + a->lda * j]); 2977 if (PetscAbsScalar(x[i]) < atmp) { 2978 x[i] = atmp; 2979 if (idx) idx[i] = j; 2980 } 2981 } 2982 } 2983 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2984 PetscCall(VecRestoreArray(v, &x)); 2985 PetscFunctionReturn(PETSC_SUCCESS); 2986 } 2987 2988 static PetscErrorCode MatGetRowMin_SeqDense(Mat A, Vec v, PetscInt idx[]) 2989 { 2990 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2991 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2992 PetscScalar *x; 2993 const PetscScalar *aa; 2994 2995 PetscFunctionBegin; 2996 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2997 PetscCall(MatDenseGetArrayRead(A, &aa)); 2998 PetscCall(VecGetArray(v, &x)); 2999 PetscCall(VecGetLocalSize(v, &p)); 3000 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 3001 for (i = 0; i < m; i++) { 3002 x[i] = aa[i]; 3003 if (idx) idx[i] = 0; 3004 for (j = 1; j < n; j++) { 3005 if (PetscRealPart(x[i]) > PetscRealPart(aa[i + a->lda * j])) { 3006 x[i] = aa[i + a->lda * j]; 3007 if (idx) idx[i] = j; 3008 } 3009 } 3010 } 3011 PetscCall(VecRestoreArray(v, &x)); 3012 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 3013 PetscFunctionReturn(PETSC_SUCCESS); 3014 } 3015 3016 PetscErrorCode MatGetColumnVector_SeqDense(Mat A, Vec v, PetscInt col) 3017 { 3018 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3019 PetscScalar *x; 3020 const PetscScalar *aa; 3021 3022 PetscFunctionBegin; 3023 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3024 PetscCall(MatDenseGetArrayRead(A, &aa)); 3025 PetscCall(VecGetArray(v, &x)); 3026 PetscCall(PetscArraycpy(x, aa + col * a->lda, A->rmap->n)); 3027 PetscCall(VecRestoreArray(v, &x)); 3028 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 3029 PetscFunctionReturn(PETSC_SUCCESS); 3030 } 3031 3032 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat A, PetscInt type, PetscReal *reductions) 3033 { 3034 PetscInt i, j, m, n; 3035 const PetscScalar *a; 3036 3037 PetscFunctionBegin; 3038 PetscCall(MatGetSize(A, &m, &n)); 3039 PetscCall(PetscArrayzero(reductions, n)); 3040 PetscCall(MatDenseGetArrayRead(A, &a)); 3041 if (type == NORM_2) { 3042 for (i = 0; i < n; i++) { 3043 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j] * a[j]); 3044 a = PetscSafePointerPlusOffset(a, m); 3045 } 3046 } else if (type == NORM_1) { 3047 for (i = 0; i < n; i++) { 3048 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j]); 3049 a = PetscSafePointerPlusOffset(a, m); 3050 } 3051 } else if (type == NORM_INFINITY) { 3052 for (i = 0; i < n; i++) { 3053 for (j = 0; j < m; j++) reductions[i] = PetscMax(PetscAbsScalar(a[j]), reductions[i]); 3054 a = PetscSafePointerPlusOffset(a, m); 3055 } 3056 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 3057 for (i = 0; i < n; i++) { 3058 for (j = 0; j < m; j++) reductions[i] += PetscRealPart(a[j]); 3059 a = PetscSafePointerPlusOffset(a, m); 3060 } 3061 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 3062 for (i = 0; i < n; i++) { 3063 for (j = 0; j < m; j++) reductions[i] += PetscImaginaryPart(a[j]); 3064 a = PetscSafePointerPlusOffset(a, m); 3065 } 3066 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 3067 PetscCall(MatDenseRestoreArrayRead(A, &a)); 3068 if (type == NORM_2) { 3069 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 3070 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 3071 for (i = 0; i < n; i++) reductions[i] /= m; 3072 } 3073 PetscFunctionReturn(PETSC_SUCCESS); 3074 } 3075 3076 PetscErrorCode MatSetRandom_SeqDense(Mat x, PetscRandom rctx) 3077 { 3078 PetscScalar *a; 3079 PetscInt lda, m, n, i, j; 3080 3081 PetscFunctionBegin; 3082 PetscCall(MatGetSize(x, &m, &n)); 3083 PetscCall(MatDenseGetLDA(x, &lda)); 3084 PetscCall(MatDenseGetArrayWrite(x, &a)); 3085 for (j = 0; j < n; j++) { 3086 for (i = 0; i < m; i++) PetscCall(PetscRandomGetValue(rctx, a + j * lda + i)); 3087 } 3088 PetscCall(MatDenseRestoreArrayWrite(x, &a)); 3089 PetscFunctionReturn(PETSC_SUCCESS); 3090 } 3091 3092 static PetscErrorCode MatMissingDiagonal_SeqDense(Mat A, PetscBool *missing, PetscInt *d) 3093 { 3094 PetscFunctionBegin; 3095 *missing = PETSC_FALSE; 3096 PetscFunctionReturn(PETSC_SUCCESS); 3097 } 3098 3099 /* vals is not const */ 3100 static PetscErrorCode MatDenseGetColumn_SeqDense(Mat A, PetscInt col, PetscScalar **vals) 3101 { 3102 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3103 PetscScalar *v; 3104 3105 PetscFunctionBegin; 3106 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3107 PetscCall(MatDenseGetArray(A, &v)); 3108 *vals = v + col * a->lda; 3109 PetscCall(MatDenseRestoreArray(A, &v)); 3110 PetscFunctionReturn(PETSC_SUCCESS); 3111 } 3112 3113 static PetscErrorCode MatDenseRestoreColumn_SeqDense(Mat A, PetscScalar **vals) 3114 { 3115 PetscFunctionBegin; 3116 if (vals) *vals = NULL; /* user cannot accidentally use the array later */ 3117 PetscFunctionReturn(PETSC_SUCCESS); 3118 } 3119 3120 static struct _MatOps MatOps_Values = {MatSetValues_SeqDense, 3121 MatGetRow_SeqDense, 3122 MatRestoreRow_SeqDense, 3123 MatMult_SeqDense, 3124 /* 4*/ MatMultAdd_SeqDense, 3125 MatMultTranspose_SeqDense, 3126 MatMultTransposeAdd_SeqDense, 3127 NULL, 3128 NULL, 3129 NULL, 3130 /* 10*/ NULL, 3131 MatLUFactor_SeqDense, 3132 MatCholeskyFactor_SeqDense, 3133 MatSOR_SeqDense, 3134 MatTranspose_SeqDense, 3135 /* 15*/ MatGetInfo_SeqDense, 3136 MatEqual_SeqDense, 3137 MatGetDiagonal_SeqDense, 3138 MatDiagonalScale_SeqDense, 3139 MatNorm_SeqDense, 3140 /* 20*/ MatAssemblyBegin_SeqDense, 3141 MatAssemblyEnd_SeqDense, 3142 MatSetOption_SeqDense, 3143 MatZeroEntries_SeqDense, 3144 /* 24*/ MatZeroRows_SeqDense, 3145 NULL, 3146 NULL, 3147 NULL, 3148 NULL, 3149 /* 29*/ MatSetUp_SeqDense, 3150 NULL, 3151 NULL, 3152 NULL, 3153 NULL, 3154 /* 34*/ MatDuplicate_SeqDense, 3155 NULL, 3156 NULL, 3157 NULL, 3158 NULL, 3159 /* 39*/ MatAXPY_SeqDense, 3160 MatCreateSubMatrices_SeqDense, 3161 NULL, 3162 MatGetValues_SeqDense, 3163 MatCopy_SeqDense, 3164 /* 44*/ MatGetRowMax_SeqDense, 3165 MatScale_SeqDense, 3166 MatShift_SeqDense, 3167 NULL, 3168 MatZeroRowsColumns_SeqDense, 3169 /* 49*/ MatSetRandom_SeqDense, 3170 NULL, 3171 NULL, 3172 NULL, 3173 NULL, 3174 /* 54*/ NULL, 3175 NULL, 3176 NULL, 3177 NULL, 3178 NULL, 3179 /* 59*/ MatCreateSubMatrix_SeqDense, 3180 MatDestroy_SeqDense, 3181 MatView_SeqDense, 3182 NULL, 3183 NULL, 3184 /* 64*/ NULL, 3185 NULL, 3186 NULL, 3187 NULL, 3188 NULL, 3189 /* 69*/ MatGetRowMaxAbs_SeqDense, 3190 NULL, 3191 NULL, 3192 NULL, 3193 NULL, 3194 /* 74*/ NULL, 3195 NULL, 3196 NULL, 3197 NULL, 3198 NULL, 3199 /* 79*/ NULL, 3200 NULL, 3201 NULL, 3202 NULL, 3203 /* 83*/ MatLoad_SeqDense, 3204 MatIsSymmetric_SeqDense, 3205 MatIsHermitian_SeqDense, 3206 NULL, 3207 NULL, 3208 NULL, 3209 /* 89*/ NULL, 3210 NULL, 3211 MatMatMultNumeric_SeqDense_SeqDense, 3212 NULL, 3213 NULL, 3214 /* 94*/ NULL, 3215 NULL, 3216 NULL, 3217 MatMatTransposeMultNumeric_SeqDense_SeqDense, 3218 NULL, 3219 /* 99*/ MatProductSetFromOptions_SeqDense, 3220 NULL, 3221 NULL, 3222 MatConjugate_SeqDense, 3223 NULL, 3224 /*104*/ NULL, 3225 MatRealPart_SeqDense, 3226 MatImaginaryPart_SeqDense, 3227 NULL, 3228 NULL, 3229 /*109*/ NULL, 3230 NULL, 3231 MatGetRowMin_SeqDense, 3232 MatGetColumnVector_SeqDense, 3233 MatMissingDiagonal_SeqDense, 3234 /*114*/ NULL, 3235 NULL, 3236 NULL, 3237 NULL, 3238 NULL, 3239 /*119*/ NULL, 3240 NULL, 3241 MatMultHermitianTranspose_SeqDense, 3242 MatMultHermitianTransposeAdd_SeqDense, 3243 NULL, 3244 /*124*/ NULL, 3245 MatGetColumnReductions_SeqDense, 3246 NULL, 3247 NULL, 3248 NULL, 3249 /*129*/ NULL, 3250 NULL, 3251 NULL, 3252 MatTransposeMatMultNumeric_SeqDense_SeqDense, 3253 NULL, 3254 /*134*/ NULL, 3255 NULL, 3256 NULL, 3257 NULL, 3258 NULL, 3259 /*139*/ NULL, 3260 NULL, 3261 NULL, 3262 NULL, 3263 NULL, 3264 MatCreateMPIMatConcatenateSeqMat_SeqDense, 3265 /*145*/ NULL, 3266 NULL, 3267 NULL, 3268 NULL, 3269 NULL, 3270 /*150*/ NULL, 3271 NULL, 3272 NULL}; 3273 3274 /*@C 3275 MatCreateSeqDense - Creates a `MATSEQDENSE` that 3276 is stored in column major order (the usual Fortran format). 3277 3278 Collective 3279 3280 Input Parameters: 3281 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3282 . m - number of rows 3283 . n - number of columns 3284 - data - optional location of matrix data in column major order. Use `NULL` for PETSc 3285 to control all matrix memory allocation. 3286 3287 Output Parameter: 3288 . A - the matrix 3289 3290 Level: intermediate 3291 3292 Note: 3293 The data input variable is intended primarily for Fortran programmers 3294 who wish to allocate their own matrix memory space. Most users should 3295 set `data` = `NULL`. 3296 3297 Developer Note: 3298 Many of the matrix operations for this variant use the BLAS and LAPACK routines. 3299 3300 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 3301 @*/ 3302 PetscErrorCode MatCreateSeqDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscScalar *data, Mat *A) 3303 { 3304 PetscFunctionBegin; 3305 PetscCall(MatCreate(comm, A)); 3306 PetscCall(MatSetSizes(*A, m, n, m, n)); 3307 PetscCall(MatSetType(*A, MATSEQDENSE)); 3308 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 3309 PetscFunctionReturn(PETSC_SUCCESS); 3310 } 3311 3312 /*@C 3313 MatSeqDenseSetPreallocation - Sets the array used for storing the matrix elements of a `MATSEQDENSE` matrix 3314 3315 Collective 3316 3317 Input Parameters: 3318 + B - the matrix 3319 - data - the array (or `NULL`) 3320 3321 Level: intermediate 3322 3323 Note: 3324 The data input variable is intended primarily for Fortran programmers 3325 who wish to allocate their own matrix memory space. Most users should 3326 need not call this routine. 3327 3328 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`, `MatDenseSetLDA()` 3329 @*/ 3330 PetscErrorCode MatSeqDenseSetPreallocation(Mat B, PetscScalar data[]) 3331 { 3332 PetscFunctionBegin; 3333 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3334 PetscTryMethod(B, "MatSeqDenseSetPreallocation_C", (Mat, PetscScalar[]), (B, data)); 3335 PetscFunctionReturn(PETSC_SUCCESS); 3336 } 3337 3338 PetscErrorCode MatSeqDenseSetPreallocation_SeqDense(Mat B, PetscScalar *data) 3339 { 3340 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3341 3342 PetscFunctionBegin; 3343 PetscCheck(!b->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3344 B->preallocated = PETSC_TRUE; 3345 3346 PetscCall(PetscLayoutSetUp(B->rmap)); 3347 PetscCall(PetscLayoutSetUp(B->cmap)); 3348 3349 if (b->lda <= 0) b->lda = B->rmap->n; 3350 3351 if (!data) { /* petsc-allocated storage */ 3352 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 3353 PetscCall(PetscCalloc1((size_t)b->lda * B->cmap->n, &b->v)); 3354 3355 b->user_alloc = PETSC_FALSE; 3356 } else { /* user-allocated storage */ 3357 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 3358 b->v = data; 3359 b->user_alloc = PETSC_TRUE; 3360 } 3361 B->assembled = PETSC_TRUE; 3362 PetscFunctionReturn(PETSC_SUCCESS); 3363 } 3364 3365 #if defined(PETSC_HAVE_ELEMENTAL) 3366 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 3367 { 3368 Mat mat_elemental; 3369 const PetscScalar *array; 3370 PetscScalar *v_colwise; 3371 PetscInt M = A->rmap->N, N = A->cmap->N, i, j, k, *rows, *cols; 3372 3373 PetscFunctionBegin; 3374 PetscCall(PetscMalloc3(M * N, &v_colwise, M, &rows, N, &cols)); 3375 PetscCall(MatDenseGetArrayRead(A, &array)); 3376 /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */ 3377 k = 0; 3378 for (j = 0; j < N; j++) { 3379 cols[j] = j; 3380 for (i = 0; i < M; i++) v_colwise[j * M + i] = array[k++]; 3381 } 3382 for (i = 0; i < M; i++) rows[i] = i; 3383 PetscCall(MatDenseRestoreArrayRead(A, &array)); 3384 3385 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 3386 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N)); 3387 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 3388 PetscCall(MatSetUp(mat_elemental)); 3389 3390 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 3391 PetscCall(MatSetValues(mat_elemental, M, rows, N, cols, v_colwise, ADD_VALUES)); 3392 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 3393 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 3394 PetscCall(PetscFree3(v_colwise, rows, cols)); 3395 3396 if (reuse == MAT_INPLACE_MATRIX) { 3397 PetscCall(MatHeaderReplace(A, &mat_elemental)); 3398 } else { 3399 *newmat = mat_elemental; 3400 } 3401 PetscFunctionReturn(PETSC_SUCCESS); 3402 } 3403 #endif 3404 3405 PetscErrorCode MatDenseSetLDA_SeqDense(Mat B, PetscInt lda) 3406 { 3407 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3408 PetscBool data; 3409 3410 PetscFunctionBegin; 3411 data = (PetscBool)((B->rmap->n > 0 && B->cmap->n > 0) ? (b->v ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE); 3412 PetscCheck(b->user_alloc || !data || b->lda == lda, PETSC_COMM_SELF, PETSC_ERR_ORDER, "LDA cannot be changed after allocation of internal storage"); 3413 PetscCheck(lda >= B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "LDA %" PetscInt_FMT " must be at least matrix dimension %" PetscInt_FMT, lda, B->rmap->n); 3414 b->lda = lda; 3415 PetscFunctionReturn(PETSC_SUCCESS); 3416 } 3417 3418 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3419 { 3420 PetscFunctionBegin; 3421 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIDense(comm, inmat, n, scall, outmat)); 3422 PetscFunctionReturn(PETSC_SUCCESS); 3423 } 3424 3425 PetscErrorCode MatDenseCreateColumnVec_Private(Mat A, Vec *v) 3426 { 3427 PetscBool isstd, iskok, iscuda, iship; 3428 PetscMPIInt size; 3429 #if PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP) 3430 /* we pass the data of A, to prevent allocating needless GPU memory the first time VecCUPMPlaceArray is called. */ 3431 const PetscScalar *a; 3432 #endif 3433 3434 PetscFunctionBegin; 3435 *v = NULL; 3436 PetscCall(PetscStrcmpAny(A->defaultvectype, &isstd, VECSTANDARD, VECSEQ, VECMPI, "")); 3437 PetscCall(PetscStrcmpAny(A->defaultvectype, &iskok, VECKOKKOS, VECSEQKOKKOS, VECMPIKOKKOS, "")); 3438 PetscCall(PetscStrcmpAny(A->defaultvectype, &iscuda, VECCUDA, VECSEQCUDA, VECMPICUDA, "")); 3439 PetscCall(PetscStrcmpAny(A->defaultvectype, &iship, VECHIP, VECSEQHIP, VECMPIHIP, "")); 3440 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3441 if (isstd) { 3442 if (size > 1) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, v)); 3443 else PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, v)); 3444 } else if (iskok) { 3445 PetscCheck(PetscDefined(HAVE_KOKKOS_KERNELS), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using KOKKOS kernels support"); 3446 #if PetscDefined(HAVE_KOKKOS_KERNELS) 3447 if (size > 1) PetscCall(VecCreateMPIKokkosWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, v)); 3448 else PetscCall(VecCreateSeqKokkosWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, v)); 3449 #endif 3450 } else if (iscuda) { 3451 PetscCheck(PetscDefined(HAVE_CUDA), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using CUDA support"); 3452 #if PetscDefined(HAVE_CUDA) 3453 PetscCall(MatDenseCUDAGetArrayRead(A, &a)); 3454 if (size > 1) PetscCall(VecCreateMPICUDAWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, a, v)); 3455 else PetscCall(VecCreateSeqCUDAWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, a, v)); 3456 #endif 3457 } else if (iship) { 3458 PetscCheck(PetscDefined(HAVE_HIP), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using HIP support"); 3459 #if PetscDefined(HAVE_HIP) 3460 PetscCall(MatDenseHIPGetArrayRead(A, &a)); 3461 if (size > 1) PetscCall(VecCreateMPIHIPWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, a, v)); 3462 else PetscCall(VecCreateSeqHIPWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, a, v)); 3463 #endif 3464 } 3465 PetscCheck(*v, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not coded for type %s", A->defaultvectype); 3466 PetscFunctionReturn(PETSC_SUCCESS); 3467 } 3468 3469 PetscErrorCode MatDenseGetColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) 3470 { 3471 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3472 3473 PetscFunctionBegin; 3474 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3475 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3476 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 3477 a->vecinuse = col + 1; 3478 PetscCall(MatDenseGetArray(A, (PetscScalar **)&a->ptrinuse)); 3479 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3480 *v = a->cvec; 3481 PetscFunctionReturn(PETSC_SUCCESS); 3482 } 3483 3484 PetscErrorCode MatDenseRestoreColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) 3485 { 3486 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3487 3488 PetscFunctionBegin; 3489 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3490 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3491 VecCheckAssembled(a->cvec); 3492 a->vecinuse = 0; 3493 PetscCall(MatDenseRestoreArray(A, (PetscScalar **)&a->ptrinuse)); 3494 PetscCall(VecResetArray(a->cvec)); 3495 if (v) *v = NULL; 3496 PetscFunctionReturn(PETSC_SUCCESS); 3497 } 3498 3499 PetscErrorCode MatDenseGetColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) 3500 { 3501 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3502 3503 PetscFunctionBegin; 3504 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3505 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3506 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 3507 a->vecinuse = col + 1; 3508 PetscCall(MatDenseGetArrayRead(A, &a->ptrinuse)); 3509 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)a->lda))); 3510 PetscCall(VecLockReadPush(a->cvec)); 3511 *v = a->cvec; 3512 PetscFunctionReturn(PETSC_SUCCESS); 3513 } 3514 3515 PetscErrorCode MatDenseRestoreColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) 3516 { 3517 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3518 3519 PetscFunctionBegin; 3520 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3521 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3522 VecCheckAssembled(a->cvec); 3523 a->vecinuse = 0; 3524 PetscCall(MatDenseRestoreArrayRead(A, &a->ptrinuse)); 3525 PetscCall(VecLockReadPop(a->cvec)); 3526 PetscCall(VecResetArray(a->cvec)); 3527 if (v) *v = NULL; 3528 PetscFunctionReturn(PETSC_SUCCESS); 3529 } 3530 3531 PetscErrorCode MatDenseGetColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) 3532 { 3533 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3534 3535 PetscFunctionBegin; 3536 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3537 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3538 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 3539 a->vecinuse = col + 1; 3540 PetscCall(MatDenseGetArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3541 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)a->lda))); 3542 *v = a->cvec; 3543 PetscFunctionReturn(PETSC_SUCCESS); 3544 } 3545 3546 PetscErrorCode MatDenseRestoreColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) 3547 { 3548 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3549 3550 PetscFunctionBegin; 3551 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3552 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3553 VecCheckAssembled(a->cvec); 3554 a->vecinuse = 0; 3555 PetscCall(MatDenseRestoreArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3556 PetscCall(VecResetArray(a->cvec)); 3557 if (v) *v = NULL; 3558 PetscFunctionReturn(PETSC_SUCCESS); 3559 } 3560 3561 PetscErrorCode MatDenseGetSubMatrix_SeqDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 3562 { 3563 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3564 3565 PetscFunctionBegin; 3566 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3567 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3568 if (a->cmat && (cend - cbegin != a->cmat->cmap->N || rend - rbegin != a->cmat->rmap->N)) PetscCall(MatDestroy(&a->cmat)); 3569 if (!a->cmat) { 3570 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), rend - rbegin, PETSC_DECIDE, rend - rbegin, cend - cbegin, PetscSafePointerPlusOffset(a->v, rbegin + (size_t)cbegin * a->lda), &a->cmat)); 3571 } else { 3572 PetscCall(MatDensePlaceArray(a->cmat, PetscSafePointerPlusOffset(a->v, rbegin + (size_t)cbegin * a->lda))); 3573 } 3574 PetscCall(MatDenseSetLDA(a->cmat, a->lda)); 3575 a->matinuse = cbegin + 1; 3576 *v = a->cmat; 3577 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 3578 A->offloadmask = PETSC_OFFLOAD_CPU; 3579 #endif 3580 PetscFunctionReturn(PETSC_SUCCESS); 3581 } 3582 3583 PetscErrorCode MatDenseRestoreSubMatrix_SeqDense(Mat A, Mat *v) 3584 { 3585 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3586 3587 PetscFunctionBegin; 3588 PetscCheck(a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 3589 PetscCheck(a->cmat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column matrix"); 3590 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 3591 a->matinuse = 0; 3592 PetscCall(MatDenseResetArray(a->cmat)); 3593 if (v) *v = NULL; 3594 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 3595 A->offloadmask = PETSC_OFFLOAD_CPU; 3596 #endif 3597 PetscFunctionReturn(PETSC_SUCCESS); 3598 } 3599 3600 /*MC 3601 MATSEQDENSE - MATSEQDENSE = "seqdense" - A matrix type to be used for sequential dense matrices. 3602 3603 Options Database Key: 3604 . -mat_type seqdense - sets the matrix type to `MATSEQDENSE` during a call to `MatSetFromOptions()` 3605 3606 Level: beginner 3607 3608 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreateSeqDense()` 3609 M*/ 3610 PetscErrorCode MatCreate_SeqDense(Mat B) 3611 { 3612 Mat_SeqDense *b; 3613 PetscMPIInt size; 3614 3615 PetscFunctionBegin; 3616 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3617 PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3618 3619 PetscCall(PetscNew(&b)); 3620 B->data = (void *)b; 3621 B->ops[0] = MatOps_Values; 3622 3623 b->roworiented = PETSC_TRUE; 3624 3625 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatQRFactor_C", MatQRFactor_SeqDense)); 3626 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetLDA_C", MatDenseGetLDA_SeqDense)); 3627 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseSetLDA_C", MatDenseSetLDA_SeqDense)); 3628 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArray_C", MatDenseGetArray_SeqDense)); 3629 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArray_C", MatDenseRestoreArray_SeqDense)); 3630 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDensePlaceArray_C", MatDensePlaceArray_SeqDense)); 3631 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseResetArray_C", MatDenseResetArray_SeqDense)); 3632 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseReplaceArray_C", MatDenseReplaceArray_SeqDense)); 3633 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayRead_C", MatDenseGetArray_SeqDense)); 3634 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayRead_C", MatDenseRestoreArray_SeqDense)); 3635 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayWrite_C", MatDenseGetArray_SeqDense)); 3636 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArray_SeqDense)); 3637 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqaij_C", MatConvert_SeqDense_SeqAIJ)); 3638 #if defined(PETSC_HAVE_ELEMENTAL) 3639 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_elemental_C", MatConvert_SeqDense_Elemental)); 3640 #endif 3641 #if defined(PETSC_HAVE_SCALAPACK) 3642 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 3643 #endif 3644 #if defined(PETSC_HAVE_CUDA) 3645 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensecuda_C", MatConvert_SeqDense_SeqDenseCUDA)); 3646 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3647 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdense_C", MatProductSetFromOptions_SeqDense)); 3648 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3649 #endif 3650 #if defined(PETSC_HAVE_HIP) 3651 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensehip_C", MatConvert_SeqDense_SeqDenseHIP)); 3652 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", MatProductSetFromOptions_SeqDense)); 3653 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdense_C", MatProductSetFromOptions_SeqDense)); 3654 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensehip_C", MatProductSetFromOptions_SeqDense)); 3655 #endif 3656 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqDenseSetPreallocation_C", MatSeqDenseSetPreallocation_SeqDense)); 3657 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 3658 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdense_C", MatProductSetFromOptions_SeqDense)); 3659 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3660 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqsbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3661 3662 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumn_C", MatDenseGetColumn_SeqDense)); 3663 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_SeqDense)); 3664 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_SeqDense)); 3665 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_SeqDense)); 3666 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_SeqDense)); 3667 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_SeqDense)); 3668 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_SeqDense)); 3669 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_SeqDense)); 3670 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_SeqDense)); 3671 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_SeqDense)); 3672 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultAddColumnRange_C", MatMultAddColumnRange_SeqDense)); 3673 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_SeqDense)); 3674 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_SeqDense)); 3675 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQDENSE)); 3676 PetscFunctionReturn(PETSC_SUCCESS); 3677 } 3678 3679 /*@C 3680 MatDenseGetColumn - gives access to a column of a dense matrix. This is only the local part of the column. You MUST call `MatDenseRestoreColumn()` to avoid memory bleeding. 3681 3682 Not Collective 3683 3684 Input Parameters: 3685 + A - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3686 - col - column index 3687 3688 Output Parameter: 3689 . vals - pointer to the data 3690 3691 Level: intermediate 3692 3693 Note: 3694 Use `MatDenseGetColumnVec()` to get access to a column of a `MATDENSE` treated as a `Vec` 3695 3696 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreColumn()`, `MatDenseGetColumnVec()` 3697 @*/ 3698 PetscErrorCode MatDenseGetColumn(Mat A, PetscInt col, PetscScalar **vals) 3699 { 3700 PetscFunctionBegin; 3701 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3702 PetscValidLogicalCollectiveInt(A, col, 2); 3703 PetscAssertPointer(vals, 3); 3704 PetscUseMethod(A, "MatDenseGetColumn_C", (Mat, PetscInt, PetscScalar **), (A, col, vals)); 3705 PetscFunctionReturn(PETSC_SUCCESS); 3706 } 3707 3708 /*@C 3709 MatDenseRestoreColumn - returns access to a column of a `MATDENSE` matrix which is returned by `MatDenseGetColumn()`. 3710 3711 Not Collective 3712 3713 Input Parameters: 3714 + A - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3715 - vals - pointer to the data (may be `NULL`) 3716 3717 Level: intermediate 3718 3719 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetColumn()` 3720 @*/ 3721 PetscErrorCode MatDenseRestoreColumn(Mat A, PetscScalar **vals) 3722 { 3723 PetscFunctionBegin; 3724 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3725 PetscAssertPointer(vals, 2); 3726 PetscUseMethod(A, "MatDenseRestoreColumn_C", (Mat, PetscScalar **), (A, vals)); 3727 PetscFunctionReturn(PETSC_SUCCESS); 3728 } 3729 3730 /*@ 3731 MatDenseGetColumnVec - Gives read-write access to a column of a `MATDENSE` matrix, represented as a `Vec`. 3732 3733 Collective 3734 3735 Input Parameters: 3736 + A - the `Mat` object 3737 - col - the column index 3738 3739 Output Parameter: 3740 . v - the vector 3741 3742 Level: intermediate 3743 3744 Notes: 3745 The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVec()` when the vector is no longer needed. 3746 3747 Use `MatDenseGetColumnVecRead()` to obtain read-only access or `MatDenseGetColumnVecWrite()` for write-only access. 3748 3749 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`, `MatDenseGetColumn()` 3750 @*/ 3751 PetscErrorCode MatDenseGetColumnVec(Mat A, PetscInt col, Vec *v) 3752 { 3753 PetscFunctionBegin; 3754 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3755 PetscValidType(A, 1); 3756 PetscValidLogicalCollectiveInt(A, col, 2); 3757 PetscAssertPointer(v, 3); 3758 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3759 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3760 PetscUseMethod(A, "MatDenseGetColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3761 PetscFunctionReturn(PETSC_SUCCESS); 3762 } 3763 3764 /*@ 3765 MatDenseRestoreColumnVec - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVec()`. 3766 3767 Collective 3768 3769 Input Parameters: 3770 + A - the `Mat` object 3771 . col - the column index 3772 - v - the `Vec` object (may be `NULL`) 3773 3774 Level: intermediate 3775 3776 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3777 @*/ 3778 PetscErrorCode MatDenseRestoreColumnVec(Mat A, PetscInt col, Vec *v) 3779 { 3780 PetscFunctionBegin; 3781 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3782 PetscValidType(A, 1); 3783 PetscValidLogicalCollectiveInt(A, col, 2); 3784 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3785 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3786 PetscUseMethod(A, "MatDenseRestoreColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3787 PetscFunctionReturn(PETSC_SUCCESS); 3788 } 3789 3790 /*@ 3791 MatDenseGetColumnVecRead - Gives read-only access to a column of a dense matrix, represented as a `Vec`. 3792 3793 Collective 3794 3795 Input Parameters: 3796 + A - the `Mat` object 3797 - col - the column index 3798 3799 Output Parameter: 3800 . v - the vector 3801 3802 Level: intermediate 3803 3804 Notes: 3805 The vector is owned by PETSc and users cannot modify it. 3806 3807 Users need to call `MatDenseRestoreColumnVecRead()` when the vector is no longer needed. 3808 3809 Use `MatDenseGetColumnVec()` to obtain read-write access or `MatDenseGetColumnVecWrite()` for write-only access. 3810 3811 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3812 @*/ 3813 PetscErrorCode MatDenseGetColumnVecRead(Mat A, PetscInt col, Vec *v) 3814 { 3815 PetscFunctionBegin; 3816 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3817 PetscValidType(A, 1); 3818 PetscValidLogicalCollectiveInt(A, col, 2); 3819 PetscAssertPointer(v, 3); 3820 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3821 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3822 PetscUseMethod(A, "MatDenseGetColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3823 PetscFunctionReturn(PETSC_SUCCESS); 3824 } 3825 3826 /*@ 3827 MatDenseRestoreColumnVecRead - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVecRead()`. 3828 3829 Collective 3830 3831 Input Parameters: 3832 + A - the `Mat` object 3833 . col - the column index 3834 - v - the `Vec` object (may be `NULL`) 3835 3836 Level: intermediate 3837 3838 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecWrite()` 3839 @*/ 3840 PetscErrorCode MatDenseRestoreColumnVecRead(Mat A, PetscInt col, Vec *v) 3841 { 3842 PetscFunctionBegin; 3843 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3844 PetscValidType(A, 1); 3845 PetscValidLogicalCollectiveInt(A, col, 2); 3846 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3847 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3848 PetscUseMethod(A, "MatDenseRestoreColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3849 PetscFunctionReturn(PETSC_SUCCESS); 3850 } 3851 3852 /*@ 3853 MatDenseGetColumnVecWrite - Gives write-only access to a column of a dense matrix, represented as a `Vec`. 3854 3855 Collective 3856 3857 Input Parameters: 3858 + A - the `Mat` object 3859 - col - the column index 3860 3861 Output Parameter: 3862 . v - the vector 3863 3864 Level: intermediate 3865 3866 Notes: 3867 The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVecWrite()` when the vector is no longer needed. 3868 3869 Use `MatDenseGetColumnVec()` to obtain read-write access or `MatDenseGetColumnVecRead()` for read-only access. 3870 3871 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3872 @*/ 3873 PetscErrorCode MatDenseGetColumnVecWrite(Mat A, PetscInt col, Vec *v) 3874 { 3875 PetscFunctionBegin; 3876 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3877 PetscValidType(A, 1); 3878 PetscValidLogicalCollectiveInt(A, col, 2); 3879 PetscAssertPointer(v, 3); 3880 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3881 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3882 PetscUseMethod(A, "MatDenseGetColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3883 PetscFunctionReturn(PETSC_SUCCESS); 3884 } 3885 3886 /*@ 3887 MatDenseRestoreColumnVecWrite - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVecWrite()`. 3888 3889 Collective 3890 3891 Input Parameters: 3892 + A - the `Mat` object 3893 . col - the column index 3894 - v - the `Vec` object (may be `NULL`) 3895 3896 Level: intermediate 3897 3898 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()` 3899 @*/ 3900 PetscErrorCode MatDenseRestoreColumnVecWrite(Mat A, PetscInt col, Vec *v) 3901 { 3902 PetscFunctionBegin; 3903 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3904 PetscValidType(A, 1); 3905 PetscValidLogicalCollectiveInt(A, col, 2); 3906 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3907 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3908 PetscUseMethod(A, "MatDenseRestoreColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3909 PetscFunctionReturn(PETSC_SUCCESS); 3910 } 3911 3912 /*@ 3913 MatDenseGetSubMatrix - Gives access to a block of rows and columns of a dense matrix, represented as a `Mat`. 3914 3915 Collective 3916 3917 Input Parameters: 3918 + A - the `Mat` object 3919 . rbegin - the first global row index in the block (if `PETSC_DECIDE`, is 0) 3920 . rend - the global row index past the last one in the block (if `PETSC_DECIDE`, is `M`) 3921 . cbegin - the first global column index in the block (if `PETSC_DECIDE`, is 0) 3922 - cend - the global column index past the last one in the block (if `PETSC_DECIDE`, is `N`) 3923 3924 Output Parameter: 3925 . v - the matrix 3926 3927 Level: intermediate 3928 3929 Notes: 3930 The matrix is owned by PETSc. Users need to call `MatDenseRestoreSubMatrix()` when the matrix is no longer needed. 3931 3932 The output matrix is not redistributed by PETSc, so depending on the values of `rbegin` and `rend`, some processes may have no local rows. 3933 3934 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreSubMatrix()` 3935 @*/ 3936 PetscErrorCode MatDenseGetSubMatrix(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 3937 { 3938 PetscFunctionBegin; 3939 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3940 PetscValidType(A, 1); 3941 PetscValidLogicalCollectiveInt(A, rbegin, 2); 3942 PetscValidLogicalCollectiveInt(A, rend, 3); 3943 PetscValidLogicalCollectiveInt(A, cbegin, 4); 3944 PetscValidLogicalCollectiveInt(A, cend, 5); 3945 PetscAssertPointer(v, 6); 3946 if (rbegin == PETSC_DECIDE) rbegin = 0; 3947 if (rend == PETSC_DECIDE) rend = A->rmap->N; 3948 if (cbegin == PETSC_DECIDE) cbegin = 0; 3949 if (cend == PETSC_DECIDE) cend = A->cmap->N; 3950 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3951 PetscCheck(rbegin >= 0 && rbegin <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", rbegin, A->rmap->N); 3952 PetscCheck(rend >= rbegin && rend <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", rend, rbegin, A->rmap->N); 3953 PetscCheck(cbegin >= 0 && cbegin <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", cbegin, A->cmap->N); 3954 PetscCheck(cend >= cbegin && cend <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", cend, cbegin, A->cmap->N); 3955 PetscUseMethod(A, "MatDenseGetSubMatrix_C", (Mat, PetscInt, PetscInt, PetscInt, PetscInt, Mat *), (A, rbegin, rend, cbegin, cend, v)); 3956 PetscFunctionReturn(PETSC_SUCCESS); 3957 } 3958 3959 /*@ 3960 MatDenseRestoreSubMatrix - Returns access to a block of columns of a dense matrix obtained from `MatDenseGetSubMatrix()`. 3961 3962 Collective 3963 3964 Input Parameters: 3965 + A - the `Mat` object 3966 - v - the `Mat` object (may be `NULL`) 3967 3968 Level: intermediate 3969 3970 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseGetSubMatrix()` 3971 @*/ 3972 PetscErrorCode MatDenseRestoreSubMatrix(Mat A, Mat *v) 3973 { 3974 PetscFunctionBegin; 3975 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3976 PetscValidType(A, 1); 3977 PetscAssertPointer(v, 2); 3978 PetscUseMethod(A, "MatDenseRestoreSubMatrix_C", (Mat, Mat *), (A, v)); 3979 PetscFunctionReturn(PETSC_SUCCESS); 3980 } 3981 3982 #include <petscblaslapack.h> 3983 #include <petsc/private/kernels/blockinvert.h> 3984 3985 PetscErrorCode MatSeqDenseInvert(Mat A) 3986 { 3987 PetscInt m; 3988 const PetscReal shift = 0.0; 3989 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 3990 PetscScalar *values; 3991 3992 PetscFunctionBegin; 3993 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3994 PetscCall(MatDenseGetArray(A, &values)); 3995 PetscCall(MatGetLocalSize(A, &m, NULL)); 3996 allowzeropivot = PetscNot(A->erroriffailure); 3997 /* factor and invert each block */ 3998 switch (m) { 3999 case 1: 4000 values[0] = (PetscScalar)1.0 / (values[0] + shift); 4001 break; 4002 case 2: 4003 PetscCall(PetscKernel_A_gets_inverse_A_2(values, shift, allowzeropivot, &zeropivotdetected)); 4004 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4005 break; 4006 case 3: 4007 PetscCall(PetscKernel_A_gets_inverse_A_3(values, shift, allowzeropivot, &zeropivotdetected)); 4008 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4009 break; 4010 case 4: 4011 PetscCall(PetscKernel_A_gets_inverse_A_4(values, shift, allowzeropivot, &zeropivotdetected)); 4012 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4013 break; 4014 case 5: { 4015 PetscScalar work[25]; 4016 PetscInt ipvt[5]; 4017 4018 PetscCall(PetscKernel_A_gets_inverse_A_5(values, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 4019 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4020 } break; 4021 case 6: 4022 PetscCall(PetscKernel_A_gets_inverse_A_6(values, shift, allowzeropivot, &zeropivotdetected)); 4023 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4024 break; 4025 case 7: 4026 PetscCall(PetscKernel_A_gets_inverse_A_7(values, shift, allowzeropivot, &zeropivotdetected)); 4027 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4028 break; 4029 default: { 4030 PetscInt *v_pivots, *IJ, j; 4031 PetscScalar *v_work; 4032 4033 PetscCall(PetscMalloc3(m, &v_work, m, &v_pivots, m, &IJ)); 4034 for (j = 0; j < m; j++) IJ[j] = j; 4035 PetscCall(PetscKernel_A_gets_inverse_A(m, values, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 4036 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4037 PetscCall(PetscFree3(v_work, v_pivots, IJ)); 4038 } 4039 } 4040 PetscCall(MatDenseRestoreArray(A, &values)); 4041 PetscFunctionReturn(PETSC_SUCCESS); 4042 } 4043