1 /* 2 Defines the basic matrix operations for sequential dense. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 8 #include <petscblaslapack.h> 9 #include <../src/mat/impls/aij/seq/aij.h> 10 11 PetscErrorCode MatSeqDenseSymmetrize_Private(Mat A, PetscBool hermitian) 12 { 13 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 14 PetscInt j, k, n = A->rmap->n; 15 PetscScalar *v; 16 17 PetscFunctionBegin; 18 PetscCheck(A->rmap->n == A->cmap->n, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot symmetrize a rectangular matrix"); 19 PetscCall(MatDenseGetArray(A, &v)); 20 if (!hermitian) { 21 for (k = 0; k < n; k++) { 22 for (j = k; j < n; j++) v[j * mat->lda + k] = v[k * mat->lda + j]; 23 } 24 } else { 25 for (k = 0; k < n; k++) { 26 for (j = k; j < n; j++) v[j * mat->lda + k] = PetscConj(v[k * mat->lda + j]); 27 } 28 } 29 PetscCall(MatDenseRestoreArray(A, &v)); 30 PetscFunctionReturn(0); 31 } 32 33 PETSC_EXTERN PetscErrorCode MatSeqDenseInvertFactors_Private(Mat A) 34 { 35 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 36 PetscBLASInt info, n; 37 38 PetscFunctionBegin; 39 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 40 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 41 if (A->factortype == MAT_FACTOR_LU) { 42 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 43 if (!mat->fwork) { 44 mat->lfwork = n; 45 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 46 } 47 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 48 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 49 PetscCall(PetscFPTrapPop()); 50 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 51 } else if (A->factortype == MAT_FACTOR_CHOLESKY) { 52 if (A->spd == PETSC_BOOL3_TRUE) { 53 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 54 PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &n, mat->v, &mat->lda, &info)); 55 PetscCall(PetscFPTrapPop()); 56 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 57 #if defined(PETSC_USE_COMPLEX) 58 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 59 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 60 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 61 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 62 PetscCallBLAS("LAPACKhetri", LAPACKhetri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 63 PetscCall(PetscFPTrapPop()); 64 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 65 #endif 66 } else { /* symmetric case */ 67 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 68 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 69 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 70 PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 71 PetscCall(PetscFPTrapPop()); 72 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_FALSE)); 73 } 74 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad Inversion: zero pivot in row %" PetscInt_FMT, (PetscInt)info - 1); 75 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 76 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix must be factored to solve"); 77 78 A->ops->solve = NULL; 79 A->ops->matsolve = NULL; 80 A->ops->solvetranspose = NULL; 81 A->ops->matsolvetranspose = NULL; 82 A->ops->solveadd = NULL; 83 A->ops->solvetransposeadd = NULL; 84 A->factortype = MAT_FACTOR_NONE; 85 PetscCall(PetscFree(A->solvertype)); 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatZeroRowsColumns_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 90 { 91 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 92 PetscInt m = l->lda, n = A->cmap->n, r = A->rmap->n, i, j; 93 PetscScalar *slot, *bb, *v; 94 const PetscScalar *xx; 95 96 PetscFunctionBegin; 97 if (PetscDefined(USE_DEBUG)) { 98 for (i = 0; i < N; i++) { 99 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 100 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 101 PetscCheck(rows[i] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Col %" PetscInt_FMT " requested to be zeroed greater than or equal number of cols %" PetscInt_FMT, rows[i], A->cmap->n); 102 } 103 } 104 if (!N) PetscFunctionReturn(0); 105 106 /* fix right hand side if needed */ 107 if (x && b) { 108 Vec xt; 109 110 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 111 PetscCall(VecDuplicate(x, &xt)); 112 PetscCall(VecCopy(x, xt)); 113 PetscCall(VecScale(xt, -1.0)); 114 PetscCall(MatMultAdd(A, xt, b, b)); 115 PetscCall(VecDestroy(&xt)); 116 PetscCall(VecGetArrayRead(x, &xx)); 117 PetscCall(VecGetArray(b, &bb)); 118 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 119 PetscCall(VecRestoreArrayRead(x, &xx)); 120 PetscCall(VecRestoreArray(b, &bb)); 121 } 122 123 PetscCall(MatDenseGetArray(A, &v)); 124 for (i = 0; i < N; i++) { 125 slot = v + rows[i] * m; 126 PetscCall(PetscArrayzero(slot, r)); 127 } 128 for (i = 0; i < N; i++) { 129 slot = v + rows[i]; 130 for (j = 0; j < n; j++) { 131 *slot = 0.0; 132 slot += m; 133 } 134 } 135 if (diag != 0.0) { 136 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 137 for (i = 0; i < N; i++) { 138 slot = v + (m + 1) * rows[i]; 139 *slot = diag; 140 } 141 } 142 PetscCall(MatDenseRestoreArray(A, &v)); 143 PetscFunctionReturn(0); 144 } 145 146 PetscErrorCode MatPtAPNumeric_SeqDense_SeqDense(Mat A, Mat P, Mat C) 147 { 148 Mat_SeqDense *c = (Mat_SeqDense *)(C->data); 149 150 PetscFunctionBegin; 151 if (c->ptapwork) { 152 PetscCall((*C->ops->matmultnumeric)(A, P, c->ptapwork)); 153 PetscCall((*C->ops->transposematmultnumeric)(P, c->ptapwork, C)); 154 } else SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Must call MatPtAPSymbolic_SeqDense_SeqDense() first"); 155 PetscFunctionReturn(0); 156 } 157 158 PetscErrorCode MatPtAPSymbolic_SeqDense_SeqDense(Mat A, Mat P, PetscReal fill, Mat C) 159 { 160 Mat_SeqDense *c; 161 PetscBool cisdense = PETSC_FALSE; 162 163 PetscFunctionBegin; 164 PetscCall(MatSetSizes(C, P->cmap->n, P->cmap->n, P->cmap->N, P->cmap->N)); 165 #if defined(PETSC_HAVE_CUDA) 166 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 167 #elif (PETSC_HAVE_HIP) 168 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 169 #endif 170 171 if (!cisdense) { 172 PetscBool flg; 173 174 PetscCall(PetscObjectTypeCompare((PetscObject)P, ((PetscObject)A)->type_name, &flg)); 175 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 176 } 177 PetscCall(MatSetUp(C)); 178 c = (Mat_SeqDense *)C->data; 179 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &c->ptapwork)); 180 PetscCall(MatSetSizes(c->ptapwork, A->rmap->n, P->cmap->n, A->rmap->N, P->cmap->N)); 181 PetscCall(MatSetType(c->ptapwork, ((PetscObject)C)->type_name)); 182 PetscCall(MatSetUp(c->ptapwork)); 183 PetscFunctionReturn(0); 184 } 185 186 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 187 { 188 Mat B = NULL; 189 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 190 Mat_SeqDense *b; 191 PetscInt *ai = a->i, *aj = a->j, m = A->rmap->N, n = A->cmap->N, i; 192 const MatScalar *av; 193 PetscBool isseqdense; 194 195 PetscFunctionBegin; 196 if (reuse == MAT_REUSE_MATRIX) { 197 PetscCall(PetscObjectTypeCompare((PetscObject)*newmat, MATSEQDENSE, &isseqdense)); 198 PetscCheck(isseqdense, PetscObjectComm((PetscObject)*newmat), PETSC_ERR_USER, "Cannot reuse matrix of type %s", ((PetscObject)(*newmat))->type_name); 199 } 200 if (reuse != MAT_REUSE_MATRIX) { 201 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 202 PetscCall(MatSetSizes(B, m, n, m, n)); 203 PetscCall(MatSetType(B, MATSEQDENSE)); 204 PetscCall(MatSeqDenseSetPreallocation(B, NULL)); 205 b = (Mat_SeqDense *)(B->data); 206 } else { 207 b = (Mat_SeqDense *)((*newmat)->data); 208 PetscCall(PetscArrayzero(b->v, m * n)); 209 } 210 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 211 for (i = 0; i < m; i++) { 212 PetscInt j; 213 for (j = 0; j < ai[1] - ai[0]; j++) { 214 b->v[*aj * m + i] = *av; 215 aj++; 216 av++; 217 } 218 ai++; 219 } 220 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 221 222 if (reuse == MAT_INPLACE_MATRIX) { 223 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 224 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 225 PetscCall(MatHeaderReplace(A, &B)); 226 } else { 227 if (B) *newmat = B; 228 PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY)); 229 PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY)); 230 } 231 PetscFunctionReturn(0); 232 } 233 234 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 235 { 236 Mat B = NULL; 237 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 238 PetscInt i, j; 239 PetscInt *rows, *nnz; 240 MatScalar *aa = a->v, *vals; 241 242 PetscFunctionBegin; 243 PetscCall(PetscCalloc3(A->rmap->n, &rows, A->rmap->n, &nnz, A->rmap->n, &vals)); 244 if (reuse != MAT_REUSE_MATRIX) { 245 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 246 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 247 PetscCall(MatSetType(B, MATSEQAIJ)); 248 for (j = 0; j < A->cmap->n; j++) { 249 for (i = 0; i < A->rmap->n; i++) 250 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) ++nnz[i]; 251 aa += a->lda; 252 } 253 PetscCall(MatSeqAIJSetPreallocation(B, PETSC_DETERMINE, nnz)); 254 } else B = *newmat; 255 aa = a->v; 256 for (j = 0; j < A->cmap->n; j++) { 257 PetscInt numRows = 0; 258 for (i = 0; i < A->rmap->n; i++) 259 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) { 260 rows[numRows] = i; 261 vals[numRows++] = aa[i]; 262 } 263 PetscCall(MatSetValues(B, numRows, rows, 1, &j, vals, INSERT_VALUES)); 264 aa += a->lda; 265 } 266 PetscCall(PetscFree3(rows, nnz, vals)); 267 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 268 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 269 270 if (reuse == MAT_INPLACE_MATRIX) { 271 PetscCall(MatHeaderReplace(A, &B)); 272 } else if (reuse != MAT_REUSE_MATRIX) *newmat = B; 273 PetscFunctionReturn(0); 274 } 275 276 PetscErrorCode MatAXPY_SeqDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 277 { 278 Mat_SeqDense *x = (Mat_SeqDense *)X->data, *y = (Mat_SeqDense *)Y->data; 279 const PetscScalar *xv; 280 PetscScalar *yv; 281 PetscBLASInt N, m, ldax = 0, lday = 0, one = 1; 282 283 PetscFunctionBegin; 284 PetscCall(MatDenseGetArrayRead(X, &xv)); 285 PetscCall(MatDenseGetArray(Y, &yv)); 286 PetscCall(PetscBLASIntCast(X->rmap->n * X->cmap->n, &N)); 287 PetscCall(PetscBLASIntCast(X->rmap->n, &m)); 288 PetscCall(PetscBLASIntCast(x->lda, &ldax)); 289 PetscCall(PetscBLASIntCast(y->lda, &lday)); 290 if (ldax > m || lday > m) { 291 PetscInt j; 292 293 for (j = 0; j < X->cmap->n; j++) PetscCallBLAS("BLASaxpy", BLASaxpy_(&m, &alpha, xv + j * ldax, &one, yv + j * lday, &one)); 294 } else { 295 PetscCallBLAS("BLASaxpy", BLASaxpy_(&N, &alpha, xv, &one, yv, &one)); 296 } 297 PetscCall(MatDenseRestoreArrayRead(X, &xv)); 298 PetscCall(MatDenseRestoreArray(Y, &yv)); 299 PetscCall(PetscLogFlops(PetscMax(2.0 * N - 1, 0))); 300 PetscFunctionReturn(0); 301 } 302 303 static PetscErrorCode MatGetInfo_SeqDense(Mat A, MatInfoType flag, MatInfo *info) 304 { 305 PetscLogDouble N = A->rmap->n * A->cmap->n; 306 307 PetscFunctionBegin; 308 info->block_size = 1.0; 309 info->nz_allocated = N; 310 info->nz_used = N; 311 info->nz_unneeded = 0; 312 info->assemblies = A->num_ass; 313 info->mallocs = 0; 314 info->memory = 0; /* REVIEW ME */ 315 info->fill_ratio_given = 0; 316 info->fill_ratio_needed = 0; 317 info->factor_mallocs = 0; 318 PetscFunctionReturn(0); 319 } 320 321 PetscErrorCode MatScale_SeqDense(Mat A, PetscScalar alpha) 322 { 323 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 324 PetscScalar *v; 325 PetscBLASInt one = 1, j, nz, lda = 0; 326 327 PetscFunctionBegin; 328 PetscCall(MatDenseGetArray(A, &v)); 329 PetscCall(PetscBLASIntCast(a->lda, &lda)); 330 if (lda > A->rmap->n) { 331 PetscCall(PetscBLASIntCast(A->rmap->n, &nz)); 332 for (j = 0; j < A->cmap->n; j++) PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v + j * lda, &one)); 333 } else { 334 PetscCall(PetscBLASIntCast(A->rmap->n * A->cmap->n, &nz)); 335 PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v, &one)); 336 } 337 PetscCall(PetscLogFlops(A->rmap->n * A->cmap->n)); 338 PetscCall(MatDenseRestoreArray(A, &v)); 339 PetscFunctionReturn(0); 340 } 341 342 PetscErrorCode MatShift_SeqDense(Mat A, PetscScalar alpha) 343 { 344 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 345 PetscScalar *v; 346 PetscInt j, k; 347 348 PetscFunctionBegin; 349 PetscCall(MatDenseGetArray(A, &v)); 350 k = PetscMin(A->rmap->n, A->cmap->n); 351 for (j = 0; j < k; j++) v[j + j * a->lda] += alpha; 352 PetscCall(PetscLogFlops(k)); 353 PetscCall(MatDenseRestoreArray(A, &v)); 354 PetscFunctionReturn(0); 355 } 356 357 static PetscErrorCode MatIsHermitian_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) 358 { 359 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 360 PetscInt i, j, m = A->rmap->n, N = a->lda; 361 const PetscScalar *v; 362 363 PetscFunctionBegin; 364 *fl = PETSC_FALSE; 365 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(0); 366 PetscCall(MatDenseGetArrayRead(A, &v)); 367 for (i = 0; i < m; i++) { 368 for (j = i; j < m; j++) { 369 if (PetscAbsScalar(v[i + j * N] - PetscConj(v[j + i * N])) > rtol) goto restore; 370 } 371 } 372 *fl = PETSC_TRUE; 373 restore: 374 PetscCall(MatDenseRestoreArrayRead(A, &v)); 375 PetscFunctionReturn(0); 376 } 377 378 static PetscErrorCode MatIsSymmetric_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) 379 { 380 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 381 PetscInt i, j, m = A->rmap->n, N = a->lda; 382 const PetscScalar *v; 383 384 PetscFunctionBegin; 385 *fl = PETSC_FALSE; 386 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(0); 387 PetscCall(MatDenseGetArrayRead(A, &v)); 388 for (i = 0; i < m; i++) { 389 for (j = i; j < m; j++) { 390 if (PetscAbsScalar(v[i + j * N] - v[j + i * N]) > rtol) goto restore; 391 } 392 } 393 *fl = PETSC_TRUE; 394 restore: 395 PetscCall(MatDenseRestoreArrayRead(A, &v)); 396 PetscFunctionReturn(0); 397 } 398 399 PetscErrorCode MatDuplicateNoCreate_SeqDense(Mat newi, Mat A, MatDuplicateOption cpvalues) 400 { 401 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 402 PetscInt lda = (PetscInt)mat->lda, j, m, nlda = lda; 403 PetscBool isdensecpu; 404 405 PetscFunctionBegin; 406 PetscCall(PetscLayoutReference(A->rmap, &newi->rmap)); 407 PetscCall(PetscLayoutReference(A->cmap, &newi->cmap)); 408 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { /* propagate LDA */ 409 PetscCall(MatDenseSetLDA(newi, lda)); 410 } 411 PetscCall(PetscObjectTypeCompare((PetscObject)newi, MATSEQDENSE, &isdensecpu)); 412 if (isdensecpu) PetscCall(MatSeqDenseSetPreallocation(newi, NULL)); 413 if (cpvalues == MAT_COPY_VALUES) { 414 const PetscScalar *av; 415 PetscScalar *v; 416 417 PetscCall(MatDenseGetArrayRead(A, &av)); 418 PetscCall(MatDenseGetArrayWrite(newi, &v)); 419 PetscCall(MatDenseGetLDA(newi, &nlda)); 420 m = A->rmap->n; 421 if (lda > m || nlda > m) { 422 for (j = 0; j < A->cmap->n; j++) PetscCall(PetscArraycpy(v + j * nlda, av + j * lda, m)); 423 } else { 424 PetscCall(PetscArraycpy(v, av, A->rmap->n * A->cmap->n)); 425 } 426 PetscCall(MatDenseRestoreArrayWrite(newi, &v)); 427 PetscCall(MatDenseRestoreArrayRead(A, &av)); 428 } 429 PetscFunctionReturn(0); 430 } 431 432 PetscErrorCode MatDuplicate_SeqDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 433 { 434 PetscFunctionBegin; 435 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), newmat)); 436 PetscCall(MatSetSizes(*newmat, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 437 PetscCall(MatSetType(*newmat, ((PetscObject)A)->type_name)); 438 PetscCall(MatDuplicateNoCreate_SeqDense(*newmat, A, cpvalues)); 439 PetscFunctionReturn(0); 440 } 441 442 static PetscErrorCode MatSolve_SeqDense_Internal_LU(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) 443 { 444 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 445 PetscBLASInt info; 446 447 PetscFunctionBegin; 448 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 449 PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_(T ? "T" : "N", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 450 PetscCall(PetscFPTrapPop()); 451 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve %d", (int)info); 452 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 453 PetscFunctionReturn(0); 454 } 455 456 static PetscErrorCode MatConjugate_SeqDense(Mat); 457 458 static PetscErrorCode MatSolve_SeqDense_Internal_Cholesky(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) 459 { 460 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 461 PetscBLASInt info; 462 463 PetscFunctionBegin; 464 if (A->spd == PETSC_BOOL3_TRUE) { 465 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 466 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 467 PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &m, &nrhs, mat->v, &mat->lda, x, &m, &info)); 468 PetscCall(PetscFPTrapPop()); 469 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "POTRS Bad solve %d", (int)info); 470 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 471 #if defined(PETSC_USE_COMPLEX) 472 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 473 if (T) PetscCall(MatConjugate_SeqDense(A)); 474 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 475 PetscCallBLAS("LAPACKhetrs", LAPACKhetrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 476 PetscCall(PetscFPTrapPop()); 477 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "HETRS Bad solve %d", (int)info); 478 if (T) PetscCall(MatConjugate_SeqDense(A)); 479 #endif 480 } else { /* symmetric case */ 481 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 482 PetscCallBLAS("LAPACKsytrs", LAPACKsytrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 483 PetscCall(PetscFPTrapPop()); 484 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "SYTRS Bad solve %d", (int)info); 485 } 486 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 487 PetscFunctionReturn(0); 488 } 489 490 static PetscErrorCode MatSolve_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) 491 { 492 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 493 PetscBLASInt info; 494 char trans; 495 496 PetscFunctionBegin; 497 if (PetscDefined(USE_COMPLEX)) { 498 trans = 'C'; 499 } else { 500 trans = 'T'; 501 } 502 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 503 { /* lwork depends on the number of right-hand sides */ 504 PetscBLASInt nlfwork, lfwork = -1; 505 PetscScalar fwork; 506 507 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 508 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 509 if (nlfwork > mat->lfwork) { 510 mat->lfwork = nlfwork; 511 PetscCall(PetscFree(mat->fwork)); 512 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 513 } 514 } 515 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 516 PetscCall(PetscFPTrapPop()); 517 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %d", (int)info); 518 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 519 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &mat->rank, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 520 PetscCall(PetscFPTrapPop()); 521 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %d", (int)info); 522 for (PetscInt j = 0; j < nrhs; j++) { 523 for (PetscInt i = mat->rank; i < k; i++) x[j * ldx + i] = 0.; 524 } 525 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 526 PetscFunctionReturn(0); 527 } 528 529 static PetscErrorCode MatSolveTranspose_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) 530 { 531 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 532 PetscBLASInt info; 533 534 PetscFunctionBegin; 535 if (A->rmap->n == A->cmap->n && mat->rank == A->rmap->n) { 536 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 537 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &m, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 538 PetscCall(PetscFPTrapPop()); 539 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %d", (int)info); 540 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 541 { /* lwork depends on the number of right-hand sides */ 542 PetscBLASInt nlfwork, lfwork = -1; 543 PetscScalar fwork; 544 545 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 546 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 547 if (nlfwork > mat->lfwork) { 548 mat->lfwork = nlfwork; 549 PetscCall(PetscFree(mat->fwork)); 550 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 551 } 552 } 553 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 554 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 555 PetscCall(PetscFPTrapPop()); 556 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %d", (int)info); 557 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 558 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "QR factored matrix cannot be used for transpose solve"); 559 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 560 PetscFunctionReturn(0); 561 } 562 563 static PetscErrorCode MatSolve_SeqDense_SetUp(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) 564 { 565 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 566 PetscScalar *y; 567 PetscBLASInt m = 0, k = 0; 568 569 PetscFunctionBegin; 570 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 571 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 572 if (k < m) { 573 PetscCall(VecCopy(xx, mat->qrrhs)); 574 PetscCall(VecGetArray(mat->qrrhs, &y)); 575 } else { 576 PetscCall(VecCopy(xx, yy)); 577 PetscCall(VecGetArray(yy, &y)); 578 } 579 *_y = y; 580 *_k = k; 581 *_m = m; 582 PetscFunctionReturn(0); 583 } 584 585 static PetscErrorCode MatSolve_SeqDense_TearDown(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) 586 { 587 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 588 PetscScalar *y = NULL; 589 PetscBLASInt m, k; 590 591 PetscFunctionBegin; 592 y = *_y; 593 *_y = NULL; 594 k = *_k; 595 m = *_m; 596 if (k < m) { 597 PetscScalar *yv; 598 PetscCall(VecGetArray(yy, &yv)); 599 PetscCall(PetscArraycpy(yv, y, k)); 600 PetscCall(VecRestoreArray(yy, &yv)); 601 PetscCall(VecRestoreArray(mat->qrrhs, &y)); 602 } else { 603 PetscCall(VecRestoreArray(yy, &y)); 604 } 605 PetscFunctionReturn(0); 606 } 607 608 static PetscErrorCode MatSolve_SeqDense_LU(Mat A, Vec xx, Vec yy) 609 { 610 PetscScalar *y = NULL; 611 PetscBLASInt m = 0, k = 0; 612 613 PetscFunctionBegin; 614 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 615 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_FALSE)); 616 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 617 PetscFunctionReturn(0); 618 } 619 620 static PetscErrorCode MatSolveTranspose_SeqDense_LU(Mat A, Vec xx, Vec yy) 621 { 622 PetscScalar *y = NULL; 623 PetscBLASInt m = 0, k = 0; 624 625 PetscFunctionBegin; 626 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 627 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_TRUE)); 628 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 629 PetscFunctionReturn(0); 630 } 631 632 static PetscErrorCode MatSolve_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) 633 { 634 PetscScalar *y = NULL; 635 PetscBLASInt m = 0, k = 0; 636 637 PetscFunctionBegin; 638 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 639 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_FALSE)); 640 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 641 PetscFunctionReturn(0); 642 } 643 644 static PetscErrorCode MatSolveTranspose_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) 645 { 646 PetscScalar *y = NULL; 647 PetscBLASInt m = 0, k = 0; 648 649 PetscFunctionBegin; 650 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 651 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_TRUE)); 652 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 653 PetscFunctionReturn(0); 654 } 655 656 static PetscErrorCode MatSolve_SeqDense_QR(Mat A, Vec xx, Vec yy) 657 { 658 PetscScalar *y = NULL; 659 PetscBLASInt m = 0, k = 0; 660 661 PetscFunctionBegin; 662 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 663 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 664 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 665 PetscFunctionReturn(0); 666 } 667 668 static PetscErrorCode MatSolveTranspose_SeqDense_QR(Mat A, Vec xx, Vec yy) 669 { 670 PetscScalar *y = NULL; 671 PetscBLASInt m = 0, k = 0; 672 673 PetscFunctionBegin; 674 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 675 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 676 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 677 PetscFunctionReturn(0); 678 } 679 680 static PetscErrorCode MatMatSolve_SeqDense_SetUp(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) 681 { 682 const PetscScalar *b; 683 PetscScalar *y; 684 PetscInt n, _ldb, _ldx; 685 PetscBLASInt nrhs = 0, m = 0, k = 0, ldb = 0, ldx = 0, ldy = 0; 686 687 PetscFunctionBegin; 688 *_ldy = 0; 689 *_m = 0; 690 *_nrhs = 0; 691 *_k = 0; 692 *_y = NULL; 693 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 694 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 695 PetscCall(MatGetSize(B, NULL, &n)); 696 PetscCall(PetscBLASIntCast(n, &nrhs)); 697 PetscCall(MatDenseGetLDA(B, &_ldb)); 698 PetscCall(PetscBLASIntCast(_ldb, &ldb)); 699 PetscCall(MatDenseGetLDA(X, &_ldx)); 700 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 701 if (ldx < m) { 702 PetscCall(MatDenseGetArrayRead(B, &b)); 703 PetscCall(PetscMalloc1(nrhs * m, &y)); 704 if (ldb == m) { 705 PetscCall(PetscArraycpy(y, b, ldb * nrhs)); 706 } else { 707 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * m], &b[j * ldb], m)); 708 } 709 ldy = m; 710 PetscCall(MatDenseRestoreArrayRead(B, &b)); 711 } else { 712 if (ldb == ldx) { 713 PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN)); 714 PetscCall(MatDenseGetArray(X, &y)); 715 } else { 716 PetscCall(MatDenseGetArray(X, &y)); 717 PetscCall(MatDenseGetArrayRead(B, &b)); 718 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * ldx], &b[j * ldb], m)); 719 PetscCall(MatDenseRestoreArrayRead(B, &b)); 720 } 721 ldy = ldx; 722 } 723 *_y = y; 724 *_ldy = ldy; 725 *_k = k; 726 *_m = m; 727 *_nrhs = nrhs; 728 PetscFunctionReturn(0); 729 } 730 731 static PetscErrorCode MatMatSolve_SeqDense_TearDown(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) 732 { 733 PetscScalar *y; 734 PetscInt _ldx; 735 PetscBLASInt k, ldy, nrhs, ldx = 0; 736 737 PetscFunctionBegin; 738 y = *_y; 739 *_y = NULL; 740 k = *_k; 741 ldy = *_ldy; 742 nrhs = *_nrhs; 743 PetscCall(MatDenseGetLDA(X, &_ldx)); 744 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 745 if (ldx != ldy) { 746 PetscScalar *xv; 747 PetscCall(MatDenseGetArray(X, &xv)); 748 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&xv[j * ldx], &y[j * ldy], k)); 749 PetscCall(MatDenseRestoreArray(X, &xv)); 750 PetscCall(PetscFree(y)); 751 } else { 752 PetscCall(MatDenseRestoreArray(X, &y)); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 static PetscErrorCode MatMatSolve_SeqDense_LU(Mat A, Mat B, Mat X) 758 { 759 PetscScalar *y; 760 PetscBLASInt m, k, ldy, nrhs; 761 762 PetscFunctionBegin; 763 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 764 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 765 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 766 PetscFunctionReturn(0); 767 } 768 769 static PetscErrorCode MatMatSolveTranspose_SeqDense_LU(Mat A, Mat B, Mat X) 770 { 771 PetscScalar *y; 772 PetscBLASInt m, k, ldy, nrhs; 773 774 PetscFunctionBegin; 775 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 776 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 777 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 778 PetscFunctionReturn(0); 779 } 780 781 static PetscErrorCode MatMatSolve_SeqDense_Cholesky(Mat A, Mat B, Mat X) 782 { 783 PetscScalar *y; 784 PetscBLASInt m, k, ldy, nrhs; 785 786 PetscFunctionBegin; 787 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 788 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 789 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 790 PetscFunctionReturn(0); 791 } 792 793 static PetscErrorCode MatMatSolveTranspose_SeqDense_Cholesky(Mat A, Mat B, Mat X) 794 { 795 PetscScalar *y; 796 PetscBLASInt m, k, ldy, nrhs; 797 798 PetscFunctionBegin; 799 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 800 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 801 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 802 PetscFunctionReturn(0); 803 } 804 805 static PetscErrorCode MatMatSolve_SeqDense_QR(Mat A, Mat B, Mat X) 806 { 807 PetscScalar *y; 808 PetscBLASInt m, k, ldy, nrhs; 809 810 PetscFunctionBegin; 811 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 812 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 813 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 814 PetscFunctionReturn(0); 815 } 816 817 static PetscErrorCode MatMatSolveTranspose_SeqDense_QR(Mat A, Mat B, Mat X) 818 { 819 PetscScalar *y; 820 PetscBLASInt m, k, ldy, nrhs; 821 822 PetscFunctionBegin; 823 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 824 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 825 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 826 PetscFunctionReturn(0); 827 } 828 829 /* ---------------------------------------------------------------*/ 830 /* COMMENT: I have chosen to hide row permutation in the pivots, 831 rather than put it in the Mat->row slot.*/ 832 PetscErrorCode MatLUFactor_SeqDense(Mat A, IS row, IS col, const MatFactorInfo *minfo) 833 { 834 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 835 PetscBLASInt n, m, info; 836 837 PetscFunctionBegin; 838 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 839 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 840 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 841 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 842 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 843 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&m, &n, mat->v, &mat->lda, mat->pivots, &info)); 844 PetscCall(PetscFPTrapPop()); 845 846 PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to LU factorization %d", (int)info); 847 PetscCheck(info <= 0, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Bad LU factorization %d", (int)info); 848 849 A->ops->solve = MatSolve_SeqDense_LU; 850 A->ops->matsolve = MatMatSolve_SeqDense_LU; 851 A->ops->solvetranspose = MatSolveTranspose_SeqDense_LU; 852 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_LU; 853 A->factortype = MAT_FACTOR_LU; 854 855 PetscCall(PetscFree(A->solvertype)); 856 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 857 858 PetscCall(PetscLogFlops((2.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3)); 859 PetscFunctionReturn(0); 860 } 861 862 static PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 863 { 864 MatFactorInfo info; 865 866 PetscFunctionBegin; 867 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 868 PetscUseTypeMethod(fact, lufactor, NULL, NULL, &info); 869 PetscFunctionReturn(0); 870 } 871 872 PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) 873 { 874 PetscFunctionBegin; 875 fact->preallocated = PETSC_TRUE; 876 fact->assembled = PETSC_TRUE; 877 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense; 878 PetscFunctionReturn(0); 879 } 880 881 /* Cholesky as L*L^T or L*D*L^T and the symmetric/hermitian complex variants */ 882 PetscErrorCode MatCholeskyFactor_SeqDense(Mat A, IS perm, const MatFactorInfo *factinfo) 883 { 884 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 885 PetscBLASInt info, n; 886 887 PetscFunctionBegin; 888 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 889 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 890 if (A->spd == PETSC_BOOL3_TRUE) { 891 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 892 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &n, mat->v, &mat->lda, &info)); 893 PetscCall(PetscFPTrapPop()); 894 #if defined(PETSC_USE_COMPLEX) 895 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 896 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 897 if (!mat->fwork) { 898 PetscScalar dummy; 899 900 mat->lfwork = -1; 901 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 902 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 903 PetscCall(PetscFPTrapPop()); 904 mat->lfwork = (PetscInt)PetscRealPart(dummy); 905 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 906 } 907 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 908 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 909 PetscCall(PetscFPTrapPop()); 910 #endif 911 } else { /* symmetric case */ 912 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 913 if (!mat->fwork) { 914 PetscScalar dummy; 915 916 mat->lfwork = -1; 917 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 918 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 919 PetscCall(PetscFPTrapPop()); 920 mat->lfwork = (PetscInt)PetscRealPart(dummy); 921 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 922 } 923 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 924 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 925 PetscCall(PetscFPTrapPop()); 926 } 927 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad factorization: zero pivot in row %" PetscInt_FMT, (PetscInt)info - 1); 928 929 A->ops->solve = MatSolve_SeqDense_Cholesky; 930 A->ops->matsolve = MatMatSolve_SeqDense_Cholesky; 931 A->ops->solvetranspose = MatSolveTranspose_SeqDense_Cholesky; 932 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_Cholesky; 933 A->factortype = MAT_FACTOR_CHOLESKY; 934 935 PetscCall(PetscFree(A->solvertype)); 936 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 937 938 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 939 PetscFunctionReturn(0); 940 } 941 942 static PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 943 { 944 MatFactorInfo info; 945 946 PetscFunctionBegin; 947 info.fill = 1.0; 948 949 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 950 PetscUseTypeMethod(fact, choleskyfactor, NULL, &info); 951 PetscFunctionReturn(0); 952 } 953 954 PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) 955 { 956 PetscFunctionBegin; 957 fact->assembled = PETSC_TRUE; 958 fact->preallocated = PETSC_TRUE; 959 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense; 960 PetscFunctionReturn(0); 961 } 962 963 PetscErrorCode MatQRFactor_SeqDense(Mat A, IS col, const MatFactorInfo *minfo) 964 { 965 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 966 PetscBLASInt n, m, info, min, max; 967 968 PetscFunctionBegin; 969 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 970 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 971 max = PetscMax(m, n); 972 min = PetscMin(m, n); 973 if (!mat->tau) { PetscCall(PetscMalloc1(min, &mat->tau)); } 974 if (!mat->pivots) { PetscCall(PetscMalloc1(n, &mat->pivots)); } 975 if (!mat->qrrhs) PetscCall(MatCreateVecs(A, NULL, &(mat->qrrhs))); 976 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 977 if (!mat->fwork) { 978 PetscScalar dummy; 979 980 mat->lfwork = -1; 981 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 982 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, &dummy, &mat->lfwork, &info)); 983 PetscCall(PetscFPTrapPop()); 984 mat->lfwork = (PetscInt)PetscRealPart(dummy); 985 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 986 } 987 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 988 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, mat->fwork, &mat->lfwork, &info)); 989 PetscCall(PetscFPTrapPop()); 990 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to QR factorization %d", (int)info); 991 // TODO: try to estimate rank or test for and use geqp3 for rank revealing QR. For now just say rank is min of m and n 992 mat->rank = min; 993 994 A->ops->solve = MatSolve_SeqDense_QR; 995 A->ops->matsolve = MatMatSolve_SeqDense_QR; 996 A->factortype = MAT_FACTOR_QR; 997 if (m == n) { 998 A->ops->solvetranspose = MatSolveTranspose_SeqDense_QR; 999 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_QR; 1000 } 1001 1002 PetscCall(PetscFree(A->solvertype)); 1003 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 1004 1005 PetscCall(PetscLogFlops(2.0 * min * min * (max - min / 3.0))); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 static PetscErrorCode MatQRFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 1010 { 1011 MatFactorInfo info; 1012 1013 PetscFunctionBegin; 1014 info.fill = 1.0; 1015 1016 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 1017 PetscUseMethod(fact, "MatQRFactor_C", (Mat, IS, const MatFactorInfo *), (fact, NULL, &info)); 1018 PetscFunctionReturn(0); 1019 } 1020 1021 PetscErrorCode MatQRFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) 1022 { 1023 PetscFunctionBegin; 1024 fact->assembled = PETSC_TRUE; 1025 fact->preallocated = PETSC_TRUE; 1026 PetscCall(PetscObjectComposeFunction((PetscObject)fact, "MatQRFactorNumeric_C", MatQRFactorNumeric_SeqDense)); 1027 PetscFunctionReturn(0); 1028 } 1029 1030 /* uses LAPACK */ 1031 PETSC_INTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A, MatFactorType ftype, Mat *fact) 1032 { 1033 PetscFunctionBegin; 1034 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), fact)); 1035 PetscCall(MatSetSizes(*fact, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 1036 PetscCall(MatSetType(*fact, MATDENSE)); 1037 (*fact)->trivialsymbolic = PETSC_TRUE; 1038 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 1039 (*fact)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqDense; 1040 (*fact)->ops->ilufactorsymbolic = MatLUFactorSymbolic_SeqDense; 1041 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 1042 (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense; 1043 } else if (ftype == MAT_FACTOR_QR) { 1044 PetscCall(PetscObjectComposeFunction((PetscObject)(*fact), "MatQRFactorSymbolic_C", MatQRFactorSymbolic_SeqDense)); 1045 } 1046 (*fact)->factortype = ftype; 1047 1048 PetscCall(PetscFree((*fact)->solvertype)); 1049 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*fact)->solvertype)); 1050 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_LU])); 1051 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ILU])); 1052 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_CHOLESKY])); 1053 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ICC])); 1054 PetscFunctionReturn(0); 1055 } 1056 1057 /* ------------------------------------------------------------------*/ 1058 static PetscErrorCode MatSOR_SeqDense(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal shift, PetscInt its, PetscInt lits, Vec xx) 1059 { 1060 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1061 PetscScalar *x, *v = mat->v, zero = 0.0, xt; 1062 const PetscScalar *b; 1063 PetscInt m = A->rmap->n, i; 1064 PetscBLASInt o = 1, bm = 0; 1065 1066 PetscFunctionBegin; 1067 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1068 PetscCheck(A->offloadmask != PETSC_OFFLOAD_GPU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1069 #endif 1070 if (shift == -1) shift = 0.0; /* negative shift indicates do not error on zero diagonal; this code never zeros on zero diagonal */ 1071 PetscCall(PetscBLASIntCast(m, &bm)); 1072 if (flag & SOR_ZERO_INITIAL_GUESS) { 1073 /* this is a hack fix, should have another version without the second BLASdotu */ 1074 PetscCall(VecSet(xx, zero)); 1075 } 1076 PetscCall(VecGetArray(xx, &x)); 1077 PetscCall(VecGetArrayRead(bb, &b)); 1078 its = its * lits; 1079 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 1080 while (its--) { 1081 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 1082 for (i = 0; i < m; i++) { 1083 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1084 x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift); 1085 } 1086 } 1087 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1088 for (i = m - 1; i >= 0; i--) { 1089 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1090 x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift); 1091 } 1092 } 1093 } 1094 PetscCall(VecRestoreArrayRead(bb, &b)); 1095 PetscCall(VecRestoreArray(xx, &x)); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 /* -----------------------------------------------------------------*/ 1100 PetscErrorCode MatMultTranspose_SeqDense(Mat A, Vec xx, Vec yy) 1101 { 1102 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1103 const PetscScalar *v = mat->v, *x; 1104 PetscScalar *y; 1105 PetscBLASInt m, n, _One = 1; 1106 PetscScalar _DOne = 1.0, _DZero = 0.0; 1107 1108 PetscFunctionBegin; 1109 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1110 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1111 PetscCall(VecGetArrayRead(xx, &x)); 1112 PetscCall(VecGetArrayWrite(yy, &y)); 1113 if (!A->rmap->n || !A->cmap->n) { 1114 PetscBLASInt i; 1115 for (i = 0; i < n; i++) y[i] = 0.0; 1116 } else { 1117 PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v, &mat->lda, x, &_One, &_DZero, y, &_One)); 1118 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n - A->cmap->n)); 1119 } 1120 PetscCall(VecRestoreArrayRead(xx, &x)); 1121 PetscCall(VecRestoreArrayWrite(yy, &y)); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 PetscErrorCode MatMult_SeqDense(Mat A, Vec xx, Vec yy) 1126 { 1127 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1128 PetscScalar *y, _DOne = 1.0, _DZero = 0.0; 1129 PetscBLASInt m, n, _One = 1; 1130 const PetscScalar *v = mat->v, *x; 1131 1132 PetscFunctionBegin; 1133 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1134 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1135 PetscCall(VecGetArrayRead(xx, &x)); 1136 PetscCall(VecGetArrayWrite(yy, &y)); 1137 if (!A->rmap->n || !A->cmap->n) { 1138 PetscBLASInt i; 1139 for (i = 0; i < m; i++) y[i] = 0.0; 1140 } else { 1141 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DZero, y, &_One)); 1142 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n - A->rmap->n)); 1143 } 1144 PetscCall(VecRestoreArrayRead(xx, &x)); 1145 PetscCall(VecRestoreArrayWrite(yy, &y)); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 PetscErrorCode MatMultAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1150 { 1151 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1152 const PetscScalar *v = mat->v, *x; 1153 PetscScalar *y, _DOne = 1.0; 1154 PetscBLASInt m, n, _One = 1; 1155 1156 PetscFunctionBegin; 1157 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1158 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1159 PetscCall(VecCopy(zz, yy)); 1160 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 1161 PetscCall(VecGetArrayRead(xx, &x)); 1162 PetscCall(VecGetArray(yy, &y)); 1163 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DOne, y, &_One)); 1164 PetscCall(VecRestoreArrayRead(xx, &x)); 1165 PetscCall(VecRestoreArray(yy, &y)); 1166 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n)); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 PetscErrorCode MatMultTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1171 { 1172 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1173 const PetscScalar *v = mat->v, *x; 1174 PetscScalar *y; 1175 PetscBLASInt m, n, _One = 1; 1176 PetscScalar _DOne = 1.0; 1177 1178 PetscFunctionBegin; 1179 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1180 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1181 PetscCall(VecCopy(zz, yy)); 1182 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 1183 PetscCall(VecGetArrayRead(xx, &x)); 1184 PetscCall(VecGetArray(yy, &y)); 1185 PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DOne, y, &_One)); 1186 PetscCall(VecRestoreArrayRead(xx, &x)); 1187 PetscCall(VecRestoreArray(yy, &y)); 1188 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n)); 1189 PetscFunctionReturn(0); 1190 } 1191 1192 /* -----------------------------------------------------------------*/ 1193 static PetscErrorCode MatGetRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) 1194 { 1195 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1196 PetscInt i; 1197 1198 PetscFunctionBegin; 1199 *ncols = A->cmap->n; 1200 if (cols) { 1201 PetscCall(PetscMalloc1(A->cmap->n, cols)); 1202 for (i = 0; i < A->cmap->n; i++) (*cols)[i] = i; 1203 } 1204 if (vals) { 1205 const PetscScalar *v; 1206 1207 PetscCall(MatDenseGetArrayRead(A, &v)); 1208 PetscCall(PetscMalloc1(A->cmap->n, vals)); 1209 v += row; 1210 for (i = 0; i < A->cmap->n; i++) { 1211 (*vals)[i] = *v; 1212 v += mat->lda; 1213 } 1214 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1215 } 1216 PetscFunctionReturn(0); 1217 } 1218 1219 static PetscErrorCode MatRestoreRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) 1220 { 1221 PetscFunctionBegin; 1222 if (ncols) *ncols = 0; 1223 if (cols) PetscCall(PetscFree(*cols)); 1224 if (vals) PetscCall(PetscFree(*vals)); 1225 PetscFunctionReturn(0); 1226 } 1227 /* ----------------------------------------------------------------*/ 1228 static PetscErrorCode MatSetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], const PetscScalar v[], InsertMode addv) 1229 { 1230 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1231 PetscScalar *av; 1232 PetscInt i, j, idx = 0; 1233 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1234 PetscOffloadMask oldf; 1235 #endif 1236 1237 PetscFunctionBegin; 1238 PetscCall(MatDenseGetArray(A, &av)); 1239 if (!mat->roworiented) { 1240 if (addv == INSERT_VALUES) { 1241 for (j = 0; j < n; j++) { 1242 if (indexn[j] < 0) { 1243 idx += m; 1244 continue; 1245 } 1246 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1247 for (i = 0; i < m; i++) { 1248 if (indexm[i] < 0) { 1249 idx++; 1250 continue; 1251 } 1252 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1253 av[indexn[j] * mat->lda + indexm[i]] = v[idx++]; 1254 } 1255 } 1256 } else { 1257 for (j = 0; j < n; j++) { 1258 if (indexn[j] < 0) { 1259 idx += m; 1260 continue; 1261 } 1262 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1263 for (i = 0; i < m; i++) { 1264 if (indexm[i] < 0) { 1265 idx++; 1266 continue; 1267 } 1268 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1269 av[indexn[j] * mat->lda + indexm[i]] += v[idx++]; 1270 } 1271 } 1272 } 1273 } else { 1274 if (addv == INSERT_VALUES) { 1275 for (i = 0; i < m; i++) { 1276 if (indexm[i] < 0) { 1277 idx += n; 1278 continue; 1279 } 1280 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1281 for (j = 0; j < n; j++) { 1282 if (indexn[j] < 0) { 1283 idx++; 1284 continue; 1285 } 1286 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1287 av[indexn[j] * mat->lda + indexm[i]] = v[idx++]; 1288 } 1289 } 1290 } else { 1291 for (i = 0; i < m; i++) { 1292 if (indexm[i] < 0) { 1293 idx += n; 1294 continue; 1295 } 1296 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1297 for (j = 0; j < n; j++) { 1298 if (indexn[j] < 0) { 1299 idx++; 1300 continue; 1301 } 1302 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1303 av[indexn[j] * mat->lda + indexm[i]] += v[idx++]; 1304 } 1305 } 1306 } 1307 } 1308 /* hack to prevent unneeded copy to the GPU while returning the array */ 1309 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1310 oldf = A->offloadmask; 1311 A->offloadmask = PETSC_OFFLOAD_GPU; 1312 #endif 1313 PetscCall(MatDenseRestoreArray(A, &av)); 1314 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1315 A->offloadmask = (oldf == PETSC_OFFLOAD_UNALLOCATED ? PETSC_OFFLOAD_UNALLOCATED : PETSC_OFFLOAD_CPU); 1316 #endif 1317 PetscFunctionReturn(0); 1318 } 1319 1320 static PetscErrorCode MatGetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], PetscScalar v[]) 1321 { 1322 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1323 const PetscScalar *vv; 1324 PetscInt i, j; 1325 1326 PetscFunctionBegin; 1327 PetscCall(MatDenseGetArrayRead(A, &vv)); 1328 /* row-oriented output */ 1329 for (i = 0; i < m; i++) { 1330 if (indexm[i] < 0) { 1331 v += n; 1332 continue; 1333 } 1334 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested larger than number rows %" PetscInt_FMT, indexm[i], A->rmap->n); 1335 for (j = 0; j < n; j++) { 1336 if (indexn[j] < 0) { 1337 v++; 1338 continue; 1339 } 1340 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " requested larger than number columns %" PetscInt_FMT, indexn[j], A->cmap->n); 1341 *v++ = vv[indexn[j] * mat->lda + indexm[i]]; 1342 } 1343 } 1344 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1345 PetscFunctionReturn(0); 1346 } 1347 1348 /* -----------------------------------------------------------------*/ 1349 1350 PetscErrorCode MatView_Dense_Binary(Mat mat, PetscViewer viewer) 1351 { 1352 PetscBool skipHeader; 1353 PetscViewerFormat format; 1354 PetscInt header[4], M, N, m, lda, i, j, k; 1355 const PetscScalar *v; 1356 PetscScalar *vwork; 1357 1358 PetscFunctionBegin; 1359 PetscCall(PetscViewerSetUp(viewer)); 1360 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1361 PetscCall(PetscViewerGetFormat(viewer, &format)); 1362 if (skipHeader) format = PETSC_VIEWER_NATIVE; 1363 1364 PetscCall(MatGetSize(mat, &M, &N)); 1365 1366 /* write matrix header */ 1367 header[0] = MAT_FILE_CLASSID; 1368 header[1] = M; 1369 header[2] = N; 1370 header[3] = (format == PETSC_VIEWER_NATIVE) ? MATRIX_BINARY_FORMAT_DENSE : M * N; 1371 if (!skipHeader) PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1372 1373 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1374 if (format != PETSC_VIEWER_NATIVE) { 1375 PetscInt nnz = m * N, *iwork; 1376 /* store row lengths for each row */ 1377 PetscCall(PetscMalloc1(nnz, &iwork)); 1378 for (i = 0; i < m; i++) iwork[i] = N; 1379 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1380 /* store column indices (zero start index) */ 1381 for (k = 0, i = 0; i < m; i++) 1382 for (j = 0; j < N; j++, k++) iwork[k] = j; 1383 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1384 PetscCall(PetscFree(iwork)); 1385 } 1386 /* store matrix values as a dense matrix in row major order */ 1387 PetscCall(PetscMalloc1(m * N, &vwork)); 1388 PetscCall(MatDenseGetArrayRead(mat, &v)); 1389 PetscCall(MatDenseGetLDA(mat, &lda)); 1390 for (k = 0, i = 0; i < m; i++) 1391 for (j = 0; j < N; j++, k++) vwork[k] = v[i + lda * j]; 1392 PetscCall(MatDenseRestoreArrayRead(mat, &v)); 1393 PetscCall(PetscViewerBinaryWriteAll(viewer, vwork, m * N, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1394 PetscCall(PetscFree(vwork)); 1395 PetscFunctionReturn(0); 1396 } 1397 1398 PetscErrorCode MatLoad_Dense_Binary(Mat mat, PetscViewer viewer) 1399 { 1400 PetscBool skipHeader; 1401 PetscInt header[4], M, N, m, nz, lda, i, j, k; 1402 PetscInt rows, cols; 1403 PetscScalar *v, *vwork; 1404 1405 PetscFunctionBegin; 1406 PetscCall(PetscViewerSetUp(viewer)); 1407 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1408 1409 if (!skipHeader) { 1410 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 1411 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 1412 M = header[1]; 1413 N = header[2]; 1414 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 1415 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 1416 nz = header[3]; 1417 PetscCheck(nz == MATRIX_BINARY_FORMAT_DENSE || nz >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Unknown matrix format %" PetscInt_FMT " in file", nz); 1418 } else { 1419 PetscCall(MatGetSize(mat, &M, &N)); 1420 PetscCheck(M >= 0 && N >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "Matrix binary file header was skipped, thus the user must specify the global sizes of input matrix"); 1421 nz = MATRIX_BINARY_FORMAT_DENSE; 1422 } 1423 1424 /* setup global sizes if not set */ 1425 if (mat->rmap->N < 0) mat->rmap->N = M; 1426 if (mat->cmap->N < 0) mat->cmap->N = N; 1427 PetscCall(MatSetUp(mat)); 1428 /* check if global sizes are correct */ 1429 PetscCall(MatGetSize(mat, &rows, &cols)); 1430 PetscCheck(M == rows && N == cols, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 1431 1432 PetscCall(MatGetSize(mat, NULL, &N)); 1433 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1434 PetscCall(MatDenseGetArray(mat, &v)); 1435 PetscCall(MatDenseGetLDA(mat, &lda)); 1436 if (nz == MATRIX_BINARY_FORMAT_DENSE) { /* matrix in file is dense format */ 1437 PetscInt nnz = m * N; 1438 /* read in matrix values */ 1439 PetscCall(PetscMalloc1(nnz, &vwork)); 1440 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1441 /* store values in column major order */ 1442 for (j = 0; j < N; j++) 1443 for (i = 0; i < m; i++) v[i + lda * j] = vwork[i * N + j]; 1444 PetscCall(PetscFree(vwork)); 1445 } else { /* matrix in file is sparse format */ 1446 PetscInt nnz = 0, *rlens, *icols; 1447 /* read in row lengths */ 1448 PetscCall(PetscMalloc1(m, &rlens)); 1449 PetscCall(PetscViewerBinaryReadAll(viewer, rlens, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1450 for (i = 0; i < m; i++) nnz += rlens[i]; 1451 /* read in column indices and values */ 1452 PetscCall(PetscMalloc2(nnz, &icols, nnz, &vwork)); 1453 PetscCall(PetscViewerBinaryReadAll(viewer, icols, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1454 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1455 /* store values in column major order */ 1456 for (k = 0, i = 0; i < m; i++) 1457 for (j = 0; j < rlens[i]; j++, k++) v[i + lda * icols[k]] = vwork[k]; 1458 PetscCall(PetscFree(rlens)); 1459 PetscCall(PetscFree2(icols, vwork)); 1460 } 1461 PetscCall(MatDenseRestoreArray(mat, &v)); 1462 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 1463 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 1464 PetscFunctionReturn(0); 1465 } 1466 1467 PetscErrorCode MatLoad_SeqDense(Mat newMat, PetscViewer viewer) 1468 { 1469 PetscBool isbinary, ishdf5; 1470 1471 PetscFunctionBegin; 1472 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1473 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1474 /* force binary viewer to load .info file if it has not yet done so */ 1475 PetscCall(PetscViewerSetUp(viewer)); 1476 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1477 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1478 if (isbinary) { 1479 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1480 } else if (ishdf5) { 1481 #if defined(PETSC_HAVE_HDF5) 1482 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1483 #else 1484 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 1485 #endif 1486 } else { 1487 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1488 } 1489 PetscFunctionReturn(0); 1490 } 1491 1492 static PetscErrorCode MatView_SeqDense_ASCII(Mat A, PetscViewer viewer) 1493 { 1494 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1495 PetscInt i, j; 1496 const char *name; 1497 PetscScalar *v, *av; 1498 PetscViewerFormat format; 1499 #if defined(PETSC_USE_COMPLEX) 1500 PetscBool allreal = PETSC_TRUE; 1501 #endif 1502 1503 PetscFunctionBegin; 1504 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&av)); 1505 PetscCall(PetscViewerGetFormat(viewer, &format)); 1506 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1507 PetscFunctionReturn(0); /* do nothing for now */ 1508 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1509 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1510 for (i = 0; i < A->rmap->n; i++) { 1511 v = av + i; 1512 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i)); 1513 for (j = 0; j < A->cmap->n; j++) { 1514 #if defined(PETSC_USE_COMPLEX) 1515 if (PetscRealPart(*v) != 0.0 && PetscImaginaryPart(*v) != 0.0) { 1516 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", j, (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1517 } else if (PetscRealPart(*v)) { 1518 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)PetscRealPart(*v))); 1519 } 1520 #else 1521 if (*v) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)*v)); 1522 #endif 1523 v += a->lda; 1524 } 1525 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1526 } 1527 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1528 } else { 1529 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1530 #if defined(PETSC_USE_COMPLEX) 1531 /* determine if matrix has all real values */ 1532 for (j = 0; j < A->cmap->n; j++) { 1533 v = av + j * a->lda; 1534 for (i = 0; i < A->rmap->n; i++) { 1535 if (PetscImaginaryPart(v[i])) { 1536 allreal = PETSC_FALSE; 1537 break; 1538 } 1539 } 1540 } 1541 #endif 1542 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1543 PetscCall(PetscObjectGetName((PetscObject)A, &name)); 1544 PetscCall(PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", A->rmap->n, A->cmap->n)); 1545 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = zeros(%" PetscInt_FMT ",%" PetscInt_FMT ");\n", name, A->rmap->n, A->cmap->n)); 1546 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = [\n", name)); 1547 } 1548 1549 for (i = 0; i < A->rmap->n; i++) { 1550 v = av + i; 1551 for (j = 0; j < A->cmap->n; j++) { 1552 #if defined(PETSC_USE_COMPLEX) 1553 if (allreal) { 1554 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(*v))); 1555 } else { 1556 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei ", (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1557 } 1558 #else 1559 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)*v)); 1560 #endif 1561 v += a->lda; 1562 } 1563 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1564 } 1565 if (format == PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, "];\n")); 1566 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1567 } 1568 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&av)); 1569 PetscCall(PetscViewerFlush(viewer)); 1570 PetscFunctionReturn(0); 1571 } 1572 1573 #include <petscdraw.h> 1574 static PetscErrorCode MatView_SeqDense_Draw_Zoom(PetscDraw draw, void *Aa) 1575 { 1576 Mat A = (Mat)Aa; 1577 PetscInt m = A->rmap->n, n = A->cmap->n, i, j; 1578 int color = PETSC_DRAW_WHITE; 1579 const PetscScalar *v; 1580 PetscViewer viewer; 1581 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1582 PetscViewerFormat format; 1583 1584 PetscFunctionBegin; 1585 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1586 PetscCall(PetscViewerGetFormat(viewer, &format)); 1587 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1588 1589 /* Loop over matrix elements drawing boxes */ 1590 PetscCall(MatDenseGetArrayRead(A, &v)); 1591 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1592 PetscDrawCollectiveBegin(draw); 1593 /* Blue for negative and Red for positive */ 1594 for (j = 0; j < n; j++) { 1595 x_l = j; 1596 x_r = x_l + 1.0; 1597 for (i = 0; i < m; i++) { 1598 y_l = m - i - 1.0; 1599 y_r = y_l + 1.0; 1600 if (PetscRealPart(v[j * m + i]) > 0.) color = PETSC_DRAW_RED; 1601 else if (PetscRealPart(v[j * m + i]) < 0.) color = PETSC_DRAW_BLUE; 1602 else continue; 1603 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1604 } 1605 } 1606 PetscDrawCollectiveEnd(draw); 1607 } else { 1608 /* use contour shading to indicate magnitude of values */ 1609 /* first determine max of all nonzero values */ 1610 PetscReal minv = 0.0, maxv = 0.0; 1611 PetscDraw popup; 1612 1613 for (i = 0; i < m * n; i++) { 1614 if (PetscAbsScalar(v[i]) > maxv) maxv = PetscAbsScalar(v[i]); 1615 } 1616 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1617 PetscCall(PetscDrawGetPopup(draw, &popup)); 1618 PetscCall(PetscDrawScalePopup(popup, minv, maxv)); 1619 1620 PetscDrawCollectiveBegin(draw); 1621 for (j = 0; j < n; j++) { 1622 x_l = j; 1623 x_r = x_l + 1.0; 1624 for (i = 0; i < m; i++) { 1625 y_l = m - i - 1.0; 1626 y_r = y_l + 1.0; 1627 color = PetscDrawRealToColor(PetscAbsScalar(v[j * m + i]), minv, maxv); 1628 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1629 } 1630 } 1631 PetscDrawCollectiveEnd(draw); 1632 } 1633 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1634 PetscFunctionReturn(0); 1635 } 1636 1637 static PetscErrorCode MatView_SeqDense_Draw(Mat A, PetscViewer viewer) 1638 { 1639 PetscDraw draw; 1640 PetscBool isnull; 1641 PetscReal xr, yr, xl, yl, h, w; 1642 1643 PetscFunctionBegin; 1644 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1645 PetscCall(PetscDrawIsNull(draw, &isnull)); 1646 if (isnull) PetscFunctionReturn(0); 1647 1648 xr = A->cmap->n; 1649 yr = A->rmap->n; 1650 h = yr / 10.0; 1651 w = xr / 10.0; 1652 xr += w; 1653 yr += h; 1654 xl = -w; 1655 yl = -h; 1656 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 1657 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 1658 PetscCall(PetscDrawZoom(draw, MatView_SeqDense_Draw_Zoom, A)); 1659 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 1660 PetscCall(PetscDrawSave(draw)); 1661 PetscFunctionReturn(0); 1662 } 1663 1664 PetscErrorCode MatView_SeqDense(Mat A, PetscViewer viewer) 1665 { 1666 PetscBool iascii, isbinary, isdraw; 1667 1668 PetscFunctionBegin; 1669 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1670 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1671 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1672 if (iascii) PetscCall(MatView_SeqDense_ASCII(A, viewer)); 1673 else if (isbinary) PetscCall(MatView_Dense_Binary(A, viewer)); 1674 else if (isdraw) PetscCall(MatView_SeqDense_Draw(A, viewer)); 1675 PetscFunctionReturn(0); 1676 } 1677 1678 static PetscErrorCode MatDensePlaceArray_SeqDense(Mat A, const PetscScalar *array) 1679 { 1680 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1681 1682 PetscFunctionBegin; 1683 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1684 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1685 PetscCheck(!a->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreArray() first"); 1686 a->unplacedarray = a->v; 1687 a->unplaced_user_alloc = a->user_alloc; 1688 a->v = (PetscScalar *)array; 1689 a->user_alloc = PETSC_TRUE; 1690 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1691 A->offloadmask = PETSC_OFFLOAD_CPU; 1692 #endif 1693 PetscFunctionReturn(0); 1694 } 1695 1696 static PetscErrorCode MatDenseResetArray_SeqDense(Mat A) 1697 { 1698 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1699 1700 PetscFunctionBegin; 1701 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1702 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1703 a->v = a->unplacedarray; 1704 a->user_alloc = a->unplaced_user_alloc; 1705 a->unplacedarray = NULL; 1706 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1707 A->offloadmask = PETSC_OFFLOAD_CPU; 1708 #endif 1709 PetscFunctionReturn(0); 1710 } 1711 1712 static PetscErrorCode MatDenseReplaceArray_SeqDense(Mat A, const PetscScalar *array) 1713 { 1714 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1715 1716 PetscFunctionBegin; 1717 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1718 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1719 if (!a->user_alloc) PetscCall(PetscFree(a->v)); 1720 a->v = (PetscScalar *)array; 1721 a->user_alloc = PETSC_FALSE; 1722 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1723 A->offloadmask = PETSC_OFFLOAD_CPU; 1724 #endif 1725 PetscFunctionReturn(0); 1726 } 1727 1728 PetscErrorCode MatDestroy_SeqDense(Mat mat) 1729 { 1730 Mat_SeqDense *l = (Mat_SeqDense *)mat->data; 1731 1732 PetscFunctionBegin; 1733 #if defined(PETSC_USE_LOG) 1734 PetscLogObjectState((PetscObject)mat, "Rows %" PetscInt_FMT " Cols %" PetscInt_FMT, mat->rmap->n, mat->cmap->n); 1735 #endif 1736 PetscCall(VecDestroy(&(l->qrrhs))); 1737 PetscCall(PetscFree(l->tau)); 1738 PetscCall(PetscFree(l->pivots)); 1739 PetscCall(PetscFree(l->fwork)); 1740 PetscCall(MatDestroy(&l->ptapwork)); 1741 if (!l->user_alloc) PetscCall(PetscFree(l->v)); 1742 if (!l->unplaced_user_alloc) PetscCall(PetscFree(l->unplacedarray)); 1743 PetscCheck(!l->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1744 PetscCheck(!l->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1745 PetscCall(VecDestroy(&l->cvec)); 1746 PetscCall(MatDestroy(&l->cmat)); 1747 PetscCall(PetscFree(mat->data)); 1748 1749 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactor_C", NULL)); 1751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorSymbolic_C", NULL)); 1752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorNumeric_C", NULL)); 1753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 1754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 1755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 1756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 1757 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 1758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 1759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 1760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 1761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 1762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 1763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 1764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqaij_C", NULL)); 1765 #if defined(PETSC_HAVE_ELEMENTAL) 1766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_elemental_C", NULL)); 1767 #endif 1768 #if defined(PETSC_HAVE_SCALAPACK) 1769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_scalapack_C", NULL)); 1770 #endif 1771 #if defined(PETSC_HAVE_CUDA) 1772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensecuda_C", NULL)); 1773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", NULL)); 1774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdense_C", NULL)); 1775 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensecuda_C", NULL)); 1776 #endif 1777 #if defined(PETSC_HAVE_HIP) 1778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensehip_C", NULL)); 1779 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", NULL)); 1780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdense_C", NULL)); 1781 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensehip_C", NULL)); 1782 #endif 1783 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSeqDenseSetPreallocation_C", NULL)); 1784 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqaij_seqdense_C", NULL)); 1785 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdense_C", NULL)); 1786 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqbaij_seqdense_C", NULL)); 1787 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqsbaij_seqdense_C", NULL)); 1788 1789 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 1790 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 1791 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 1792 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 1793 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 1794 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 1795 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 1796 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 1797 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 1798 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 1799 PetscFunctionReturn(0); 1800 } 1801 1802 static PetscErrorCode MatTranspose_SeqDense(Mat A, MatReuse reuse, Mat *matout) 1803 { 1804 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1805 PetscInt k, j, m = A->rmap->n, M = mat->lda, n = A->cmap->n; 1806 PetscScalar *v, tmp; 1807 1808 PetscFunctionBegin; 1809 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1810 if (reuse == MAT_INPLACE_MATRIX) { 1811 if (m == n) { /* in place transpose */ 1812 PetscCall(MatDenseGetArray(A, &v)); 1813 for (j = 0; j < m; j++) { 1814 for (k = 0; k < j; k++) { 1815 tmp = v[j + k * M]; 1816 v[j + k * M] = v[k + j * M]; 1817 v[k + j * M] = tmp; 1818 } 1819 } 1820 PetscCall(MatDenseRestoreArray(A, &v)); 1821 } else { /* reuse memory, temporary allocates new memory */ 1822 PetscScalar *v2; 1823 PetscLayout tmplayout; 1824 1825 PetscCall(PetscMalloc1((size_t)m * n, &v2)); 1826 PetscCall(MatDenseGetArray(A, &v)); 1827 for (j = 0; j < n; j++) { 1828 for (k = 0; k < m; k++) v2[j + (size_t)k * n] = v[k + (size_t)j * M]; 1829 } 1830 PetscCall(PetscArraycpy(v, v2, (size_t)m * n)); 1831 PetscCall(PetscFree(v2)); 1832 PetscCall(MatDenseRestoreArray(A, &v)); 1833 /* cleanup size dependent quantities */ 1834 PetscCall(VecDestroy(&mat->cvec)); 1835 PetscCall(MatDestroy(&mat->cmat)); 1836 PetscCall(PetscFree(mat->pivots)); 1837 PetscCall(PetscFree(mat->fwork)); 1838 PetscCall(MatDestroy(&mat->ptapwork)); 1839 /* swap row/col layouts */ 1840 mat->lda = n; 1841 tmplayout = A->rmap; 1842 A->rmap = A->cmap; 1843 A->cmap = tmplayout; 1844 } 1845 } else { /* out-of-place transpose */ 1846 Mat tmat; 1847 Mat_SeqDense *tmatd; 1848 PetscScalar *v2; 1849 PetscInt M2; 1850 1851 if (reuse == MAT_INITIAL_MATRIX) { 1852 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &tmat)); 1853 PetscCall(MatSetSizes(tmat, A->cmap->n, A->rmap->n, A->cmap->n, A->rmap->n)); 1854 PetscCall(MatSetType(tmat, ((PetscObject)A)->type_name)); 1855 PetscCall(MatSeqDenseSetPreallocation(tmat, NULL)); 1856 } else tmat = *matout; 1857 1858 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&v)); 1859 PetscCall(MatDenseGetArray(tmat, &v2)); 1860 tmatd = (Mat_SeqDense *)tmat->data; 1861 M2 = tmatd->lda; 1862 for (j = 0; j < n; j++) { 1863 for (k = 0; k < m; k++) v2[j + k * M2] = v[k + j * M]; 1864 } 1865 PetscCall(MatDenseRestoreArray(tmat, &v2)); 1866 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&v)); 1867 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 1868 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 1869 *matout = tmat; 1870 } 1871 PetscFunctionReturn(0); 1872 } 1873 1874 static PetscErrorCode MatEqual_SeqDense(Mat A1, Mat A2, PetscBool *flg) 1875 { 1876 Mat_SeqDense *mat1 = (Mat_SeqDense *)A1->data; 1877 Mat_SeqDense *mat2 = (Mat_SeqDense *)A2->data; 1878 PetscInt i; 1879 const PetscScalar *v1, *v2; 1880 1881 PetscFunctionBegin; 1882 if (A1->rmap->n != A2->rmap->n) { 1883 *flg = PETSC_FALSE; 1884 PetscFunctionReturn(0); 1885 } 1886 if (A1->cmap->n != A2->cmap->n) { 1887 *flg = PETSC_FALSE; 1888 PetscFunctionReturn(0); 1889 } 1890 PetscCall(MatDenseGetArrayRead(A1, &v1)); 1891 PetscCall(MatDenseGetArrayRead(A2, &v2)); 1892 for (i = 0; i < A1->cmap->n; i++) { 1893 PetscCall(PetscArraycmp(v1, v2, A1->rmap->n, flg)); 1894 if (*flg == PETSC_FALSE) PetscFunctionReturn(0); 1895 v1 += mat1->lda; 1896 v2 += mat2->lda; 1897 } 1898 PetscCall(MatDenseRestoreArrayRead(A1, &v1)); 1899 PetscCall(MatDenseRestoreArrayRead(A2, &v2)); 1900 *flg = PETSC_TRUE; 1901 PetscFunctionReturn(0); 1902 } 1903 1904 static PetscErrorCode MatGetDiagonal_SeqDense(Mat A, Vec v) 1905 { 1906 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1907 PetscInt i, n, len; 1908 PetscScalar *x; 1909 const PetscScalar *vv; 1910 1911 PetscFunctionBegin; 1912 PetscCall(VecGetSize(v, &n)); 1913 PetscCall(VecGetArray(v, &x)); 1914 len = PetscMin(A->rmap->n, A->cmap->n); 1915 PetscCall(MatDenseGetArrayRead(A, &vv)); 1916 PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 1917 for (i = 0; i < len; i++) x[i] = vv[i * mat->lda + i]; 1918 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1919 PetscCall(VecRestoreArray(v, &x)); 1920 PetscFunctionReturn(0); 1921 } 1922 1923 static PetscErrorCode MatDiagonalScale_SeqDense(Mat A, Vec ll, Vec rr) 1924 { 1925 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1926 const PetscScalar *l, *r; 1927 PetscScalar x, *v, *vv; 1928 PetscInt i, j, m = A->rmap->n, n = A->cmap->n; 1929 1930 PetscFunctionBegin; 1931 PetscCall(MatDenseGetArray(A, &vv)); 1932 if (ll) { 1933 PetscCall(VecGetSize(ll, &m)); 1934 PetscCall(VecGetArrayRead(ll, &l)); 1935 PetscCheck(m == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vec wrong size"); 1936 for (i = 0; i < m; i++) { 1937 x = l[i]; 1938 v = vv + i; 1939 for (j = 0; j < n; j++) { 1940 (*v) *= x; 1941 v += mat->lda; 1942 } 1943 } 1944 PetscCall(VecRestoreArrayRead(ll, &l)); 1945 PetscCall(PetscLogFlops(1.0 * n * m)); 1946 } 1947 if (rr) { 1948 PetscCall(VecGetSize(rr, &n)); 1949 PetscCall(VecGetArrayRead(rr, &r)); 1950 PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec wrong size"); 1951 for (i = 0; i < n; i++) { 1952 x = r[i]; 1953 v = vv + i * mat->lda; 1954 for (j = 0; j < m; j++) (*v++) *= x; 1955 } 1956 PetscCall(VecRestoreArrayRead(rr, &r)); 1957 PetscCall(PetscLogFlops(1.0 * n * m)); 1958 } 1959 PetscCall(MatDenseRestoreArray(A, &vv)); 1960 PetscFunctionReturn(0); 1961 } 1962 1963 PetscErrorCode MatNorm_SeqDense(Mat A, NormType type, PetscReal *nrm) 1964 { 1965 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1966 PetscScalar *v, *vv; 1967 PetscReal sum = 0.0; 1968 PetscInt lda, m = A->rmap->n, i, j; 1969 1970 PetscFunctionBegin; 1971 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&vv)); 1972 PetscCall(MatDenseGetLDA(A, &lda)); 1973 v = vv; 1974 if (type == NORM_FROBENIUS) { 1975 if (lda > m) { 1976 for (j = 0; j < A->cmap->n; j++) { 1977 v = vv + j * lda; 1978 for (i = 0; i < m; i++) { 1979 sum += PetscRealPart(PetscConj(*v) * (*v)); 1980 v++; 1981 } 1982 } 1983 } else { 1984 #if defined(PETSC_USE_REAL___FP16) 1985 PetscBLASInt one = 1, cnt = A->cmap->n * A->rmap->n; 1986 PetscCallBLAS("BLASnrm2", *nrm = BLASnrm2_(&cnt, v, &one)); 1987 } 1988 #else 1989 for (i = 0; i < A->cmap->n * A->rmap->n; i++) { 1990 sum += PetscRealPart(PetscConj(*v) * (*v)); 1991 v++; 1992 } 1993 } 1994 *nrm = PetscSqrtReal(sum); 1995 #endif 1996 PetscCall(PetscLogFlops(2.0 * A->cmap->n * A->rmap->n)); 1997 } else if (type == NORM_1) { 1998 *nrm = 0.0; 1999 for (j = 0; j < A->cmap->n; j++) { 2000 v = vv + j * mat->lda; 2001 sum = 0.0; 2002 for (i = 0; i < A->rmap->n; i++) { 2003 sum += PetscAbsScalar(*v); 2004 v++; 2005 } 2006 if (sum > *nrm) *nrm = sum; 2007 } 2008 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 2009 } else if (type == NORM_INFINITY) { 2010 *nrm = 0.0; 2011 for (j = 0; j < A->rmap->n; j++) { 2012 v = vv + j; 2013 sum = 0.0; 2014 for (i = 0; i < A->cmap->n; i++) { 2015 sum += PetscAbsScalar(*v); 2016 v += mat->lda; 2017 } 2018 if (sum > *nrm) *nrm = sum; 2019 } 2020 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 2021 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No two norm"); 2022 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&vv)); 2023 PetscFunctionReturn(0); 2024 } 2025 2026 static PetscErrorCode MatSetOption_SeqDense(Mat A, MatOption op, PetscBool flg) 2027 { 2028 Mat_SeqDense *aij = (Mat_SeqDense *)A->data; 2029 2030 PetscFunctionBegin; 2031 switch (op) { 2032 case MAT_ROW_ORIENTED: 2033 aij->roworiented = flg; 2034 break; 2035 case MAT_NEW_NONZERO_LOCATIONS: 2036 case MAT_NEW_NONZERO_LOCATION_ERR: 2037 case MAT_NEW_NONZERO_ALLOCATION_ERR: 2038 case MAT_FORCE_DIAGONAL_ENTRIES: 2039 case MAT_KEEP_NONZERO_PATTERN: 2040 case MAT_IGNORE_OFF_PROC_ENTRIES: 2041 case MAT_USE_HASH_TABLE: 2042 case MAT_IGNORE_ZERO_ENTRIES: 2043 case MAT_IGNORE_LOWER_TRIANGULAR: 2044 case MAT_SORTED_FULL: 2045 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 2046 break; 2047 case MAT_SPD: 2048 case MAT_SYMMETRIC: 2049 case MAT_STRUCTURALLY_SYMMETRIC: 2050 case MAT_HERMITIAN: 2051 case MAT_SYMMETRY_ETERNAL: 2052 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 2053 case MAT_SPD_ETERNAL: 2054 break; 2055 default: 2056 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 2057 } 2058 PetscFunctionReturn(0); 2059 } 2060 2061 PetscErrorCode MatZeroEntries_SeqDense(Mat A) 2062 { 2063 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 2064 PetscInt lda = l->lda, m = A->rmap->n, n = A->cmap->n, j; 2065 PetscScalar *v; 2066 2067 PetscFunctionBegin; 2068 PetscCall(MatDenseGetArrayWrite(A, &v)); 2069 if (lda > m) { 2070 for (j = 0; j < n; j++) PetscCall(PetscArrayzero(v + j * lda, m)); 2071 } else { 2072 PetscCall(PetscArrayzero(v, PetscInt64Mult(m, n))); 2073 } 2074 PetscCall(MatDenseRestoreArrayWrite(A, &v)); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 static PetscErrorCode MatZeroRows_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 2079 { 2080 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 2081 PetscInt m = l->lda, n = A->cmap->n, i, j; 2082 PetscScalar *slot, *bb, *v; 2083 const PetscScalar *xx; 2084 2085 PetscFunctionBegin; 2086 if (PetscDefined(USE_DEBUG)) { 2087 for (i = 0; i < N; i++) { 2088 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 2089 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 2090 } 2091 } 2092 if (!N) PetscFunctionReturn(0); 2093 2094 /* fix right hand side if needed */ 2095 if (x && b) { 2096 PetscCall(VecGetArrayRead(x, &xx)); 2097 PetscCall(VecGetArray(b, &bb)); 2098 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 2099 PetscCall(VecRestoreArrayRead(x, &xx)); 2100 PetscCall(VecRestoreArray(b, &bb)); 2101 } 2102 2103 PetscCall(MatDenseGetArray(A, &v)); 2104 for (i = 0; i < N; i++) { 2105 slot = v + rows[i]; 2106 for (j = 0; j < n; j++) { 2107 *slot = 0.0; 2108 slot += m; 2109 } 2110 } 2111 if (diag != 0.0) { 2112 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 2113 for (i = 0; i < N; i++) { 2114 slot = v + (m + 1) * rows[i]; 2115 *slot = diag; 2116 } 2117 } 2118 PetscCall(MatDenseRestoreArray(A, &v)); 2119 PetscFunctionReturn(0); 2120 } 2121 2122 static PetscErrorCode MatDenseGetLDA_SeqDense(Mat A, PetscInt *lda) 2123 { 2124 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2125 2126 PetscFunctionBegin; 2127 *lda = mat->lda; 2128 PetscFunctionReturn(0); 2129 } 2130 2131 PetscErrorCode MatDenseGetArray_SeqDense(Mat A, PetscScalar **array) 2132 { 2133 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2134 2135 PetscFunctionBegin; 2136 PetscCheck(!mat->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2137 *array = mat->v; 2138 PetscFunctionReturn(0); 2139 } 2140 2141 PetscErrorCode MatDenseRestoreArray_SeqDense(Mat A, PetscScalar **array) 2142 { 2143 PetscFunctionBegin; 2144 if (array) *array = NULL; 2145 PetscFunctionReturn(0); 2146 } 2147 2148 /*@ 2149 MatDenseGetLDA - gets the leading dimension of the array returned from `MatDenseGetArray()` 2150 2151 Not collective 2152 2153 Input Parameter: 2154 . mat - a `MATDENSE` or `MATDENSECUDA` matrix 2155 2156 Output Parameter: 2157 . lda - the leading dimension 2158 2159 Level: intermediate 2160 2161 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseSetLDA()` 2162 @*/ 2163 PetscErrorCode MatDenseGetLDA(Mat A, PetscInt *lda) 2164 { 2165 PetscFunctionBegin; 2166 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2167 PetscValidIntPointer(lda, 2); 2168 MatCheckPreallocated(A, 1); 2169 PetscUseMethod(A, "MatDenseGetLDA_C", (Mat, PetscInt *), (A, lda)); 2170 PetscFunctionReturn(0); 2171 } 2172 2173 /*@ 2174 MatDenseSetLDA - Sets the leading dimension of the array used by the `MATDENSE` matrix 2175 2176 Not collective 2177 2178 Input Parameters: 2179 + mat - a `MATDENSE` or `MATDENSECUDA` matrix 2180 - lda - the leading dimension 2181 2182 Level: intermediate 2183 2184 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetLDA()` 2185 @*/ 2186 PetscErrorCode MatDenseSetLDA(Mat A, PetscInt lda) 2187 { 2188 PetscFunctionBegin; 2189 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2190 PetscTryMethod(A, "MatDenseSetLDA_C", (Mat, PetscInt), (A, lda)); 2191 PetscFunctionReturn(0); 2192 } 2193 2194 /*@C 2195 MatDenseGetArray - gives read-write access to the array where the data for a `MATDENSE` matrix is stored 2196 2197 Logically Collective 2198 2199 Input Parameter: 2200 . mat - a dense matrix 2201 2202 Output Parameter: 2203 . array - pointer to the data 2204 2205 Level: intermediate 2206 2207 Fortran Note: 2208 `MatDenseGetArray()` Fortran binding is deprecated (since PETSc 3.19), use `MatDenseGetArrayF90()` 2209 2210 .seealso: `MATDENSE`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2211 @*/ 2212 PetscErrorCode MatDenseGetArray(Mat A, PetscScalar **array) 2213 { 2214 PetscFunctionBegin; 2215 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2216 PetscValidPointer(array, 2); 2217 PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array)); 2218 PetscFunctionReturn(0); 2219 } 2220 2221 /*@C 2222 MatDenseRestoreArray - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArray()` 2223 2224 Logically Collective 2225 2226 Input Parameters: 2227 + mat - a dense matrix 2228 - array - pointer to the data (may be NULL) 2229 2230 Level: intermediate 2231 2232 Fortran Note: 2233 `MatDenseRestoreArray()` Fortran binding is deprecated (since PETSc 3.19), use `MatDenseRestoreArrayF90()` 2234 2235 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2236 @*/ 2237 PetscErrorCode MatDenseRestoreArray(Mat A, PetscScalar **array) 2238 { 2239 PetscFunctionBegin; 2240 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2241 PetscValidPointer(array, 2); 2242 PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array)); 2243 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2244 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2245 A->offloadmask = PETSC_OFFLOAD_CPU; 2246 #endif 2247 PetscFunctionReturn(0); 2248 } 2249 2250 /*@C 2251 MatDenseGetArrayRead - gives read-only access to the array where the data for a `MATDENSE` matrix is stored 2252 2253 Not Collective; No Fortran Support 2254 2255 Input Parameter: 2256 . mat - a dense matrix 2257 2258 Output Parameter: 2259 . array - pointer to the data 2260 2261 Level: intermediate 2262 2263 .seealso: `MATDENSE`, `MatDenseRestoreArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2264 @*/ 2265 PetscErrorCode MatDenseGetArrayRead(Mat A, const PetscScalar **array) 2266 { 2267 PetscFunctionBegin; 2268 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2269 PetscValidPointer(array, 2); 2270 PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, const PetscScalar **), (A, array)); 2271 PetscFunctionReturn(0); 2272 } 2273 2274 /*@C 2275 MatDenseRestoreArrayRead - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayRead()` 2276 2277 Not Collective; No Fortran Support 2278 2279 Input Parameters: 2280 + mat - a dense matrix 2281 - array - pointer to the data (may be NULL) 2282 2283 Level: intermediate 2284 2285 .seealso: `MATDENSE`, `MatDenseGetArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2286 @*/ 2287 PetscErrorCode MatDenseRestoreArrayRead(Mat A, const PetscScalar **array) 2288 { 2289 PetscFunctionBegin; 2290 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2291 PetscValidPointer(array, 2); 2292 PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, const PetscScalar **), (A, array)); 2293 PetscFunctionReturn(0); 2294 } 2295 2296 /*@C 2297 MatDenseGetArrayWrite - gives write-only access to the array where the data for a `MATDENSE` matrix is stored 2298 2299 Not Collective; No Fortran Support 2300 2301 Input Parameter: 2302 . mat - a dense matrix 2303 2304 Output Parameter: 2305 . array - pointer to the data 2306 2307 Level: intermediate 2308 2309 .seealso: `MATDENSE`, `MatDenseRestoreArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2310 @*/ 2311 PetscErrorCode MatDenseGetArrayWrite(Mat A, PetscScalar **array) 2312 { 2313 PetscFunctionBegin; 2314 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2315 PetscValidPointer(array, 2); 2316 PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2317 PetscFunctionReturn(0); 2318 } 2319 2320 /*@C 2321 MatDenseRestoreArrayWrite - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayWrite()` 2322 2323 Not Collective; No Fortran Support 2324 2325 Input Parameters: 2326 + mat - a dense matrix 2327 - array - pointer to the data (may be NULL) 2328 2329 Level: intermediate 2330 2331 .seealso: `MATDENSE`, `MatDenseGetArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2332 @*/ 2333 PetscErrorCode MatDenseRestoreArrayWrite(Mat A, PetscScalar **array) 2334 { 2335 PetscFunctionBegin; 2336 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2337 PetscValidPointer(array, 2); 2338 PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2339 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2340 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2341 A->offloadmask = PETSC_OFFLOAD_CPU; 2342 #endif 2343 PetscFunctionReturn(0); 2344 } 2345 2346 static PetscErrorCode MatCreateSubMatrix_SeqDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 2347 { 2348 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2349 PetscInt i, j, nrows, ncols, ldb; 2350 const PetscInt *irow, *icol; 2351 PetscScalar *av, *bv, *v = mat->v; 2352 Mat newmat; 2353 2354 PetscFunctionBegin; 2355 PetscCall(ISGetIndices(isrow, &irow)); 2356 PetscCall(ISGetIndices(iscol, &icol)); 2357 PetscCall(ISGetLocalSize(isrow, &nrows)); 2358 PetscCall(ISGetLocalSize(iscol, &ncols)); 2359 2360 /* Check submatrixcall */ 2361 if (scall == MAT_REUSE_MATRIX) { 2362 PetscInt n_cols, n_rows; 2363 PetscCall(MatGetSize(*B, &n_rows, &n_cols)); 2364 if (n_rows != nrows || n_cols != ncols) { 2365 /* resize the result matrix to match number of requested rows/columns */ 2366 PetscCall(MatSetSizes(*B, nrows, ncols, nrows, ncols)); 2367 } 2368 newmat = *B; 2369 } else { 2370 /* Create and fill new matrix */ 2371 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 2372 PetscCall(MatSetSizes(newmat, nrows, ncols, nrows, ncols)); 2373 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 2374 PetscCall(MatSeqDenseSetPreallocation(newmat, NULL)); 2375 } 2376 2377 /* Now extract the data pointers and do the copy,column at a time */ 2378 PetscCall(MatDenseGetArray(newmat, &bv)); 2379 PetscCall(MatDenseGetLDA(newmat, &ldb)); 2380 for (i = 0; i < ncols; i++) { 2381 av = v + mat->lda * icol[i]; 2382 for (j = 0; j < nrows; j++) bv[j] = av[irow[j]]; 2383 bv += ldb; 2384 } 2385 PetscCall(MatDenseRestoreArray(newmat, &bv)); 2386 2387 /* Assemble the matrices so that the correct flags are set */ 2388 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 2389 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 2390 2391 /* Free work space */ 2392 PetscCall(ISRestoreIndices(isrow, &irow)); 2393 PetscCall(ISRestoreIndices(iscol, &icol)); 2394 *B = newmat; 2395 PetscFunctionReturn(0); 2396 } 2397 2398 static PetscErrorCode MatCreateSubMatrices_SeqDense(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 2399 { 2400 PetscInt i; 2401 2402 PetscFunctionBegin; 2403 if (scall == MAT_INITIAL_MATRIX) PetscCall(PetscCalloc1(n, B)); 2404 2405 for (i = 0; i < n; i++) PetscCall(MatCreateSubMatrix_SeqDense(A, irow[i], icol[i], scall, &(*B)[i])); 2406 PetscFunctionReturn(0); 2407 } 2408 2409 static PetscErrorCode MatAssemblyBegin_SeqDense(Mat mat, MatAssemblyType mode) 2410 { 2411 PetscFunctionBegin; 2412 PetscFunctionReturn(0); 2413 } 2414 2415 static PetscErrorCode MatAssemblyEnd_SeqDense(Mat mat, MatAssemblyType mode) 2416 { 2417 PetscFunctionBegin; 2418 PetscFunctionReturn(0); 2419 } 2420 2421 PetscErrorCode MatCopy_SeqDense(Mat A, Mat B, MatStructure str) 2422 { 2423 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data; 2424 const PetscScalar *va; 2425 PetscScalar *vb; 2426 PetscInt lda1 = a->lda, lda2 = b->lda, m = A->rmap->n, n = A->cmap->n, j; 2427 2428 PetscFunctionBegin; 2429 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2430 if (A->ops->copy != B->ops->copy) { 2431 PetscCall(MatCopy_Basic(A, B, str)); 2432 PetscFunctionReturn(0); 2433 } 2434 PetscCheck(m == B->rmap->n && n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "size(B) != size(A)"); 2435 PetscCall(MatDenseGetArrayRead(A, &va)); 2436 PetscCall(MatDenseGetArray(B, &vb)); 2437 if (lda1 > m || lda2 > m) { 2438 for (j = 0; j < n; j++) PetscCall(PetscArraycpy(vb + j * lda2, va + j * lda1, m)); 2439 } else { 2440 PetscCall(PetscArraycpy(vb, va, A->rmap->n * A->cmap->n)); 2441 } 2442 PetscCall(MatDenseRestoreArray(B, &vb)); 2443 PetscCall(MatDenseRestoreArrayRead(A, &va)); 2444 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2445 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2446 PetscFunctionReturn(0); 2447 } 2448 2449 PetscErrorCode MatSetUp_SeqDense(Mat A) 2450 { 2451 PetscFunctionBegin; 2452 PetscCall(PetscLayoutSetUp(A->rmap)); 2453 PetscCall(PetscLayoutSetUp(A->cmap)); 2454 if (!A->preallocated) PetscCall(MatSeqDenseSetPreallocation(A, NULL)); 2455 PetscFunctionReturn(0); 2456 } 2457 2458 static PetscErrorCode MatConjugate_SeqDense(Mat A) 2459 { 2460 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2461 PetscInt i, j; 2462 PetscInt min = PetscMin(A->rmap->n, A->cmap->n); 2463 PetscScalar *aa; 2464 2465 PetscFunctionBegin; 2466 PetscCall(MatDenseGetArray(A, &aa)); 2467 for (j = 0; j < A->cmap->n; j++) { 2468 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscConj(aa[i + j * mat->lda]); 2469 } 2470 PetscCall(MatDenseRestoreArray(A, &aa)); 2471 if (mat->tau) 2472 for (i = 0; i < min; i++) mat->tau[i] = PetscConj(mat->tau[i]); 2473 PetscFunctionReturn(0); 2474 } 2475 2476 static PetscErrorCode MatRealPart_SeqDense(Mat A) 2477 { 2478 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2479 PetscInt i, j; 2480 PetscScalar *aa; 2481 2482 PetscFunctionBegin; 2483 PetscCall(MatDenseGetArray(A, &aa)); 2484 for (j = 0; j < A->cmap->n; j++) { 2485 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscRealPart(aa[i + j * mat->lda]); 2486 } 2487 PetscCall(MatDenseRestoreArray(A, &aa)); 2488 PetscFunctionReturn(0); 2489 } 2490 2491 static PetscErrorCode MatImaginaryPart_SeqDense(Mat A) 2492 { 2493 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2494 PetscInt i, j; 2495 PetscScalar *aa; 2496 2497 PetscFunctionBegin; 2498 PetscCall(MatDenseGetArray(A, &aa)); 2499 for (j = 0; j < A->cmap->n; j++) { 2500 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscImaginaryPart(aa[i + j * mat->lda]); 2501 } 2502 PetscCall(MatDenseRestoreArray(A, &aa)); 2503 PetscFunctionReturn(0); 2504 } 2505 2506 /* ----------------------------------------------------------------*/ 2507 PetscErrorCode MatMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2508 { 2509 PetscInt m = A->rmap->n, n = B->cmap->n; 2510 PetscBool cisdense = PETSC_FALSE; 2511 2512 PetscFunctionBegin; 2513 PetscCall(MatSetSizes(C, m, n, m, n)); 2514 #if defined(PETSC_HAVE_CUDA) 2515 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2516 #endif 2517 #if defined(PETSC_HAVE_HIP) 2518 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2519 #endif 2520 if (!cisdense) { 2521 PetscBool flg; 2522 2523 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2524 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2525 } 2526 PetscCall(MatSetUp(C)); 2527 PetscFunctionReturn(0); 2528 } 2529 2530 PetscErrorCode MatMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2531 { 2532 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data, *c = (Mat_SeqDense *)C->data; 2533 PetscBLASInt m, n, k; 2534 const PetscScalar *av, *bv; 2535 PetscScalar *cv; 2536 PetscScalar _DOne = 1.0, _DZero = 0.0; 2537 2538 PetscFunctionBegin; 2539 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2540 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2541 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2542 if (!m || !n || !k) PetscFunctionReturn(0); 2543 PetscCall(MatDenseGetArrayRead(A, &av)); 2544 PetscCall(MatDenseGetArrayRead(B, &bv)); 2545 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2546 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2547 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2548 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2549 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2550 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2551 PetscFunctionReturn(0); 2552 } 2553 2554 PetscErrorCode MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2555 { 2556 PetscInt m = A->rmap->n, n = B->rmap->n; 2557 PetscBool cisdense = PETSC_FALSE; 2558 2559 PetscFunctionBegin; 2560 PetscCall(MatSetSizes(C, m, n, m, n)); 2561 #if defined(PETSC_HAVE_CUDA) 2562 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2563 #endif 2564 #if defined(PETSC_HAVE_HIP) 2565 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2566 #endif 2567 if (!cisdense) { 2568 PetscBool flg; 2569 2570 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2571 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2572 } 2573 PetscCall(MatSetUp(C)); 2574 PetscFunctionReturn(0); 2575 } 2576 2577 PetscErrorCode MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2578 { 2579 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2580 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2581 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2582 const PetscScalar *av, *bv; 2583 PetscScalar *cv; 2584 PetscBLASInt m, n, k; 2585 PetscScalar _DOne = 1.0, _DZero = 0.0; 2586 2587 PetscFunctionBegin; 2588 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2589 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2590 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2591 if (!m || !n || !k) PetscFunctionReturn(0); 2592 PetscCall(MatDenseGetArrayRead(A, &av)); 2593 PetscCall(MatDenseGetArrayRead(B, &bv)); 2594 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2595 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2596 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2597 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2598 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2599 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2600 PetscFunctionReturn(0); 2601 } 2602 2603 PetscErrorCode MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2604 { 2605 PetscInt m = A->cmap->n, n = B->cmap->n; 2606 PetscBool cisdense = PETSC_FALSE; 2607 2608 PetscFunctionBegin; 2609 PetscCall(MatSetSizes(C, m, n, m, n)); 2610 #if defined(PETSC_HAVE_CUDA) 2611 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2612 #endif 2613 #if defined(PETSC_HAVE_HIP) 2614 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2615 #endif 2616 if (!cisdense) { 2617 PetscBool flg; 2618 2619 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2620 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2621 } 2622 PetscCall(MatSetUp(C)); 2623 PetscFunctionReturn(0); 2624 } 2625 2626 PetscErrorCode MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2627 { 2628 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2629 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2630 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2631 const PetscScalar *av, *bv; 2632 PetscScalar *cv; 2633 PetscBLASInt m, n, k; 2634 PetscScalar _DOne = 1.0, _DZero = 0.0; 2635 2636 PetscFunctionBegin; 2637 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2638 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2639 PetscCall(PetscBLASIntCast(A->rmap->n, &k)); 2640 if (!m || !n || !k) PetscFunctionReturn(0); 2641 PetscCall(MatDenseGetArrayRead(A, &av)); 2642 PetscCall(MatDenseGetArrayRead(B, &bv)); 2643 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2644 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2645 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2646 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2647 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2648 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2649 PetscFunctionReturn(0); 2650 } 2651 2652 /* ----------------------------------------------- */ 2653 static PetscErrorCode MatProductSetFromOptions_SeqDense_AB(Mat C) 2654 { 2655 PetscFunctionBegin; 2656 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqDense; 2657 C->ops->productsymbolic = MatProductSymbolic_AB; 2658 PetscFunctionReturn(0); 2659 } 2660 2661 static PetscErrorCode MatProductSetFromOptions_SeqDense_AtB(Mat C) 2662 { 2663 PetscFunctionBegin; 2664 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqDense_SeqDense; 2665 C->ops->productsymbolic = MatProductSymbolic_AtB; 2666 PetscFunctionReturn(0); 2667 } 2668 2669 static PetscErrorCode MatProductSetFromOptions_SeqDense_ABt(Mat C) 2670 { 2671 PetscFunctionBegin; 2672 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqDense_SeqDense; 2673 C->ops->productsymbolic = MatProductSymbolic_ABt; 2674 PetscFunctionReturn(0); 2675 } 2676 2677 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense(Mat C) 2678 { 2679 Mat_Product *product = C->product; 2680 2681 PetscFunctionBegin; 2682 switch (product->type) { 2683 case MATPRODUCT_AB: 2684 PetscCall(MatProductSetFromOptions_SeqDense_AB(C)); 2685 break; 2686 case MATPRODUCT_AtB: 2687 PetscCall(MatProductSetFromOptions_SeqDense_AtB(C)); 2688 break; 2689 case MATPRODUCT_ABt: 2690 PetscCall(MatProductSetFromOptions_SeqDense_ABt(C)); 2691 break; 2692 default: 2693 break; 2694 } 2695 PetscFunctionReturn(0); 2696 } 2697 /* ----------------------------------------------- */ 2698 2699 static PetscErrorCode MatGetRowMax_SeqDense(Mat A, Vec v, PetscInt idx[]) 2700 { 2701 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2702 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2703 PetscScalar *x; 2704 const PetscScalar *aa; 2705 2706 PetscFunctionBegin; 2707 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2708 PetscCall(VecGetArray(v, &x)); 2709 PetscCall(VecGetLocalSize(v, &p)); 2710 PetscCall(MatDenseGetArrayRead(A, &aa)); 2711 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2712 for (i = 0; i < m; i++) { 2713 x[i] = aa[i]; 2714 if (idx) idx[i] = 0; 2715 for (j = 1; j < n; j++) { 2716 if (PetscRealPart(x[i]) < PetscRealPart(aa[i + a->lda * j])) { 2717 x[i] = aa[i + a->lda * j]; 2718 if (idx) idx[i] = j; 2719 } 2720 } 2721 } 2722 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2723 PetscCall(VecRestoreArray(v, &x)); 2724 PetscFunctionReturn(0); 2725 } 2726 2727 static PetscErrorCode MatGetRowMaxAbs_SeqDense(Mat A, Vec v, PetscInt idx[]) 2728 { 2729 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2730 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2731 PetscScalar *x; 2732 PetscReal atmp; 2733 const PetscScalar *aa; 2734 2735 PetscFunctionBegin; 2736 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2737 PetscCall(VecGetArray(v, &x)); 2738 PetscCall(VecGetLocalSize(v, &p)); 2739 PetscCall(MatDenseGetArrayRead(A, &aa)); 2740 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2741 for (i = 0; i < m; i++) { 2742 x[i] = PetscAbsScalar(aa[i]); 2743 for (j = 1; j < n; j++) { 2744 atmp = PetscAbsScalar(aa[i + a->lda * j]); 2745 if (PetscAbsScalar(x[i]) < atmp) { 2746 x[i] = atmp; 2747 if (idx) idx[i] = j; 2748 } 2749 } 2750 } 2751 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2752 PetscCall(VecRestoreArray(v, &x)); 2753 PetscFunctionReturn(0); 2754 } 2755 2756 static PetscErrorCode MatGetRowMin_SeqDense(Mat A, Vec v, PetscInt idx[]) 2757 { 2758 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2759 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2760 PetscScalar *x; 2761 const PetscScalar *aa; 2762 2763 PetscFunctionBegin; 2764 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2765 PetscCall(MatDenseGetArrayRead(A, &aa)); 2766 PetscCall(VecGetArray(v, &x)); 2767 PetscCall(VecGetLocalSize(v, &p)); 2768 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2769 for (i = 0; i < m; i++) { 2770 x[i] = aa[i]; 2771 if (idx) idx[i] = 0; 2772 for (j = 1; j < n; j++) { 2773 if (PetscRealPart(x[i]) > PetscRealPart(aa[i + a->lda * j])) { 2774 x[i] = aa[i + a->lda * j]; 2775 if (idx) idx[i] = j; 2776 } 2777 } 2778 } 2779 PetscCall(VecRestoreArray(v, &x)); 2780 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2781 PetscFunctionReturn(0); 2782 } 2783 2784 PetscErrorCode MatGetColumnVector_SeqDense(Mat A, Vec v, PetscInt col) 2785 { 2786 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2787 PetscScalar *x; 2788 const PetscScalar *aa; 2789 2790 PetscFunctionBegin; 2791 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2792 PetscCall(MatDenseGetArrayRead(A, &aa)); 2793 PetscCall(VecGetArray(v, &x)); 2794 PetscCall(PetscArraycpy(x, aa + col * a->lda, A->rmap->n)); 2795 PetscCall(VecRestoreArray(v, &x)); 2796 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2797 PetscFunctionReturn(0); 2798 } 2799 2800 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat A, PetscInt type, PetscReal *reductions) 2801 { 2802 PetscInt i, j, m, n; 2803 const PetscScalar *a; 2804 2805 PetscFunctionBegin; 2806 PetscCall(MatGetSize(A, &m, &n)); 2807 PetscCall(PetscArrayzero(reductions, n)); 2808 PetscCall(MatDenseGetArrayRead(A, &a)); 2809 if (type == NORM_2) { 2810 for (i = 0; i < n; i++) { 2811 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j] * a[j]); 2812 a += m; 2813 } 2814 } else if (type == NORM_1) { 2815 for (i = 0; i < n; i++) { 2816 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j]); 2817 a += m; 2818 } 2819 } else if (type == NORM_INFINITY) { 2820 for (i = 0; i < n; i++) { 2821 for (j = 0; j < m; j++) reductions[i] = PetscMax(PetscAbsScalar(a[j]), reductions[i]); 2822 a += m; 2823 } 2824 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2825 for (i = 0; i < n; i++) { 2826 for (j = 0; j < m; j++) reductions[i] += PetscRealPart(a[j]); 2827 a += m; 2828 } 2829 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2830 for (i = 0; i < n; i++) { 2831 for (j = 0; j < m; j++) reductions[i] += PetscImaginaryPart(a[j]); 2832 a += m; 2833 } 2834 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2835 PetscCall(MatDenseRestoreArrayRead(A, &a)); 2836 if (type == NORM_2) { 2837 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2838 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2839 for (i = 0; i < n; i++) reductions[i] /= m; 2840 } 2841 PetscFunctionReturn(0); 2842 } 2843 2844 PetscErrorCode MatSetRandom_SeqDense(Mat x, PetscRandom rctx) 2845 { 2846 PetscScalar *a; 2847 PetscInt lda, m, n, i, j; 2848 2849 PetscFunctionBegin; 2850 PetscCall(MatGetSize(x, &m, &n)); 2851 PetscCall(MatDenseGetLDA(x, &lda)); 2852 PetscCall(MatDenseGetArrayWrite(x, &a)); 2853 for (j = 0; j < n; j++) { 2854 for (i = 0; i < m; i++) PetscCall(PetscRandomGetValue(rctx, a + j * lda + i)); 2855 } 2856 PetscCall(MatDenseRestoreArrayWrite(x, &a)); 2857 PetscFunctionReturn(0); 2858 } 2859 2860 static PetscErrorCode MatMissingDiagonal_SeqDense(Mat A, PetscBool *missing, PetscInt *d) 2861 { 2862 PetscFunctionBegin; 2863 *missing = PETSC_FALSE; 2864 PetscFunctionReturn(0); 2865 } 2866 2867 /* vals is not const */ 2868 static PetscErrorCode MatDenseGetColumn_SeqDense(Mat A, PetscInt col, PetscScalar **vals) 2869 { 2870 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2871 PetscScalar *v; 2872 2873 PetscFunctionBegin; 2874 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2875 PetscCall(MatDenseGetArray(A, &v)); 2876 *vals = v + col * a->lda; 2877 PetscCall(MatDenseRestoreArray(A, &v)); 2878 PetscFunctionReturn(0); 2879 } 2880 2881 static PetscErrorCode MatDenseRestoreColumn_SeqDense(Mat A, PetscScalar **vals) 2882 { 2883 PetscFunctionBegin; 2884 if (vals) *vals = NULL; /* user cannot accidentally use the array later */ 2885 PetscFunctionReturn(0); 2886 } 2887 2888 /* -------------------------------------------------------------------*/ 2889 static struct _MatOps MatOps_Values = {MatSetValues_SeqDense, 2890 MatGetRow_SeqDense, 2891 MatRestoreRow_SeqDense, 2892 MatMult_SeqDense, 2893 /* 4*/ MatMultAdd_SeqDense, 2894 MatMultTranspose_SeqDense, 2895 MatMultTransposeAdd_SeqDense, 2896 NULL, 2897 NULL, 2898 NULL, 2899 /* 10*/ NULL, 2900 MatLUFactor_SeqDense, 2901 MatCholeskyFactor_SeqDense, 2902 MatSOR_SeqDense, 2903 MatTranspose_SeqDense, 2904 /* 15*/ MatGetInfo_SeqDense, 2905 MatEqual_SeqDense, 2906 MatGetDiagonal_SeqDense, 2907 MatDiagonalScale_SeqDense, 2908 MatNorm_SeqDense, 2909 /* 20*/ MatAssemblyBegin_SeqDense, 2910 MatAssemblyEnd_SeqDense, 2911 MatSetOption_SeqDense, 2912 MatZeroEntries_SeqDense, 2913 /* 24*/ MatZeroRows_SeqDense, 2914 NULL, 2915 NULL, 2916 NULL, 2917 NULL, 2918 /* 29*/ MatSetUp_SeqDense, 2919 NULL, 2920 NULL, 2921 NULL, 2922 NULL, 2923 /* 34*/ MatDuplicate_SeqDense, 2924 NULL, 2925 NULL, 2926 NULL, 2927 NULL, 2928 /* 39*/ MatAXPY_SeqDense, 2929 MatCreateSubMatrices_SeqDense, 2930 NULL, 2931 MatGetValues_SeqDense, 2932 MatCopy_SeqDense, 2933 /* 44*/ MatGetRowMax_SeqDense, 2934 MatScale_SeqDense, 2935 MatShift_SeqDense, 2936 NULL, 2937 MatZeroRowsColumns_SeqDense, 2938 /* 49*/ MatSetRandom_SeqDense, 2939 NULL, 2940 NULL, 2941 NULL, 2942 NULL, 2943 /* 54*/ NULL, 2944 NULL, 2945 NULL, 2946 NULL, 2947 NULL, 2948 /* 59*/ MatCreateSubMatrix_SeqDense, 2949 MatDestroy_SeqDense, 2950 MatView_SeqDense, 2951 NULL, 2952 NULL, 2953 /* 64*/ NULL, 2954 NULL, 2955 NULL, 2956 NULL, 2957 NULL, 2958 /* 69*/ MatGetRowMaxAbs_SeqDense, 2959 NULL, 2960 NULL, 2961 NULL, 2962 NULL, 2963 /* 74*/ NULL, 2964 NULL, 2965 NULL, 2966 NULL, 2967 NULL, 2968 /* 79*/ NULL, 2969 NULL, 2970 NULL, 2971 NULL, 2972 /* 83*/ MatLoad_SeqDense, 2973 MatIsSymmetric_SeqDense, 2974 MatIsHermitian_SeqDense, 2975 NULL, 2976 NULL, 2977 NULL, 2978 /* 89*/ NULL, 2979 NULL, 2980 MatMatMultNumeric_SeqDense_SeqDense, 2981 NULL, 2982 NULL, 2983 /* 94*/ NULL, 2984 NULL, 2985 NULL, 2986 MatMatTransposeMultNumeric_SeqDense_SeqDense, 2987 NULL, 2988 /* 99*/ MatProductSetFromOptions_SeqDense, 2989 NULL, 2990 NULL, 2991 MatConjugate_SeqDense, 2992 NULL, 2993 /*104*/ NULL, 2994 MatRealPart_SeqDense, 2995 MatImaginaryPart_SeqDense, 2996 NULL, 2997 NULL, 2998 /*109*/ NULL, 2999 NULL, 3000 MatGetRowMin_SeqDense, 3001 MatGetColumnVector_SeqDense, 3002 MatMissingDiagonal_SeqDense, 3003 /*114*/ NULL, 3004 NULL, 3005 NULL, 3006 NULL, 3007 NULL, 3008 /*119*/ NULL, 3009 NULL, 3010 NULL, 3011 NULL, 3012 NULL, 3013 /*124*/ NULL, 3014 MatGetColumnReductions_SeqDense, 3015 NULL, 3016 NULL, 3017 NULL, 3018 /*129*/ NULL, 3019 NULL, 3020 NULL, 3021 MatTransposeMatMultNumeric_SeqDense_SeqDense, 3022 NULL, 3023 /*134*/ NULL, 3024 NULL, 3025 NULL, 3026 NULL, 3027 NULL, 3028 /*139*/ NULL, 3029 NULL, 3030 NULL, 3031 NULL, 3032 NULL, 3033 MatCreateMPIMatConcatenateSeqMat_SeqDense, 3034 /*145*/ NULL, 3035 NULL, 3036 NULL, 3037 NULL, 3038 NULL, 3039 /*150*/ NULL, 3040 NULL}; 3041 3042 /*@C 3043 MatCreateSeqDense - Creates a `MATSEQDENSE` that 3044 is stored in column major order (the usual Fortran 77 manner). Many 3045 of the matrix operations use the BLAS and LAPACK routines. 3046 3047 Collective 3048 3049 Input Parameters: 3050 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3051 . m - number of rows 3052 . n - number of columns 3053 - data - optional location of matrix data in column major order. Set data=NULL for PETSc 3054 to control all matrix memory allocation. 3055 3056 Output Parameter: 3057 . A - the matrix 3058 3059 Note: 3060 The data input variable is intended primarily for Fortran programmers 3061 who wish to allocate their own matrix memory space. Most users should 3062 set data=NULL. 3063 3064 Level: intermediate 3065 3066 .seealso: `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 3067 @*/ 3068 PetscErrorCode MatCreateSeqDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscScalar *data, Mat *A) 3069 { 3070 PetscFunctionBegin; 3071 PetscCall(MatCreate(comm, A)); 3072 PetscCall(MatSetSizes(*A, m, n, m, n)); 3073 PetscCall(MatSetType(*A, MATSEQDENSE)); 3074 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 3075 PetscFunctionReturn(0); 3076 } 3077 3078 /*@C 3079 MatSeqDenseSetPreallocation - Sets the array used for storing the matrix elements of a `MATSEQDENSE` matrix 3080 3081 Collective 3082 3083 Input Parameters: 3084 + B - the matrix 3085 - data - the array (or NULL) 3086 3087 Note: 3088 The data input variable is intended primarily for Fortran programmers 3089 who wish to allocate their own matrix memory space. Most users should 3090 need not call this routine. 3091 3092 Level: intermediate 3093 3094 .seealso: `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`, `MatDenseSetLDA()` 3095 @*/ 3096 PetscErrorCode MatSeqDenseSetPreallocation(Mat B, PetscScalar data[]) 3097 { 3098 PetscFunctionBegin; 3099 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3100 PetscTryMethod(B, "MatSeqDenseSetPreallocation_C", (Mat, PetscScalar[]), (B, data)); 3101 PetscFunctionReturn(0); 3102 } 3103 3104 PetscErrorCode MatSeqDenseSetPreallocation_SeqDense(Mat B, PetscScalar *data) 3105 { 3106 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3107 3108 PetscFunctionBegin; 3109 PetscCheck(!b->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3110 B->preallocated = PETSC_TRUE; 3111 3112 PetscCall(PetscLayoutSetUp(B->rmap)); 3113 PetscCall(PetscLayoutSetUp(B->cmap)); 3114 3115 if (b->lda <= 0) b->lda = B->rmap->n; 3116 3117 if (!data) { /* petsc-allocated storage */ 3118 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 3119 PetscCall(PetscCalloc1((size_t)b->lda * B->cmap->n, &b->v)); 3120 3121 b->user_alloc = PETSC_FALSE; 3122 } else { /* user-allocated storage */ 3123 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 3124 b->v = data; 3125 b->user_alloc = PETSC_TRUE; 3126 } 3127 B->assembled = PETSC_TRUE; 3128 PetscFunctionReturn(0); 3129 } 3130 3131 #if defined(PETSC_HAVE_ELEMENTAL) 3132 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 3133 { 3134 Mat mat_elemental; 3135 const PetscScalar *array; 3136 PetscScalar *v_colwise; 3137 PetscInt M = A->rmap->N, N = A->cmap->N, i, j, k, *rows, *cols; 3138 3139 PetscFunctionBegin; 3140 PetscCall(PetscMalloc3(M * N, &v_colwise, M, &rows, N, &cols)); 3141 PetscCall(MatDenseGetArrayRead(A, &array)); 3142 /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */ 3143 k = 0; 3144 for (j = 0; j < N; j++) { 3145 cols[j] = j; 3146 for (i = 0; i < M; i++) v_colwise[j * M + i] = array[k++]; 3147 } 3148 for (i = 0; i < M; i++) rows[i] = i; 3149 PetscCall(MatDenseRestoreArrayRead(A, &array)); 3150 3151 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 3152 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N)); 3153 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 3154 PetscCall(MatSetUp(mat_elemental)); 3155 3156 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 3157 PetscCall(MatSetValues(mat_elemental, M, rows, N, cols, v_colwise, ADD_VALUES)); 3158 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 3159 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 3160 PetscCall(PetscFree3(v_colwise, rows, cols)); 3161 3162 if (reuse == MAT_INPLACE_MATRIX) { 3163 PetscCall(MatHeaderReplace(A, &mat_elemental)); 3164 } else { 3165 *newmat = mat_elemental; 3166 } 3167 PetscFunctionReturn(0); 3168 } 3169 #endif 3170 3171 PetscErrorCode MatDenseSetLDA_SeqDense(Mat B, PetscInt lda) 3172 { 3173 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3174 PetscBool data; 3175 3176 PetscFunctionBegin; 3177 data = (PetscBool)((B->rmap->n > 0 && B->cmap->n > 0) ? (b->v ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE); 3178 PetscCheck(b->user_alloc || !data || b->lda == lda, PETSC_COMM_SELF, PETSC_ERR_ORDER, "LDA cannot be changed after allocation of internal storage"); 3179 PetscCheck(lda >= B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "LDA %" PetscInt_FMT " must be at least matrix dimension %" PetscInt_FMT, lda, B->rmap->n); 3180 b->lda = lda; 3181 PetscFunctionReturn(0); 3182 } 3183 3184 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3185 { 3186 PetscFunctionBegin; 3187 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIDense(comm, inmat, n, scall, outmat)); 3188 PetscFunctionReturn(0); 3189 } 3190 3191 PetscErrorCode MatDenseGetColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) 3192 { 3193 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3194 3195 PetscFunctionBegin; 3196 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3197 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3198 if (!a->cvec) { PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec)); } 3199 a->vecinuse = col + 1; 3200 PetscCall(MatDenseGetArray(A, (PetscScalar **)&a->ptrinuse)); 3201 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3202 *v = a->cvec; 3203 PetscFunctionReturn(0); 3204 } 3205 3206 PetscErrorCode MatDenseRestoreColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) 3207 { 3208 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3209 3210 PetscFunctionBegin; 3211 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3212 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3213 a->vecinuse = 0; 3214 PetscCall(MatDenseRestoreArray(A, (PetscScalar **)&a->ptrinuse)); 3215 PetscCall(VecResetArray(a->cvec)); 3216 if (v) *v = NULL; 3217 PetscFunctionReturn(0); 3218 } 3219 3220 PetscErrorCode MatDenseGetColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) 3221 { 3222 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3223 3224 PetscFunctionBegin; 3225 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3226 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3227 if (!a->cvec) { PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec)); } 3228 a->vecinuse = col + 1; 3229 PetscCall(MatDenseGetArrayRead(A, &a->ptrinuse)); 3230 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3231 PetscCall(VecLockReadPush(a->cvec)); 3232 *v = a->cvec; 3233 PetscFunctionReturn(0); 3234 } 3235 3236 PetscErrorCode MatDenseRestoreColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) 3237 { 3238 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3239 3240 PetscFunctionBegin; 3241 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3242 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3243 a->vecinuse = 0; 3244 PetscCall(MatDenseRestoreArrayRead(A, &a->ptrinuse)); 3245 PetscCall(VecLockReadPop(a->cvec)); 3246 PetscCall(VecResetArray(a->cvec)); 3247 if (v) *v = NULL; 3248 PetscFunctionReturn(0); 3249 } 3250 3251 PetscErrorCode MatDenseGetColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) 3252 { 3253 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3254 3255 PetscFunctionBegin; 3256 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3257 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3258 if (!a->cvec) { PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec)); } 3259 a->vecinuse = col + 1; 3260 PetscCall(MatDenseGetArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3261 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3262 *v = a->cvec; 3263 PetscFunctionReturn(0); 3264 } 3265 3266 PetscErrorCode MatDenseRestoreColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) 3267 { 3268 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3269 3270 PetscFunctionBegin; 3271 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3272 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3273 a->vecinuse = 0; 3274 PetscCall(MatDenseRestoreArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3275 PetscCall(VecResetArray(a->cvec)); 3276 if (v) *v = NULL; 3277 PetscFunctionReturn(0); 3278 } 3279 3280 PetscErrorCode MatDenseGetSubMatrix_SeqDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 3281 { 3282 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3283 3284 PetscFunctionBegin; 3285 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3286 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3287 if (a->cmat && (cend - cbegin != a->cmat->cmap->N || rend - rbegin != a->cmat->rmap->N)) PetscCall(MatDestroy(&a->cmat)); 3288 if (!a->cmat) { 3289 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), rend - rbegin, PETSC_DECIDE, rend - rbegin, cend - cbegin, a->v + rbegin + (size_t)cbegin * a->lda, &a->cmat)); 3290 } else { 3291 PetscCall(MatDensePlaceArray(a->cmat, a->v + rbegin + (size_t)cbegin * a->lda)); 3292 } 3293 PetscCall(MatDenseSetLDA(a->cmat, a->lda)); 3294 a->matinuse = cbegin + 1; 3295 *v = a->cmat; 3296 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 3297 A->offloadmask = PETSC_OFFLOAD_CPU; 3298 #endif 3299 PetscFunctionReturn(0); 3300 } 3301 3302 PetscErrorCode MatDenseRestoreSubMatrix_SeqDense(Mat A, Mat *v) 3303 { 3304 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3305 3306 PetscFunctionBegin; 3307 PetscCheck(a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 3308 PetscCheck(a->cmat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column matrix"); 3309 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 3310 a->matinuse = 0; 3311 PetscCall(MatDenseResetArray(a->cmat)); 3312 if (v) *v = NULL; 3313 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 3314 A->offloadmask = PETSC_OFFLOAD_CPU; 3315 #endif 3316 PetscFunctionReturn(0); 3317 } 3318 3319 /*MC 3320 MATSEQDENSE - MATSEQDENSE = "seqdense" - A matrix type to be used for sequential dense matrices. 3321 3322 Options Database Keys: 3323 . -mat_type seqdense - sets the matrix type to `MATSEQDENSE` during a call to `MatSetFromOptions()` 3324 3325 Level: beginner 3326 3327 .seealso: `MATSEQDENSE`, `MatCreateSeqDense()` 3328 M*/ 3329 PetscErrorCode MatCreate_SeqDense(Mat B) 3330 { 3331 Mat_SeqDense *b; 3332 PetscMPIInt size; 3333 3334 PetscFunctionBegin; 3335 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3336 PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3337 3338 PetscCall(PetscNew(&b)); 3339 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 3340 B->data = (void *)b; 3341 3342 b->roworiented = PETSC_TRUE; 3343 3344 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatQRFactor_C", MatQRFactor_SeqDense)); 3345 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetLDA_C", MatDenseGetLDA_SeqDense)); 3346 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseSetLDA_C", MatDenseSetLDA_SeqDense)); 3347 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArray_C", MatDenseGetArray_SeqDense)); 3348 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArray_C", MatDenseRestoreArray_SeqDense)); 3349 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDensePlaceArray_C", MatDensePlaceArray_SeqDense)); 3350 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseResetArray_C", MatDenseResetArray_SeqDense)); 3351 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseReplaceArray_C", MatDenseReplaceArray_SeqDense)); 3352 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayRead_C", MatDenseGetArray_SeqDense)); 3353 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayRead_C", MatDenseRestoreArray_SeqDense)); 3354 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayWrite_C", MatDenseGetArray_SeqDense)); 3355 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArray_SeqDense)); 3356 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqaij_C", MatConvert_SeqDense_SeqAIJ)); 3357 #if defined(PETSC_HAVE_ELEMENTAL) 3358 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_elemental_C", MatConvert_SeqDense_Elemental)); 3359 #endif 3360 #if defined(PETSC_HAVE_SCALAPACK) 3361 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 3362 #endif 3363 #if defined(PETSC_HAVE_CUDA) 3364 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensecuda_C", MatConvert_SeqDense_SeqDenseCUDA)); 3365 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3366 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdense_C", MatProductSetFromOptions_SeqDense)); 3367 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3368 #endif 3369 #if defined(PETSC_HAVE_HIP) 3370 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensehip_C", MatConvert_SeqDense_SeqDenseHIP)); 3371 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", MatProductSetFromOptions_SeqDense)); 3372 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdense_C", MatProductSetFromOptions_SeqDense)); 3373 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensehip_C", MatProductSetFromOptions_SeqDense)); 3374 #endif 3375 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqDenseSetPreallocation_C", MatSeqDenseSetPreallocation_SeqDense)); 3376 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 3377 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdense_C", MatProductSetFromOptions_SeqDense)); 3378 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3379 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqsbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3380 3381 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumn_C", MatDenseGetColumn_SeqDense)); 3382 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_SeqDense)); 3383 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_SeqDense)); 3384 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_SeqDense)); 3385 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_SeqDense)); 3386 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_SeqDense)); 3387 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_SeqDense)); 3388 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_SeqDense)); 3389 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_SeqDense)); 3390 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_SeqDense)); 3391 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQDENSE)); 3392 PetscFunctionReturn(0); 3393 } 3394 3395 /*@C 3396 MatDenseGetColumn - gives access to a column of a dense matrix. This is only the local part of the column. You MUST call `MatDenseRestoreColumn()` to avoid memory bleeding. 3397 3398 Not Collective 3399 3400 Input Parameters: 3401 + mat - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3402 - col - column index 3403 3404 Output Parameter: 3405 . vals - pointer to the data 3406 3407 Level: intermediate 3408 3409 Note: 3410 Use `MatDenseGetColumnVec()` to get access to a column of a `MATDENSE` treated as a `Vec` 3411 3412 .seealso: `MATDENSE`, `MatDenseRestoreColumn()`, `MatDenseGetColumnVec()` 3413 @*/ 3414 PetscErrorCode MatDenseGetColumn(Mat A, PetscInt col, PetscScalar **vals) 3415 { 3416 PetscFunctionBegin; 3417 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3418 PetscValidLogicalCollectiveInt(A, col, 2); 3419 PetscValidPointer(vals, 3); 3420 PetscUseMethod(A, "MatDenseGetColumn_C", (Mat, PetscInt, PetscScalar **), (A, col, vals)); 3421 PetscFunctionReturn(0); 3422 } 3423 3424 /*@C 3425 MatDenseRestoreColumn - returns access to a column of a `MATDENSE` matrix which is returned by `MatDenseGetColumn()`. 3426 3427 Not Collective 3428 3429 Input Parameters: 3430 + mat - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3431 - vals - pointer to the data (may be NULL) 3432 3433 Level: intermediate 3434 3435 .seealso: `MATDENSE`, `MatDenseGetColumn()` 3436 @*/ 3437 PetscErrorCode MatDenseRestoreColumn(Mat A, PetscScalar **vals) 3438 { 3439 PetscFunctionBegin; 3440 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3441 PetscValidPointer(vals, 2); 3442 PetscUseMethod(A, "MatDenseRestoreColumn_C", (Mat, PetscScalar **), (A, vals)); 3443 PetscFunctionReturn(0); 3444 } 3445 3446 /*@ 3447 MatDenseGetColumnVec - Gives read-write access to a column of a `MATDENSE` matrix, represented as a `Vec`. 3448 3449 Collective 3450 3451 Input Parameters: 3452 + mat - the `Mat` object 3453 - col - the column index 3454 3455 Output Parameter: 3456 . v - the vector 3457 3458 Notes: 3459 The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVec()` when the vector is no longer needed. 3460 3461 Use `MatDenseGetColumnVecRead()` to obtain read-only access or `MatDenseGetColumnVecWrite()` for write-only access. 3462 3463 Level: intermediate 3464 3465 .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`, `MatDenseGetColumn()` 3466 @*/ 3467 PetscErrorCode MatDenseGetColumnVec(Mat A, PetscInt col, Vec *v) 3468 { 3469 PetscFunctionBegin; 3470 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3471 PetscValidType(A, 1); 3472 PetscValidLogicalCollectiveInt(A, col, 2); 3473 PetscValidPointer(v, 3); 3474 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3475 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3476 PetscUseMethod(A, "MatDenseGetColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3477 PetscFunctionReturn(0); 3478 } 3479 3480 /*@ 3481 MatDenseRestoreColumnVec - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVec(). 3482 3483 Collective 3484 3485 Input Parameters: 3486 + mat - the Mat object 3487 . col - the column index 3488 - v - the Vec object (may be NULL) 3489 3490 Level: intermediate 3491 3492 .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3493 @*/ 3494 PetscErrorCode MatDenseRestoreColumnVec(Mat A, PetscInt col, Vec *v) 3495 { 3496 PetscFunctionBegin; 3497 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3498 PetscValidType(A, 1); 3499 PetscValidLogicalCollectiveInt(A, col, 2); 3500 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3501 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3502 PetscUseMethod(A, "MatDenseRestoreColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3503 PetscFunctionReturn(0); 3504 } 3505 3506 /*@ 3507 MatDenseGetColumnVecRead - Gives read-only access to a column of a dense matrix, represented as a Vec. 3508 3509 Collective 3510 3511 Input Parameters: 3512 + mat - the Mat object 3513 - col - the column index 3514 3515 Output Parameter: 3516 . v - the vector 3517 3518 Notes: 3519 The vector is owned by PETSc and users cannot modify it. 3520 3521 Users need to call MatDenseRestoreColumnVecRead() when the vector is no longer needed. 3522 3523 Use MatDenseGetColumnVec() to obtain read-write access or MatDenseGetColumnVecWrite() for write-only access. 3524 3525 Level: intermediate 3526 3527 .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3528 @*/ 3529 PetscErrorCode MatDenseGetColumnVecRead(Mat A, PetscInt col, Vec *v) 3530 { 3531 PetscFunctionBegin; 3532 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3533 PetscValidType(A, 1); 3534 PetscValidLogicalCollectiveInt(A, col, 2); 3535 PetscValidPointer(v, 3); 3536 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3537 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3538 PetscUseMethod(A, "MatDenseGetColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3539 PetscFunctionReturn(0); 3540 } 3541 3542 /*@ 3543 MatDenseRestoreColumnVecRead - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVecRead(). 3544 3545 Collective 3546 3547 Input Parameters: 3548 + mat - the Mat object 3549 . col - the column index 3550 - v - the Vec object (may be NULL) 3551 3552 Level: intermediate 3553 3554 .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecWrite()` 3555 @*/ 3556 PetscErrorCode MatDenseRestoreColumnVecRead(Mat A, PetscInt col, Vec *v) 3557 { 3558 PetscFunctionBegin; 3559 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3560 PetscValidType(A, 1); 3561 PetscValidLogicalCollectiveInt(A, col, 2); 3562 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3563 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3564 PetscUseMethod(A, "MatDenseRestoreColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3565 PetscFunctionReturn(0); 3566 } 3567 3568 /*@ 3569 MatDenseGetColumnVecWrite - Gives write-only access to a column of a dense matrix, represented as a Vec. 3570 3571 Collective 3572 3573 Input Parameters: 3574 + mat - the Mat object 3575 - col - the column index 3576 3577 Output Parameter: 3578 . v - the vector 3579 3580 Notes: 3581 The vector is owned by PETSc. Users need to call MatDenseRestoreColumnVecWrite() when the vector is no longer needed. 3582 3583 Use MatDenseGetColumnVec() to obtain read-write access or MatDenseGetColumnVecRead() for read-only access. 3584 3585 Level: intermediate 3586 3587 .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3588 @*/ 3589 PetscErrorCode MatDenseGetColumnVecWrite(Mat A, PetscInt col, Vec *v) 3590 { 3591 PetscFunctionBegin; 3592 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3593 PetscValidType(A, 1); 3594 PetscValidLogicalCollectiveInt(A, col, 2); 3595 PetscValidPointer(v, 3); 3596 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3597 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3598 PetscUseMethod(A, "MatDenseGetColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3599 PetscFunctionReturn(0); 3600 } 3601 3602 /*@ 3603 MatDenseRestoreColumnVecWrite - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVecWrite(). 3604 3605 Collective 3606 3607 Input Parameters: 3608 + mat - the Mat object 3609 . col - the column index 3610 - v - the Vec object (may be NULL) 3611 3612 Level: intermediate 3613 3614 .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()` 3615 @*/ 3616 PetscErrorCode MatDenseRestoreColumnVecWrite(Mat A, PetscInt col, Vec *v) 3617 { 3618 PetscFunctionBegin; 3619 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3620 PetscValidType(A, 1); 3621 PetscValidLogicalCollectiveInt(A, col, 2); 3622 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3623 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3624 PetscUseMethod(A, "MatDenseRestoreColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3625 PetscFunctionReturn(0); 3626 } 3627 3628 /*@ 3629 MatDenseGetSubMatrix - Gives access to a block of rows and columns of a dense matrix, represented as a Mat. 3630 3631 Collective 3632 3633 Input Parameters: 3634 + mat - the Mat object 3635 . rbegin - the first global row index in the block (if PETSC_DECIDE, is 0) 3636 . rend - the global row index past the last one in the block (if PETSC_DECIDE, is M) 3637 . cbegin - the first global column index in the block (if PETSC_DECIDE, is 0) 3638 - cend - the global column index past the last one in the block (if PETSC_DECIDE, is N) 3639 3640 Output Parameter: 3641 . v - the matrix 3642 3643 Notes: 3644 The matrix is owned by PETSc. Users need to call MatDenseRestoreSubMatrix() when the matrix is no longer needed. 3645 3646 The output matrix is not redistributed by PETSc, so depending on the values of rbegin and rend, some processes may have no local rows. 3647 3648 Level: intermediate 3649 3650 .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreSubMatrix()` 3651 @*/ 3652 PetscErrorCode MatDenseGetSubMatrix(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 3653 { 3654 PetscFunctionBegin; 3655 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3656 PetscValidType(A, 1); 3657 PetscValidLogicalCollectiveInt(A, rbegin, 2); 3658 PetscValidLogicalCollectiveInt(A, rend, 3); 3659 PetscValidLogicalCollectiveInt(A, cbegin, 4); 3660 PetscValidLogicalCollectiveInt(A, cend, 5); 3661 PetscValidPointer(v, 6); 3662 if (rbegin == PETSC_DECIDE) rbegin = 0; 3663 if (rend == PETSC_DECIDE) rend = A->rmap->N; 3664 if (cbegin == PETSC_DECIDE) cbegin = 0; 3665 if (cend == PETSC_DECIDE) cend = A->cmap->N; 3666 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3667 PetscCheck(rbegin >= 0 && rbegin <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", rbegin, A->rmap->N); 3668 PetscCheck(rend >= rbegin && rend <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", rend, rbegin, A->rmap->N); 3669 PetscCheck(cbegin >= 0 && cbegin <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", cbegin, A->cmap->N); 3670 PetscCheck(cend >= cbegin && cend <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", cend, cbegin, A->cmap->N); 3671 PetscUseMethod(A, "MatDenseGetSubMatrix_C", (Mat, PetscInt, PetscInt, PetscInt, PetscInt, Mat *), (A, rbegin, rend, cbegin, cend, v)); 3672 PetscFunctionReturn(0); 3673 } 3674 3675 /*@ 3676 MatDenseRestoreSubMatrix - Returns access to a block of columns of a dense matrix obtained from MatDenseGetSubMatrix(). 3677 3678 Collective 3679 3680 Input Parameters: 3681 + mat - the Mat object 3682 - v - the Mat object (may be NULL) 3683 3684 Level: intermediate 3685 3686 .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseGetSubMatrix()` 3687 @*/ 3688 PetscErrorCode MatDenseRestoreSubMatrix(Mat A, Mat *v) 3689 { 3690 PetscFunctionBegin; 3691 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3692 PetscValidType(A, 1); 3693 PetscValidPointer(v, 2); 3694 PetscUseMethod(A, "MatDenseRestoreSubMatrix_C", (Mat, Mat *), (A, v)); 3695 PetscFunctionReturn(0); 3696 } 3697 3698 #include <petscblaslapack.h> 3699 #include <petsc/private/kernels/blockinvert.h> 3700 3701 PetscErrorCode MatSeqDenseInvert(Mat A) 3702 { 3703 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3704 PetscInt bs = A->rmap->n; 3705 MatScalar *values = a->v; 3706 const PetscReal shift = 0.0; 3707 PetscBool allowzeropivot = PetscNot(A->erroriffailure), zeropivotdetected = PETSC_FALSE; 3708 3709 PetscFunctionBegin; 3710 /* factor and invert each block */ 3711 switch (bs) { 3712 case 1: 3713 values[0] = (PetscScalar)1.0 / (values[0] + shift); 3714 break; 3715 case 2: 3716 PetscCall(PetscKernel_A_gets_inverse_A_2(values, shift, allowzeropivot, &zeropivotdetected)); 3717 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3718 break; 3719 case 3: 3720 PetscCall(PetscKernel_A_gets_inverse_A_3(values, shift, allowzeropivot, &zeropivotdetected)); 3721 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3722 break; 3723 case 4: 3724 PetscCall(PetscKernel_A_gets_inverse_A_4(values, shift, allowzeropivot, &zeropivotdetected)); 3725 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3726 break; 3727 case 5: { 3728 PetscScalar work[25]; 3729 PetscInt ipvt[5]; 3730 3731 PetscCall(PetscKernel_A_gets_inverse_A_5(values, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 3732 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3733 } break; 3734 case 6: 3735 PetscCall(PetscKernel_A_gets_inverse_A_6(values, shift, allowzeropivot, &zeropivotdetected)); 3736 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3737 break; 3738 case 7: 3739 PetscCall(PetscKernel_A_gets_inverse_A_7(values, shift, allowzeropivot, &zeropivotdetected)); 3740 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3741 break; 3742 default: { 3743 PetscInt *v_pivots, *IJ, j; 3744 PetscScalar *v_work; 3745 3746 PetscCall(PetscMalloc3(bs, &v_work, bs, &v_pivots, bs, &IJ)); 3747 for (j = 0; j < bs; j++) IJ[j] = j; 3748 PetscCall(PetscKernel_A_gets_inverse_A(bs, values, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 3749 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3750 PetscCall(PetscFree3(v_work, v_pivots, IJ)); 3751 } 3752 } 3753 PetscFunctionReturn(0); 3754 } 3755