1 /* 2 Defines the basic matrix operations for sequential dense. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 8 #include <petscblaslapack.h> 9 #include <../src/mat/impls/aij/seq/aij.h> 10 11 PetscErrorCode MatSeqDenseSymmetrize_Private(Mat A, PetscBool hermitian) 12 { 13 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 14 PetscInt j, k, n = A->rmap->n; 15 PetscScalar *v; 16 17 PetscFunctionBegin; 18 PetscCheck(A->rmap->n == A->cmap->n, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot symmetrize a rectangular matrix"); 19 PetscCall(MatDenseGetArray(A, &v)); 20 if (!hermitian) { 21 for (k = 0; k < n; k++) { 22 for (j = k; j < n; j++) v[j * mat->lda + k] = v[k * mat->lda + j]; 23 } 24 } else { 25 for (k = 0; k < n; k++) { 26 for (j = k; j < n; j++) v[j * mat->lda + k] = PetscConj(v[k * mat->lda + j]); 27 } 28 } 29 PetscCall(MatDenseRestoreArray(A, &v)); 30 PetscFunctionReturn(0); 31 } 32 33 PETSC_EXTERN PetscErrorCode MatSeqDenseInvertFactors_Private(Mat A) 34 { 35 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 36 PetscBLASInt info, n; 37 38 PetscFunctionBegin; 39 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 40 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 41 if (A->factortype == MAT_FACTOR_LU) { 42 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 43 if (!mat->fwork) { 44 mat->lfwork = n; 45 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 46 } 47 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 48 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 49 PetscCall(PetscFPTrapPop()); 50 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 51 } else if (A->factortype == MAT_FACTOR_CHOLESKY) { 52 if (A->spd == PETSC_BOOL3_TRUE) { 53 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 54 PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &n, mat->v, &mat->lda, &info)); 55 PetscCall(PetscFPTrapPop()); 56 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 57 #if defined(PETSC_USE_COMPLEX) 58 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 59 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 60 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 61 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 62 PetscCallBLAS("LAPACKhetri", LAPACKhetri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 63 PetscCall(PetscFPTrapPop()); 64 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 65 #endif 66 } else { /* symmetric case */ 67 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 68 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 69 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 70 PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 71 PetscCall(PetscFPTrapPop()); 72 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_FALSE)); 73 } 74 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad Inversion: zero pivot in row %" PetscInt_FMT, (PetscInt)info - 1); 75 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 76 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix must be factored to solve"); 77 78 A->ops->solve = NULL; 79 A->ops->matsolve = NULL; 80 A->ops->solvetranspose = NULL; 81 A->ops->matsolvetranspose = NULL; 82 A->ops->solveadd = NULL; 83 A->ops->solvetransposeadd = NULL; 84 A->factortype = MAT_FACTOR_NONE; 85 PetscCall(PetscFree(A->solvertype)); 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatZeroRowsColumns_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 90 { 91 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 92 PetscInt m = l->lda, n = A->cmap->n, r = A->rmap->n, i, j; 93 PetscScalar *slot, *bb, *v; 94 const PetscScalar *xx; 95 96 PetscFunctionBegin; 97 if (PetscDefined(USE_DEBUG)) { 98 for (i = 0; i < N; i++) { 99 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 100 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 101 PetscCheck(rows[i] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Col %" PetscInt_FMT " requested to be zeroed greater than or equal number of cols %" PetscInt_FMT, rows[i], A->cmap->n); 102 } 103 } 104 if (!N) PetscFunctionReturn(0); 105 106 /* fix right hand side if needed */ 107 if (x && b) { 108 Vec xt; 109 110 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 111 PetscCall(VecDuplicate(x, &xt)); 112 PetscCall(VecCopy(x, xt)); 113 PetscCall(VecScale(xt, -1.0)); 114 PetscCall(MatMultAdd(A, xt, b, b)); 115 PetscCall(VecDestroy(&xt)); 116 PetscCall(VecGetArrayRead(x, &xx)); 117 PetscCall(VecGetArray(b, &bb)); 118 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 119 PetscCall(VecRestoreArrayRead(x, &xx)); 120 PetscCall(VecRestoreArray(b, &bb)); 121 } 122 123 PetscCall(MatDenseGetArray(A, &v)); 124 for (i = 0; i < N; i++) { 125 slot = v + rows[i] * m; 126 PetscCall(PetscArrayzero(slot, r)); 127 } 128 for (i = 0; i < N; i++) { 129 slot = v + rows[i]; 130 for (j = 0; j < n; j++) { 131 *slot = 0.0; 132 slot += m; 133 } 134 } 135 if (diag != 0.0) { 136 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 137 for (i = 0; i < N; i++) { 138 slot = v + (m + 1) * rows[i]; 139 *slot = diag; 140 } 141 } 142 PetscCall(MatDenseRestoreArray(A, &v)); 143 PetscFunctionReturn(0); 144 } 145 146 PetscErrorCode MatPtAPNumeric_SeqDense_SeqDense(Mat A, Mat P, Mat C) 147 { 148 Mat_SeqDense *c = (Mat_SeqDense *)(C->data); 149 150 PetscFunctionBegin; 151 if (c->ptapwork) { 152 PetscCall((*C->ops->matmultnumeric)(A, P, c->ptapwork)); 153 PetscCall((*C->ops->transposematmultnumeric)(P, c->ptapwork, C)); 154 } else SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Must call MatPtAPSymbolic_SeqDense_SeqDense() first"); 155 PetscFunctionReturn(0); 156 } 157 158 PetscErrorCode MatPtAPSymbolic_SeqDense_SeqDense(Mat A, Mat P, PetscReal fill, Mat C) 159 { 160 Mat_SeqDense *c; 161 PetscBool cisdense = PETSC_FALSE; 162 163 PetscFunctionBegin; 164 PetscCall(MatSetSizes(C, P->cmap->n, P->cmap->n, P->cmap->N, P->cmap->N)); 165 #if defined(PETSC_HAVE_CUDA) 166 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 167 #elif (PETSC_HAVE_HIP) 168 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 169 #endif 170 171 if (!cisdense) { 172 PetscBool flg; 173 174 PetscCall(PetscObjectTypeCompare((PetscObject)P, ((PetscObject)A)->type_name, &flg)); 175 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 176 } 177 PetscCall(MatSetUp(C)); 178 c = (Mat_SeqDense *)C->data; 179 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &c->ptapwork)); 180 PetscCall(MatSetSizes(c->ptapwork, A->rmap->n, P->cmap->n, A->rmap->N, P->cmap->N)); 181 PetscCall(MatSetType(c->ptapwork, ((PetscObject)C)->type_name)); 182 PetscCall(MatSetUp(c->ptapwork)); 183 PetscFunctionReturn(0); 184 } 185 186 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 187 { 188 Mat B = NULL; 189 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 190 Mat_SeqDense *b; 191 PetscInt *ai = a->i, *aj = a->j, m = A->rmap->N, n = A->cmap->N, i; 192 const MatScalar *av; 193 PetscBool isseqdense; 194 195 PetscFunctionBegin; 196 if (reuse == MAT_REUSE_MATRIX) { 197 PetscCall(PetscObjectTypeCompare((PetscObject)*newmat, MATSEQDENSE, &isseqdense)); 198 PetscCheck(isseqdense, PetscObjectComm((PetscObject)*newmat), PETSC_ERR_USER, "Cannot reuse matrix of type %s", ((PetscObject)(*newmat))->type_name); 199 } 200 if (reuse != MAT_REUSE_MATRIX) { 201 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 202 PetscCall(MatSetSizes(B, m, n, m, n)); 203 PetscCall(MatSetType(B, MATSEQDENSE)); 204 PetscCall(MatSeqDenseSetPreallocation(B, NULL)); 205 b = (Mat_SeqDense *)(B->data); 206 } else { 207 b = (Mat_SeqDense *)((*newmat)->data); 208 PetscCall(PetscArrayzero(b->v, m * n)); 209 } 210 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 211 for (i = 0; i < m; i++) { 212 PetscInt j; 213 for (j = 0; j < ai[1] - ai[0]; j++) { 214 b->v[*aj * m + i] = *av; 215 aj++; 216 av++; 217 } 218 ai++; 219 } 220 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 221 222 if (reuse == MAT_INPLACE_MATRIX) { 223 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 224 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 225 PetscCall(MatHeaderReplace(A, &B)); 226 } else { 227 if (B) *newmat = B; 228 PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY)); 229 PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY)); 230 } 231 PetscFunctionReturn(0); 232 } 233 234 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 235 { 236 Mat B = NULL; 237 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 238 PetscInt i, j; 239 PetscInt *rows, *nnz; 240 MatScalar *aa = a->v, *vals; 241 242 PetscFunctionBegin; 243 PetscCall(PetscCalloc3(A->rmap->n, &rows, A->rmap->n, &nnz, A->rmap->n, &vals)); 244 if (reuse != MAT_REUSE_MATRIX) { 245 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 246 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 247 PetscCall(MatSetType(B, MATSEQAIJ)); 248 for (j = 0; j < A->cmap->n; j++) { 249 for (i = 0; i < A->rmap->n; i++) 250 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) ++nnz[i]; 251 aa += a->lda; 252 } 253 PetscCall(MatSeqAIJSetPreallocation(B, PETSC_DETERMINE, nnz)); 254 } else B = *newmat; 255 aa = a->v; 256 for (j = 0; j < A->cmap->n; j++) { 257 PetscInt numRows = 0; 258 for (i = 0; i < A->rmap->n; i++) 259 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) { 260 rows[numRows] = i; 261 vals[numRows++] = aa[i]; 262 } 263 PetscCall(MatSetValues(B, numRows, rows, 1, &j, vals, INSERT_VALUES)); 264 aa += a->lda; 265 } 266 PetscCall(PetscFree3(rows, nnz, vals)); 267 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 268 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 269 270 if (reuse == MAT_INPLACE_MATRIX) { 271 PetscCall(MatHeaderReplace(A, &B)); 272 } else if (reuse != MAT_REUSE_MATRIX) *newmat = B; 273 PetscFunctionReturn(0); 274 } 275 276 PetscErrorCode MatAXPY_SeqDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 277 { 278 Mat_SeqDense *x = (Mat_SeqDense *)X->data, *y = (Mat_SeqDense *)Y->data; 279 const PetscScalar *xv; 280 PetscScalar *yv; 281 PetscBLASInt N, m, ldax = 0, lday = 0, one = 1; 282 283 PetscFunctionBegin; 284 PetscCall(MatDenseGetArrayRead(X, &xv)); 285 PetscCall(MatDenseGetArray(Y, &yv)); 286 PetscCall(PetscBLASIntCast(X->rmap->n * X->cmap->n, &N)); 287 PetscCall(PetscBLASIntCast(X->rmap->n, &m)); 288 PetscCall(PetscBLASIntCast(x->lda, &ldax)); 289 PetscCall(PetscBLASIntCast(y->lda, &lday)); 290 if (ldax > m || lday > m) { 291 PetscInt j; 292 293 for (j = 0; j < X->cmap->n; j++) PetscCallBLAS("BLASaxpy", BLASaxpy_(&m, &alpha, xv + j * ldax, &one, yv + j * lday, &one)); 294 } else { 295 PetscCallBLAS("BLASaxpy", BLASaxpy_(&N, &alpha, xv, &one, yv, &one)); 296 } 297 PetscCall(MatDenseRestoreArrayRead(X, &xv)); 298 PetscCall(MatDenseRestoreArray(Y, &yv)); 299 PetscCall(PetscLogFlops(PetscMax(2.0 * N - 1, 0))); 300 PetscFunctionReturn(0); 301 } 302 303 static PetscErrorCode MatGetInfo_SeqDense(Mat A, MatInfoType flag, MatInfo *info) 304 { 305 PetscLogDouble N = A->rmap->n * A->cmap->n; 306 307 PetscFunctionBegin; 308 info->block_size = 1.0; 309 info->nz_allocated = N; 310 info->nz_used = N; 311 info->nz_unneeded = 0; 312 info->assemblies = A->num_ass; 313 info->mallocs = 0; 314 info->memory = 0; /* REVIEW ME */ 315 info->fill_ratio_given = 0; 316 info->fill_ratio_needed = 0; 317 info->factor_mallocs = 0; 318 PetscFunctionReturn(0); 319 } 320 321 PetscErrorCode MatScale_SeqDense(Mat A, PetscScalar alpha) 322 { 323 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 324 PetscScalar *v; 325 PetscBLASInt one = 1, j, nz, lda = 0; 326 327 PetscFunctionBegin; 328 PetscCall(MatDenseGetArray(A, &v)); 329 PetscCall(PetscBLASIntCast(a->lda, &lda)); 330 if (lda > A->rmap->n) { 331 PetscCall(PetscBLASIntCast(A->rmap->n, &nz)); 332 for (j = 0; j < A->cmap->n; j++) PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v + j * lda, &one)); 333 } else { 334 PetscCall(PetscBLASIntCast(A->rmap->n * A->cmap->n, &nz)); 335 PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v, &one)); 336 } 337 PetscCall(PetscLogFlops(A->rmap->n * A->cmap->n)); 338 PetscCall(MatDenseRestoreArray(A, &v)); 339 PetscFunctionReturn(0); 340 } 341 342 PetscErrorCode MatShift_SeqDense(Mat A, PetscScalar alpha) 343 { 344 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 345 PetscScalar *v; 346 PetscInt j, k; 347 348 PetscFunctionBegin; 349 PetscCall(MatDenseGetArray(A, &v)); 350 k = PetscMin(A->rmap->n, A->cmap->n); 351 for (j = 0; j < k; j++) v[j + j * a->lda] += alpha; 352 PetscCall(PetscLogFlops(k)); 353 PetscCall(MatDenseRestoreArray(A, &v)); 354 PetscFunctionReturn(0); 355 } 356 357 static PetscErrorCode MatIsHermitian_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) 358 { 359 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 360 PetscInt i, j, m = A->rmap->n, N = a->lda; 361 const PetscScalar *v; 362 363 PetscFunctionBegin; 364 *fl = PETSC_FALSE; 365 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(0); 366 PetscCall(MatDenseGetArrayRead(A, &v)); 367 for (i = 0; i < m; i++) { 368 for (j = i; j < m; j++) { 369 if (PetscAbsScalar(v[i + j * N] - PetscConj(v[j + i * N])) > rtol) goto restore; 370 } 371 } 372 *fl = PETSC_TRUE; 373 restore: 374 PetscCall(MatDenseRestoreArrayRead(A, &v)); 375 PetscFunctionReturn(0); 376 } 377 378 static PetscErrorCode MatIsSymmetric_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) 379 { 380 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 381 PetscInt i, j, m = A->rmap->n, N = a->lda; 382 const PetscScalar *v; 383 384 PetscFunctionBegin; 385 *fl = PETSC_FALSE; 386 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(0); 387 PetscCall(MatDenseGetArrayRead(A, &v)); 388 for (i = 0; i < m; i++) { 389 for (j = i; j < m; j++) { 390 if (PetscAbsScalar(v[i + j * N] - v[j + i * N]) > rtol) goto restore; 391 } 392 } 393 *fl = PETSC_TRUE; 394 restore: 395 PetscCall(MatDenseRestoreArrayRead(A, &v)); 396 PetscFunctionReturn(0); 397 } 398 399 PetscErrorCode MatDuplicateNoCreate_SeqDense(Mat newi, Mat A, MatDuplicateOption cpvalues) 400 { 401 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 402 PetscInt lda = (PetscInt)mat->lda, j, m, nlda = lda; 403 PetscBool isdensecpu; 404 405 PetscFunctionBegin; 406 PetscCall(PetscLayoutReference(A->rmap, &newi->rmap)); 407 PetscCall(PetscLayoutReference(A->cmap, &newi->cmap)); 408 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { /* propagate LDA */ 409 PetscCall(MatDenseSetLDA(newi, lda)); 410 } 411 PetscCall(PetscObjectTypeCompare((PetscObject)newi, MATSEQDENSE, &isdensecpu)); 412 if (isdensecpu) PetscCall(MatSeqDenseSetPreallocation(newi, NULL)); 413 if (cpvalues == MAT_COPY_VALUES) { 414 const PetscScalar *av; 415 PetscScalar *v; 416 417 PetscCall(MatDenseGetArrayRead(A, &av)); 418 PetscCall(MatDenseGetArrayWrite(newi, &v)); 419 PetscCall(MatDenseGetLDA(newi, &nlda)); 420 m = A->rmap->n; 421 if (lda > m || nlda > m) { 422 for (j = 0; j < A->cmap->n; j++) PetscCall(PetscArraycpy(v + j * nlda, av + j * lda, m)); 423 } else { 424 PetscCall(PetscArraycpy(v, av, A->rmap->n * A->cmap->n)); 425 } 426 PetscCall(MatDenseRestoreArrayWrite(newi, &v)); 427 PetscCall(MatDenseRestoreArrayRead(A, &av)); 428 } 429 PetscFunctionReturn(0); 430 } 431 432 PetscErrorCode MatDuplicate_SeqDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 433 { 434 PetscFunctionBegin; 435 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), newmat)); 436 PetscCall(MatSetSizes(*newmat, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 437 PetscCall(MatSetType(*newmat, ((PetscObject)A)->type_name)); 438 PetscCall(MatDuplicateNoCreate_SeqDense(*newmat, A, cpvalues)); 439 PetscFunctionReturn(0); 440 } 441 442 static PetscErrorCode MatSolve_SeqDense_Internal_LU(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) 443 { 444 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 445 PetscBLASInt info; 446 447 PetscFunctionBegin; 448 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 449 PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_(T ? "T" : "N", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 450 PetscCall(PetscFPTrapPop()); 451 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve %d", (int)info); 452 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 453 PetscFunctionReturn(0); 454 } 455 456 static PetscErrorCode MatConjugate_SeqDense(Mat); 457 458 static PetscErrorCode MatSolve_SeqDense_Internal_Cholesky(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) 459 { 460 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 461 PetscBLASInt info; 462 463 PetscFunctionBegin; 464 if (A->spd == PETSC_BOOL3_TRUE) { 465 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 466 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 467 PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &m, &nrhs, mat->v, &mat->lda, x, &m, &info)); 468 PetscCall(PetscFPTrapPop()); 469 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "POTRS Bad solve %d", (int)info); 470 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 471 #if defined(PETSC_USE_COMPLEX) 472 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 473 if (T) PetscCall(MatConjugate_SeqDense(A)); 474 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 475 PetscCallBLAS("LAPACKhetrs", LAPACKhetrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 476 PetscCall(PetscFPTrapPop()); 477 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "HETRS Bad solve %d", (int)info); 478 if (T) PetscCall(MatConjugate_SeqDense(A)); 479 #endif 480 } else { /* symmetric case */ 481 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 482 PetscCallBLAS("LAPACKsytrs", LAPACKsytrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 483 PetscCall(PetscFPTrapPop()); 484 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "SYTRS Bad solve %d", (int)info); 485 } 486 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 487 PetscFunctionReturn(0); 488 } 489 490 static PetscErrorCode MatSolve_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) 491 { 492 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 493 PetscBLASInt info; 494 char trans; 495 496 PetscFunctionBegin; 497 if (PetscDefined(USE_COMPLEX)) { 498 trans = 'C'; 499 } else { 500 trans = 'T'; 501 } 502 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 503 { /* lwork depends on the number of right-hand sides */ 504 PetscBLASInt nlfwork, lfwork = -1; 505 PetscScalar fwork; 506 507 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 508 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 509 if (nlfwork > mat->lfwork) { 510 mat->lfwork = nlfwork; 511 PetscCall(PetscFree(mat->fwork)); 512 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 513 } 514 } 515 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 516 PetscCall(PetscFPTrapPop()); 517 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %d", (int)info); 518 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 519 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &mat->rank, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 520 PetscCall(PetscFPTrapPop()); 521 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %d", (int)info); 522 for (PetscInt j = 0; j < nrhs; j++) { 523 for (PetscInt i = mat->rank; i < k; i++) x[j * ldx + i] = 0.; 524 } 525 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 526 PetscFunctionReturn(0); 527 } 528 529 static PetscErrorCode MatSolveTranspose_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) 530 { 531 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 532 PetscBLASInt info; 533 534 PetscFunctionBegin; 535 if (A->rmap->n == A->cmap->n && mat->rank == A->rmap->n) { 536 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 537 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &m, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 538 PetscCall(PetscFPTrapPop()); 539 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %d", (int)info); 540 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 541 { /* lwork depends on the number of right-hand sides */ 542 PetscBLASInt nlfwork, lfwork = -1; 543 PetscScalar fwork; 544 545 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 546 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 547 if (nlfwork > mat->lfwork) { 548 mat->lfwork = nlfwork; 549 PetscCall(PetscFree(mat->fwork)); 550 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 551 } 552 } 553 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 554 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 555 PetscCall(PetscFPTrapPop()); 556 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %d", (int)info); 557 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 558 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "QR factored matrix cannot be used for transpose solve"); 559 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 560 PetscFunctionReturn(0); 561 } 562 563 static PetscErrorCode MatSolve_SeqDense_SetUp(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) 564 { 565 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 566 PetscScalar *y; 567 PetscBLASInt m = 0, k = 0; 568 569 PetscFunctionBegin; 570 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 571 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 572 if (k < m) { 573 PetscCall(VecCopy(xx, mat->qrrhs)); 574 PetscCall(VecGetArray(mat->qrrhs, &y)); 575 } else { 576 PetscCall(VecCopy(xx, yy)); 577 PetscCall(VecGetArray(yy, &y)); 578 } 579 *_y = y; 580 *_k = k; 581 *_m = m; 582 PetscFunctionReturn(0); 583 } 584 585 static PetscErrorCode MatSolve_SeqDense_TearDown(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) 586 { 587 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 588 PetscScalar *y = NULL; 589 PetscBLASInt m, k; 590 591 PetscFunctionBegin; 592 y = *_y; 593 *_y = NULL; 594 k = *_k; 595 m = *_m; 596 if (k < m) { 597 PetscScalar *yv; 598 PetscCall(VecGetArray(yy, &yv)); 599 PetscCall(PetscArraycpy(yv, y, k)); 600 PetscCall(VecRestoreArray(yy, &yv)); 601 PetscCall(VecRestoreArray(mat->qrrhs, &y)); 602 } else { 603 PetscCall(VecRestoreArray(yy, &y)); 604 } 605 PetscFunctionReturn(0); 606 } 607 608 static PetscErrorCode MatSolve_SeqDense_LU(Mat A, Vec xx, Vec yy) 609 { 610 PetscScalar *y = NULL; 611 PetscBLASInt m = 0, k = 0; 612 613 PetscFunctionBegin; 614 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 615 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_FALSE)); 616 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 617 PetscFunctionReturn(0); 618 } 619 620 static PetscErrorCode MatSolveTranspose_SeqDense_LU(Mat A, Vec xx, Vec yy) 621 { 622 PetscScalar *y = NULL; 623 PetscBLASInt m = 0, k = 0; 624 625 PetscFunctionBegin; 626 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 627 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_TRUE)); 628 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 629 PetscFunctionReturn(0); 630 } 631 632 static PetscErrorCode MatSolve_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) 633 { 634 PetscScalar *y = NULL; 635 PetscBLASInt m = 0, k = 0; 636 637 PetscFunctionBegin; 638 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 639 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_FALSE)); 640 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 641 PetscFunctionReturn(0); 642 } 643 644 static PetscErrorCode MatSolveTranspose_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) 645 { 646 PetscScalar *y = NULL; 647 PetscBLASInt m = 0, k = 0; 648 649 PetscFunctionBegin; 650 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 651 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_TRUE)); 652 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 653 PetscFunctionReturn(0); 654 } 655 656 static PetscErrorCode MatSolve_SeqDense_QR(Mat A, Vec xx, Vec yy) 657 { 658 PetscScalar *y = NULL; 659 PetscBLASInt m = 0, k = 0; 660 661 PetscFunctionBegin; 662 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 663 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 664 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 665 PetscFunctionReturn(0); 666 } 667 668 static PetscErrorCode MatSolveTranspose_SeqDense_QR(Mat A, Vec xx, Vec yy) 669 { 670 PetscScalar *y = NULL; 671 PetscBLASInt m = 0, k = 0; 672 673 PetscFunctionBegin; 674 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 675 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 676 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 677 PetscFunctionReturn(0); 678 } 679 680 static PetscErrorCode MatMatSolve_SeqDense_SetUp(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) 681 { 682 const PetscScalar *b; 683 PetscScalar *y; 684 PetscInt n, _ldb, _ldx; 685 PetscBLASInt nrhs = 0, m = 0, k = 0, ldb = 0, ldx = 0, ldy = 0; 686 687 PetscFunctionBegin; 688 *_ldy = 0; 689 *_m = 0; 690 *_nrhs = 0; 691 *_k = 0; 692 *_y = NULL; 693 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 694 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 695 PetscCall(MatGetSize(B, NULL, &n)); 696 PetscCall(PetscBLASIntCast(n, &nrhs)); 697 PetscCall(MatDenseGetLDA(B, &_ldb)); 698 PetscCall(PetscBLASIntCast(_ldb, &ldb)); 699 PetscCall(MatDenseGetLDA(X, &_ldx)); 700 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 701 if (ldx < m) { 702 PetscCall(MatDenseGetArrayRead(B, &b)); 703 PetscCall(PetscMalloc1(nrhs * m, &y)); 704 if (ldb == m) { 705 PetscCall(PetscArraycpy(y, b, ldb * nrhs)); 706 } else { 707 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * m], &b[j * ldb], m)); 708 } 709 ldy = m; 710 PetscCall(MatDenseRestoreArrayRead(B, &b)); 711 } else { 712 if (ldb == ldx) { 713 PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN)); 714 PetscCall(MatDenseGetArray(X, &y)); 715 } else { 716 PetscCall(MatDenseGetArray(X, &y)); 717 PetscCall(MatDenseGetArrayRead(B, &b)); 718 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * ldx], &b[j * ldb], m)); 719 PetscCall(MatDenseRestoreArrayRead(B, &b)); 720 } 721 ldy = ldx; 722 } 723 *_y = y; 724 *_ldy = ldy; 725 *_k = k; 726 *_m = m; 727 *_nrhs = nrhs; 728 PetscFunctionReturn(0); 729 } 730 731 static PetscErrorCode MatMatSolve_SeqDense_TearDown(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) 732 { 733 PetscScalar *y; 734 PetscInt _ldx; 735 PetscBLASInt k, ldy, nrhs, ldx = 0; 736 737 PetscFunctionBegin; 738 y = *_y; 739 *_y = NULL; 740 k = *_k; 741 ldy = *_ldy; 742 nrhs = *_nrhs; 743 PetscCall(MatDenseGetLDA(X, &_ldx)); 744 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 745 if (ldx != ldy) { 746 PetscScalar *xv; 747 PetscCall(MatDenseGetArray(X, &xv)); 748 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&xv[j * ldx], &y[j * ldy], k)); 749 PetscCall(MatDenseRestoreArray(X, &xv)); 750 PetscCall(PetscFree(y)); 751 } else { 752 PetscCall(MatDenseRestoreArray(X, &y)); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 static PetscErrorCode MatMatSolve_SeqDense_LU(Mat A, Mat B, Mat X) 758 { 759 PetscScalar *y; 760 PetscBLASInt m, k, ldy, nrhs; 761 762 PetscFunctionBegin; 763 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 764 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 765 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 766 PetscFunctionReturn(0); 767 } 768 769 static PetscErrorCode MatMatSolveTranspose_SeqDense_LU(Mat A, Mat B, Mat X) 770 { 771 PetscScalar *y; 772 PetscBLASInt m, k, ldy, nrhs; 773 774 PetscFunctionBegin; 775 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 776 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 777 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 778 PetscFunctionReturn(0); 779 } 780 781 static PetscErrorCode MatMatSolve_SeqDense_Cholesky(Mat A, Mat B, Mat X) 782 { 783 PetscScalar *y; 784 PetscBLASInt m, k, ldy, nrhs; 785 786 PetscFunctionBegin; 787 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 788 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 789 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 790 PetscFunctionReturn(0); 791 } 792 793 static PetscErrorCode MatMatSolveTranspose_SeqDense_Cholesky(Mat A, Mat B, Mat X) 794 { 795 PetscScalar *y; 796 PetscBLASInt m, k, ldy, nrhs; 797 798 PetscFunctionBegin; 799 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 800 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 801 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 802 PetscFunctionReturn(0); 803 } 804 805 static PetscErrorCode MatMatSolve_SeqDense_QR(Mat A, Mat B, Mat X) 806 { 807 PetscScalar *y; 808 PetscBLASInt m, k, ldy, nrhs; 809 810 PetscFunctionBegin; 811 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 812 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 813 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 814 PetscFunctionReturn(0); 815 } 816 817 static PetscErrorCode MatMatSolveTranspose_SeqDense_QR(Mat A, Mat B, Mat X) 818 { 819 PetscScalar *y; 820 PetscBLASInt m, k, ldy, nrhs; 821 822 PetscFunctionBegin; 823 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 824 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 825 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 826 PetscFunctionReturn(0); 827 } 828 829 /* ---------------------------------------------------------------*/ 830 /* COMMENT: I have chosen to hide row permutation in the pivots, 831 rather than put it in the Mat->row slot.*/ 832 PetscErrorCode MatLUFactor_SeqDense(Mat A, IS row, IS col, const MatFactorInfo *minfo) 833 { 834 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 835 PetscBLASInt n, m, info; 836 837 PetscFunctionBegin; 838 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 839 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 840 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 841 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 842 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 843 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&m, &n, mat->v, &mat->lda, mat->pivots, &info)); 844 PetscCall(PetscFPTrapPop()); 845 846 PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to LU factorization %d", (int)info); 847 PetscCheck(info <= 0, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Bad LU factorization %d", (int)info); 848 849 A->ops->solve = MatSolve_SeqDense_LU; 850 A->ops->matsolve = MatMatSolve_SeqDense_LU; 851 A->ops->solvetranspose = MatSolveTranspose_SeqDense_LU; 852 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_LU; 853 A->factortype = MAT_FACTOR_LU; 854 855 PetscCall(PetscFree(A->solvertype)); 856 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 857 858 PetscCall(PetscLogFlops((2.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3)); 859 PetscFunctionReturn(0); 860 } 861 862 static PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 863 { 864 MatFactorInfo info; 865 866 PetscFunctionBegin; 867 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 868 PetscUseTypeMethod(fact, lufactor, NULL, NULL, &info); 869 PetscFunctionReturn(0); 870 } 871 872 PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) 873 { 874 PetscFunctionBegin; 875 fact->preallocated = PETSC_TRUE; 876 fact->assembled = PETSC_TRUE; 877 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense; 878 PetscFunctionReturn(0); 879 } 880 881 /* Cholesky as L*L^T or L*D*L^T and the symmetric/hermitian complex variants */ 882 PetscErrorCode MatCholeskyFactor_SeqDense(Mat A, IS perm, const MatFactorInfo *factinfo) 883 { 884 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 885 PetscBLASInt info, n; 886 887 PetscFunctionBegin; 888 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 889 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 890 if (A->spd == PETSC_BOOL3_TRUE) { 891 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 892 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &n, mat->v, &mat->lda, &info)); 893 PetscCall(PetscFPTrapPop()); 894 #if defined(PETSC_USE_COMPLEX) 895 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 896 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 897 if (!mat->fwork) { 898 PetscScalar dummy; 899 900 mat->lfwork = -1; 901 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 902 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 903 PetscCall(PetscFPTrapPop()); 904 mat->lfwork = (PetscInt)PetscRealPart(dummy); 905 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 906 } 907 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 908 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 909 PetscCall(PetscFPTrapPop()); 910 #endif 911 } else { /* symmetric case */ 912 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 913 if (!mat->fwork) { 914 PetscScalar dummy; 915 916 mat->lfwork = -1; 917 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 918 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 919 PetscCall(PetscFPTrapPop()); 920 mat->lfwork = (PetscInt)PetscRealPart(dummy); 921 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 922 } 923 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 924 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 925 PetscCall(PetscFPTrapPop()); 926 } 927 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad factorization: zero pivot in row %" PetscInt_FMT, (PetscInt)info - 1); 928 929 A->ops->solve = MatSolve_SeqDense_Cholesky; 930 A->ops->matsolve = MatMatSolve_SeqDense_Cholesky; 931 A->ops->solvetranspose = MatSolveTranspose_SeqDense_Cholesky; 932 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_Cholesky; 933 A->factortype = MAT_FACTOR_CHOLESKY; 934 935 PetscCall(PetscFree(A->solvertype)); 936 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 937 938 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 939 PetscFunctionReturn(0); 940 } 941 942 static PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 943 { 944 MatFactorInfo info; 945 946 PetscFunctionBegin; 947 info.fill = 1.0; 948 949 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 950 PetscUseTypeMethod(fact, choleskyfactor, NULL, &info); 951 PetscFunctionReturn(0); 952 } 953 954 PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) 955 { 956 PetscFunctionBegin; 957 fact->assembled = PETSC_TRUE; 958 fact->preallocated = PETSC_TRUE; 959 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense; 960 PetscFunctionReturn(0); 961 } 962 963 PetscErrorCode MatQRFactor_SeqDense(Mat A, IS col, const MatFactorInfo *minfo) 964 { 965 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 966 PetscBLASInt n, m, info, min, max; 967 968 PetscFunctionBegin; 969 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 970 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 971 max = PetscMax(m, n); 972 min = PetscMin(m, n); 973 if (!mat->tau) { PetscCall(PetscMalloc1(min, &mat->tau)); } 974 if (!mat->pivots) { PetscCall(PetscMalloc1(n, &mat->pivots)); } 975 if (!mat->qrrhs) PetscCall(MatCreateVecs(A, NULL, &(mat->qrrhs))); 976 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 977 if (!mat->fwork) { 978 PetscScalar dummy; 979 980 mat->lfwork = -1; 981 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 982 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, &dummy, &mat->lfwork, &info)); 983 PetscCall(PetscFPTrapPop()); 984 mat->lfwork = (PetscInt)PetscRealPart(dummy); 985 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 986 } 987 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 988 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, mat->fwork, &mat->lfwork, &info)); 989 PetscCall(PetscFPTrapPop()); 990 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to QR factorization %d", (int)info); 991 // TODO: try to estimate rank or test for and use geqp3 for rank revealing QR. For now just say rank is min of m and n 992 mat->rank = min; 993 994 A->ops->solve = MatSolve_SeqDense_QR; 995 A->ops->matsolve = MatMatSolve_SeqDense_QR; 996 A->factortype = MAT_FACTOR_QR; 997 if (m == n) { 998 A->ops->solvetranspose = MatSolveTranspose_SeqDense_QR; 999 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_QR; 1000 } 1001 1002 PetscCall(PetscFree(A->solvertype)); 1003 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 1004 1005 PetscCall(PetscLogFlops(2.0 * min * min * (max - min / 3.0))); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 static PetscErrorCode MatQRFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 1010 { 1011 MatFactorInfo info; 1012 1013 PetscFunctionBegin; 1014 info.fill = 1.0; 1015 1016 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 1017 PetscUseMethod(fact, "MatQRFactor_C", (Mat, IS, const MatFactorInfo *), (fact, NULL, &info)); 1018 PetscFunctionReturn(0); 1019 } 1020 1021 PetscErrorCode MatQRFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) 1022 { 1023 PetscFunctionBegin; 1024 fact->assembled = PETSC_TRUE; 1025 fact->preallocated = PETSC_TRUE; 1026 PetscCall(PetscObjectComposeFunction((PetscObject)fact, "MatQRFactorNumeric_C", MatQRFactorNumeric_SeqDense)); 1027 PetscFunctionReturn(0); 1028 } 1029 1030 /* uses LAPACK */ 1031 PETSC_INTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A, MatFactorType ftype, Mat *fact) 1032 { 1033 PetscFunctionBegin; 1034 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), fact)); 1035 PetscCall(MatSetSizes(*fact, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 1036 PetscCall(MatSetType(*fact, MATDENSE)); 1037 (*fact)->trivialsymbolic = PETSC_TRUE; 1038 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 1039 (*fact)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqDense; 1040 (*fact)->ops->ilufactorsymbolic = MatLUFactorSymbolic_SeqDense; 1041 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 1042 (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense; 1043 } else if (ftype == MAT_FACTOR_QR) { 1044 PetscCall(PetscObjectComposeFunction((PetscObject)(*fact), "MatQRFactorSymbolic_C", MatQRFactorSymbolic_SeqDense)); 1045 } 1046 (*fact)->factortype = ftype; 1047 1048 PetscCall(PetscFree((*fact)->solvertype)); 1049 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*fact)->solvertype)); 1050 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_LU])); 1051 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ILU])); 1052 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_CHOLESKY])); 1053 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ICC])); 1054 PetscFunctionReturn(0); 1055 } 1056 1057 /* ------------------------------------------------------------------*/ 1058 static PetscErrorCode MatSOR_SeqDense(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal shift, PetscInt its, PetscInt lits, Vec xx) 1059 { 1060 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1061 PetscScalar *x, *v = mat->v, zero = 0.0, xt; 1062 const PetscScalar *b; 1063 PetscInt m = A->rmap->n, i; 1064 PetscBLASInt o = 1, bm = 0; 1065 1066 PetscFunctionBegin; 1067 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1068 PetscCheck(A->offloadmask != PETSC_OFFLOAD_GPU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1069 #endif 1070 if (shift == -1) shift = 0.0; /* negative shift indicates do not error on zero diagonal; this code never zeros on zero diagonal */ 1071 PetscCall(PetscBLASIntCast(m, &bm)); 1072 if (flag & SOR_ZERO_INITIAL_GUESS) { 1073 /* this is a hack fix, should have another version without the second BLASdotu */ 1074 PetscCall(VecSet(xx, zero)); 1075 } 1076 PetscCall(VecGetArray(xx, &x)); 1077 PetscCall(VecGetArrayRead(bb, &b)); 1078 its = its * lits; 1079 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 1080 while (its--) { 1081 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 1082 for (i = 0; i < m; i++) { 1083 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1084 x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift); 1085 } 1086 } 1087 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1088 for (i = m - 1; i >= 0; i--) { 1089 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1090 x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift); 1091 } 1092 } 1093 } 1094 PetscCall(VecRestoreArrayRead(bb, &b)); 1095 PetscCall(VecRestoreArray(xx, &x)); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 /* -----------------------------------------------------------------*/ 1100 PetscErrorCode MatMultTranspose_SeqDense(Mat A, Vec xx, Vec yy) 1101 { 1102 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1103 const PetscScalar *v = mat->v, *x; 1104 PetscScalar *y; 1105 PetscBLASInt m, n, _One = 1; 1106 PetscScalar _DOne = 1.0, _DZero = 0.0; 1107 1108 PetscFunctionBegin; 1109 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1110 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1111 PetscCall(VecGetArrayRead(xx, &x)); 1112 PetscCall(VecGetArrayWrite(yy, &y)); 1113 if (!A->rmap->n || !A->cmap->n) { 1114 PetscBLASInt i; 1115 for (i = 0; i < n; i++) y[i] = 0.0; 1116 } else { 1117 PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v, &mat->lda, x, &_One, &_DZero, y, &_One)); 1118 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n - A->cmap->n)); 1119 } 1120 PetscCall(VecRestoreArrayRead(xx, &x)); 1121 PetscCall(VecRestoreArrayWrite(yy, &y)); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 PetscErrorCode MatMult_SeqDense(Mat A, Vec xx, Vec yy) 1126 { 1127 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1128 PetscScalar *y, _DOne = 1.0, _DZero = 0.0; 1129 PetscBLASInt m, n, _One = 1; 1130 const PetscScalar *v = mat->v, *x; 1131 1132 PetscFunctionBegin; 1133 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1134 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1135 PetscCall(VecGetArrayRead(xx, &x)); 1136 PetscCall(VecGetArrayWrite(yy, &y)); 1137 if (!A->rmap->n || !A->cmap->n) { 1138 PetscBLASInt i; 1139 for (i = 0; i < m; i++) y[i] = 0.0; 1140 } else { 1141 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DZero, y, &_One)); 1142 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n - A->rmap->n)); 1143 } 1144 PetscCall(VecRestoreArrayRead(xx, &x)); 1145 PetscCall(VecRestoreArrayWrite(yy, &y)); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 PetscErrorCode MatMultAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1150 { 1151 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1152 const PetscScalar *v = mat->v, *x; 1153 PetscScalar *y, _DOne = 1.0; 1154 PetscBLASInt m, n, _One = 1; 1155 1156 PetscFunctionBegin; 1157 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1158 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1159 PetscCall(VecCopy(zz, yy)); 1160 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 1161 PetscCall(VecGetArrayRead(xx, &x)); 1162 PetscCall(VecGetArray(yy, &y)); 1163 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DOne, y, &_One)); 1164 PetscCall(VecRestoreArrayRead(xx, &x)); 1165 PetscCall(VecRestoreArray(yy, &y)); 1166 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n)); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 PetscErrorCode MatMultTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1171 { 1172 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1173 const PetscScalar *v = mat->v, *x; 1174 PetscScalar *y; 1175 PetscBLASInt m, n, _One = 1; 1176 PetscScalar _DOne = 1.0; 1177 1178 PetscFunctionBegin; 1179 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1180 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1181 PetscCall(VecCopy(zz, yy)); 1182 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 1183 PetscCall(VecGetArrayRead(xx, &x)); 1184 PetscCall(VecGetArray(yy, &y)); 1185 PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DOne, y, &_One)); 1186 PetscCall(VecRestoreArrayRead(xx, &x)); 1187 PetscCall(VecRestoreArray(yy, &y)); 1188 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n)); 1189 PetscFunctionReturn(0); 1190 } 1191 1192 /* -----------------------------------------------------------------*/ 1193 static PetscErrorCode MatGetRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) 1194 { 1195 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1196 PetscInt i; 1197 1198 PetscFunctionBegin; 1199 *ncols = A->cmap->n; 1200 if (cols) { 1201 PetscCall(PetscMalloc1(A->cmap->n, cols)); 1202 for (i = 0; i < A->cmap->n; i++) (*cols)[i] = i; 1203 } 1204 if (vals) { 1205 const PetscScalar *v; 1206 1207 PetscCall(MatDenseGetArrayRead(A, &v)); 1208 PetscCall(PetscMalloc1(A->cmap->n, vals)); 1209 v += row; 1210 for (i = 0; i < A->cmap->n; i++) { 1211 (*vals)[i] = *v; 1212 v += mat->lda; 1213 } 1214 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1215 } 1216 PetscFunctionReturn(0); 1217 } 1218 1219 static PetscErrorCode MatRestoreRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) 1220 { 1221 PetscFunctionBegin; 1222 if (ncols) *ncols = 0; 1223 if (cols) PetscCall(PetscFree(*cols)); 1224 if (vals) PetscCall(PetscFree(*vals)); 1225 PetscFunctionReturn(0); 1226 } 1227 /* ----------------------------------------------------------------*/ 1228 static PetscErrorCode MatSetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], const PetscScalar v[], InsertMode addv) 1229 { 1230 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1231 PetscScalar *av; 1232 PetscInt i, j, idx = 0; 1233 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1234 PetscOffloadMask oldf; 1235 #endif 1236 1237 PetscFunctionBegin; 1238 PetscCall(MatDenseGetArray(A, &av)); 1239 if (!mat->roworiented) { 1240 if (addv == INSERT_VALUES) { 1241 for (j = 0; j < n; j++) { 1242 if (indexn[j] < 0) { 1243 idx += m; 1244 continue; 1245 } 1246 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1247 for (i = 0; i < m; i++) { 1248 if (indexm[i] < 0) { 1249 idx++; 1250 continue; 1251 } 1252 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1253 av[indexn[j] * mat->lda + indexm[i]] = v[idx++]; 1254 } 1255 } 1256 } else { 1257 for (j = 0; j < n; j++) { 1258 if (indexn[j] < 0) { 1259 idx += m; 1260 continue; 1261 } 1262 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1263 for (i = 0; i < m; i++) { 1264 if (indexm[i] < 0) { 1265 idx++; 1266 continue; 1267 } 1268 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1269 av[indexn[j] * mat->lda + indexm[i]] += v[idx++]; 1270 } 1271 } 1272 } 1273 } else { 1274 if (addv == INSERT_VALUES) { 1275 for (i = 0; i < m; i++) { 1276 if (indexm[i] < 0) { 1277 idx += n; 1278 continue; 1279 } 1280 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1281 for (j = 0; j < n; j++) { 1282 if (indexn[j] < 0) { 1283 idx++; 1284 continue; 1285 } 1286 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1287 av[indexn[j] * mat->lda + indexm[i]] = v[idx++]; 1288 } 1289 } 1290 } else { 1291 for (i = 0; i < m; i++) { 1292 if (indexm[i] < 0) { 1293 idx += n; 1294 continue; 1295 } 1296 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1297 for (j = 0; j < n; j++) { 1298 if (indexn[j] < 0) { 1299 idx++; 1300 continue; 1301 } 1302 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1303 av[indexn[j] * mat->lda + indexm[i]] += v[idx++]; 1304 } 1305 } 1306 } 1307 } 1308 /* hack to prevent unneeded copy to the GPU while returning the array */ 1309 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1310 oldf = A->offloadmask; 1311 A->offloadmask = PETSC_OFFLOAD_GPU; 1312 #endif 1313 PetscCall(MatDenseRestoreArray(A, &av)); 1314 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1315 A->offloadmask = (oldf == PETSC_OFFLOAD_UNALLOCATED ? PETSC_OFFLOAD_UNALLOCATED : PETSC_OFFLOAD_CPU); 1316 #endif 1317 PetscFunctionReturn(0); 1318 } 1319 1320 static PetscErrorCode MatGetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], PetscScalar v[]) 1321 { 1322 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1323 const PetscScalar *vv; 1324 PetscInt i, j; 1325 1326 PetscFunctionBegin; 1327 PetscCall(MatDenseGetArrayRead(A, &vv)); 1328 /* row-oriented output */ 1329 for (i = 0; i < m; i++) { 1330 if (indexm[i] < 0) { 1331 v += n; 1332 continue; 1333 } 1334 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested larger than number rows %" PetscInt_FMT, indexm[i], A->rmap->n); 1335 for (j = 0; j < n; j++) { 1336 if (indexn[j] < 0) { 1337 v++; 1338 continue; 1339 } 1340 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " requested larger than number columns %" PetscInt_FMT, indexn[j], A->cmap->n); 1341 *v++ = vv[indexn[j] * mat->lda + indexm[i]]; 1342 } 1343 } 1344 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1345 PetscFunctionReturn(0); 1346 } 1347 1348 /* -----------------------------------------------------------------*/ 1349 1350 PetscErrorCode MatView_Dense_Binary(Mat mat, PetscViewer viewer) 1351 { 1352 PetscBool skipHeader; 1353 PetscViewerFormat format; 1354 PetscInt header[4], M, N, m, lda, i, j, k; 1355 const PetscScalar *v; 1356 PetscScalar *vwork; 1357 1358 PetscFunctionBegin; 1359 PetscCall(PetscViewerSetUp(viewer)); 1360 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1361 PetscCall(PetscViewerGetFormat(viewer, &format)); 1362 if (skipHeader) format = PETSC_VIEWER_NATIVE; 1363 1364 PetscCall(MatGetSize(mat, &M, &N)); 1365 1366 /* write matrix header */ 1367 header[0] = MAT_FILE_CLASSID; 1368 header[1] = M; 1369 header[2] = N; 1370 header[3] = (format == PETSC_VIEWER_NATIVE) ? MATRIX_BINARY_FORMAT_DENSE : M * N; 1371 if (!skipHeader) PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1372 1373 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1374 if (format != PETSC_VIEWER_NATIVE) { 1375 PetscInt nnz = m * N, *iwork; 1376 /* store row lengths for each row */ 1377 PetscCall(PetscMalloc1(nnz, &iwork)); 1378 for (i = 0; i < m; i++) iwork[i] = N; 1379 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1380 /* store column indices (zero start index) */ 1381 for (k = 0, i = 0; i < m; i++) 1382 for (j = 0; j < N; j++, k++) iwork[k] = j; 1383 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1384 PetscCall(PetscFree(iwork)); 1385 } 1386 /* store matrix values as a dense matrix in row major order */ 1387 PetscCall(PetscMalloc1(m * N, &vwork)); 1388 PetscCall(MatDenseGetArrayRead(mat, &v)); 1389 PetscCall(MatDenseGetLDA(mat, &lda)); 1390 for (k = 0, i = 0; i < m; i++) 1391 for (j = 0; j < N; j++, k++) vwork[k] = v[i + lda * j]; 1392 PetscCall(MatDenseRestoreArrayRead(mat, &v)); 1393 PetscCall(PetscViewerBinaryWriteAll(viewer, vwork, m * N, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1394 PetscCall(PetscFree(vwork)); 1395 PetscFunctionReturn(0); 1396 } 1397 1398 PetscErrorCode MatLoad_Dense_Binary(Mat mat, PetscViewer viewer) 1399 { 1400 PetscBool skipHeader; 1401 PetscInt header[4], M, N, m, nz, lda, i, j, k; 1402 PetscInt rows, cols; 1403 PetscScalar *v, *vwork; 1404 1405 PetscFunctionBegin; 1406 PetscCall(PetscViewerSetUp(viewer)); 1407 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1408 1409 if (!skipHeader) { 1410 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 1411 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 1412 M = header[1]; 1413 N = header[2]; 1414 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 1415 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 1416 nz = header[3]; 1417 PetscCheck(nz == MATRIX_BINARY_FORMAT_DENSE || nz >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Unknown matrix format %" PetscInt_FMT " in file", nz); 1418 } else { 1419 PetscCall(MatGetSize(mat, &M, &N)); 1420 PetscCheck(M >= 0 && N >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "Matrix binary file header was skipped, thus the user must specify the global sizes of input matrix"); 1421 nz = MATRIX_BINARY_FORMAT_DENSE; 1422 } 1423 1424 /* setup global sizes if not set */ 1425 if (mat->rmap->N < 0) mat->rmap->N = M; 1426 if (mat->cmap->N < 0) mat->cmap->N = N; 1427 PetscCall(MatSetUp(mat)); 1428 /* check if global sizes are correct */ 1429 PetscCall(MatGetSize(mat, &rows, &cols)); 1430 PetscCheck(M == rows && N == cols, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 1431 1432 PetscCall(MatGetSize(mat, NULL, &N)); 1433 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1434 PetscCall(MatDenseGetArray(mat, &v)); 1435 PetscCall(MatDenseGetLDA(mat, &lda)); 1436 if (nz == MATRIX_BINARY_FORMAT_DENSE) { /* matrix in file is dense format */ 1437 PetscInt nnz = m * N; 1438 /* read in matrix values */ 1439 PetscCall(PetscMalloc1(nnz, &vwork)); 1440 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1441 /* store values in column major order */ 1442 for (j = 0; j < N; j++) 1443 for (i = 0; i < m; i++) v[i + lda * j] = vwork[i * N + j]; 1444 PetscCall(PetscFree(vwork)); 1445 } else { /* matrix in file is sparse format */ 1446 PetscInt nnz = 0, *rlens, *icols; 1447 /* read in row lengths */ 1448 PetscCall(PetscMalloc1(m, &rlens)); 1449 PetscCall(PetscViewerBinaryReadAll(viewer, rlens, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1450 for (i = 0; i < m; i++) nnz += rlens[i]; 1451 /* read in column indices and values */ 1452 PetscCall(PetscMalloc2(nnz, &icols, nnz, &vwork)); 1453 PetscCall(PetscViewerBinaryReadAll(viewer, icols, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1454 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1455 /* store values in column major order */ 1456 for (k = 0, i = 0; i < m; i++) 1457 for (j = 0; j < rlens[i]; j++, k++) v[i + lda * icols[k]] = vwork[k]; 1458 PetscCall(PetscFree(rlens)); 1459 PetscCall(PetscFree2(icols, vwork)); 1460 } 1461 PetscCall(MatDenseRestoreArray(mat, &v)); 1462 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 1463 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 1464 PetscFunctionReturn(0); 1465 } 1466 1467 PetscErrorCode MatLoad_SeqDense(Mat newMat, PetscViewer viewer) 1468 { 1469 PetscBool isbinary, ishdf5; 1470 1471 PetscFunctionBegin; 1472 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1473 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1474 /* force binary viewer to load .info file if it has not yet done so */ 1475 PetscCall(PetscViewerSetUp(viewer)); 1476 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1477 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1478 if (isbinary) { 1479 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1480 } else if (ishdf5) { 1481 #if defined(PETSC_HAVE_HDF5) 1482 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1483 #else 1484 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 1485 #endif 1486 } else { 1487 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1488 } 1489 PetscFunctionReturn(0); 1490 } 1491 1492 static PetscErrorCode MatView_SeqDense_ASCII(Mat A, PetscViewer viewer) 1493 { 1494 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1495 PetscInt i, j; 1496 const char *name; 1497 PetscScalar *v, *av; 1498 PetscViewerFormat format; 1499 #if defined(PETSC_USE_COMPLEX) 1500 PetscBool allreal = PETSC_TRUE; 1501 #endif 1502 1503 PetscFunctionBegin; 1504 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&av)); 1505 PetscCall(PetscViewerGetFormat(viewer, &format)); 1506 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1507 PetscFunctionReturn(0); /* do nothing for now */ 1508 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1509 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1510 for (i = 0; i < A->rmap->n; i++) { 1511 v = av + i; 1512 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i)); 1513 for (j = 0; j < A->cmap->n; j++) { 1514 #if defined(PETSC_USE_COMPLEX) 1515 if (PetscRealPart(*v) != 0.0 && PetscImaginaryPart(*v) != 0.0) { 1516 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", j, (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1517 } else if (PetscRealPart(*v)) { 1518 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)PetscRealPart(*v))); 1519 } 1520 #else 1521 if (*v) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)*v)); 1522 #endif 1523 v += a->lda; 1524 } 1525 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1526 } 1527 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1528 } else { 1529 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1530 #if defined(PETSC_USE_COMPLEX) 1531 /* determine if matrix has all real values */ 1532 for (j = 0; j < A->cmap->n; j++) { 1533 v = av + j * a->lda; 1534 for (i = 0; i < A->rmap->n; i++) { 1535 if (PetscImaginaryPart(v[i])) { 1536 allreal = PETSC_FALSE; 1537 break; 1538 } 1539 } 1540 } 1541 #endif 1542 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1543 PetscCall(PetscObjectGetName((PetscObject)A, &name)); 1544 PetscCall(PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", A->rmap->n, A->cmap->n)); 1545 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = zeros(%" PetscInt_FMT ",%" PetscInt_FMT ");\n", name, A->rmap->n, A->cmap->n)); 1546 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = [\n", name)); 1547 } 1548 1549 for (i = 0; i < A->rmap->n; i++) { 1550 v = av + i; 1551 for (j = 0; j < A->cmap->n; j++) { 1552 #if defined(PETSC_USE_COMPLEX) 1553 if (allreal) { 1554 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(*v))); 1555 } else { 1556 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei ", (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1557 } 1558 #else 1559 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)*v)); 1560 #endif 1561 v += a->lda; 1562 } 1563 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1564 } 1565 if (format == PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, "];\n")); 1566 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1567 } 1568 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&av)); 1569 PetscCall(PetscViewerFlush(viewer)); 1570 PetscFunctionReturn(0); 1571 } 1572 1573 #include <petscdraw.h> 1574 static PetscErrorCode MatView_SeqDense_Draw_Zoom(PetscDraw draw, void *Aa) 1575 { 1576 Mat A = (Mat)Aa; 1577 PetscInt m = A->rmap->n, n = A->cmap->n, i, j; 1578 int color = PETSC_DRAW_WHITE; 1579 const PetscScalar *v; 1580 PetscViewer viewer; 1581 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1582 PetscViewerFormat format; 1583 1584 PetscFunctionBegin; 1585 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1586 PetscCall(PetscViewerGetFormat(viewer, &format)); 1587 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1588 1589 /* Loop over matrix elements drawing boxes */ 1590 PetscCall(MatDenseGetArrayRead(A, &v)); 1591 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1592 PetscDrawCollectiveBegin(draw); 1593 /* Blue for negative and Red for positive */ 1594 for (j = 0; j < n; j++) { 1595 x_l = j; 1596 x_r = x_l + 1.0; 1597 for (i = 0; i < m; i++) { 1598 y_l = m - i - 1.0; 1599 y_r = y_l + 1.0; 1600 if (PetscRealPart(v[j * m + i]) > 0.) color = PETSC_DRAW_RED; 1601 else if (PetscRealPart(v[j * m + i]) < 0.) color = PETSC_DRAW_BLUE; 1602 else continue; 1603 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1604 } 1605 } 1606 PetscDrawCollectiveEnd(draw); 1607 } else { 1608 /* use contour shading to indicate magnitude of values */ 1609 /* first determine max of all nonzero values */ 1610 PetscReal minv = 0.0, maxv = 0.0; 1611 PetscDraw popup; 1612 1613 for (i = 0; i < m * n; i++) { 1614 if (PetscAbsScalar(v[i]) > maxv) maxv = PetscAbsScalar(v[i]); 1615 } 1616 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1617 PetscCall(PetscDrawGetPopup(draw, &popup)); 1618 PetscCall(PetscDrawScalePopup(popup, minv, maxv)); 1619 1620 PetscDrawCollectiveBegin(draw); 1621 for (j = 0; j < n; j++) { 1622 x_l = j; 1623 x_r = x_l + 1.0; 1624 for (i = 0; i < m; i++) { 1625 y_l = m - i - 1.0; 1626 y_r = y_l + 1.0; 1627 color = PetscDrawRealToColor(PetscAbsScalar(v[j * m + i]), minv, maxv); 1628 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1629 } 1630 } 1631 PetscDrawCollectiveEnd(draw); 1632 } 1633 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1634 PetscFunctionReturn(0); 1635 } 1636 1637 static PetscErrorCode MatView_SeqDense_Draw(Mat A, PetscViewer viewer) 1638 { 1639 PetscDraw draw; 1640 PetscBool isnull; 1641 PetscReal xr, yr, xl, yl, h, w; 1642 1643 PetscFunctionBegin; 1644 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1645 PetscCall(PetscDrawIsNull(draw, &isnull)); 1646 if (isnull) PetscFunctionReturn(0); 1647 1648 xr = A->cmap->n; 1649 yr = A->rmap->n; 1650 h = yr / 10.0; 1651 w = xr / 10.0; 1652 xr += w; 1653 yr += h; 1654 xl = -w; 1655 yl = -h; 1656 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 1657 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 1658 PetscCall(PetscDrawZoom(draw, MatView_SeqDense_Draw_Zoom, A)); 1659 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 1660 PetscCall(PetscDrawSave(draw)); 1661 PetscFunctionReturn(0); 1662 } 1663 1664 PetscErrorCode MatView_SeqDense(Mat A, PetscViewer viewer) 1665 { 1666 PetscBool iascii, isbinary, isdraw; 1667 1668 PetscFunctionBegin; 1669 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1670 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1671 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1672 if (iascii) PetscCall(MatView_SeqDense_ASCII(A, viewer)); 1673 else if (isbinary) PetscCall(MatView_Dense_Binary(A, viewer)); 1674 else if (isdraw) PetscCall(MatView_SeqDense_Draw(A, viewer)); 1675 PetscFunctionReturn(0); 1676 } 1677 1678 static PetscErrorCode MatDensePlaceArray_SeqDense(Mat A, const PetscScalar *array) 1679 { 1680 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1681 1682 PetscFunctionBegin; 1683 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1684 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1685 PetscCheck(!a->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreArray() first"); 1686 a->unplacedarray = a->v; 1687 a->unplaced_user_alloc = a->user_alloc; 1688 a->v = (PetscScalar *)array; 1689 a->user_alloc = PETSC_TRUE; 1690 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1691 A->offloadmask = PETSC_OFFLOAD_CPU; 1692 #endif 1693 PetscFunctionReturn(0); 1694 } 1695 1696 static PetscErrorCode MatDenseResetArray_SeqDense(Mat A) 1697 { 1698 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1699 1700 PetscFunctionBegin; 1701 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1702 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1703 a->v = a->unplacedarray; 1704 a->user_alloc = a->unplaced_user_alloc; 1705 a->unplacedarray = NULL; 1706 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1707 A->offloadmask = PETSC_OFFLOAD_CPU; 1708 #endif 1709 PetscFunctionReturn(0); 1710 } 1711 1712 static PetscErrorCode MatDenseReplaceArray_SeqDense(Mat A, const PetscScalar *array) 1713 { 1714 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1715 1716 PetscFunctionBegin; 1717 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1718 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1719 if (!a->user_alloc) PetscCall(PetscFree(a->v)); 1720 a->v = (PetscScalar *)array; 1721 a->user_alloc = PETSC_FALSE; 1722 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1723 A->offloadmask = PETSC_OFFLOAD_CPU; 1724 #endif 1725 PetscFunctionReturn(0); 1726 } 1727 1728 PetscErrorCode MatDestroy_SeqDense(Mat mat) 1729 { 1730 Mat_SeqDense *l = (Mat_SeqDense *)mat->data; 1731 1732 PetscFunctionBegin; 1733 #if defined(PETSC_USE_LOG) 1734 PetscLogObjectState((PetscObject)mat, "Rows %" PetscInt_FMT " Cols %" PetscInt_FMT, mat->rmap->n, mat->cmap->n); 1735 #endif 1736 PetscCall(VecDestroy(&(l->qrrhs))); 1737 PetscCall(PetscFree(l->tau)); 1738 PetscCall(PetscFree(l->pivots)); 1739 PetscCall(PetscFree(l->fwork)); 1740 PetscCall(MatDestroy(&l->ptapwork)); 1741 if (!l->user_alloc) PetscCall(PetscFree(l->v)); 1742 if (!l->unplaced_user_alloc) PetscCall(PetscFree(l->unplacedarray)); 1743 PetscCheck(!l->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1744 PetscCheck(!l->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1745 PetscCall(VecDestroy(&l->cvec)); 1746 PetscCall(MatDestroy(&l->cmat)); 1747 PetscCall(PetscFree(mat->data)); 1748 1749 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactor_C", NULL)); 1751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorSymbolic_C", NULL)); 1752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorNumeric_C", NULL)); 1753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 1754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 1755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 1756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 1757 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 1758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 1759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 1760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 1761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 1762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 1763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 1764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqaij_C", NULL)); 1765 #if defined(PETSC_HAVE_ELEMENTAL) 1766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_elemental_C", NULL)); 1767 #endif 1768 #if defined(PETSC_HAVE_SCALAPACK) 1769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_scalapack_C", NULL)); 1770 #endif 1771 #if defined(PETSC_HAVE_CUDA) 1772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensecuda_C", NULL)); 1773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", NULL)); 1774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdense_C", NULL)); 1775 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensecuda_C", NULL)); 1776 #endif 1777 #if defined(PETSC_HAVE_HIP) 1778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensehip_C", NULL)); 1779 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", NULL)); 1780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdense_C", NULL)); 1781 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensehip_C", NULL)); 1782 #endif 1783 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSeqDenseSetPreallocation_C", NULL)); 1784 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqaij_seqdense_C", NULL)); 1785 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdense_C", NULL)); 1786 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqbaij_seqdense_C", NULL)); 1787 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqsbaij_seqdense_C", NULL)); 1788 1789 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 1790 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 1791 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 1792 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 1793 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 1794 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 1795 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 1796 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 1797 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 1798 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 1799 PetscFunctionReturn(0); 1800 } 1801 1802 static PetscErrorCode MatTranspose_SeqDense(Mat A, MatReuse reuse, Mat *matout) 1803 { 1804 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1805 PetscInt k, j, m = A->rmap->n, M = mat->lda, n = A->cmap->n; 1806 PetscScalar *v, tmp; 1807 1808 PetscFunctionBegin; 1809 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1810 if (reuse == MAT_INPLACE_MATRIX) { 1811 if (m == n) { /* in place transpose */ 1812 PetscCall(MatDenseGetArray(A, &v)); 1813 for (j = 0; j < m; j++) { 1814 for (k = 0; k < j; k++) { 1815 tmp = v[j + k * M]; 1816 v[j + k * M] = v[k + j * M]; 1817 v[k + j * M] = tmp; 1818 } 1819 } 1820 PetscCall(MatDenseRestoreArray(A, &v)); 1821 } else { /* reuse memory, temporary allocates new memory */ 1822 PetscScalar *v2; 1823 PetscLayout tmplayout; 1824 1825 PetscCall(PetscMalloc1((size_t)m * n, &v2)); 1826 PetscCall(MatDenseGetArray(A, &v)); 1827 for (j = 0; j < n; j++) { 1828 for (k = 0; k < m; k++) v2[j + (size_t)k * n] = v[k + (size_t)j * M]; 1829 } 1830 PetscCall(PetscArraycpy(v, v2, (size_t)m * n)); 1831 PetscCall(PetscFree(v2)); 1832 PetscCall(MatDenseRestoreArray(A, &v)); 1833 /* cleanup size dependent quantities */ 1834 PetscCall(VecDestroy(&mat->cvec)); 1835 PetscCall(MatDestroy(&mat->cmat)); 1836 PetscCall(PetscFree(mat->pivots)); 1837 PetscCall(PetscFree(mat->fwork)); 1838 PetscCall(MatDestroy(&mat->ptapwork)); 1839 /* swap row/col layouts */ 1840 mat->lda = n; 1841 tmplayout = A->rmap; 1842 A->rmap = A->cmap; 1843 A->cmap = tmplayout; 1844 } 1845 } else { /* out-of-place transpose */ 1846 Mat tmat; 1847 Mat_SeqDense *tmatd; 1848 PetscScalar *v2; 1849 PetscInt M2; 1850 1851 if (reuse == MAT_INITIAL_MATRIX) { 1852 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &tmat)); 1853 PetscCall(MatSetSizes(tmat, A->cmap->n, A->rmap->n, A->cmap->n, A->rmap->n)); 1854 PetscCall(MatSetType(tmat, ((PetscObject)A)->type_name)); 1855 PetscCall(MatSeqDenseSetPreallocation(tmat, NULL)); 1856 } else tmat = *matout; 1857 1858 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&v)); 1859 PetscCall(MatDenseGetArray(tmat, &v2)); 1860 tmatd = (Mat_SeqDense *)tmat->data; 1861 M2 = tmatd->lda; 1862 for (j = 0; j < n; j++) { 1863 for (k = 0; k < m; k++) v2[j + k * M2] = v[k + j * M]; 1864 } 1865 PetscCall(MatDenseRestoreArray(tmat, &v2)); 1866 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&v)); 1867 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 1868 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 1869 *matout = tmat; 1870 } 1871 PetscFunctionReturn(0); 1872 } 1873 1874 static PetscErrorCode MatEqual_SeqDense(Mat A1, Mat A2, PetscBool *flg) 1875 { 1876 Mat_SeqDense *mat1 = (Mat_SeqDense *)A1->data; 1877 Mat_SeqDense *mat2 = (Mat_SeqDense *)A2->data; 1878 PetscInt i; 1879 const PetscScalar *v1, *v2; 1880 1881 PetscFunctionBegin; 1882 if (A1->rmap->n != A2->rmap->n) { 1883 *flg = PETSC_FALSE; 1884 PetscFunctionReturn(0); 1885 } 1886 if (A1->cmap->n != A2->cmap->n) { 1887 *flg = PETSC_FALSE; 1888 PetscFunctionReturn(0); 1889 } 1890 PetscCall(MatDenseGetArrayRead(A1, &v1)); 1891 PetscCall(MatDenseGetArrayRead(A2, &v2)); 1892 for (i = 0; i < A1->cmap->n; i++) { 1893 PetscCall(PetscArraycmp(v1, v2, A1->rmap->n, flg)); 1894 if (*flg == PETSC_FALSE) PetscFunctionReturn(0); 1895 v1 += mat1->lda; 1896 v2 += mat2->lda; 1897 } 1898 PetscCall(MatDenseRestoreArrayRead(A1, &v1)); 1899 PetscCall(MatDenseRestoreArrayRead(A2, &v2)); 1900 *flg = PETSC_TRUE; 1901 PetscFunctionReturn(0); 1902 } 1903 1904 static PetscErrorCode MatGetDiagonal_SeqDense(Mat A, Vec v) 1905 { 1906 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1907 PetscInt i, n, len; 1908 PetscScalar *x; 1909 const PetscScalar *vv; 1910 1911 PetscFunctionBegin; 1912 PetscCall(VecGetSize(v, &n)); 1913 PetscCall(VecGetArray(v, &x)); 1914 len = PetscMin(A->rmap->n, A->cmap->n); 1915 PetscCall(MatDenseGetArrayRead(A, &vv)); 1916 PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 1917 for (i = 0; i < len; i++) x[i] = vv[i * mat->lda + i]; 1918 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1919 PetscCall(VecRestoreArray(v, &x)); 1920 PetscFunctionReturn(0); 1921 } 1922 1923 static PetscErrorCode MatDiagonalScale_SeqDense(Mat A, Vec ll, Vec rr) 1924 { 1925 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1926 const PetscScalar *l, *r; 1927 PetscScalar x, *v, *vv; 1928 PetscInt i, j, m = A->rmap->n, n = A->cmap->n; 1929 1930 PetscFunctionBegin; 1931 PetscCall(MatDenseGetArray(A, &vv)); 1932 if (ll) { 1933 PetscCall(VecGetSize(ll, &m)); 1934 PetscCall(VecGetArrayRead(ll, &l)); 1935 PetscCheck(m == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vec wrong size"); 1936 for (i = 0; i < m; i++) { 1937 x = l[i]; 1938 v = vv + i; 1939 for (j = 0; j < n; j++) { 1940 (*v) *= x; 1941 v += mat->lda; 1942 } 1943 } 1944 PetscCall(VecRestoreArrayRead(ll, &l)); 1945 PetscCall(PetscLogFlops(1.0 * n * m)); 1946 } 1947 if (rr) { 1948 PetscCall(VecGetSize(rr, &n)); 1949 PetscCall(VecGetArrayRead(rr, &r)); 1950 PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec wrong size"); 1951 for (i = 0; i < n; i++) { 1952 x = r[i]; 1953 v = vv + i * mat->lda; 1954 for (j = 0; j < m; j++) (*v++) *= x; 1955 } 1956 PetscCall(VecRestoreArrayRead(rr, &r)); 1957 PetscCall(PetscLogFlops(1.0 * n * m)); 1958 } 1959 PetscCall(MatDenseRestoreArray(A, &vv)); 1960 PetscFunctionReturn(0); 1961 } 1962 1963 PetscErrorCode MatNorm_SeqDense(Mat A, NormType type, PetscReal *nrm) 1964 { 1965 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1966 PetscScalar *v, *vv; 1967 PetscReal sum = 0.0; 1968 PetscInt lda, m = A->rmap->n, i, j; 1969 1970 PetscFunctionBegin; 1971 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&vv)); 1972 PetscCall(MatDenseGetLDA(A, &lda)); 1973 v = vv; 1974 if (type == NORM_FROBENIUS) { 1975 if (lda > m) { 1976 for (j = 0; j < A->cmap->n; j++) { 1977 v = vv + j * lda; 1978 for (i = 0; i < m; i++) { 1979 sum += PetscRealPart(PetscConj(*v) * (*v)); 1980 v++; 1981 } 1982 } 1983 } else { 1984 #if defined(PETSC_USE_REAL___FP16) 1985 PetscBLASInt one = 1, cnt = A->cmap->n * A->rmap->n; 1986 PetscCallBLAS("BLASnrm2", *nrm = BLASnrm2_(&cnt, v, &one)); 1987 } 1988 #else 1989 for (i = 0; i < A->cmap->n * A->rmap->n; i++) { 1990 sum += PetscRealPart(PetscConj(*v) * (*v)); 1991 v++; 1992 } 1993 } 1994 *nrm = PetscSqrtReal(sum); 1995 #endif 1996 PetscCall(PetscLogFlops(2.0 * A->cmap->n * A->rmap->n)); 1997 } else if (type == NORM_1) { 1998 *nrm = 0.0; 1999 for (j = 0; j < A->cmap->n; j++) { 2000 v = vv + j * mat->lda; 2001 sum = 0.0; 2002 for (i = 0; i < A->rmap->n; i++) { 2003 sum += PetscAbsScalar(*v); 2004 v++; 2005 } 2006 if (sum > *nrm) *nrm = sum; 2007 } 2008 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 2009 } else if (type == NORM_INFINITY) { 2010 *nrm = 0.0; 2011 for (j = 0; j < A->rmap->n; j++) { 2012 v = vv + j; 2013 sum = 0.0; 2014 for (i = 0; i < A->cmap->n; i++) { 2015 sum += PetscAbsScalar(*v); 2016 v += mat->lda; 2017 } 2018 if (sum > *nrm) *nrm = sum; 2019 } 2020 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 2021 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No two norm"); 2022 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&vv)); 2023 PetscFunctionReturn(0); 2024 } 2025 2026 static PetscErrorCode MatSetOption_SeqDense(Mat A, MatOption op, PetscBool flg) 2027 { 2028 Mat_SeqDense *aij = (Mat_SeqDense *)A->data; 2029 2030 PetscFunctionBegin; 2031 switch (op) { 2032 case MAT_ROW_ORIENTED: 2033 aij->roworiented = flg; 2034 break; 2035 case MAT_NEW_NONZERO_LOCATIONS: 2036 case MAT_NEW_NONZERO_LOCATION_ERR: 2037 case MAT_NEW_NONZERO_ALLOCATION_ERR: 2038 case MAT_FORCE_DIAGONAL_ENTRIES: 2039 case MAT_KEEP_NONZERO_PATTERN: 2040 case MAT_IGNORE_OFF_PROC_ENTRIES: 2041 case MAT_USE_HASH_TABLE: 2042 case MAT_IGNORE_ZERO_ENTRIES: 2043 case MAT_IGNORE_LOWER_TRIANGULAR: 2044 case MAT_SORTED_FULL: 2045 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 2046 break; 2047 case MAT_SPD: 2048 case MAT_SYMMETRIC: 2049 case MAT_STRUCTURALLY_SYMMETRIC: 2050 case MAT_HERMITIAN: 2051 case MAT_SYMMETRY_ETERNAL: 2052 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 2053 case MAT_SPD_ETERNAL: 2054 break; 2055 default: 2056 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 2057 } 2058 PetscFunctionReturn(0); 2059 } 2060 2061 PetscErrorCode MatZeroEntries_SeqDense(Mat A) 2062 { 2063 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 2064 PetscInt lda = l->lda, m = A->rmap->n, n = A->cmap->n, j; 2065 PetscScalar *v; 2066 2067 PetscFunctionBegin; 2068 PetscCall(MatDenseGetArrayWrite(A, &v)); 2069 if (lda > m) { 2070 for (j = 0; j < n; j++) PetscCall(PetscArrayzero(v + j * lda, m)); 2071 } else { 2072 PetscCall(PetscArrayzero(v, PetscInt64Mult(m, n))); 2073 } 2074 PetscCall(MatDenseRestoreArrayWrite(A, &v)); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 static PetscErrorCode MatZeroRows_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 2079 { 2080 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 2081 PetscInt m = l->lda, n = A->cmap->n, i, j; 2082 PetscScalar *slot, *bb, *v; 2083 const PetscScalar *xx; 2084 2085 PetscFunctionBegin; 2086 if (PetscDefined(USE_DEBUG)) { 2087 for (i = 0; i < N; i++) { 2088 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 2089 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 2090 } 2091 } 2092 if (!N) PetscFunctionReturn(0); 2093 2094 /* fix right hand side if needed */ 2095 if (x && b) { 2096 PetscCall(VecGetArrayRead(x, &xx)); 2097 PetscCall(VecGetArray(b, &bb)); 2098 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 2099 PetscCall(VecRestoreArrayRead(x, &xx)); 2100 PetscCall(VecRestoreArray(b, &bb)); 2101 } 2102 2103 PetscCall(MatDenseGetArray(A, &v)); 2104 for (i = 0; i < N; i++) { 2105 slot = v + rows[i]; 2106 for (j = 0; j < n; j++) { 2107 *slot = 0.0; 2108 slot += m; 2109 } 2110 } 2111 if (diag != 0.0) { 2112 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 2113 for (i = 0; i < N; i++) { 2114 slot = v + (m + 1) * rows[i]; 2115 *slot = diag; 2116 } 2117 } 2118 PetscCall(MatDenseRestoreArray(A, &v)); 2119 PetscFunctionReturn(0); 2120 } 2121 2122 static PetscErrorCode MatDenseGetLDA_SeqDense(Mat A, PetscInt *lda) 2123 { 2124 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2125 2126 PetscFunctionBegin; 2127 *lda = mat->lda; 2128 PetscFunctionReturn(0); 2129 } 2130 2131 PetscErrorCode MatDenseGetArray_SeqDense(Mat A, PetscScalar **array) 2132 { 2133 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2134 2135 PetscFunctionBegin; 2136 PetscCheck(!mat->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2137 *array = mat->v; 2138 PetscFunctionReturn(0); 2139 } 2140 2141 PetscErrorCode MatDenseRestoreArray_SeqDense(Mat A, PetscScalar **array) 2142 { 2143 PetscFunctionBegin; 2144 if (array) *array = NULL; 2145 PetscFunctionReturn(0); 2146 } 2147 2148 /*@ 2149 MatDenseGetLDA - gets the leading dimension of the array returned from `MatDenseGetArray()` 2150 2151 Not collective 2152 2153 Input Parameter: 2154 . mat - a `MATDENSE` or `MATDENSECUDA` matrix 2155 2156 Output Parameter: 2157 . lda - the leading dimension 2158 2159 Level: intermediate 2160 2161 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseSetLDA()` 2162 @*/ 2163 PetscErrorCode MatDenseGetLDA(Mat A, PetscInt *lda) 2164 { 2165 PetscFunctionBegin; 2166 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2167 PetscValidIntPointer(lda, 2); 2168 MatCheckPreallocated(A, 1); 2169 PetscUseMethod(A, "MatDenseGetLDA_C", (Mat, PetscInt *), (A, lda)); 2170 PetscFunctionReturn(0); 2171 } 2172 2173 /*@ 2174 MatDenseSetLDA - Sets the leading dimension of the array used by the `MATDENSE` matrix 2175 2176 Not collective 2177 2178 Input Parameters: 2179 + mat - a `MATDENSE` or `MATDENSECUDA` matrix 2180 - lda - the leading dimension 2181 2182 Level: intermediate 2183 2184 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetLDA()` 2185 @*/ 2186 PetscErrorCode MatDenseSetLDA(Mat A, PetscInt lda) 2187 { 2188 PetscFunctionBegin; 2189 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2190 PetscTryMethod(A, "MatDenseSetLDA_C", (Mat, PetscInt), (A, lda)); 2191 PetscFunctionReturn(0); 2192 } 2193 2194 /*@C 2195 MatDenseGetArray - gives read-write access to the array where the data for a `MATDENSE` matrix is stored 2196 2197 Logically Collective on A 2198 2199 Input Parameter: 2200 . mat - a dense matrix 2201 2202 Output Parameter: 2203 . array - pointer to the data 2204 2205 Level: intermediate 2206 2207 .seealso: `MATDENSE`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2208 @*/ 2209 PetscErrorCode MatDenseGetArray(Mat A, PetscScalar **array) 2210 { 2211 PetscFunctionBegin; 2212 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2213 PetscValidPointer(array, 2); 2214 PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array)); 2215 PetscFunctionReturn(0); 2216 } 2217 2218 /*@C 2219 MatDenseRestoreArray - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArray()` 2220 2221 Logically Collective on A 2222 2223 Input Parameters: 2224 + mat - a dense matrix 2225 - array - pointer to the data (may be NULL) 2226 2227 Level: intermediate 2228 2229 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2230 @*/ 2231 PetscErrorCode MatDenseRestoreArray(Mat A, PetscScalar **array) 2232 { 2233 PetscFunctionBegin; 2234 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2235 PetscValidPointer(array, 2); 2236 PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array)); 2237 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2238 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2239 A->offloadmask = PETSC_OFFLOAD_CPU; 2240 #endif 2241 PetscFunctionReturn(0); 2242 } 2243 2244 /*@C 2245 MatDenseGetArrayRead - gives read-only access to the array where the data for a `MATDENSE` matrix is stored 2246 2247 Not Collective 2248 2249 Input Parameter: 2250 . mat - a dense matrix 2251 2252 Output Parameter: 2253 . array - pointer to the data 2254 2255 Level: intermediate 2256 2257 .seealso: `MATDENSE`, `MatDenseRestoreArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2258 @*/ 2259 PetscErrorCode MatDenseGetArrayRead(Mat A, const PetscScalar **array) 2260 { 2261 PetscFunctionBegin; 2262 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2263 PetscValidPointer(array, 2); 2264 PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, const PetscScalar **), (A, array)); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 /*@C 2269 MatDenseRestoreArrayRead - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayRead()` 2270 2271 Not Collective 2272 2273 Input Parameters: 2274 + mat - a dense matrix 2275 - array - pointer to the data (may be NULL) 2276 2277 Level: intermediate 2278 2279 .seealso: `MATDENSE`, `MatDenseGetArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2280 @*/ 2281 PetscErrorCode MatDenseRestoreArrayRead(Mat A, const PetscScalar **array) 2282 { 2283 PetscFunctionBegin; 2284 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2285 PetscValidPointer(array, 2); 2286 PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, const PetscScalar **), (A, array)); 2287 PetscFunctionReturn(0); 2288 } 2289 2290 /*@C 2291 MatDenseGetArrayWrite - gives write-only access to the array where the data for a `MATDENSE` matrix is stored 2292 2293 Not Collective 2294 2295 Input Parameter: 2296 . mat - a dense matrix 2297 2298 Output Parameter: 2299 . array - pointer to the data 2300 2301 Level: intermediate 2302 2303 .seealso: `MATDENSE`, `MatDenseRestoreArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2304 @*/ 2305 PetscErrorCode MatDenseGetArrayWrite(Mat A, PetscScalar **array) 2306 { 2307 PetscFunctionBegin; 2308 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2309 PetscValidPointer(array, 2); 2310 PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2311 PetscFunctionReturn(0); 2312 } 2313 2314 /*@C 2315 MatDenseRestoreArrayWrite - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayWrite()` 2316 2317 Not Collective 2318 2319 Input Parameters: 2320 + mat - a dense matrix 2321 - array - pointer to the data (may be NULL) 2322 2323 Level: intermediate 2324 2325 .seealso: `MATDENSE`, `MatDenseGetArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2326 @*/ 2327 PetscErrorCode MatDenseRestoreArrayWrite(Mat A, PetscScalar **array) 2328 { 2329 PetscFunctionBegin; 2330 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2331 PetscValidPointer(array, 2); 2332 PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2333 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2334 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2335 A->offloadmask = PETSC_OFFLOAD_CPU; 2336 #endif 2337 PetscFunctionReturn(0); 2338 } 2339 2340 static PetscErrorCode MatCreateSubMatrix_SeqDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 2341 { 2342 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2343 PetscInt i, j, nrows, ncols, ldb; 2344 const PetscInt *irow, *icol; 2345 PetscScalar *av, *bv, *v = mat->v; 2346 Mat newmat; 2347 2348 PetscFunctionBegin; 2349 PetscCall(ISGetIndices(isrow, &irow)); 2350 PetscCall(ISGetIndices(iscol, &icol)); 2351 PetscCall(ISGetLocalSize(isrow, &nrows)); 2352 PetscCall(ISGetLocalSize(iscol, &ncols)); 2353 2354 /* Check submatrixcall */ 2355 if (scall == MAT_REUSE_MATRIX) { 2356 PetscInt n_cols, n_rows; 2357 PetscCall(MatGetSize(*B, &n_rows, &n_cols)); 2358 if (n_rows != nrows || n_cols != ncols) { 2359 /* resize the result matrix to match number of requested rows/columns */ 2360 PetscCall(MatSetSizes(*B, nrows, ncols, nrows, ncols)); 2361 } 2362 newmat = *B; 2363 } else { 2364 /* Create and fill new matrix */ 2365 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 2366 PetscCall(MatSetSizes(newmat, nrows, ncols, nrows, ncols)); 2367 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 2368 PetscCall(MatSeqDenseSetPreallocation(newmat, NULL)); 2369 } 2370 2371 /* Now extract the data pointers and do the copy,column at a time */ 2372 PetscCall(MatDenseGetArray(newmat, &bv)); 2373 PetscCall(MatDenseGetLDA(newmat, &ldb)); 2374 for (i = 0; i < ncols; i++) { 2375 av = v + mat->lda * icol[i]; 2376 for (j = 0; j < nrows; j++) bv[j] = av[irow[j]]; 2377 bv += ldb; 2378 } 2379 PetscCall(MatDenseRestoreArray(newmat, &bv)); 2380 2381 /* Assemble the matrices so that the correct flags are set */ 2382 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 2383 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 2384 2385 /* Free work space */ 2386 PetscCall(ISRestoreIndices(isrow, &irow)); 2387 PetscCall(ISRestoreIndices(iscol, &icol)); 2388 *B = newmat; 2389 PetscFunctionReturn(0); 2390 } 2391 2392 static PetscErrorCode MatCreateSubMatrices_SeqDense(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 2393 { 2394 PetscInt i; 2395 2396 PetscFunctionBegin; 2397 if (scall == MAT_INITIAL_MATRIX) PetscCall(PetscCalloc1(n, B)); 2398 2399 for (i = 0; i < n; i++) PetscCall(MatCreateSubMatrix_SeqDense(A, irow[i], icol[i], scall, &(*B)[i])); 2400 PetscFunctionReturn(0); 2401 } 2402 2403 static PetscErrorCode MatAssemblyBegin_SeqDense(Mat mat, MatAssemblyType mode) 2404 { 2405 PetscFunctionBegin; 2406 PetscFunctionReturn(0); 2407 } 2408 2409 static PetscErrorCode MatAssemblyEnd_SeqDense(Mat mat, MatAssemblyType mode) 2410 { 2411 PetscFunctionBegin; 2412 PetscFunctionReturn(0); 2413 } 2414 2415 PetscErrorCode MatCopy_SeqDense(Mat A, Mat B, MatStructure str) 2416 { 2417 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data; 2418 const PetscScalar *va; 2419 PetscScalar *vb; 2420 PetscInt lda1 = a->lda, lda2 = b->lda, m = A->rmap->n, n = A->cmap->n, j; 2421 2422 PetscFunctionBegin; 2423 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2424 if (A->ops->copy != B->ops->copy) { 2425 PetscCall(MatCopy_Basic(A, B, str)); 2426 PetscFunctionReturn(0); 2427 } 2428 PetscCheck(m == B->rmap->n && n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "size(B) != size(A)"); 2429 PetscCall(MatDenseGetArrayRead(A, &va)); 2430 PetscCall(MatDenseGetArray(B, &vb)); 2431 if (lda1 > m || lda2 > m) { 2432 for (j = 0; j < n; j++) PetscCall(PetscArraycpy(vb + j * lda2, va + j * lda1, m)); 2433 } else { 2434 PetscCall(PetscArraycpy(vb, va, A->rmap->n * A->cmap->n)); 2435 } 2436 PetscCall(MatDenseRestoreArray(B, &vb)); 2437 PetscCall(MatDenseRestoreArrayRead(A, &va)); 2438 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2439 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 PetscErrorCode MatSetUp_SeqDense(Mat A) 2444 { 2445 PetscFunctionBegin; 2446 PetscCall(PetscLayoutSetUp(A->rmap)); 2447 PetscCall(PetscLayoutSetUp(A->cmap)); 2448 if (!A->preallocated) PetscCall(MatSeqDenseSetPreallocation(A, NULL)); 2449 PetscFunctionReturn(0); 2450 } 2451 2452 static PetscErrorCode MatConjugate_SeqDense(Mat A) 2453 { 2454 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2455 PetscInt i, j; 2456 PetscInt min = PetscMin(A->rmap->n, A->cmap->n); 2457 PetscScalar *aa; 2458 2459 PetscFunctionBegin; 2460 PetscCall(MatDenseGetArray(A, &aa)); 2461 for (j = 0; j < A->cmap->n; j++) { 2462 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscConj(aa[i + j * mat->lda]); 2463 } 2464 PetscCall(MatDenseRestoreArray(A, &aa)); 2465 if (mat->tau) 2466 for (i = 0; i < min; i++) mat->tau[i] = PetscConj(mat->tau[i]); 2467 PetscFunctionReturn(0); 2468 } 2469 2470 static PetscErrorCode MatRealPart_SeqDense(Mat A) 2471 { 2472 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2473 PetscInt i, j; 2474 PetscScalar *aa; 2475 2476 PetscFunctionBegin; 2477 PetscCall(MatDenseGetArray(A, &aa)); 2478 for (j = 0; j < A->cmap->n; j++) { 2479 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscRealPart(aa[i + j * mat->lda]); 2480 } 2481 PetscCall(MatDenseRestoreArray(A, &aa)); 2482 PetscFunctionReturn(0); 2483 } 2484 2485 static PetscErrorCode MatImaginaryPart_SeqDense(Mat A) 2486 { 2487 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2488 PetscInt i, j; 2489 PetscScalar *aa; 2490 2491 PetscFunctionBegin; 2492 PetscCall(MatDenseGetArray(A, &aa)); 2493 for (j = 0; j < A->cmap->n; j++) { 2494 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscImaginaryPart(aa[i + j * mat->lda]); 2495 } 2496 PetscCall(MatDenseRestoreArray(A, &aa)); 2497 PetscFunctionReturn(0); 2498 } 2499 2500 /* ----------------------------------------------------------------*/ 2501 PetscErrorCode MatMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2502 { 2503 PetscInt m = A->rmap->n, n = B->cmap->n; 2504 PetscBool cisdense = PETSC_FALSE; 2505 2506 PetscFunctionBegin; 2507 PetscCall(MatSetSizes(C, m, n, m, n)); 2508 #if defined(PETSC_HAVE_CUDA) 2509 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2510 #endif 2511 #if defined(PETSC_HAVE_HIP) 2512 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2513 #endif 2514 if (!cisdense) { 2515 PetscBool flg; 2516 2517 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2518 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2519 } 2520 PetscCall(MatSetUp(C)); 2521 PetscFunctionReturn(0); 2522 } 2523 2524 PetscErrorCode MatMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2525 { 2526 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data, *c = (Mat_SeqDense *)C->data; 2527 PetscBLASInt m, n, k; 2528 const PetscScalar *av, *bv; 2529 PetscScalar *cv; 2530 PetscScalar _DOne = 1.0, _DZero = 0.0; 2531 2532 PetscFunctionBegin; 2533 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2534 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2535 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2536 if (!m || !n || !k) PetscFunctionReturn(0); 2537 PetscCall(MatDenseGetArrayRead(A, &av)); 2538 PetscCall(MatDenseGetArrayRead(B, &bv)); 2539 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2540 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2541 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2542 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2543 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2544 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2545 PetscFunctionReturn(0); 2546 } 2547 2548 PetscErrorCode MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2549 { 2550 PetscInt m = A->rmap->n, n = B->rmap->n; 2551 PetscBool cisdense = PETSC_FALSE; 2552 2553 PetscFunctionBegin; 2554 PetscCall(MatSetSizes(C, m, n, m, n)); 2555 #if defined(PETSC_HAVE_CUDA) 2556 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2557 #endif 2558 #if defined(PETSC_HAVE_HIP) 2559 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2560 #endif 2561 if (!cisdense) { 2562 PetscBool flg; 2563 2564 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2565 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2566 } 2567 PetscCall(MatSetUp(C)); 2568 PetscFunctionReturn(0); 2569 } 2570 2571 PetscErrorCode MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2572 { 2573 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2574 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2575 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2576 const PetscScalar *av, *bv; 2577 PetscScalar *cv; 2578 PetscBLASInt m, n, k; 2579 PetscScalar _DOne = 1.0, _DZero = 0.0; 2580 2581 PetscFunctionBegin; 2582 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2583 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2584 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2585 if (!m || !n || !k) PetscFunctionReturn(0); 2586 PetscCall(MatDenseGetArrayRead(A, &av)); 2587 PetscCall(MatDenseGetArrayRead(B, &bv)); 2588 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2589 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2590 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2591 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2592 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2593 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2594 PetscFunctionReturn(0); 2595 } 2596 2597 PetscErrorCode MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2598 { 2599 PetscInt m = A->cmap->n, n = B->cmap->n; 2600 PetscBool cisdense = PETSC_FALSE; 2601 2602 PetscFunctionBegin; 2603 PetscCall(MatSetSizes(C, m, n, m, n)); 2604 #if defined(PETSC_HAVE_CUDA) 2605 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2606 #endif 2607 #if defined(PETSC_HAVE_HIP) 2608 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "")); 2609 #endif 2610 if (!cisdense) { 2611 PetscBool flg; 2612 2613 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2614 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2615 } 2616 PetscCall(MatSetUp(C)); 2617 PetscFunctionReturn(0); 2618 } 2619 2620 PetscErrorCode MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2621 { 2622 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2623 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2624 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2625 const PetscScalar *av, *bv; 2626 PetscScalar *cv; 2627 PetscBLASInt m, n, k; 2628 PetscScalar _DOne = 1.0, _DZero = 0.0; 2629 2630 PetscFunctionBegin; 2631 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2632 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2633 PetscCall(PetscBLASIntCast(A->rmap->n, &k)); 2634 if (!m || !n || !k) PetscFunctionReturn(0); 2635 PetscCall(MatDenseGetArrayRead(A, &av)); 2636 PetscCall(MatDenseGetArrayRead(B, &bv)); 2637 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2638 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2639 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2640 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2641 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2642 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2643 PetscFunctionReturn(0); 2644 } 2645 2646 /* ----------------------------------------------- */ 2647 static PetscErrorCode MatProductSetFromOptions_SeqDense_AB(Mat C) 2648 { 2649 PetscFunctionBegin; 2650 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqDense; 2651 C->ops->productsymbolic = MatProductSymbolic_AB; 2652 PetscFunctionReturn(0); 2653 } 2654 2655 static PetscErrorCode MatProductSetFromOptions_SeqDense_AtB(Mat C) 2656 { 2657 PetscFunctionBegin; 2658 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqDense_SeqDense; 2659 C->ops->productsymbolic = MatProductSymbolic_AtB; 2660 PetscFunctionReturn(0); 2661 } 2662 2663 static PetscErrorCode MatProductSetFromOptions_SeqDense_ABt(Mat C) 2664 { 2665 PetscFunctionBegin; 2666 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqDense_SeqDense; 2667 C->ops->productsymbolic = MatProductSymbolic_ABt; 2668 PetscFunctionReturn(0); 2669 } 2670 2671 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense(Mat C) 2672 { 2673 Mat_Product *product = C->product; 2674 2675 PetscFunctionBegin; 2676 switch (product->type) { 2677 case MATPRODUCT_AB: 2678 PetscCall(MatProductSetFromOptions_SeqDense_AB(C)); 2679 break; 2680 case MATPRODUCT_AtB: 2681 PetscCall(MatProductSetFromOptions_SeqDense_AtB(C)); 2682 break; 2683 case MATPRODUCT_ABt: 2684 PetscCall(MatProductSetFromOptions_SeqDense_ABt(C)); 2685 break; 2686 default: 2687 break; 2688 } 2689 PetscFunctionReturn(0); 2690 } 2691 /* ----------------------------------------------- */ 2692 2693 static PetscErrorCode MatGetRowMax_SeqDense(Mat A, Vec v, PetscInt idx[]) 2694 { 2695 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2696 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2697 PetscScalar *x; 2698 const PetscScalar *aa; 2699 2700 PetscFunctionBegin; 2701 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2702 PetscCall(VecGetArray(v, &x)); 2703 PetscCall(VecGetLocalSize(v, &p)); 2704 PetscCall(MatDenseGetArrayRead(A, &aa)); 2705 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2706 for (i = 0; i < m; i++) { 2707 x[i] = aa[i]; 2708 if (idx) idx[i] = 0; 2709 for (j = 1; j < n; j++) { 2710 if (PetscRealPart(x[i]) < PetscRealPart(aa[i + a->lda * j])) { 2711 x[i] = aa[i + a->lda * j]; 2712 if (idx) idx[i] = j; 2713 } 2714 } 2715 } 2716 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2717 PetscCall(VecRestoreArray(v, &x)); 2718 PetscFunctionReturn(0); 2719 } 2720 2721 static PetscErrorCode MatGetRowMaxAbs_SeqDense(Mat A, Vec v, PetscInt idx[]) 2722 { 2723 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2724 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2725 PetscScalar *x; 2726 PetscReal atmp; 2727 const PetscScalar *aa; 2728 2729 PetscFunctionBegin; 2730 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2731 PetscCall(VecGetArray(v, &x)); 2732 PetscCall(VecGetLocalSize(v, &p)); 2733 PetscCall(MatDenseGetArrayRead(A, &aa)); 2734 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2735 for (i = 0; i < m; i++) { 2736 x[i] = PetscAbsScalar(aa[i]); 2737 for (j = 1; j < n; j++) { 2738 atmp = PetscAbsScalar(aa[i + a->lda * j]); 2739 if (PetscAbsScalar(x[i]) < atmp) { 2740 x[i] = atmp; 2741 if (idx) idx[i] = j; 2742 } 2743 } 2744 } 2745 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2746 PetscCall(VecRestoreArray(v, &x)); 2747 PetscFunctionReturn(0); 2748 } 2749 2750 static PetscErrorCode MatGetRowMin_SeqDense(Mat A, Vec v, PetscInt idx[]) 2751 { 2752 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2753 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2754 PetscScalar *x; 2755 const PetscScalar *aa; 2756 2757 PetscFunctionBegin; 2758 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2759 PetscCall(MatDenseGetArrayRead(A, &aa)); 2760 PetscCall(VecGetArray(v, &x)); 2761 PetscCall(VecGetLocalSize(v, &p)); 2762 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2763 for (i = 0; i < m; i++) { 2764 x[i] = aa[i]; 2765 if (idx) idx[i] = 0; 2766 for (j = 1; j < n; j++) { 2767 if (PetscRealPart(x[i]) > PetscRealPart(aa[i + a->lda * j])) { 2768 x[i] = aa[i + a->lda * j]; 2769 if (idx) idx[i] = j; 2770 } 2771 } 2772 } 2773 PetscCall(VecRestoreArray(v, &x)); 2774 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2775 PetscFunctionReturn(0); 2776 } 2777 2778 PetscErrorCode MatGetColumnVector_SeqDense(Mat A, Vec v, PetscInt col) 2779 { 2780 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2781 PetscScalar *x; 2782 const PetscScalar *aa; 2783 2784 PetscFunctionBegin; 2785 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2786 PetscCall(MatDenseGetArrayRead(A, &aa)); 2787 PetscCall(VecGetArray(v, &x)); 2788 PetscCall(PetscArraycpy(x, aa + col * a->lda, A->rmap->n)); 2789 PetscCall(VecRestoreArray(v, &x)); 2790 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2791 PetscFunctionReturn(0); 2792 } 2793 2794 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat A, PetscInt type, PetscReal *reductions) 2795 { 2796 PetscInt i, j, m, n; 2797 const PetscScalar *a; 2798 2799 PetscFunctionBegin; 2800 PetscCall(MatGetSize(A, &m, &n)); 2801 PetscCall(PetscArrayzero(reductions, n)); 2802 PetscCall(MatDenseGetArrayRead(A, &a)); 2803 if (type == NORM_2) { 2804 for (i = 0; i < n; i++) { 2805 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j] * a[j]); 2806 a += m; 2807 } 2808 } else if (type == NORM_1) { 2809 for (i = 0; i < n; i++) { 2810 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j]); 2811 a += m; 2812 } 2813 } else if (type == NORM_INFINITY) { 2814 for (i = 0; i < n; i++) { 2815 for (j = 0; j < m; j++) reductions[i] = PetscMax(PetscAbsScalar(a[j]), reductions[i]); 2816 a += m; 2817 } 2818 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2819 for (i = 0; i < n; i++) { 2820 for (j = 0; j < m; j++) reductions[i] += PetscRealPart(a[j]); 2821 a += m; 2822 } 2823 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2824 for (i = 0; i < n; i++) { 2825 for (j = 0; j < m; j++) reductions[i] += PetscImaginaryPart(a[j]); 2826 a += m; 2827 } 2828 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2829 PetscCall(MatDenseRestoreArrayRead(A, &a)); 2830 if (type == NORM_2) { 2831 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2832 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2833 for (i = 0; i < n; i++) reductions[i] /= m; 2834 } 2835 PetscFunctionReturn(0); 2836 } 2837 2838 PetscErrorCode MatSetRandom_SeqDense(Mat x, PetscRandom rctx) 2839 { 2840 PetscScalar *a; 2841 PetscInt lda, m, n, i, j; 2842 2843 PetscFunctionBegin; 2844 PetscCall(MatGetSize(x, &m, &n)); 2845 PetscCall(MatDenseGetLDA(x, &lda)); 2846 PetscCall(MatDenseGetArrayWrite(x, &a)); 2847 for (j = 0; j < n; j++) { 2848 for (i = 0; i < m; i++) PetscCall(PetscRandomGetValue(rctx, a + j * lda + i)); 2849 } 2850 PetscCall(MatDenseRestoreArrayWrite(x, &a)); 2851 PetscFunctionReturn(0); 2852 } 2853 2854 static PetscErrorCode MatMissingDiagonal_SeqDense(Mat A, PetscBool *missing, PetscInt *d) 2855 { 2856 PetscFunctionBegin; 2857 *missing = PETSC_FALSE; 2858 PetscFunctionReturn(0); 2859 } 2860 2861 /* vals is not const */ 2862 static PetscErrorCode MatDenseGetColumn_SeqDense(Mat A, PetscInt col, PetscScalar **vals) 2863 { 2864 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2865 PetscScalar *v; 2866 2867 PetscFunctionBegin; 2868 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2869 PetscCall(MatDenseGetArray(A, &v)); 2870 *vals = v + col * a->lda; 2871 PetscCall(MatDenseRestoreArray(A, &v)); 2872 PetscFunctionReturn(0); 2873 } 2874 2875 static PetscErrorCode MatDenseRestoreColumn_SeqDense(Mat A, PetscScalar **vals) 2876 { 2877 PetscFunctionBegin; 2878 if (vals) *vals = NULL; /* user cannot accidentally use the array later */ 2879 PetscFunctionReturn(0); 2880 } 2881 2882 /* -------------------------------------------------------------------*/ 2883 static struct _MatOps MatOps_Values = {MatSetValues_SeqDense, 2884 MatGetRow_SeqDense, 2885 MatRestoreRow_SeqDense, 2886 MatMult_SeqDense, 2887 /* 4*/ MatMultAdd_SeqDense, 2888 MatMultTranspose_SeqDense, 2889 MatMultTransposeAdd_SeqDense, 2890 NULL, 2891 NULL, 2892 NULL, 2893 /* 10*/ NULL, 2894 MatLUFactor_SeqDense, 2895 MatCholeskyFactor_SeqDense, 2896 MatSOR_SeqDense, 2897 MatTranspose_SeqDense, 2898 /* 15*/ MatGetInfo_SeqDense, 2899 MatEqual_SeqDense, 2900 MatGetDiagonal_SeqDense, 2901 MatDiagonalScale_SeqDense, 2902 MatNorm_SeqDense, 2903 /* 20*/ MatAssemblyBegin_SeqDense, 2904 MatAssemblyEnd_SeqDense, 2905 MatSetOption_SeqDense, 2906 MatZeroEntries_SeqDense, 2907 /* 24*/ MatZeroRows_SeqDense, 2908 NULL, 2909 NULL, 2910 NULL, 2911 NULL, 2912 /* 29*/ MatSetUp_SeqDense, 2913 NULL, 2914 NULL, 2915 NULL, 2916 NULL, 2917 /* 34*/ MatDuplicate_SeqDense, 2918 NULL, 2919 NULL, 2920 NULL, 2921 NULL, 2922 /* 39*/ MatAXPY_SeqDense, 2923 MatCreateSubMatrices_SeqDense, 2924 NULL, 2925 MatGetValues_SeqDense, 2926 MatCopy_SeqDense, 2927 /* 44*/ MatGetRowMax_SeqDense, 2928 MatScale_SeqDense, 2929 MatShift_SeqDense, 2930 NULL, 2931 MatZeroRowsColumns_SeqDense, 2932 /* 49*/ MatSetRandom_SeqDense, 2933 NULL, 2934 NULL, 2935 NULL, 2936 NULL, 2937 /* 54*/ NULL, 2938 NULL, 2939 NULL, 2940 NULL, 2941 NULL, 2942 /* 59*/ MatCreateSubMatrix_SeqDense, 2943 MatDestroy_SeqDense, 2944 MatView_SeqDense, 2945 NULL, 2946 NULL, 2947 /* 64*/ NULL, 2948 NULL, 2949 NULL, 2950 NULL, 2951 NULL, 2952 /* 69*/ MatGetRowMaxAbs_SeqDense, 2953 NULL, 2954 NULL, 2955 NULL, 2956 NULL, 2957 /* 74*/ NULL, 2958 NULL, 2959 NULL, 2960 NULL, 2961 NULL, 2962 /* 79*/ NULL, 2963 NULL, 2964 NULL, 2965 NULL, 2966 /* 83*/ MatLoad_SeqDense, 2967 MatIsSymmetric_SeqDense, 2968 MatIsHermitian_SeqDense, 2969 NULL, 2970 NULL, 2971 NULL, 2972 /* 89*/ NULL, 2973 NULL, 2974 MatMatMultNumeric_SeqDense_SeqDense, 2975 NULL, 2976 NULL, 2977 /* 94*/ NULL, 2978 NULL, 2979 NULL, 2980 MatMatTransposeMultNumeric_SeqDense_SeqDense, 2981 NULL, 2982 /* 99*/ MatProductSetFromOptions_SeqDense, 2983 NULL, 2984 NULL, 2985 MatConjugate_SeqDense, 2986 NULL, 2987 /*104*/ NULL, 2988 MatRealPart_SeqDense, 2989 MatImaginaryPart_SeqDense, 2990 NULL, 2991 NULL, 2992 /*109*/ NULL, 2993 NULL, 2994 MatGetRowMin_SeqDense, 2995 MatGetColumnVector_SeqDense, 2996 MatMissingDiagonal_SeqDense, 2997 /*114*/ NULL, 2998 NULL, 2999 NULL, 3000 NULL, 3001 NULL, 3002 /*119*/ NULL, 3003 NULL, 3004 NULL, 3005 NULL, 3006 NULL, 3007 /*124*/ NULL, 3008 MatGetColumnReductions_SeqDense, 3009 NULL, 3010 NULL, 3011 NULL, 3012 /*129*/ NULL, 3013 NULL, 3014 NULL, 3015 MatTransposeMatMultNumeric_SeqDense_SeqDense, 3016 NULL, 3017 /*134*/ NULL, 3018 NULL, 3019 NULL, 3020 NULL, 3021 NULL, 3022 /*139*/ NULL, 3023 NULL, 3024 NULL, 3025 NULL, 3026 NULL, 3027 MatCreateMPIMatConcatenateSeqMat_SeqDense, 3028 /*145*/ NULL, 3029 NULL, 3030 NULL, 3031 NULL, 3032 NULL, 3033 /*150*/ NULL}; 3034 3035 /*@C 3036 MatCreateSeqDense - Creates a `MATSEQDENSE` that 3037 is stored in column major order (the usual Fortran 77 manner). Many 3038 of the matrix operations use the BLAS and LAPACK routines. 3039 3040 Collective 3041 3042 Input Parameters: 3043 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3044 . m - number of rows 3045 . n - number of columns 3046 - data - optional location of matrix data in column major order. Set data=NULL for PETSc 3047 to control all matrix memory allocation. 3048 3049 Output Parameter: 3050 . A - the matrix 3051 3052 Note: 3053 The data input variable is intended primarily for Fortran programmers 3054 who wish to allocate their own matrix memory space. Most users should 3055 set data=NULL. 3056 3057 Level: intermediate 3058 3059 .seealso: `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 3060 @*/ 3061 PetscErrorCode MatCreateSeqDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscScalar *data, Mat *A) 3062 { 3063 PetscFunctionBegin; 3064 PetscCall(MatCreate(comm, A)); 3065 PetscCall(MatSetSizes(*A, m, n, m, n)); 3066 PetscCall(MatSetType(*A, MATSEQDENSE)); 3067 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 3068 PetscFunctionReturn(0); 3069 } 3070 3071 /*@C 3072 MatSeqDenseSetPreallocation - Sets the array used for storing the matrix elements of a `MATSEQDENSE` matrix 3073 3074 Collective 3075 3076 Input Parameters: 3077 + B - the matrix 3078 - data - the array (or NULL) 3079 3080 Note: 3081 The data input variable is intended primarily for Fortran programmers 3082 who wish to allocate their own matrix memory space. Most users should 3083 need not call this routine. 3084 3085 Level: intermediate 3086 3087 .seealso: `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`, `MatDenseSetLDA()` 3088 @*/ 3089 PetscErrorCode MatSeqDenseSetPreallocation(Mat B, PetscScalar data[]) 3090 { 3091 PetscFunctionBegin; 3092 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3093 PetscTryMethod(B, "MatSeqDenseSetPreallocation_C", (Mat, PetscScalar[]), (B, data)); 3094 PetscFunctionReturn(0); 3095 } 3096 3097 PetscErrorCode MatSeqDenseSetPreallocation_SeqDense(Mat B, PetscScalar *data) 3098 { 3099 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3100 3101 PetscFunctionBegin; 3102 PetscCheck(!b->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3103 B->preallocated = PETSC_TRUE; 3104 3105 PetscCall(PetscLayoutSetUp(B->rmap)); 3106 PetscCall(PetscLayoutSetUp(B->cmap)); 3107 3108 if (b->lda <= 0) b->lda = B->rmap->n; 3109 3110 if (!data) { /* petsc-allocated storage */ 3111 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 3112 PetscCall(PetscCalloc1((size_t)b->lda * B->cmap->n, &b->v)); 3113 3114 b->user_alloc = PETSC_FALSE; 3115 } else { /* user-allocated storage */ 3116 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 3117 b->v = data; 3118 b->user_alloc = PETSC_TRUE; 3119 } 3120 B->assembled = PETSC_TRUE; 3121 PetscFunctionReturn(0); 3122 } 3123 3124 #if defined(PETSC_HAVE_ELEMENTAL) 3125 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 3126 { 3127 Mat mat_elemental; 3128 const PetscScalar *array; 3129 PetscScalar *v_colwise; 3130 PetscInt M = A->rmap->N, N = A->cmap->N, i, j, k, *rows, *cols; 3131 3132 PetscFunctionBegin; 3133 PetscCall(PetscMalloc3(M * N, &v_colwise, M, &rows, N, &cols)); 3134 PetscCall(MatDenseGetArrayRead(A, &array)); 3135 /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */ 3136 k = 0; 3137 for (j = 0; j < N; j++) { 3138 cols[j] = j; 3139 for (i = 0; i < M; i++) v_colwise[j * M + i] = array[k++]; 3140 } 3141 for (i = 0; i < M; i++) rows[i] = i; 3142 PetscCall(MatDenseRestoreArrayRead(A, &array)); 3143 3144 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 3145 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N)); 3146 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 3147 PetscCall(MatSetUp(mat_elemental)); 3148 3149 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 3150 PetscCall(MatSetValues(mat_elemental, M, rows, N, cols, v_colwise, ADD_VALUES)); 3151 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 3152 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 3153 PetscCall(PetscFree3(v_colwise, rows, cols)); 3154 3155 if (reuse == MAT_INPLACE_MATRIX) { 3156 PetscCall(MatHeaderReplace(A, &mat_elemental)); 3157 } else { 3158 *newmat = mat_elemental; 3159 } 3160 PetscFunctionReturn(0); 3161 } 3162 #endif 3163 3164 PetscErrorCode MatDenseSetLDA_SeqDense(Mat B, PetscInt lda) 3165 { 3166 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3167 PetscBool data; 3168 3169 PetscFunctionBegin; 3170 data = (PetscBool)((B->rmap->n > 0 && B->cmap->n > 0) ? (b->v ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE); 3171 PetscCheck(b->user_alloc || !data || b->lda == lda, PETSC_COMM_SELF, PETSC_ERR_ORDER, "LDA cannot be changed after allocation of internal storage"); 3172 PetscCheck(lda >= B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "LDA %" PetscInt_FMT " must be at least matrix dimension %" PetscInt_FMT, lda, B->rmap->n); 3173 b->lda = lda; 3174 PetscFunctionReturn(0); 3175 } 3176 3177 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3178 { 3179 PetscFunctionBegin; 3180 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIDense(comm, inmat, n, scall, outmat)); 3181 PetscFunctionReturn(0); 3182 } 3183 3184 PetscErrorCode MatDenseGetColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) 3185 { 3186 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3187 3188 PetscFunctionBegin; 3189 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3190 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3191 if (!a->cvec) { PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec)); } 3192 a->vecinuse = col + 1; 3193 PetscCall(MatDenseGetArray(A, (PetscScalar **)&a->ptrinuse)); 3194 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3195 *v = a->cvec; 3196 PetscFunctionReturn(0); 3197 } 3198 3199 PetscErrorCode MatDenseRestoreColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) 3200 { 3201 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3202 3203 PetscFunctionBegin; 3204 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3205 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3206 a->vecinuse = 0; 3207 PetscCall(MatDenseRestoreArray(A, (PetscScalar **)&a->ptrinuse)); 3208 PetscCall(VecResetArray(a->cvec)); 3209 if (v) *v = NULL; 3210 PetscFunctionReturn(0); 3211 } 3212 3213 PetscErrorCode MatDenseGetColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) 3214 { 3215 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3216 3217 PetscFunctionBegin; 3218 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3219 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3220 if (!a->cvec) { PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec)); } 3221 a->vecinuse = col + 1; 3222 PetscCall(MatDenseGetArrayRead(A, &a->ptrinuse)); 3223 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3224 PetscCall(VecLockReadPush(a->cvec)); 3225 *v = a->cvec; 3226 PetscFunctionReturn(0); 3227 } 3228 3229 PetscErrorCode MatDenseRestoreColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) 3230 { 3231 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3232 3233 PetscFunctionBegin; 3234 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3235 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3236 a->vecinuse = 0; 3237 PetscCall(MatDenseRestoreArrayRead(A, &a->ptrinuse)); 3238 PetscCall(VecLockReadPop(a->cvec)); 3239 PetscCall(VecResetArray(a->cvec)); 3240 if (v) *v = NULL; 3241 PetscFunctionReturn(0); 3242 } 3243 3244 PetscErrorCode MatDenseGetColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) 3245 { 3246 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3247 3248 PetscFunctionBegin; 3249 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3250 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3251 if (!a->cvec) { PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec)); } 3252 a->vecinuse = col + 1; 3253 PetscCall(MatDenseGetArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3254 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3255 *v = a->cvec; 3256 PetscFunctionReturn(0); 3257 } 3258 3259 PetscErrorCode MatDenseRestoreColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) 3260 { 3261 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3262 3263 PetscFunctionBegin; 3264 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3265 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3266 a->vecinuse = 0; 3267 PetscCall(MatDenseRestoreArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3268 PetscCall(VecResetArray(a->cvec)); 3269 if (v) *v = NULL; 3270 PetscFunctionReturn(0); 3271 } 3272 3273 PetscErrorCode MatDenseGetSubMatrix_SeqDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 3274 { 3275 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3276 3277 PetscFunctionBegin; 3278 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3279 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3280 if (a->cmat && (cend - cbegin != a->cmat->cmap->N || rend - rbegin != a->cmat->rmap->N)) PetscCall(MatDestroy(&a->cmat)); 3281 if (!a->cmat) { 3282 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), rend - rbegin, PETSC_DECIDE, rend - rbegin, cend - cbegin, a->v + rbegin + (size_t)cbegin * a->lda, &a->cmat)); 3283 } else { 3284 PetscCall(MatDensePlaceArray(a->cmat, a->v + rbegin + (size_t)cbegin * a->lda)); 3285 } 3286 PetscCall(MatDenseSetLDA(a->cmat, a->lda)); 3287 a->matinuse = cbegin + 1; 3288 *v = a->cmat; 3289 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 3290 A->offloadmask = PETSC_OFFLOAD_CPU; 3291 #endif 3292 PetscFunctionReturn(0); 3293 } 3294 3295 PetscErrorCode MatDenseRestoreSubMatrix_SeqDense(Mat A, Mat *v) 3296 { 3297 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3298 3299 PetscFunctionBegin; 3300 PetscCheck(a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 3301 PetscCheck(a->cmat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column matrix"); 3302 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 3303 a->matinuse = 0; 3304 PetscCall(MatDenseResetArray(a->cmat)); 3305 if (v) *v = NULL; 3306 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 3307 A->offloadmask = PETSC_OFFLOAD_CPU; 3308 #endif 3309 PetscFunctionReturn(0); 3310 } 3311 3312 /*MC 3313 MATSEQDENSE - MATSEQDENSE = "seqdense" - A matrix type to be used for sequential dense matrices. 3314 3315 Options Database Keys: 3316 . -mat_type seqdense - sets the matrix type to `MATSEQDENSE` during a call to `MatSetFromOptions()` 3317 3318 Level: beginner 3319 3320 .seealso: `MATSEQDENSE`, `MatCreateSeqDense()` 3321 M*/ 3322 PetscErrorCode MatCreate_SeqDense(Mat B) 3323 { 3324 Mat_SeqDense *b; 3325 PetscMPIInt size; 3326 3327 PetscFunctionBegin; 3328 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3329 PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3330 3331 PetscCall(PetscNew(&b)); 3332 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 3333 B->data = (void *)b; 3334 3335 b->roworiented = PETSC_TRUE; 3336 3337 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatQRFactor_C", MatQRFactor_SeqDense)); 3338 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetLDA_C", MatDenseGetLDA_SeqDense)); 3339 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseSetLDA_C", MatDenseSetLDA_SeqDense)); 3340 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArray_C", MatDenseGetArray_SeqDense)); 3341 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArray_C", MatDenseRestoreArray_SeqDense)); 3342 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDensePlaceArray_C", MatDensePlaceArray_SeqDense)); 3343 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseResetArray_C", MatDenseResetArray_SeqDense)); 3344 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseReplaceArray_C", MatDenseReplaceArray_SeqDense)); 3345 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayRead_C", MatDenseGetArray_SeqDense)); 3346 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayRead_C", MatDenseRestoreArray_SeqDense)); 3347 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayWrite_C", MatDenseGetArray_SeqDense)); 3348 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArray_SeqDense)); 3349 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqaij_C", MatConvert_SeqDense_SeqAIJ)); 3350 #if defined(PETSC_HAVE_ELEMENTAL) 3351 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_elemental_C", MatConvert_SeqDense_Elemental)); 3352 #endif 3353 #if defined(PETSC_HAVE_SCALAPACK) 3354 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 3355 #endif 3356 #if defined(PETSC_HAVE_CUDA) 3357 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensecuda_C", MatConvert_SeqDense_SeqDenseCUDA)); 3358 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3359 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdense_C", MatProductSetFromOptions_SeqDense)); 3360 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3361 #endif 3362 #if defined(PETSC_HAVE_HIP) 3363 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensehip_C", MatConvert_SeqDense_SeqDenseHIP)); 3364 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", MatProductSetFromOptions_SeqDense)); 3365 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdense_C", MatProductSetFromOptions_SeqDense)); 3366 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensehip_C", MatProductSetFromOptions_SeqDense)); 3367 #endif 3368 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqDenseSetPreallocation_C", MatSeqDenseSetPreallocation_SeqDense)); 3369 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 3370 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdense_C", MatProductSetFromOptions_SeqDense)); 3371 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3372 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqsbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3373 3374 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumn_C", MatDenseGetColumn_SeqDense)); 3375 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_SeqDense)); 3376 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_SeqDense)); 3377 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_SeqDense)); 3378 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_SeqDense)); 3379 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_SeqDense)); 3380 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_SeqDense)); 3381 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_SeqDense)); 3382 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_SeqDense)); 3383 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_SeqDense)); 3384 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQDENSE)); 3385 PetscFunctionReturn(0); 3386 } 3387 3388 /*@C 3389 MatDenseGetColumn - gives access to a column of a dense matrix. This is only the local part of the column. You MUST call `MatDenseRestoreColumn()` to avoid memory bleeding. 3390 3391 Not Collective 3392 3393 Input Parameters: 3394 + mat - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3395 - col - column index 3396 3397 Output Parameter: 3398 . vals - pointer to the data 3399 3400 Level: intermediate 3401 3402 Note: 3403 Use `MatDenseGetColumnVec()` to get access to a column of a `MATDENSE` treated as a `Vec` 3404 3405 .seealso: `MATDENSE`, `MatDenseRestoreColumn()`, `MatDenseGetColumnVec()` 3406 @*/ 3407 PetscErrorCode MatDenseGetColumn(Mat A, PetscInt col, PetscScalar **vals) 3408 { 3409 PetscFunctionBegin; 3410 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3411 PetscValidLogicalCollectiveInt(A, col, 2); 3412 PetscValidPointer(vals, 3); 3413 PetscUseMethod(A, "MatDenseGetColumn_C", (Mat, PetscInt, PetscScalar **), (A, col, vals)); 3414 PetscFunctionReturn(0); 3415 } 3416 3417 /*@C 3418 MatDenseRestoreColumn - returns access to a column of a `MATDENSE` matrix which is returned by `MatDenseGetColumn()`. 3419 3420 Not Collective 3421 3422 Input Parameters: 3423 + mat - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3424 - vals - pointer to the data (may be NULL) 3425 3426 Level: intermediate 3427 3428 .seealso: `MATDENSE`, `MatDenseGetColumn()` 3429 @*/ 3430 PetscErrorCode MatDenseRestoreColumn(Mat A, PetscScalar **vals) 3431 { 3432 PetscFunctionBegin; 3433 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3434 PetscValidPointer(vals, 2); 3435 PetscUseMethod(A, "MatDenseRestoreColumn_C", (Mat, PetscScalar **), (A, vals)); 3436 PetscFunctionReturn(0); 3437 } 3438 3439 /*@ 3440 MatDenseGetColumnVec - Gives read-write access to a column of a `MATDENSE` matrix, represented as a `Vec`. 3441 3442 Collective 3443 3444 Input Parameters: 3445 + mat - the `Mat` object 3446 - col - the column index 3447 3448 Output Parameter: 3449 . v - the vector 3450 3451 Notes: 3452 The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVec()` when the vector is no longer needed. 3453 3454 Use `MatDenseGetColumnVecRead()` to obtain read-only access or `MatDenseGetColumnVecWrite()` for write-only access. 3455 3456 Level: intermediate 3457 3458 .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`, `MatDenseGetColumn()` 3459 @*/ 3460 PetscErrorCode MatDenseGetColumnVec(Mat A, PetscInt col, Vec *v) 3461 { 3462 PetscFunctionBegin; 3463 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3464 PetscValidType(A, 1); 3465 PetscValidLogicalCollectiveInt(A, col, 2); 3466 PetscValidPointer(v, 3); 3467 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3468 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3469 PetscUseMethod(A, "MatDenseGetColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3470 PetscFunctionReturn(0); 3471 } 3472 3473 /*@ 3474 MatDenseRestoreColumnVec - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVec(). 3475 3476 Collective 3477 3478 Input Parameters: 3479 + mat - the Mat object 3480 . col - the column index 3481 - v - the Vec object (may be NULL) 3482 3483 Level: intermediate 3484 3485 .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3486 @*/ 3487 PetscErrorCode MatDenseRestoreColumnVec(Mat A, PetscInt col, Vec *v) 3488 { 3489 PetscFunctionBegin; 3490 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3491 PetscValidType(A, 1); 3492 PetscValidLogicalCollectiveInt(A, col, 2); 3493 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3494 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3495 PetscUseMethod(A, "MatDenseRestoreColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3496 PetscFunctionReturn(0); 3497 } 3498 3499 /*@ 3500 MatDenseGetColumnVecRead - Gives read-only access to a column of a dense matrix, represented as a Vec. 3501 3502 Collective 3503 3504 Input Parameters: 3505 + mat - the Mat object 3506 - col - the column index 3507 3508 Output Parameter: 3509 . v - the vector 3510 3511 Notes: 3512 The vector is owned by PETSc and users cannot modify it. 3513 3514 Users need to call MatDenseRestoreColumnVecRead() when the vector is no longer needed. 3515 3516 Use MatDenseGetColumnVec() to obtain read-write access or MatDenseGetColumnVecWrite() for write-only access. 3517 3518 Level: intermediate 3519 3520 .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3521 @*/ 3522 PetscErrorCode MatDenseGetColumnVecRead(Mat A, PetscInt col, Vec *v) 3523 { 3524 PetscFunctionBegin; 3525 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3526 PetscValidType(A, 1); 3527 PetscValidLogicalCollectiveInt(A, col, 2); 3528 PetscValidPointer(v, 3); 3529 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3530 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3531 PetscUseMethod(A, "MatDenseGetColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3532 PetscFunctionReturn(0); 3533 } 3534 3535 /*@ 3536 MatDenseRestoreColumnVecRead - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVecRead(). 3537 3538 Collective 3539 3540 Input Parameters: 3541 + mat - the Mat object 3542 . col - the column index 3543 - v - the Vec object (may be NULL) 3544 3545 Level: intermediate 3546 3547 .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecWrite()` 3548 @*/ 3549 PetscErrorCode MatDenseRestoreColumnVecRead(Mat A, PetscInt col, Vec *v) 3550 { 3551 PetscFunctionBegin; 3552 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3553 PetscValidType(A, 1); 3554 PetscValidLogicalCollectiveInt(A, col, 2); 3555 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3556 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3557 PetscUseMethod(A, "MatDenseRestoreColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3558 PetscFunctionReturn(0); 3559 } 3560 3561 /*@ 3562 MatDenseGetColumnVecWrite - Gives write-only access to a column of a dense matrix, represented as a Vec. 3563 3564 Collective 3565 3566 Input Parameters: 3567 + mat - the Mat object 3568 - col - the column index 3569 3570 Output Parameter: 3571 . v - the vector 3572 3573 Notes: 3574 The vector is owned by PETSc. Users need to call MatDenseRestoreColumnVecWrite() when the vector is no longer needed. 3575 3576 Use MatDenseGetColumnVec() to obtain read-write access or MatDenseGetColumnVecRead() for read-only access. 3577 3578 Level: intermediate 3579 3580 .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3581 @*/ 3582 PetscErrorCode MatDenseGetColumnVecWrite(Mat A, PetscInt col, Vec *v) 3583 { 3584 PetscFunctionBegin; 3585 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3586 PetscValidType(A, 1); 3587 PetscValidLogicalCollectiveInt(A, col, 2); 3588 PetscValidPointer(v, 3); 3589 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3590 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3591 PetscUseMethod(A, "MatDenseGetColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3592 PetscFunctionReturn(0); 3593 } 3594 3595 /*@ 3596 MatDenseRestoreColumnVecWrite - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVecWrite(). 3597 3598 Collective 3599 3600 Input Parameters: 3601 + mat - the Mat object 3602 . col - the column index 3603 - v - the Vec object (may be NULL) 3604 3605 Level: intermediate 3606 3607 .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()` 3608 @*/ 3609 PetscErrorCode MatDenseRestoreColumnVecWrite(Mat A, PetscInt col, Vec *v) 3610 { 3611 PetscFunctionBegin; 3612 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3613 PetscValidType(A, 1); 3614 PetscValidLogicalCollectiveInt(A, col, 2); 3615 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3616 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3617 PetscUseMethod(A, "MatDenseRestoreColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3618 PetscFunctionReturn(0); 3619 } 3620 3621 /*@ 3622 MatDenseGetSubMatrix - Gives access to a block of rows and columns of a dense matrix, represented as a Mat. 3623 3624 Collective 3625 3626 Input Parameters: 3627 + mat - the Mat object 3628 . rbegin - the first global row index in the block (if PETSC_DECIDE, is 0) 3629 . rend - the global row index past the last one in the block (if PETSC_DECIDE, is M) 3630 . cbegin - the first global column index in the block (if PETSC_DECIDE, is 0) 3631 - cend - the global column index past the last one in the block (if PETSC_DECIDE, is N) 3632 3633 Output Parameter: 3634 . v - the matrix 3635 3636 Notes: 3637 The matrix is owned by PETSc. Users need to call MatDenseRestoreSubMatrix() when the matrix is no longer needed. 3638 3639 The output matrix is not redistributed by PETSc, so depending on the values of rbegin and rend, some processes may have no local rows. 3640 3641 Level: intermediate 3642 3643 .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreSubMatrix()` 3644 @*/ 3645 PetscErrorCode MatDenseGetSubMatrix(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 3646 { 3647 PetscFunctionBegin; 3648 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3649 PetscValidType(A, 1); 3650 PetscValidLogicalCollectiveInt(A, rbegin, 2); 3651 PetscValidLogicalCollectiveInt(A, rend, 3); 3652 PetscValidLogicalCollectiveInt(A, cbegin, 4); 3653 PetscValidLogicalCollectiveInt(A, cend, 5); 3654 PetscValidPointer(v, 6); 3655 if (rbegin == PETSC_DECIDE) rbegin = 0; 3656 if (rend == PETSC_DECIDE) rend = A->rmap->N; 3657 if (cbegin == PETSC_DECIDE) cbegin = 0; 3658 if (cend == PETSC_DECIDE) cend = A->cmap->N; 3659 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3660 PetscCheck(rbegin >= 0 && rbegin <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", rbegin, A->rmap->N); 3661 PetscCheck(rend >= rbegin && rend <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", rend, rbegin, A->rmap->N); 3662 PetscCheck(cbegin >= 0 && cbegin <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", cbegin, A->cmap->N); 3663 PetscCheck(cend >= cbegin && cend <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", cend, cbegin, A->cmap->N); 3664 PetscUseMethod(A, "MatDenseGetSubMatrix_C", (Mat, PetscInt, PetscInt, PetscInt, PetscInt, Mat *), (A, rbegin, rend, cbegin, cend, v)); 3665 PetscFunctionReturn(0); 3666 } 3667 3668 /*@ 3669 MatDenseRestoreSubMatrix - Returns access to a block of columns of a dense matrix obtained from MatDenseGetSubMatrix(). 3670 3671 Collective 3672 3673 Input Parameters: 3674 + mat - the Mat object 3675 - v - the Mat object (may be NULL) 3676 3677 Level: intermediate 3678 3679 .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseGetSubMatrix()` 3680 @*/ 3681 PetscErrorCode MatDenseRestoreSubMatrix(Mat A, Mat *v) 3682 { 3683 PetscFunctionBegin; 3684 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3685 PetscValidType(A, 1); 3686 PetscValidPointer(v, 2); 3687 PetscUseMethod(A, "MatDenseRestoreSubMatrix_C", (Mat, Mat *), (A, v)); 3688 PetscFunctionReturn(0); 3689 } 3690 3691 #include <petscblaslapack.h> 3692 #include <petsc/private/kernels/blockinvert.h> 3693 3694 PetscErrorCode MatSeqDenseInvert(Mat A) 3695 { 3696 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3697 PetscInt bs = A->rmap->n; 3698 MatScalar *values = a->v; 3699 const PetscReal shift = 0.0; 3700 PetscBool allowzeropivot = PetscNot(A->erroriffailure), zeropivotdetected = PETSC_FALSE; 3701 3702 PetscFunctionBegin; 3703 /* factor and invert each block */ 3704 switch (bs) { 3705 case 1: 3706 values[0] = (PetscScalar)1.0 / (values[0] + shift); 3707 break; 3708 case 2: 3709 PetscCall(PetscKernel_A_gets_inverse_A_2(values, shift, allowzeropivot, &zeropivotdetected)); 3710 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3711 break; 3712 case 3: 3713 PetscCall(PetscKernel_A_gets_inverse_A_3(values, shift, allowzeropivot, &zeropivotdetected)); 3714 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3715 break; 3716 case 4: 3717 PetscCall(PetscKernel_A_gets_inverse_A_4(values, shift, allowzeropivot, &zeropivotdetected)); 3718 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3719 break; 3720 case 5: { 3721 PetscScalar work[25]; 3722 PetscInt ipvt[5]; 3723 3724 PetscCall(PetscKernel_A_gets_inverse_A_5(values, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 3725 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3726 } break; 3727 case 6: 3728 PetscCall(PetscKernel_A_gets_inverse_A_6(values, shift, allowzeropivot, &zeropivotdetected)); 3729 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3730 break; 3731 case 7: 3732 PetscCall(PetscKernel_A_gets_inverse_A_7(values, shift, allowzeropivot, &zeropivotdetected)); 3733 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3734 break; 3735 default: { 3736 PetscInt *v_pivots, *IJ, j; 3737 PetscScalar *v_work; 3738 3739 PetscCall(PetscMalloc3(bs, &v_work, bs, &v_pivots, bs, &IJ)); 3740 for (j = 0; j < bs; j++) IJ[j] = j; 3741 PetscCall(PetscKernel_A_gets_inverse_A(bs, values, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 3742 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3743 PetscCall(PetscFree3(v_work, v_pivots, IJ)); 3744 } 3745 } 3746 PetscFunctionReturn(0); 3747 } 3748