1 /* 2 Defines the basic matrix operations for sequential dense. 3 */ 4 5 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <../src/mat/impls/aij/seq/aij.h> 8 9 PetscErrorCode MatSeqDenseSymmetrize_Private(Mat A, PetscBool hermitian) 10 { 11 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 12 PetscInt j, k, n = A->rmap->n; 13 PetscScalar *v; 14 15 PetscFunctionBegin; 16 PetscCheck(A->rmap->n == A->cmap->n, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot symmetrize a rectangular matrix"); 17 PetscCall(MatDenseGetArray(A, &v)); 18 if (!hermitian) { 19 for (k = 0; k < n; k++) { 20 for (j = k; j < n; j++) v[j * mat->lda + k] = v[k * mat->lda + j]; 21 } 22 } else { 23 for (k = 0; k < n; k++) { 24 for (j = k; j < n; j++) v[j * mat->lda + k] = PetscConj(v[k * mat->lda + j]); 25 } 26 } 27 PetscCall(MatDenseRestoreArray(A, &v)); 28 PetscFunctionReturn(0); 29 } 30 31 PETSC_EXTERN PetscErrorCode MatSeqDenseInvertFactors_Private(Mat A) 32 { 33 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 34 PetscBLASInt info, n; 35 36 PetscFunctionBegin; 37 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 38 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 39 if (A->factortype == MAT_FACTOR_LU) { 40 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 41 if (!mat->fwork) { 42 mat->lfwork = n; 43 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 44 } 45 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 46 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 47 PetscCall(PetscFPTrapPop()); 48 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 49 } else if (A->factortype == MAT_FACTOR_CHOLESKY) { 50 if (A->spd == PETSC_BOOL3_TRUE) { 51 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 52 PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &n, mat->v, &mat->lda, &info)); 53 PetscCall(PetscFPTrapPop()); 54 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 55 #if defined(PETSC_USE_COMPLEX) 56 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 57 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 58 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 59 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 60 PetscCallBLAS("LAPACKhetri", LAPACKhetri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 61 PetscCall(PetscFPTrapPop()); 62 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 63 #endif 64 } else { /* symmetric case */ 65 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 66 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 67 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 68 PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 69 PetscCall(PetscFPTrapPop()); 70 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_FALSE)); 71 } 72 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad Inversion: zero pivot in row %" PetscInt_FMT, (PetscInt)info - 1); 73 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 74 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix must be factored to solve"); 75 76 A->ops->solve = NULL; 77 A->ops->matsolve = NULL; 78 A->ops->solvetranspose = NULL; 79 A->ops->matsolvetranspose = NULL; 80 A->ops->solveadd = NULL; 81 A->ops->solvetransposeadd = NULL; 82 A->factortype = MAT_FACTOR_NONE; 83 PetscCall(PetscFree(A->solvertype)); 84 PetscFunctionReturn(0); 85 } 86 87 PetscErrorCode MatZeroRowsColumns_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 88 { 89 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 90 PetscInt m = l->lda, n = A->cmap->n, r = A->rmap->n, i, j; 91 PetscScalar *slot, *bb, *v; 92 const PetscScalar *xx; 93 94 PetscFunctionBegin; 95 if (PetscDefined(USE_DEBUG)) { 96 for (i = 0; i < N; i++) { 97 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 98 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 99 PetscCheck(rows[i] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Col %" PetscInt_FMT " requested to be zeroed greater than or equal number of cols %" PetscInt_FMT, rows[i], A->cmap->n); 100 } 101 } 102 if (!N) PetscFunctionReturn(0); 103 104 /* fix right hand side if needed */ 105 if (x && b) { 106 Vec xt; 107 108 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 109 PetscCall(VecDuplicate(x, &xt)); 110 PetscCall(VecCopy(x, xt)); 111 PetscCall(VecScale(xt, -1.0)); 112 PetscCall(MatMultAdd(A, xt, b, b)); 113 PetscCall(VecDestroy(&xt)); 114 PetscCall(VecGetArrayRead(x, &xx)); 115 PetscCall(VecGetArray(b, &bb)); 116 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 117 PetscCall(VecRestoreArrayRead(x, &xx)); 118 PetscCall(VecRestoreArray(b, &bb)); 119 } 120 121 PetscCall(MatDenseGetArray(A, &v)); 122 for (i = 0; i < N; i++) { 123 slot = v + rows[i] * m; 124 PetscCall(PetscArrayzero(slot, r)); 125 } 126 for (i = 0; i < N; i++) { 127 slot = v + rows[i]; 128 for (j = 0; j < n; j++) { 129 *slot = 0.0; 130 slot += m; 131 } 132 } 133 if (diag != 0.0) { 134 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 135 for (i = 0; i < N; i++) { 136 slot = v + (m + 1) * rows[i]; 137 *slot = diag; 138 } 139 } 140 PetscCall(MatDenseRestoreArray(A, &v)); 141 PetscFunctionReturn(0); 142 } 143 144 PetscErrorCode MatPtAPNumeric_SeqDense_SeqDense(Mat A, Mat P, Mat C) 145 { 146 Mat_SeqDense *c = (Mat_SeqDense *)(C->data); 147 148 PetscFunctionBegin; 149 if (c->ptapwork) { 150 PetscCall((*C->ops->matmultnumeric)(A, P, c->ptapwork)); 151 PetscCall((*C->ops->transposematmultnumeric)(P, c->ptapwork, C)); 152 } else SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Must call MatPtAPSymbolic_SeqDense_SeqDense() first"); 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatPtAPSymbolic_SeqDense_SeqDense(Mat A, Mat P, PetscReal fill, Mat C) 157 { 158 Mat_SeqDense *c; 159 PetscBool cisdense; 160 161 PetscFunctionBegin; 162 PetscCall(MatSetSizes(C, P->cmap->n, P->cmap->n, P->cmap->N, P->cmap->N)); 163 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 164 if (!cisdense) { 165 PetscBool flg; 166 167 PetscCall(PetscObjectTypeCompare((PetscObject)P, ((PetscObject)A)->type_name, &flg)); 168 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 169 } 170 PetscCall(MatSetUp(C)); 171 c = (Mat_SeqDense *)C->data; 172 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &c->ptapwork)); 173 PetscCall(MatSetSizes(c->ptapwork, A->rmap->n, P->cmap->n, A->rmap->N, P->cmap->N)); 174 PetscCall(MatSetType(c->ptapwork, ((PetscObject)C)->type_name)); 175 PetscCall(MatSetUp(c->ptapwork)); 176 PetscFunctionReturn(0); 177 } 178 179 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 180 { 181 Mat B = NULL; 182 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 183 Mat_SeqDense *b; 184 PetscInt *ai = a->i, *aj = a->j, m = A->rmap->N, n = A->cmap->N, i; 185 const MatScalar *av; 186 PetscBool isseqdense; 187 188 PetscFunctionBegin; 189 if (reuse == MAT_REUSE_MATRIX) { 190 PetscCall(PetscObjectTypeCompare((PetscObject)*newmat, MATSEQDENSE, &isseqdense)); 191 PetscCheck(isseqdense, PetscObjectComm((PetscObject)*newmat), PETSC_ERR_USER, "Cannot reuse matrix of type %s", ((PetscObject)(*newmat))->type_name); 192 } 193 if (reuse != MAT_REUSE_MATRIX) { 194 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 195 PetscCall(MatSetSizes(B, m, n, m, n)); 196 PetscCall(MatSetType(B, MATSEQDENSE)); 197 PetscCall(MatSeqDenseSetPreallocation(B, NULL)); 198 b = (Mat_SeqDense *)(B->data); 199 } else { 200 b = (Mat_SeqDense *)((*newmat)->data); 201 PetscCall(PetscArrayzero(b->v, m * n)); 202 } 203 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 204 for (i = 0; i < m; i++) { 205 PetscInt j; 206 for (j = 0; j < ai[1] - ai[0]; j++) { 207 b->v[*aj * m + i] = *av; 208 aj++; 209 av++; 210 } 211 ai++; 212 } 213 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 214 215 if (reuse == MAT_INPLACE_MATRIX) { 216 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 217 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 218 PetscCall(MatHeaderReplace(A, &B)); 219 } else { 220 if (B) *newmat = B; 221 PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY)); 222 PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY)); 223 } 224 PetscFunctionReturn(0); 225 } 226 227 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 228 { 229 Mat B = NULL; 230 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 231 PetscInt i, j; 232 PetscInt *rows, *nnz; 233 MatScalar *aa = a->v, *vals; 234 235 PetscFunctionBegin; 236 PetscCall(PetscCalloc3(A->rmap->n, &rows, A->rmap->n, &nnz, A->rmap->n, &vals)); 237 if (reuse != MAT_REUSE_MATRIX) { 238 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 239 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 240 PetscCall(MatSetType(B, MATSEQAIJ)); 241 for (j = 0; j < A->cmap->n; j++) { 242 for (i = 0; i < A->rmap->n; i++) 243 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) ++nnz[i]; 244 aa += a->lda; 245 } 246 PetscCall(MatSeqAIJSetPreallocation(B, PETSC_DETERMINE, nnz)); 247 } else B = *newmat; 248 aa = a->v; 249 for (j = 0; j < A->cmap->n; j++) { 250 PetscInt numRows = 0; 251 for (i = 0; i < A->rmap->n; i++) 252 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) { 253 rows[numRows] = i; 254 vals[numRows++] = aa[i]; 255 } 256 PetscCall(MatSetValues(B, numRows, rows, 1, &j, vals, INSERT_VALUES)); 257 aa += a->lda; 258 } 259 PetscCall(PetscFree3(rows, nnz, vals)); 260 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 261 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 262 263 if (reuse == MAT_INPLACE_MATRIX) { 264 PetscCall(MatHeaderReplace(A, &B)); 265 } else if (reuse != MAT_REUSE_MATRIX) *newmat = B; 266 PetscFunctionReturn(0); 267 } 268 269 PetscErrorCode MatAXPY_SeqDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 270 { 271 Mat_SeqDense *x = (Mat_SeqDense *)X->data, *y = (Mat_SeqDense *)Y->data; 272 const PetscScalar *xv; 273 PetscScalar *yv; 274 PetscBLASInt N, m, ldax = 0, lday = 0, one = 1; 275 276 PetscFunctionBegin; 277 PetscCall(MatDenseGetArrayRead(X, &xv)); 278 PetscCall(MatDenseGetArray(Y, &yv)); 279 PetscCall(PetscBLASIntCast(X->rmap->n * X->cmap->n, &N)); 280 PetscCall(PetscBLASIntCast(X->rmap->n, &m)); 281 PetscCall(PetscBLASIntCast(x->lda, &ldax)); 282 PetscCall(PetscBLASIntCast(y->lda, &lday)); 283 if (ldax > m || lday > m) { 284 PetscInt j; 285 286 for (j = 0; j < X->cmap->n; j++) PetscCallBLAS("BLASaxpy", BLASaxpy_(&m, &alpha, xv + j * ldax, &one, yv + j * lday, &one)); 287 } else { 288 PetscCallBLAS("BLASaxpy", BLASaxpy_(&N, &alpha, xv, &one, yv, &one)); 289 } 290 PetscCall(MatDenseRestoreArrayRead(X, &xv)); 291 PetscCall(MatDenseRestoreArray(Y, &yv)); 292 PetscCall(PetscLogFlops(PetscMax(2.0 * N - 1, 0))); 293 PetscFunctionReturn(0); 294 } 295 296 static PetscErrorCode MatGetInfo_SeqDense(Mat A, MatInfoType flag, MatInfo *info) 297 { 298 PetscLogDouble N = A->rmap->n * A->cmap->n; 299 300 PetscFunctionBegin; 301 info->block_size = 1.0; 302 info->nz_allocated = N; 303 info->nz_used = N; 304 info->nz_unneeded = 0; 305 info->assemblies = A->num_ass; 306 info->mallocs = 0; 307 info->memory = 0; /* REVIEW ME */ 308 info->fill_ratio_given = 0; 309 info->fill_ratio_needed = 0; 310 info->factor_mallocs = 0; 311 PetscFunctionReturn(0); 312 } 313 314 PetscErrorCode MatScale_SeqDense(Mat A, PetscScalar alpha) 315 { 316 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 317 PetscScalar *v; 318 PetscBLASInt one = 1, j, nz, lda = 0; 319 320 PetscFunctionBegin; 321 PetscCall(MatDenseGetArray(A, &v)); 322 PetscCall(PetscBLASIntCast(a->lda, &lda)); 323 if (lda > A->rmap->n) { 324 PetscCall(PetscBLASIntCast(A->rmap->n, &nz)); 325 for (j = 0; j < A->cmap->n; j++) PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v + j * lda, &one)); 326 } else { 327 PetscCall(PetscBLASIntCast(A->rmap->n * A->cmap->n, &nz)); 328 PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v, &one)); 329 } 330 PetscCall(PetscLogFlops(A->rmap->n * A->cmap->n)); 331 PetscCall(MatDenseRestoreArray(A, &v)); 332 PetscFunctionReturn(0); 333 } 334 335 PetscErrorCode MatShift_SeqDense(Mat A, PetscScalar alpha) 336 { 337 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 338 PetscScalar *v; 339 PetscInt j, k; 340 341 PetscFunctionBegin; 342 PetscCall(MatDenseGetArray(A, &v)); 343 k = PetscMin(A->rmap->n, A->cmap->n); 344 for (j = 0; j < k; j++) v[j + j * a->lda] += alpha; 345 PetscCall(PetscLogFlops(k)); 346 PetscCall(MatDenseRestoreArray(A, &v)); 347 PetscFunctionReturn(0); 348 } 349 350 static PetscErrorCode MatIsHermitian_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) 351 { 352 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 353 PetscInt i, j, m = A->rmap->n, N = a->lda; 354 const PetscScalar *v; 355 356 PetscFunctionBegin; 357 *fl = PETSC_FALSE; 358 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(0); 359 PetscCall(MatDenseGetArrayRead(A, &v)); 360 for (i = 0; i < m; i++) { 361 for (j = i; j < m; j++) { 362 if (PetscAbsScalar(v[i + j * N] - PetscConj(v[j + i * N])) > rtol) goto restore; 363 } 364 } 365 *fl = PETSC_TRUE; 366 restore: 367 PetscCall(MatDenseRestoreArrayRead(A, &v)); 368 PetscFunctionReturn(0); 369 } 370 371 static PetscErrorCode MatIsSymmetric_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) 372 { 373 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 374 PetscInt i, j, m = A->rmap->n, N = a->lda; 375 const PetscScalar *v; 376 377 PetscFunctionBegin; 378 *fl = PETSC_FALSE; 379 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(0); 380 PetscCall(MatDenseGetArrayRead(A, &v)); 381 for (i = 0; i < m; i++) { 382 for (j = i; j < m; j++) { 383 if (PetscAbsScalar(v[i + j * N] - v[j + i * N]) > rtol) goto restore; 384 } 385 } 386 *fl = PETSC_TRUE; 387 restore: 388 PetscCall(MatDenseRestoreArrayRead(A, &v)); 389 PetscFunctionReturn(0); 390 } 391 392 PetscErrorCode MatDuplicateNoCreate_SeqDense(Mat newi, Mat A, MatDuplicateOption cpvalues) 393 { 394 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 395 PetscInt lda = (PetscInt)mat->lda, j, m, nlda = lda; 396 PetscBool isdensecpu; 397 398 PetscFunctionBegin; 399 PetscCall(PetscLayoutReference(A->rmap, &newi->rmap)); 400 PetscCall(PetscLayoutReference(A->cmap, &newi->cmap)); 401 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { /* propagate LDA */ 402 PetscCall(MatDenseSetLDA(newi, lda)); 403 } 404 PetscCall(PetscObjectTypeCompare((PetscObject)newi, MATSEQDENSE, &isdensecpu)); 405 if (isdensecpu) PetscCall(MatSeqDenseSetPreallocation(newi, NULL)); 406 if (cpvalues == MAT_COPY_VALUES) { 407 const PetscScalar *av; 408 PetscScalar *v; 409 410 PetscCall(MatDenseGetArrayRead(A, &av)); 411 PetscCall(MatDenseGetArrayWrite(newi, &v)); 412 PetscCall(MatDenseGetLDA(newi, &nlda)); 413 m = A->rmap->n; 414 if (lda > m || nlda > m) { 415 for (j = 0; j < A->cmap->n; j++) PetscCall(PetscArraycpy(v + j * nlda, av + j * lda, m)); 416 } else { 417 PetscCall(PetscArraycpy(v, av, A->rmap->n * A->cmap->n)); 418 } 419 PetscCall(MatDenseRestoreArrayWrite(newi, &v)); 420 PetscCall(MatDenseRestoreArrayRead(A, &av)); 421 } 422 PetscFunctionReturn(0); 423 } 424 425 PetscErrorCode MatDuplicate_SeqDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 426 { 427 PetscFunctionBegin; 428 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), newmat)); 429 PetscCall(MatSetSizes(*newmat, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 430 PetscCall(MatSetType(*newmat, ((PetscObject)A)->type_name)); 431 PetscCall(MatDuplicateNoCreate_SeqDense(*newmat, A, cpvalues)); 432 PetscFunctionReturn(0); 433 } 434 435 static PetscErrorCode MatSolve_SeqDense_Internal_LU(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) 436 { 437 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 438 PetscBLASInt info; 439 440 PetscFunctionBegin; 441 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 442 PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_(T ? "T" : "N", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 443 PetscCall(PetscFPTrapPop()); 444 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve %d", (int)info); 445 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 446 PetscFunctionReturn(0); 447 } 448 449 static PetscErrorCode MatConjugate_SeqDense(Mat); 450 451 static PetscErrorCode MatSolve_SeqDense_Internal_Cholesky(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) 452 { 453 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 454 PetscBLASInt info; 455 456 PetscFunctionBegin; 457 if (A->spd == PETSC_BOOL3_TRUE) { 458 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 459 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 460 PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &m, &nrhs, mat->v, &mat->lda, x, &m, &info)); 461 PetscCall(PetscFPTrapPop()); 462 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "POTRS Bad solve %d", (int)info); 463 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 464 #if defined(PETSC_USE_COMPLEX) 465 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 466 if (T) PetscCall(MatConjugate_SeqDense(A)); 467 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 468 PetscCallBLAS("LAPACKhetrs", LAPACKhetrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 469 PetscCall(PetscFPTrapPop()); 470 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "HETRS Bad solve %d", (int)info); 471 if (T) PetscCall(MatConjugate_SeqDense(A)); 472 #endif 473 } else { /* symmetric case */ 474 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 475 PetscCallBLAS("LAPACKsytrs", LAPACKsytrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 476 PetscCall(PetscFPTrapPop()); 477 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "SYTRS Bad solve %d", (int)info); 478 } 479 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 480 PetscFunctionReturn(0); 481 } 482 483 static PetscErrorCode MatSolve_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) 484 { 485 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 486 PetscBLASInt info; 487 char trans; 488 489 PetscFunctionBegin; 490 if (PetscDefined(USE_COMPLEX)) { 491 trans = 'C'; 492 } else { 493 trans = 'T'; 494 } 495 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 496 { /* lwork depends on the number of right-hand sides */ 497 PetscBLASInt nlfwork, lfwork = -1; 498 PetscScalar fwork; 499 500 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 501 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 502 if (nlfwork > mat->lfwork) { 503 mat->lfwork = nlfwork; 504 PetscCall(PetscFree(mat->fwork)); 505 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 506 } 507 } 508 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 509 PetscCall(PetscFPTrapPop()); 510 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %d", (int)info); 511 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 512 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &mat->rank, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 513 PetscCall(PetscFPTrapPop()); 514 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %d", (int)info); 515 for (PetscInt j = 0; j < nrhs; j++) { 516 for (PetscInt i = mat->rank; i < k; i++) x[j * ldx + i] = 0.; 517 } 518 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 519 PetscFunctionReturn(0); 520 } 521 522 static PetscErrorCode MatSolveTranspose_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) 523 { 524 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 525 PetscBLASInt info; 526 527 PetscFunctionBegin; 528 if (A->rmap->n == A->cmap->n && mat->rank == A->rmap->n) { 529 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 530 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &m, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 531 PetscCall(PetscFPTrapPop()); 532 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %d", (int)info); 533 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 534 { /* lwork depends on the number of right-hand sides */ 535 PetscBLASInt nlfwork, lfwork = -1; 536 PetscScalar fwork; 537 538 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 539 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 540 if (nlfwork > mat->lfwork) { 541 mat->lfwork = nlfwork; 542 PetscCall(PetscFree(mat->fwork)); 543 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 544 } 545 } 546 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 547 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 548 PetscCall(PetscFPTrapPop()); 549 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %d", (int)info); 550 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 551 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "QR factored matrix cannot be used for transpose solve"); 552 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 553 PetscFunctionReturn(0); 554 } 555 556 static PetscErrorCode MatSolve_SeqDense_SetUp(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) 557 { 558 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 559 PetscScalar *y; 560 PetscBLASInt m = 0, k = 0; 561 562 PetscFunctionBegin; 563 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 564 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 565 if (k < m) { 566 PetscCall(VecCopy(xx, mat->qrrhs)); 567 PetscCall(VecGetArray(mat->qrrhs, &y)); 568 } else { 569 PetscCall(VecCopy(xx, yy)); 570 PetscCall(VecGetArray(yy, &y)); 571 } 572 *_y = y; 573 *_k = k; 574 *_m = m; 575 PetscFunctionReturn(0); 576 } 577 578 static PetscErrorCode MatSolve_SeqDense_TearDown(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) 579 { 580 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 581 PetscScalar *y = NULL; 582 PetscBLASInt m, k; 583 584 PetscFunctionBegin; 585 y = *_y; 586 *_y = NULL; 587 k = *_k; 588 m = *_m; 589 if (k < m) { 590 PetscScalar *yv; 591 PetscCall(VecGetArray(yy, &yv)); 592 PetscCall(PetscArraycpy(yv, y, k)); 593 PetscCall(VecRestoreArray(yy, &yv)); 594 PetscCall(VecRestoreArray(mat->qrrhs, &y)); 595 } else { 596 PetscCall(VecRestoreArray(yy, &y)); 597 } 598 PetscFunctionReturn(0); 599 } 600 601 static PetscErrorCode MatSolve_SeqDense_LU(Mat A, Vec xx, Vec yy) 602 { 603 PetscScalar *y = NULL; 604 PetscBLASInt m = 0, k = 0; 605 606 PetscFunctionBegin; 607 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 608 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_FALSE)); 609 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 610 PetscFunctionReturn(0); 611 } 612 613 static PetscErrorCode MatSolveTranspose_SeqDense_LU(Mat A, Vec xx, Vec yy) 614 { 615 PetscScalar *y = NULL; 616 PetscBLASInt m = 0, k = 0; 617 618 PetscFunctionBegin; 619 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 620 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_TRUE)); 621 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 622 PetscFunctionReturn(0); 623 } 624 625 static PetscErrorCode MatSolve_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) 626 { 627 PetscScalar *y = NULL; 628 PetscBLASInt m = 0, k = 0; 629 630 PetscFunctionBegin; 631 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 632 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_FALSE)); 633 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 634 PetscFunctionReturn(0); 635 } 636 637 static PetscErrorCode MatSolveTranspose_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) 638 { 639 PetscScalar *y = NULL; 640 PetscBLASInt m = 0, k = 0; 641 642 PetscFunctionBegin; 643 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 644 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_TRUE)); 645 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 646 PetscFunctionReturn(0); 647 } 648 649 static PetscErrorCode MatSolve_SeqDense_QR(Mat A, Vec xx, Vec yy) 650 { 651 PetscScalar *y = NULL; 652 PetscBLASInt m = 0, k = 0; 653 654 PetscFunctionBegin; 655 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 656 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 657 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 658 PetscFunctionReturn(0); 659 } 660 661 static PetscErrorCode MatSolveTranspose_SeqDense_QR(Mat A, Vec xx, Vec yy) 662 { 663 PetscScalar *y = NULL; 664 PetscBLASInt m = 0, k = 0; 665 666 PetscFunctionBegin; 667 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 668 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 669 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 670 PetscFunctionReturn(0); 671 } 672 673 static PetscErrorCode MatMatSolve_SeqDense_SetUp(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) 674 { 675 const PetscScalar *b; 676 PetscScalar *y; 677 PetscInt n, _ldb, _ldx; 678 PetscBLASInt nrhs = 0, m = 0, k = 0, ldb = 0, ldx = 0, ldy = 0; 679 680 PetscFunctionBegin; 681 *_ldy = 0; 682 *_m = 0; 683 *_nrhs = 0; 684 *_k = 0; 685 *_y = NULL; 686 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 687 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 688 PetscCall(MatGetSize(B, NULL, &n)); 689 PetscCall(PetscBLASIntCast(n, &nrhs)); 690 PetscCall(MatDenseGetLDA(B, &_ldb)); 691 PetscCall(PetscBLASIntCast(_ldb, &ldb)); 692 PetscCall(MatDenseGetLDA(X, &_ldx)); 693 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 694 if (ldx < m) { 695 PetscCall(MatDenseGetArrayRead(B, &b)); 696 PetscCall(PetscMalloc1(nrhs * m, &y)); 697 if (ldb == m) { 698 PetscCall(PetscArraycpy(y, b, ldb * nrhs)); 699 } else { 700 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * m], &b[j * ldb], m)); 701 } 702 ldy = m; 703 PetscCall(MatDenseRestoreArrayRead(B, &b)); 704 } else { 705 if (ldb == ldx) { 706 PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN)); 707 PetscCall(MatDenseGetArray(X, &y)); 708 } else { 709 PetscCall(MatDenseGetArray(X, &y)); 710 PetscCall(MatDenseGetArrayRead(B, &b)); 711 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * ldx], &b[j * ldb], m)); 712 PetscCall(MatDenseRestoreArrayRead(B, &b)); 713 } 714 ldy = ldx; 715 } 716 *_y = y; 717 *_ldy = ldy; 718 *_k = k; 719 *_m = m; 720 *_nrhs = nrhs; 721 PetscFunctionReturn(0); 722 } 723 724 static PetscErrorCode MatMatSolve_SeqDense_TearDown(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) 725 { 726 PetscScalar *y; 727 PetscInt _ldx; 728 PetscBLASInt k, ldy, nrhs, ldx = 0; 729 730 PetscFunctionBegin; 731 y = *_y; 732 *_y = NULL; 733 k = *_k; 734 ldy = *_ldy; 735 nrhs = *_nrhs; 736 PetscCall(MatDenseGetLDA(X, &_ldx)); 737 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 738 if (ldx != ldy) { 739 PetscScalar *xv; 740 PetscCall(MatDenseGetArray(X, &xv)); 741 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&xv[j * ldx], &y[j * ldy], k)); 742 PetscCall(MatDenseRestoreArray(X, &xv)); 743 PetscCall(PetscFree(y)); 744 } else { 745 PetscCall(MatDenseRestoreArray(X, &y)); 746 } 747 PetscFunctionReturn(0); 748 } 749 750 static PetscErrorCode MatMatSolve_SeqDense_LU(Mat A, Mat B, Mat X) 751 { 752 PetscScalar *y; 753 PetscBLASInt m, k, ldy, nrhs; 754 755 PetscFunctionBegin; 756 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 757 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 758 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 759 PetscFunctionReturn(0); 760 } 761 762 static PetscErrorCode MatMatSolveTranspose_SeqDense_LU(Mat A, Mat B, Mat X) 763 { 764 PetscScalar *y; 765 PetscBLASInt m, k, ldy, nrhs; 766 767 PetscFunctionBegin; 768 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 769 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 770 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 771 PetscFunctionReturn(0); 772 } 773 774 static PetscErrorCode MatMatSolve_SeqDense_Cholesky(Mat A, Mat B, Mat X) 775 { 776 PetscScalar *y; 777 PetscBLASInt m, k, ldy, nrhs; 778 779 PetscFunctionBegin; 780 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 781 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 782 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 783 PetscFunctionReturn(0); 784 } 785 786 static PetscErrorCode MatMatSolveTranspose_SeqDense_Cholesky(Mat A, Mat B, Mat X) 787 { 788 PetscScalar *y; 789 PetscBLASInt m, k, ldy, nrhs; 790 791 PetscFunctionBegin; 792 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 793 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 794 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 795 PetscFunctionReturn(0); 796 } 797 798 static PetscErrorCode MatMatSolve_SeqDense_QR(Mat A, Mat B, Mat X) 799 { 800 PetscScalar *y; 801 PetscBLASInt m, k, ldy, nrhs; 802 803 PetscFunctionBegin; 804 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 805 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 806 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 807 PetscFunctionReturn(0); 808 } 809 810 static PetscErrorCode MatMatSolveTranspose_SeqDense_QR(Mat A, Mat B, Mat X) 811 { 812 PetscScalar *y; 813 PetscBLASInt m, k, ldy, nrhs; 814 815 PetscFunctionBegin; 816 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 817 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 818 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 819 PetscFunctionReturn(0); 820 } 821 822 /* ---------------------------------------------------------------*/ 823 /* COMMENT: I have chosen to hide row permutation in the pivots, 824 rather than put it in the Mat->row slot.*/ 825 PetscErrorCode MatLUFactor_SeqDense(Mat A, IS row, IS col, const MatFactorInfo *minfo) 826 { 827 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 828 PetscBLASInt n, m, info; 829 830 PetscFunctionBegin; 831 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 832 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 833 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 834 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 835 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 836 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&m, &n, mat->v, &mat->lda, mat->pivots, &info)); 837 PetscCall(PetscFPTrapPop()); 838 839 PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to LU factorization %d", (int)info); 840 PetscCheck(info <= 0, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Bad LU factorization %d", (int)info); 841 842 A->ops->solve = MatSolve_SeqDense_LU; 843 A->ops->matsolve = MatMatSolve_SeqDense_LU; 844 A->ops->solvetranspose = MatSolveTranspose_SeqDense_LU; 845 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_LU; 846 A->factortype = MAT_FACTOR_LU; 847 848 PetscCall(PetscFree(A->solvertype)); 849 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 850 851 PetscCall(PetscLogFlops((2.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3)); 852 PetscFunctionReturn(0); 853 } 854 855 static PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 856 { 857 MatFactorInfo info; 858 859 PetscFunctionBegin; 860 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 861 PetscUseTypeMethod(fact, lufactor, NULL, NULL, &info); 862 PetscFunctionReturn(0); 863 } 864 865 PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) 866 { 867 PetscFunctionBegin; 868 fact->preallocated = PETSC_TRUE; 869 fact->assembled = PETSC_TRUE; 870 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense; 871 PetscFunctionReturn(0); 872 } 873 874 /* Cholesky as L*L^T or L*D*L^T and the symmetric/hermitian complex variants */ 875 PetscErrorCode MatCholeskyFactor_SeqDense(Mat A, IS perm, const MatFactorInfo *factinfo) 876 { 877 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 878 PetscBLASInt info, n; 879 880 PetscFunctionBegin; 881 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 882 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 883 if (A->spd == PETSC_BOOL3_TRUE) { 884 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 885 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &n, mat->v, &mat->lda, &info)); 886 PetscCall(PetscFPTrapPop()); 887 #if defined(PETSC_USE_COMPLEX) 888 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 889 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 890 if (!mat->fwork) { 891 PetscScalar dummy; 892 893 mat->lfwork = -1; 894 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 895 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 896 PetscCall(PetscFPTrapPop()); 897 mat->lfwork = (PetscInt)PetscRealPart(dummy); 898 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 899 } 900 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 901 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 902 PetscCall(PetscFPTrapPop()); 903 #endif 904 } else { /* symmetric case */ 905 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 906 if (!mat->fwork) { 907 PetscScalar dummy; 908 909 mat->lfwork = -1; 910 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 911 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 912 PetscCall(PetscFPTrapPop()); 913 mat->lfwork = (PetscInt)PetscRealPart(dummy); 914 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 915 } 916 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 917 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 918 PetscCall(PetscFPTrapPop()); 919 } 920 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad factorization: zero pivot in row %" PetscInt_FMT, (PetscInt)info - 1); 921 922 A->ops->solve = MatSolve_SeqDense_Cholesky; 923 A->ops->matsolve = MatMatSolve_SeqDense_Cholesky; 924 A->ops->solvetranspose = MatSolveTranspose_SeqDense_Cholesky; 925 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_Cholesky; 926 A->factortype = MAT_FACTOR_CHOLESKY; 927 928 PetscCall(PetscFree(A->solvertype)); 929 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 930 931 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 932 PetscFunctionReturn(0); 933 } 934 935 static PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 936 { 937 MatFactorInfo info; 938 939 PetscFunctionBegin; 940 info.fill = 1.0; 941 942 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 943 PetscUseTypeMethod(fact, choleskyfactor, NULL, &info); 944 PetscFunctionReturn(0); 945 } 946 947 PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) 948 { 949 PetscFunctionBegin; 950 fact->assembled = PETSC_TRUE; 951 fact->preallocated = PETSC_TRUE; 952 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense; 953 PetscFunctionReturn(0); 954 } 955 956 PetscErrorCode MatQRFactor_SeqDense(Mat A, IS col, const MatFactorInfo *minfo) 957 { 958 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 959 PetscBLASInt n, m, info, min, max; 960 961 PetscFunctionBegin; 962 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 963 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 964 max = PetscMax(m, n); 965 min = PetscMin(m, n); 966 if (!mat->tau) { PetscCall(PetscMalloc1(min, &mat->tau)); } 967 if (!mat->pivots) { PetscCall(PetscMalloc1(n, &mat->pivots)); } 968 if (!mat->qrrhs) PetscCall(MatCreateVecs(A, NULL, &(mat->qrrhs))); 969 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 970 if (!mat->fwork) { 971 PetscScalar dummy; 972 973 mat->lfwork = -1; 974 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 975 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, &dummy, &mat->lfwork, &info)); 976 PetscCall(PetscFPTrapPop()); 977 mat->lfwork = (PetscInt)PetscRealPart(dummy); 978 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 979 } 980 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 981 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, mat->fwork, &mat->lfwork, &info)); 982 PetscCall(PetscFPTrapPop()); 983 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to QR factorization %d", (int)info); 984 // TODO: try to estimate rank or test for and use geqp3 for rank revealing QR. For now just say rank is min of m and n 985 mat->rank = min; 986 987 A->ops->solve = MatSolve_SeqDense_QR; 988 A->ops->matsolve = MatMatSolve_SeqDense_QR; 989 A->factortype = MAT_FACTOR_QR; 990 if (m == n) { 991 A->ops->solvetranspose = MatSolveTranspose_SeqDense_QR; 992 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_QR; 993 } 994 995 PetscCall(PetscFree(A->solvertype)); 996 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 997 998 PetscCall(PetscLogFlops(2.0 * min * min * (max - min / 3.0))); 999 PetscFunctionReturn(0); 1000 } 1001 1002 static PetscErrorCode MatQRFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) 1003 { 1004 MatFactorInfo info; 1005 1006 PetscFunctionBegin; 1007 info.fill = 1.0; 1008 1009 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 1010 PetscUseMethod(fact, "MatQRFactor_C", (Mat, IS, const MatFactorInfo *), (fact, NULL, &info)); 1011 PetscFunctionReturn(0); 1012 } 1013 1014 PetscErrorCode MatQRFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) 1015 { 1016 PetscFunctionBegin; 1017 fact->assembled = PETSC_TRUE; 1018 fact->preallocated = PETSC_TRUE; 1019 PetscCall(PetscObjectComposeFunction((PetscObject)fact, "MatQRFactorNumeric_C", MatQRFactorNumeric_SeqDense)); 1020 PetscFunctionReturn(0); 1021 } 1022 1023 /* uses LAPACK */ 1024 PETSC_INTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A, MatFactorType ftype, Mat *fact) 1025 { 1026 PetscFunctionBegin; 1027 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), fact)); 1028 PetscCall(MatSetSizes(*fact, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 1029 PetscCall(MatSetType(*fact, MATDENSE)); 1030 (*fact)->trivialsymbolic = PETSC_TRUE; 1031 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 1032 (*fact)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqDense; 1033 (*fact)->ops->ilufactorsymbolic = MatLUFactorSymbolic_SeqDense; 1034 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 1035 (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense; 1036 } else if (ftype == MAT_FACTOR_QR) { 1037 PetscCall(PetscObjectComposeFunction((PetscObject)(*fact), "MatQRFactorSymbolic_C", MatQRFactorSymbolic_SeqDense)); 1038 } 1039 (*fact)->factortype = ftype; 1040 1041 PetscCall(PetscFree((*fact)->solvertype)); 1042 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*fact)->solvertype)); 1043 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_LU])); 1044 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ILU])); 1045 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_CHOLESKY])); 1046 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ICC])); 1047 PetscFunctionReturn(0); 1048 } 1049 1050 /* ------------------------------------------------------------------*/ 1051 static PetscErrorCode MatSOR_SeqDense(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal shift, PetscInt its, PetscInt lits, Vec xx) 1052 { 1053 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1054 PetscScalar *x, *v = mat->v, zero = 0.0, xt; 1055 const PetscScalar *b; 1056 PetscInt m = A->rmap->n, i; 1057 PetscBLASInt o = 1, bm = 0; 1058 1059 PetscFunctionBegin; 1060 #if defined(PETSC_HAVE_CUDA) 1061 PetscCheck(A->offloadmask != PETSC_OFFLOAD_GPU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1062 #endif 1063 if (shift == -1) shift = 0.0; /* negative shift indicates do not error on zero diagonal; this code never zeros on zero diagonal */ 1064 PetscCall(PetscBLASIntCast(m, &bm)); 1065 if (flag & SOR_ZERO_INITIAL_GUESS) { 1066 /* this is a hack fix, should have another version without the second BLASdotu */ 1067 PetscCall(VecSet(xx, zero)); 1068 } 1069 PetscCall(VecGetArray(xx, &x)); 1070 PetscCall(VecGetArrayRead(bb, &b)); 1071 its = its * lits; 1072 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 1073 while (its--) { 1074 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 1075 for (i = 0; i < m; i++) { 1076 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1077 x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift); 1078 } 1079 } 1080 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1081 for (i = m - 1; i >= 0; i--) { 1082 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1083 x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift); 1084 } 1085 } 1086 } 1087 PetscCall(VecRestoreArrayRead(bb, &b)); 1088 PetscCall(VecRestoreArray(xx, &x)); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 /* -----------------------------------------------------------------*/ 1093 PetscErrorCode MatMultTranspose_SeqDense(Mat A, Vec xx, Vec yy) 1094 { 1095 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1096 const PetscScalar *v = mat->v, *x; 1097 PetscScalar *y; 1098 PetscBLASInt m, n, _One = 1; 1099 PetscScalar _DOne = 1.0, _DZero = 0.0; 1100 1101 PetscFunctionBegin; 1102 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1103 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1104 PetscCall(VecGetArrayRead(xx, &x)); 1105 PetscCall(VecGetArrayWrite(yy, &y)); 1106 if (!A->rmap->n || !A->cmap->n) { 1107 PetscBLASInt i; 1108 for (i = 0; i < n; i++) y[i] = 0.0; 1109 } else { 1110 PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v, &mat->lda, x, &_One, &_DZero, y, &_One)); 1111 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n - A->cmap->n)); 1112 } 1113 PetscCall(VecRestoreArrayRead(xx, &x)); 1114 PetscCall(VecRestoreArrayWrite(yy, &y)); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 PetscErrorCode MatMult_SeqDense(Mat A, Vec xx, Vec yy) 1119 { 1120 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1121 PetscScalar *y, _DOne = 1.0, _DZero = 0.0; 1122 PetscBLASInt m, n, _One = 1; 1123 const PetscScalar *v = mat->v, *x; 1124 1125 PetscFunctionBegin; 1126 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1127 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1128 PetscCall(VecGetArrayRead(xx, &x)); 1129 PetscCall(VecGetArrayWrite(yy, &y)); 1130 if (!A->rmap->n || !A->cmap->n) { 1131 PetscBLASInt i; 1132 for (i = 0; i < m; i++) y[i] = 0.0; 1133 } else { 1134 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DZero, y, &_One)); 1135 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n - A->rmap->n)); 1136 } 1137 PetscCall(VecRestoreArrayRead(xx, &x)); 1138 PetscCall(VecRestoreArrayWrite(yy, &y)); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 PetscErrorCode MatMultAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1143 { 1144 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1145 const PetscScalar *v = mat->v, *x; 1146 PetscScalar *y, _DOne = 1.0; 1147 PetscBLASInt m, n, _One = 1; 1148 1149 PetscFunctionBegin; 1150 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1151 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1152 PetscCall(VecCopy(zz, yy)); 1153 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 1154 PetscCall(VecGetArrayRead(xx, &x)); 1155 PetscCall(VecGetArray(yy, &y)); 1156 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DOne, y, &_One)); 1157 PetscCall(VecRestoreArrayRead(xx, &x)); 1158 PetscCall(VecRestoreArray(yy, &y)); 1159 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n)); 1160 PetscFunctionReturn(0); 1161 } 1162 1163 PetscErrorCode MatMultTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) 1164 { 1165 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1166 const PetscScalar *v = mat->v, *x; 1167 PetscScalar *y; 1168 PetscBLASInt m, n, _One = 1; 1169 PetscScalar _DOne = 1.0; 1170 1171 PetscFunctionBegin; 1172 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1173 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1174 PetscCall(VecCopy(zz, yy)); 1175 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 1176 PetscCall(VecGetArrayRead(xx, &x)); 1177 PetscCall(VecGetArray(yy, &y)); 1178 PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DOne, y, &_One)); 1179 PetscCall(VecRestoreArrayRead(xx, &x)); 1180 PetscCall(VecRestoreArray(yy, &y)); 1181 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n)); 1182 PetscFunctionReturn(0); 1183 } 1184 1185 /* -----------------------------------------------------------------*/ 1186 static PetscErrorCode MatGetRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) 1187 { 1188 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1189 PetscInt i; 1190 1191 PetscFunctionBegin; 1192 *ncols = A->cmap->n; 1193 if (cols) { 1194 PetscCall(PetscMalloc1(A->cmap->n, cols)); 1195 for (i = 0; i < A->cmap->n; i++) (*cols)[i] = i; 1196 } 1197 if (vals) { 1198 const PetscScalar *v; 1199 1200 PetscCall(MatDenseGetArrayRead(A, &v)); 1201 PetscCall(PetscMalloc1(A->cmap->n, vals)); 1202 v += row; 1203 for (i = 0; i < A->cmap->n; i++) { 1204 (*vals)[i] = *v; 1205 v += mat->lda; 1206 } 1207 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1208 } 1209 PetscFunctionReturn(0); 1210 } 1211 1212 static PetscErrorCode MatRestoreRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) 1213 { 1214 PetscFunctionBegin; 1215 if (ncols) *ncols = 0; 1216 if (cols) PetscCall(PetscFree(*cols)); 1217 if (vals) PetscCall(PetscFree(*vals)); 1218 PetscFunctionReturn(0); 1219 } 1220 /* ----------------------------------------------------------------*/ 1221 static PetscErrorCode MatSetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], const PetscScalar v[], InsertMode addv) 1222 { 1223 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1224 PetscScalar *av; 1225 PetscInt i, j, idx = 0; 1226 #if defined(PETSC_HAVE_CUDA) 1227 PetscOffloadMask oldf; 1228 #endif 1229 1230 PetscFunctionBegin; 1231 PetscCall(MatDenseGetArray(A, &av)); 1232 if (!mat->roworiented) { 1233 if (addv == INSERT_VALUES) { 1234 for (j = 0; j < n; j++) { 1235 if (indexn[j] < 0) { 1236 idx += m; 1237 continue; 1238 } 1239 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1240 for (i = 0; i < m; i++) { 1241 if (indexm[i] < 0) { 1242 idx++; 1243 continue; 1244 } 1245 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1246 av[indexn[j] * mat->lda + indexm[i]] = v[idx++]; 1247 } 1248 } 1249 } else { 1250 for (j = 0; j < n; j++) { 1251 if (indexn[j] < 0) { 1252 idx += m; 1253 continue; 1254 } 1255 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1256 for (i = 0; i < m; i++) { 1257 if (indexm[i] < 0) { 1258 idx++; 1259 continue; 1260 } 1261 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1262 av[indexn[j] * mat->lda + indexm[i]] += v[idx++]; 1263 } 1264 } 1265 } 1266 } else { 1267 if (addv == INSERT_VALUES) { 1268 for (i = 0; i < m; i++) { 1269 if (indexm[i] < 0) { 1270 idx += n; 1271 continue; 1272 } 1273 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1274 for (j = 0; j < n; j++) { 1275 if (indexn[j] < 0) { 1276 idx++; 1277 continue; 1278 } 1279 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1280 av[indexn[j] * mat->lda + indexm[i]] = v[idx++]; 1281 } 1282 } 1283 } else { 1284 for (i = 0; i < m; i++) { 1285 if (indexm[i] < 0) { 1286 idx += n; 1287 continue; 1288 } 1289 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1290 for (j = 0; j < n; j++) { 1291 if (indexn[j] < 0) { 1292 idx++; 1293 continue; 1294 } 1295 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1296 av[indexn[j] * mat->lda + indexm[i]] += v[idx++]; 1297 } 1298 } 1299 } 1300 } 1301 /* hack to prevent unneeded copy to the GPU while returning the array */ 1302 #if defined(PETSC_HAVE_CUDA) 1303 oldf = A->offloadmask; 1304 A->offloadmask = PETSC_OFFLOAD_GPU; 1305 #endif 1306 PetscCall(MatDenseRestoreArray(A, &av)); 1307 #if defined(PETSC_HAVE_CUDA) 1308 A->offloadmask = (oldf == PETSC_OFFLOAD_UNALLOCATED ? PETSC_OFFLOAD_UNALLOCATED : PETSC_OFFLOAD_CPU); 1309 #endif 1310 PetscFunctionReturn(0); 1311 } 1312 1313 static PetscErrorCode MatGetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], PetscScalar v[]) 1314 { 1315 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1316 const PetscScalar *vv; 1317 PetscInt i, j; 1318 1319 PetscFunctionBegin; 1320 PetscCall(MatDenseGetArrayRead(A, &vv)); 1321 /* row-oriented output */ 1322 for (i = 0; i < m; i++) { 1323 if (indexm[i] < 0) { 1324 v += n; 1325 continue; 1326 } 1327 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested larger than number rows %" PetscInt_FMT, indexm[i], A->rmap->n); 1328 for (j = 0; j < n; j++) { 1329 if (indexn[j] < 0) { 1330 v++; 1331 continue; 1332 } 1333 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " requested larger than number columns %" PetscInt_FMT, indexn[j], A->cmap->n); 1334 *v++ = vv[indexn[j] * mat->lda + indexm[i]]; 1335 } 1336 } 1337 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 /* -----------------------------------------------------------------*/ 1342 1343 PetscErrorCode MatView_Dense_Binary(Mat mat, PetscViewer viewer) 1344 { 1345 PetscBool skipHeader; 1346 PetscViewerFormat format; 1347 PetscInt header[4], M, N, m, lda, i, j, k; 1348 const PetscScalar *v; 1349 PetscScalar *vwork; 1350 1351 PetscFunctionBegin; 1352 PetscCall(PetscViewerSetUp(viewer)); 1353 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1354 PetscCall(PetscViewerGetFormat(viewer, &format)); 1355 if (skipHeader) format = PETSC_VIEWER_NATIVE; 1356 1357 PetscCall(MatGetSize(mat, &M, &N)); 1358 1359 /* write matrix header */ 1360 header[0] = MAT_FILE_CLASSID; 1361 header[1] = M; 1362 header[2] = N; 1363 header[3] = (format == PETSC_VIEWER_NATIVE) ? MATRIX_BINARY_FORMAT_DENSE : M * N; 1364 if (!skipHeader) PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1365 1366 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1367 if (format != PETSC_VIEWER_NATIVE) { 1368 PetscInt nnz = m * N, *iwork; 1369 /* store row lengths for each row */ 1370 PetscCall(PetscMalloc1(nnz, &iwork)); 1371 for (i = 0; i < m; i++) iwork[i] = N; 1372 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1373 /* store column indices (zero start index) */ 1374 for (k = 0, i = 0; i < m; i++) 1375 for (j = 0; j < N; j++, k++) iwork[k] = j; 1376 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1377 PetscCall(PetscFree(iwork)); 1378 } 1379 /* store matrix values as a dense matrix in row major order */ 1380 PetscCall(PetscMalloc1(m * N, &vwork)); 1381 PetscCall(MatDenseGetArrayRead(mat, &v)); 1382 PetscCall(MatDenseGetLDA(mat, &lda)); 1383 for (k = 0, i = 0; i < m; i++) 1384 for (j = 0; j < N; j++, k++) vwork[k] = v[i + lda * j]; 1385 PetscCall(MatDenseRestoreArrayRead(mat, &v)); 1386 PetscCall(PetscViewerBinaryWriteAll(viewer, vwork, m * N, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1387 PetscCall(PetscFree(vwork)); 1388 PetscFunctionReturn(0); 1389 } 1390 1391 PetscErrorCode MatLoad_Dense_Binary(Mat mat, PetscViewer viewer) 1392 { 1393 PetscBool skipHeader; 1394 PetscInt header[4], M, N, m, nz, lda, i, j, k; 1395 PetscInt rows, cols; 1396 PetscScalar *v, *vwork; 1397 1398 PetscFunctionBegin; 1399 PetscCall(PetscViewerSetUp(viewer)); 1400 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1401 1402 if (!skipHeader) { 1403 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 1404 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 1405 M = header[1]; 1406 N = header[2]; 1407 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 1408 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 1409 nz = header[3]; 1410 PetscCheck(nz == MATRIX_BINARY_FORMAT_DENSE || nz >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Unknown matrix format %" PetscInt_FMT " in file", nz); 1411 } else { 1412 PetscCall(MatGetSize(mat, &M, &N)); 1413 PetscCheck(M >= 0 && N >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "Matrix binary file header was skipped, thus the user must specify the global sizes of input matrix"); 1414 nz = MATRIX_BINARY_FORMAT_DENSE; 1415 } 1416 1417 /* setup global sizes if not set */ 1418 if (mat->rmap->N < 0) mat->rmap->N = M; 1419 if (mat->cmap->N < 0) mat->cmap->N = N; 1420 PetscCall(MatSetUp(mat)); 1421 /* check if global sizes are correct */ 1422 PetscCall(MatGetSize(mat, &rows, &cols)); 1423 PetscCheck(M == rows && N == cols, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 1424 1425 PetscCall(MatGetSize(mat, NULL, &N)); 1426 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1427 PetscCall(MatDenseGetArray(mat, &v)); 1428 PetscCall(MatDenseGetLDA(mat, &lda)); 1429 if (nz == MATRIX_BINARY_FORMAT_DENSE) { /* matrix in file is dense format */ 1430 PetscInt nnz = m * N; 1431 /* read in matrix values */ 1432 PetscCall(PetscMalloc1(nnz, &vwork)); 1433 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1434 /* store values in column major order */ 1435 for (j = 0; j < N; j++) 1436 for (i = 0; i < m; i++) v[i + lda * j] = vwork[i * N + j]; 1437 PetscCall(PetscFree(vwork)); 1438 } else { /* matrix in file is sparse format */ 1439 PetscInt nnz = 0, *rlens, *icols; 1440 /* read in row lengths */ 1441 PetscCall(PetscMalloc1(m, &rlens)); 1442 PetscCall(PetscViewerBinaryReadAll(viewer, rlens, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1443 for (i = 0; i < m; i++) nnz += rlens[i]; 1444 /* read in column indices and values */ 1445 PetscCall(PetscMalloc2(nnz, &icols, nnz, &vwork)); 1446 PetscCall(PetscViewerBinaryReadAll(viewer, icols, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1447 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1448 /* store values in column major order */ 1449 for (k = 0, i = 0; i < m; i++) 1450 for (j = 0; j < rlens[i]; j++, k++) v[i + lda * icols[k]] = vwork[k]; 1451 PetscCall(PetscFree(rlens)); 1452 PetscCall(PetscFree2(icols, vwork)); 1453 } 1454 PetscCall(MatDenseRestoreArray(mat, &v)); 1455 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 1456 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 1457 PetscFunctionReturn(0); 1458 } 1459 1460 PetscErrorCode MatLoad_SeqDense(Mat newMat, PetscViewer viewer) 1461 { 1462 PetscBool isbinary, ishdf5; 1463 1464 PetscFunctionBegin; 1465 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1466 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1467 /* force binary viewer to load .info file if it has not yet done so */ 1468 PetscCall(PetscViewerSetUp(viewer)); 1469 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1470 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1471 if (isbinary) { 1472 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1473 } else if (ishdf5) { 1474 #if defined(PETSC_HAVE_HDF5) 1475 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1476 #else 1477 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 1478 #endif 1479 } else { 1480 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1481 } 1482 PetscFunctionReturn(0); 1483 } 1484 1485 static PetscErrorCode MatView_SeqDense_ASCII(Mat A, PetscViewer viewer) 1486 { 1487 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1488 PetscInt i, j; 1489 const char *name; 1490 PetscScalar *v, *av; 1491 PetscViewerFormat format; 1492 #if defined(PETSC_USE_COMPLEX) 1493 PetscBool allreal = PETSC_TRUE; 1494 #endif 1495 1496 PetscFunctionBegin; 1497 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&av)); 1498 PetscCall(PetscViewerGetFormat(viewer, &format)); 1499 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1500 PetscFunctionReturn(0); /* do nothing for now */ 1501 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1502 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1503 for (i = 0; i < A->rmap->n; i++) { 1504 v = av + i; 1505 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i)); 1506 for (j = 0; j < A->cmap->n; j++) { 1507 #if defined(PETSC_USE_COMPLEX) 1508 if (PetscRealPart(*v) != 0.0 && PetscImaginaryPart(*v) != 0.0) { 1509 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", j, (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1510 } else if (PetscRealPart(*v)) { 1511 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)PetscRealPart(*v))); 1512 } 1513 #else 1514 if (*v) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)*v)); 1515 #endif 1516 v += a->lda; 1517 } 1518 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1519 } 1520 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1521 } else { 1522 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1523 #if defined(PETSC_USE_COMPLEX) 1524 /* determine if matrix has all real values */ 1525 for (j = 0; j < A->cmap->n; j++) { 1526 v = av + j * a->lda; 1527 for (i = 0; i < A->rmap->n; i++) { 1528 if (PetscImaginaryPart(v[i])) { 1529 allreal = PETSC_FALSE; 1530 break; 1531 } 1532 } 1533 } 1534 #endif 1535 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1536 PetscCall(PetscObjectGetName((PetscObject)A, &name)); 1537 PetscCall(PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", A->rmap->n, A->cmap->n)); 1538 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = zeros(%" PetscInt_FMT ",%" PetscInt_FMT ");\n", name, A->rmap->n, A->cmap->n)); 1539 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = [\n", name)); 1540 } 1541 1542 for (i = 0; i < A->rmap->n; i++) { 1543 v = av + i; 1544 for (j = 0; j < A->cmap->n; j++) { 1545 #if defined(PETSC_USE_COMPLEX) 1546 if (allreal) { 1547 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(*v))); 1548 } else { 1549 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei ", (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1550 } 1551 #else 1552 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)*v)); 1553 #endif 1554 v += a->lda; 1555 } 1556 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1557 } 1558 if (format == PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, "];\n")); 1559 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1560 } 1561 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&av)); 1562 PetscCall(PetscViewerFlush(viewer)); 1563 PetscFunctionReturn(0); 1564 } 1565 1566 #include <petscdraw.h> 1567 static PetscErrorCode MatView_SeqDense_Draw_Zoom(PetscDraw draw, void *Aa) 1568 { 1569 Mat A = (Mat)Aa; 1570 PetscInt m = A->rmap->n, n = A->cmap->n, i, j; 1571 int color = PETSC_DRAW_WHITE; 1572 const PetscScalar *v; 1573 PetscViewer viewer; 1574 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1575 PetscViewerFormat format; 1576 1577 PetscFunctionBegin; 1578 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1579 PetscCall(PetscViewerGetFormat(viewer, &format)); 1580 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1581 1582 /* Loop over matrix elements drawing boxes */ 1583 PetscCall(MatDenseGetArrayRead(A, &v)); 1584 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1585 PetscDrawCollectiveBegin(draw); 1586 /* Blue for negative and Red for positive */ 1587 for (j = 0; j < n; j++) { 1588 x_l = j; 1589 x_r = x_l + 1.0; 1590 for (i = 0; i < m; i++) { 1591 y_l = m - i - 1.0; 1592 y_r = y_l + 1.0; 1593 if (PetscRealPart(v[j * m + i]) > 0.) color = PETSC_DRAW_RED; 1594 else if (PetscRealPart(v[j * m + i]) < 0.) color = PETSC_DRAW_BLUE; 1595 else continue; 1596 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1597 } 1598 } 1599 PetscDrawCollectiveEnd(draw); 1600 } else { 1601 /* use contour shading to indicate magnitude of values */ 1602 /* first determine max of all nonzero values */ 1603 PetscReal minv = 0.0, maxv = 0.0; 1604 PetscDraw popup; 1605 1606 for (i = 0; i < m * n; i++) { 1607 if (PetscAbsScalar(v[i]) > maxv) maxv = PetscAbsScalar(v[i]); 1608 } 1609 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1610 PetscCall(PetscDrawGetPopup(draw, &popup)); 1611 PetscCall(PetscDrawScalePopup(popup, minv, maxv)); 1612 1613 PetscDrawCollectiveBegin(draw); 1614 for (j = 0; j < n; j++) { 1615 x_l = j; 1616 x_r = x_l + 1.0; 1617 for (i = 0; i < m; i++) { 1618 y_l = m - i - 1.0; 1619 y_r = y_l + 1.0; 1620 color = PetscDrawRealToColor(PetscAbsScalar(v[j * m + i]), minv, maxv); 1621 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1622 } 1623 } 1624 PetscDrawCollectiveEnd(draw); 1625 } 1626 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1627 PetscFunctionReturn(0); 1628 } 1629 1630 static PetscErrorCode MatView_SeqDense_Draw(Mat A, PetscViewer viewer) 1631 { 1632 PetscDraw draw; 1633 PetscBool isnull; 1634 PetscReal xr, yr, xl, yl, h, w; 1635 1636 PetscFunctionBegin; 1637 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1638 PetscCall(PetscDrawIsNull(draw, &isnull)); 1639 if (isnull) PetscFunctionReturn(0); 1640 1641 xr = A->cmap->n; 1642 yr = A->rmap->n; 1643 h = yr / 10.0; 1644 w = xr / 10.0; 1645 xr += w; 1646 yr += h; 1647 xl = -w; 1648 yl = -h; 1649 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 1650 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 1651 PetscCall(PetscDrawZoom(draw, MatView_SeqDense_Draw_Zoom, A)); 1652 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 1653 PetscCall(PetscDrawSave(draw)); 1654 PetscFunctionReturn(0); 1655 } 1656 1657 PetscErrorCode MatView_SeqDense(Mat A, PetscViewer viewer) 1658 { 1659 PetscBool iascii, isbinary, isdraw; 1660 1661 PetscFunctionBegin; 1662 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1663 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1664 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1665 if (iascii) PetscCall(MatView_SeqDense_ASCII(A, viewer)); 1666 else if (isbinary) PetscCall(MatView_Dense_Binary(A, viewer)); 1667 else if (isdraw) PetscCall(MatView_SeqDense_Draw(A, viewer)); 1668 PetscFunctionReturn(0); 1669 } 1670 1671 static PetscErrorCode MatDensePlaceArray_SeqDense(Mat A, const PetscScalar *array) 1672 { 1673 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1674 1675 PetscFunctionBegin; 1676 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1677 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1678 PetscCheck(!a->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreArray() first"); 1679 a->unplacedarray = a->v; 1680 a->unplaced_user_alloc = a->user_alloc; 1681 a->v = (PetscScalar *)array; 1682 a->user_alloc = PETSC_TRUE; 1683 #if defined(PETSC_HAVE_CUDA) 1684 A->offloadmask = PETSC_OFFLOAD_CPU; 1685 #endif 1686 PetscFunctionReturn(0); 1687 } 1688 1689 static PetscErrorCode MatDenseResetArray_SeqDense(Mat A) 1690 { 1691 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1692 1693 PetscFunctionBegin; 1694 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1695 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1696 a->v = a->unplacedarray; 1697 a->user_alloc = a->unplaced_user_alloc; 1698 a->unplacedarray = NULL; 1699 #if defined(PETSC_HAVE_CUDA) 1700 A->offloadmask = PETSC_OFFLOAD_CPU; 1701 #endif 1702 PetscFunctionReturn(0); 1703 } 1704 1705 static PetscErrorCode MatDenseReplaceArray_SeqDense(Mat A, const PetscScalar *array) 1706 { 1707 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1708 1709 PetscFunctionBegin; 1710 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1711 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1712 if (!a->user_alloc) PetscCall(PetscFree(a->v)); 1713 a->v = (PetscScalar *)array; 1714 a->user_alloc = PETSC_FALSE; 1715 #if defined(PETSC_HAVE_CUDA) 1716 A->offloadmask = PETSC_OFFLOAD_CPU; 1717 #endif 1718 PetscFunctionReturn(0); 1719 } 1720 1721 PetscErrorCode MatDestroy_SeqDense(Mat mat) 1722 { 1723 Mat_SeqDense *l = (Mat_SeqDense *)mat->data; 1724 1725 PetscFunctionBegin; 1726 #if defined(PETSC_USE_LOG) 1727 PetscLogObjectState((PetscObject)mat, "Rows %" PetscInt_FMT " Cols %" PetscInt_FMT, mat->rmap->n, mat->cmap->n); 1728 #endif 1729 PetscCall(VecDestroy(&(l->qrrhs))); 1730 PetscCall(PetscFree(l->tau)); 1731 PetscCall(PetscFree(l->pivots)); 1732 PetscCall(PetscFree(l->fwork)); 1733 PetscCall(MatDestroy(&l->ptapwork)); 1734 if (!l->user_alloc) PetscCall(PetscFree(l->v)); 1735 if (!l->unplaced_user_alloc) PetscCall(PetscFree(l->unplacedarray)); 1736 PetscCheck(!l->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1737 PetscCheck(!l->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1738 PetscCall(VecDestroy(&l->cvec)); 1739 PetscCall(MatDestroy(&l->cmat)); 1740 PetscCall(PetscFree(mat->data)); 1741 1742 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactor_C", NULL)); 1744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorSymbolic_C", NULL)); 1745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorNumeric_C", NULL)); 1746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 1747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 1748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 1749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 1750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 1751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 1752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 1753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 1754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 1755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 1756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 1757 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqaij_C", NULL)); 1758 #if defined(PETSC_HAVE_ELEMENTAL) 1759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_elemental_C", NULL)); 1760 #endif 1761 #if defined(PETSC_HAVE_SCALAPACK) 1762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_scalapack_C", NULL)); 1763 #endif 1764 #if defined(PETSC_HAVE_CUDA) 1765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensecuda_C", NULL)); 1766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", NULL)); 1767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdense_C", NULL)); 1768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensecuda_C", NULL)); 1769 #endif 1770 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSeqDenseSetPreallocation_C", NULL)); 1771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqaij_seqdense_C", NULL)); 1772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdense_C", NULL)); 1773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqbaij_seqdense_C", NULL)); 1774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqsbaij_seqdense_C", NULL)); 1775 1776 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 1777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 1778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 1779 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 1780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 1781 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 1782 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 1783 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 1784 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 1785 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 1786 PetscFunctionReturn(0); 1787 } 1788 1789 static PetscErrorCode MatTranspose_SeqDense(Mat A, MatReuse reuse, Mat *matout) 1790 { 1791 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1792 PetscInt k, j, m = A->rmap->n, M = mat->lda, n = A->cmap->n; 1793 PetscScalar *v, tmp; 1794 1795 PetscFunctionBegin; 1796 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1797 if (reuse == MAT_INPLACE_MATRIX) { 1798 if (m == n) { /* in place transpose */ 1799 PetscCall(MatDenseGetArray(A, &v)); 1800 for (j = 0; j < m; j++) { 1801 for (k = 0; k < j; k++) { 1802 tmp = v[j + k * M]; 1803 v[j + k * M] = v[k + j * M]; 1804 v[k + j * M] = tmp; 1805 } 1806 } 1807 PetscCall(MatDenseRestoreArray(A, &v)); 1808 } else { /* reuse memory, temporary allocates new memory */ 1809 PetscScalar *v2; 1810 PetscLayout tmplayout; 1811 1812 PetscCall(PetscMalloc1((size_t)m * n, &v2)); 1813 PetscCall(MatDenseGetArray(A, &v)); 1814 for (j = 0; j < n; j++) { 1815 for (k = 0; k < m; k++) v2[j + (size_t)k * n] = v[k + (size_t)j * M]; 1816 } 1817 PetscCall(PetscArraycpy(v, v2, (size_t)m * n)); 1818 PetscCall(PetscFree(v2)); 1819 PetscCall(MatDenseRestoreArray(A, &v)); 1820 /* cleanup size dependent quantities */ 1821 PetscCall(VecDestroy(&mat->cvec)); 1822 PetscCall(MatDestroy(&mat->cmat)); 1823 PetscCall(PetscFree(mat->pivots)); 1824 PetscCall(PetscFree(mat->fwork)); 1825 PetscCall(MatDestroy(&mat->ptapwork)); 1826 /* swap row/col layouts */ 1827 mat->lda = n; 1828 tmplayout = A->rmap; 1829 A->rmap = A->cmap; 1830 A->cmap = tmplayout; 1831 } 1832 } else { /* out-of-place transpose */ 1833 Mat tmat; 1834 Mat_SeqDense *tmatd; 1835 PetscScalar *v2; 1836 PetscInt M2; 1837 1838 if (reuse == MAT_INITIAL_MATRIX) { 1839 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &tmat)); 1840 PetscCall(MatSetSizes(tmat, A->cmap->n, A->rmap->n, A->cmap->n, A->rmap->n)); 1841 PetscCall(MatSetType(tmat, ((PetscObject)A)->type_name)); 1842 PetscCall(MatSeqDenseSetPreallocation(tmat, NULL)); 1843 } else tmat = *matout; 1844 1845 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&v)); 1846 PetscCall(MatDenseGetArray(tmat, &v2)); 1847 tmatd = (Mat_SeqDense *)tmat->data; 1848 M2 = tmatd->lda; 1849 for (j = 0; j < n; j++) { 1850 for (k = 0; k < m; k++) v2[j + k * M2] = v[k + j * M]; 1851 } 1852 PetscCall(MatDenseRestoreArray(tmat, &v2)); 1853 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&v)); 1854 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 1855 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 1856 *matout = tmat; 1857 } 1858 PetscFunctionReturn(0); 1859 } 1860 1861 static PetscErrorCode MatEqual_SeqDense(Mat A1, Mat A2, PetscBool *flg) 1862 { 1863 Mat_SeqDense *mat1 = (Mat_SeqDense *)A1->data; 1864 Mat_SeqDense *mat2 = (Mat_SeqDense *)A2->data; 1865 PetscInt i; 1866 const PetscScalar *v1, *v2; 1867 1868 PetscFunctionBegin; 1869 if (A1->rmap->n != A2->rmap->n) { 1870 *flg = PETSC_FALSE; 1871 PetscFunctionReturn(0); 1872 } 1873 if (A1->cmap->n != A2->cmap->n) { 1874 *flg = PETSC_FALSE; 1875 PetscFunctionReturn(0); 1876 } 1877 PetscCall(MatDenseGetArrayRead(A1, &v1)); 1878 PetscCall(MatDenseGetArrayRead(A2, &v2)); 1879 for (i = 0; i < A1->cmap->n; i++) { 1880 PetscCall(PetscArraycmp(v1, v2, A1->rmap->n, flg)); 1881 if (*flg == PETSC_FALSE) PetscFunctionReturn(0); 1882 v1 += mat1->lda; 1883 v2 += mat2->lda; 1884 } 1885 PetscCall(MatDenseRestoreArrayRead(A1, &v1)); 1886 PetscCall(MatDenseRestoreArrayRead(A2, &v2)); 1887 *flg = PETSC_TRUE; 1888 PetscFunctionReturn(0); 1889 } 1890 1891 static PetscErrorCode MatGetDiagonal_SeqDense(Mat A, Vec v) 1892 { 1893 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1894 PetscInt i, n, len; 1895 PetscScalar *x; 1896 const PetscScalar *vv; 1897 1898 PetscFunctionBegin; 1899 PetscCall(VecGetSize(v, &n)); 1900 PetscCall(VecGetArray(v, &x)); 1901 len = PetscMin(A->rmap->n, A->cmap->n); 1902 PetscCall(MatDenseGetArrayRead(A, &vv)); 1903 PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 1904 for (i = 0; i < len; i++) x[i] = vv[i * mat->lda + i]; 1905 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1906 PetscCall(VecRestoreArray(v, &x)); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 static PetscErrorCode MatDiagonalScale_SeqDense(Mat A, Vec ll, Vec rr) 1911 { 1912 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1913 const PetscScalar *l, *r; 1914 PetscScalar x, *v, *vv; 1915 PetscInt i, j, m = A->rmap->n, n = A->cmap->n; 1916 1917 PetscFunctionBegin; 1918 PetscCall(MatDenseGetArray(A, &vv)); 1919 if (ll) { 1920 PetscCall(VecGetSize(ll, &m)); 1921 PetscCall(VecGetArrayRead(ll, &l)); 1922 PetscCheck(m == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vec wrong size"); 1923 for (i = 0; i < m; i++) { 1924 x = l[i]; 1925 v = vv + i; 1926 for (j = 0; j < n; j++) { 1927 (*v) *= x; 1928 v += mat->lda; 1929 } 1930 } 1931 PetscCall(VecRestoreArrayRead(ll, &l)); 1932 PetscCall(PetscLogFlops(1.0 * n * m)); 1933 } 1934 if (rr) { 1935 PetscCall(VecGetSize(rr, &n)); 1936 PetscCall(VecGetArrayRead(rr, &r)); 1937 PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec wrong size"); 1938 for (i = 0; i < n; i++) { 1939 x = r[i]; 1940 v = vv + i * mat->lda; 1941 for (j = 0; j < m; j++) (*v++) *= x; 1942 } 1943 PetscCall(VecRestoreArrayRead(rr, &r)); 1944 PetscCall(PetscLogFlops(1.0 * n * m)); 1945 } 1946 PetscCall(MatDenseRestoreArray(A, &vv)); 1947 PetscFunctionReturn(0); 1948 } 1949 1950 PetscErrorCode MatNorm_SeqDense(Mat A, NormType type, PetscReal *nrm) 1951 { 1952 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1953 PetscScalar *v, *vv; 1954 PetscReal sum = 0.0; 1955 PetscInt lda, m = A->rmap->n, i, j; 1956 1957 PetscFunctionBegin; 1958 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&vv)); 1959 PetscCall(MatDenseGetLDA(A, &lda)); 1960 v = vv; 1961 if (type == NORM_FROBENIUS) { 1962 if (lda > m) { 1963 for (j = 0; j < A->cmap->n; j++) { 1964 v = vv + j * lda; 1965 for (i = 0; i < m; i++) { 1966 sum += PetscRealPart(PetscConj(*v) * (*v)); 1967 v++; 1968 } 1969 } 1970 } else { 1971 #if defined(PETSC_USE_REAL___FP16) 1972 PetscBLASInt one = 1, cnt = A->cmap->n * A->rmap->n; 1973 PetscCallBLAS("BLASnrm2", *nrm = BLASnrm2_(&cnt, v, &one)); 1974 } 1975 #else 1976 for (i = 0; i < A->cmap->n * A->rmap->n; i++) { 1977 sum += PetscRealPart(PetscConj(*v) * (*v)); 1978 v++; 1979 } 1980 } 1981 *nrm = PetscSqrtReal(sum); 1982 #endif 1983 PetscCall(PetscLogFlops(2.0 * A->cmap->n * A->rmap->n)); 1984 } else if (type == NORM_1) { 1985 *nrm = 0.0; 1986 for (j = 0; j < A->cmap->n; j++) { 1987 v = vv + j * mat->lda; 1988 sum = 0.0; 1989 for (i = 0; i < A->rmap->n; i++) { 1990 sum += PetscAbsScalar(*v); 1991 v++; 1992 } 1993 if (sum > *nrm) *nrm = sum; 1994 } 1995 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 1996 } else if (type == NORM_INFINITY) { 1997 *nrm = 0.0; 1998 for (j = 0; j < A->rmap->n; j++) { 1999 v = vv + j; 2000 sum = 0.0; 2001 for (i = 0; i < A->cmap->n; i++) { 2002 sum += PetscAbsScalar(*v); 2003 v += mat->lda; 2004 } 2005 if (sum > *nrm) *nrm = sum; 2006 } 2007 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 2008 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No two norm"); 2009 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&vv)); 2010 PetscFunctionReturn(0); 2011 } 2012 2013 static PetscErrorCode MatSetOption_SeqDense(Mat A, MatOption op, PetscBool flg) 2014 { 2015 Mat_SeqDense *aij = (Mat_SeqDense *)A->data; 2016 2017 PetscFunctionBegin; 2018 switch (op) { 2019 case MAT_ROW_ORIENTED: 2020 aij->roworiented = flg; 2021 break; 2022 case MAT_NEW_NONZERO_LOCATIONS: 2023 case MAT_NEW_NONZERO_LOCATION_ERR: 2024 case MAT_NEW_NONZERO_ALLOCATION_ERR: 2025 case MAT_FORCE_DIAGONAL_ENTRIES: 2026 case MAT_KEEP_NONZERO_PATTERN: 2027 case MAT_IGNORE_OFF_PROC_ENTRIES: 2028 case MAT_USE_HASH_TABLE: 2029 case MAT_IGNORE_ZERO_ENTRIES: 2030 case MAT_IGNORE_LOWER_TRIANGULAR: 2031 case MAT_SORTED_FULL: 2032 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 2033 break; 2034 case MAT_SPD: 2035 case MAT_SYMMETRIC: 2036 case MAT_STRUCTURALLY_SYMMETRIC: 2037 case MAT_HERMITIAN: 2038 case MAT_SYMMETRY_ETERNAL: 2039 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 2040 case MAT_SPD_ETERNAL: 2041 break; 2042 default: 2043 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 2044 } 2045 PetscFunctionReturn(0); 2046 } 2047 2048 PetscErrorCode MatZeroEntries_SeqDense(Mat A) 2049 { 2050 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 2051 PetscInt lda = l->lda, m = A->rmap->n, n = A->cmap->n, j; 2052 PetscScalar *v; 2053 2054 PetscFunctionBegin; 2055 PetscCall(MatDenseGetArrayWrite(A, &v)); 2056 if (lda > m) { 2057 for (j = 0; j < n; j++) PetscCall(PetscArrayzero(v + j * lda, m)); 2058 } else { 2059 PetscCall(PetscArrayzero(v, PetscInt64Mult(m, n))); 2060 } 2061 PetscCall(MatDenseRestoreArrayWrite(A, &v)); 2062 PetscFunctionReturn(0); 2063 } 2064 2065 static PetscErrorCode MatZeroRows_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 2066 { 2067 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 2068 PetscInt m = l->lda, n = A->cmap->n, i, j; 2069 PetscScalar *slot, *bb, *v; 2070 const PetscScalar *xx; 2071 2072 PetscFunctionBegin; 2073 if (PetscDefined(USE_DEBUG)) { 2074 for (i = 0; i < N; i++) { 2075 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 2076 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 2077 } 2078 } 2079 if (!N) PetscFunctionReturn(0); 2080 2081 /* fix right hand side if needed */ 2082 if (x && b) { 2083 PetscCall(VecGetArrayRead(x, &xx)); 2084 PetscCall(VecGetArray(b, &bb)); 2085 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 2086 PetscCall(VecRestoreArrayRead(x, &xx)); 2087 PetscCall(VecRestoreArray(b, &bb)); 2088 } 2089 2090 PetscCall(MatDenseGetArray(A, &v)); 2091 for (i = 0; i < N; i++) { 2092 slot = v + rows[i]; 2093 for (j = 0; j < n; j++) { 2094 *slot = 0.0; 2095 slot += m; 2096 } 2097 } 2098 if (diag != 0.0) { 2099 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 2100 for (i = 0; i < N; i++) { 2101 slot = v + (m + 1) * rows[i]; 2102 *slot = diag; 2103 } 2104 } 2105 PetscCall(MatDenseRestoreArray(A, &v)); 2106 PetscFunctionReturn(0); 2107 } 2108 2109 static PetscErrorCode MatDenseGetLDA_SeqDense(Mat A, PetscInt *lda) 2110 { 2111 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2112 2113 PetscFunctionBegin; 2114 *lda = mat->lda; 2115 PetscFunctionReturn(0); 2116 } 2117 2118 PetscErrorCode MatDenseGetArray_SeqDense(Mat A, PetscScalar **array) 2119 { 2120 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2121 2122 PetscFunctionBegin; 2123 PetscCheck(!mat->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2124 *array = mat->v; 2125 PetscFunctionReturn(0); 2126 } 2127 2128 PetscErrorCode MatDenseRestoreArray_SeqDense(Mat A, PetscScalar **array) 2129 { 2130 PetscFunctionBegin; 2131 if (array) *array = NULL; 2132 PetscFunctionReturn(0); 2133 } 2134 2135 /*@ 2136 MatDenseGetLDA - gets the leading dimension of the array returned from `MatDenseGetArray()` 2137 2138 Not collective 2139 2140 Input Parameter: 2141 . mat - a `MATDENSE` or `MATDENSECUDA` matrix 2142 2143 Output Parameter: 2144 . lda - the leading dimension 2145 2146 Level: intermediate 2147 2148 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseSetLDA()` 2149 @*/ 2150 PetscErrorCode MatDenseGetLDA(Mat A, PetscInt *lda) 2151 { 2152 PetscFunctionBegin; 2153 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2154 PetscValidIntPointer(lda, 2); 2155 MatCheckPreallocated(A, 1); 2156 PetscUseMethod(A, "MatDenseGetLDA_C", (Mat, PetscInt *), (A, lda)); 2157 PetscFunctionReturn(0); 2158 } 2159 2160 /*@ 2161 MatDenseSetLDA - Sets the leading dimension of the array used by the `MATDENSE` matrix 2162 2163 Not collective 2164 2165 Input Parameters: 2166 + mat - a `MATDENSE` or `MATDENSECUDA` matrix 2167 - lda - the leading dimension 2168 2169 Level: intermediate 2170 2171 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetLDA()` 2172 @*/ 2173 PetscErrorCode MatDenseSetLDA(Mat A, PetscInt lda) 2174 { 2175 PetscFunctionBegin; 2176 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2177 PetscTryMethod(A, "MatDenseSetLDA_C", (Mat, PetscInt), (A, lda)); 2178 PetscFunctionReturn(0); 2179 } 2180 2181 /*@C 2182 MatDenseGetArray - gives read-write access to the array where the data for a `MATDENSE` matrix is stored 2183 2184 Logically Collective on A 2185 2186 Input Parameter: 2187 . mat - a dense matrix 2188 2189 Output Parameter: 2190 . array - pointer to the data 2191 2192 Level: intermediate 2193 2194 .seealso: `MATDENSE`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2195 @*/ 2196 PetscErrorCode MatDenseGetArray(Mat A, PetscScalar **array) 2197 { 2198 PetscFunctionBegin; 2199 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2200 PetscValidPointer(array, 2); 2201 PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array)); 2202 PetscFunctionReturn(0); 2203 } 2204 2205 /*@C 2206 MatDenseRestoreArray - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArray()` 2207 2208 Logically Collective on A 2209 2210 Input Parameters: 2211 + mat - a dense matrix 2212 - array - pointer to the data (may be NULL) 2213 2214 Level: intermediate 2215 2216 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2217 @*/ 2218 PetscErrorCode MatDenseRestoreArray(Mat A, PetscScalar **array) 2219 { 2220 PetscFunctionBegin; 2221 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2222 PetscValidPointer(array, 2); 2223 PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array)); 2224 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2225 #if defined(PETSC_HAVE_CUDA) 2226 A->offloadmask = PETSC_OFFLOAD_CPU; 2227 #endif 2228 PetscFunctionReturn(0); 2229 } 2230 2231 /*@C 2232 MatDenseGetArrayRead - gives read-only access to the array where the data for a `MATDENSE` matrix is stored 2233 2234 Not Collective 2235 2236 Input Parameter: 2237 . mat - a dense matrix 2238 2239 Output Parameter: 2240 . array - pointer to the data 2241 2242 Level: intermediate 2243 2244 .seealso: `MATDENSE`, `MatDenseRestoreArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2245 @*/ 2246 PetscErrorCode MatDenseGetArrayRead(Mat A, const PetscScalar **array) 2247 { 2248 PetscFunctionBegin; 2249 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2250 PetscValidPointer(array, 2); 2251 PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, const PetscScalar **), (A, array)); 2252 PetscFunctionReturn(0); 2253 } 2254 2255 /*@C 2256 MatDenseRestoreArrayRead - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayRead()` 2257 2258 Not Collective 2259 2260 Input Parameters: 2261 + mat - a dense matrix 2262 - array - pointer to the data (may be NULL) 2263 2264 Level: intermediate 2265 2266 .seealso: `MATDENSE`, `MatDenseGetArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2267 @*/ 2268 PetscErrorCode MatDenseRestoreArrayRead(Mat A, const PetscScalar **array) 2269 { 2270 PetscFunctionBegin; 2271 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2272 PetscValidPointer(array, 2); 2273 PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, const PetscScalar **), (A, array)); 2274 PetscFunctionReturn(0); 2275 } 2276 2277 /*@C 2278 MatDenseGetArrayWrite - gives write-only access to the array where the data for a `MATDENSE` matrix is stored 2279 2280 Not Collective 2281 2282 Input Parameter: 2283 . mat - a dense matrix 2284 2285 Output Parameter: 2286 . array - pointer to the data 2287 2288 Level: intermediate 2289 2290 .seealso: `MATDENSE`, `MatDenseRestoreArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2291 @*/ 2292 PetscErrorCode MatDenseGetArrayWrite(Mat A, PetscScalar **array) 2293 { 2294 PetscFunctionBegin; 2295 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2296 PetscValidPointer(array, 2); 2297 PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2298 PetscFunctionReturn(0); 2299 } 2300 2301 /*@C 2302 MatDenseRestoreArrayWrite - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayWrite()` 2303 2304 Not Collective 2305 2306 Input Parameters: 2307 + mat - a dense matrix 2308 - array - pointer to the data (may be NULL) 2309 2310 Level: intermediate 2311 2312 .seealso: `MATDENSE`, `MatDenseGetArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2313 @*/ 2314 PetscErrorCode MatDenseRestoreArrayWrite(Mat A, PetscScalar **array) 2315 { 2316 PetscFunctionBegin; 2317 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2318 PetscValidPointer(array, 2); 2319 PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2320 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2321 #if defined(PETSC_HAVE_CUDA) 2322 A->offloadmask = PETSC_OFFLOAD_CPU; 2323 #endif 2324 PetscFunctionReturn(0); 2325 } 2326 2327 static PetscErrorCode MatCreateSubMatrix_SeqDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 2328 { 2329 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2330 PetscInt i, j, nrows, ncols, ldb; 2331 const PetscInt *irow, *icol; 2332 PetscScalar *av, *bv, *v = mat->v; 2333 Mat newmat; 2334 2335 PetscFunctionBegin; 2336 PetscCall(ISGetIndices(isrow, &irow)); 2337 PetscCall(ISGetIndices(iscol, &icol)); 2338 PetscCall(ISGetLocalSize(isrow, &nrows)); 2339 PetscCall(ISGetLocalSize(iscol, &ncols)); 2340 2341 /* Check submatrixcall */ 2342 if (scall == MAT_REUSE_MATRIX) { 2343 PetscInt n_cols, n_rows; 2344 PetscCall(MatGetSize(*B, &n_rows, &n_cols)); 2345 if (n_rows != nrows || n_cols != ncols) { 2346 /* resize the result matrix to match number of requested rows/columns */ 2347 PetscCall(MatSetSizes(*B, nrows, ncols, nrows, ncols)); 2348 } 2349 newmat = *B; 2350 } else { 2351 /* Create and fill new matrix */ 2352 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 2353 PetscCall(MatSetSizes(newmat, nrows, ncols, nrows, ncols)); 2354 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 2355 PetscCall(MatSeqDenseSetPreallocation(newmat, NULL)); 2356 } 2357 2358 /* Now extract the data pointers and do the copy,column at a time */ 2359 PetscCall(MatDenseGetArray(newmat, &bv)); 2360 PetscCall(MatDenseGetLDA(newmat, &ldb)); 2361 for (i = 0; i < ncols; i++) { 2362 av = v + mat->lda * icol[i]; 2363 for (j = 0; j < nrows; j++) bv[j] = av[irow[j]]; 2364 bv += ldb; 2365 } 2366 PetscCall(MatDenseRestoreArray(newmat, &bv)); 2367 2368 /* Assemble the matrices so that the correct flags are set */ 2369 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 2370 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 2371 2372 /* Free work space */ 2373 PetscCall(ISRestoreIndices(isrow, &irow)); 2374 PetscCall(ISRestoreIndices(iscol, &icol)); 2375 *B = newmat; 2376 PetscFunctionReturn(0); 2377 } 2378 2379 static PetscErrorCode MatCreateSubMatrices_SeqDense(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 2380 { 2381 PetscInt i; 2382 2383 PetscFunctionBegin; 2384 if (scall == MAT_INITIAL_MATRIX) PetscCall(PetscCalloc1(n, B)); 2385 2386 for (i = 0; i < n; i++) PetscCall(MatCreateSubMatrix_SeqDense(A, irow[i], icol[i], scall, &(*B)[i])); 2387 PetscFunctionReturn(0); 2388 } 2389 2390 static PetscErrorCode MatAssemblyBegin_SeqDense(Mat mat, MatAssemblyType mode) 2391 { 2392 PetscFunctionBegin; 2393 PetscFunctionReturn(0); 2394 } 2395 2396 static PetscErrorCode MatAssemblyEnd_SeqDense(Mat mat, MatAssemblyType mode) 2397 { 2398 PetscFunctionBegin; 2399 PetscFunctionReturn(0); 2400 } 2401 2402 PetscErrorCode MatCopy_SeqDense(Mat A, Mat B, MatStructure str) 2403 { 2404 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data; 2405 const PetscScalar *va; 2406 PetscScalar *vb; 2407 PetscInt lda1 = a->lda, lda2 = b->lda, m = A->rmap->n, n = A->cmap->n, j; 2408 2409 PetscFunctionBegin; 2410 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2411 if (A->ops->copy != B->ops->copy) { 2412 PetscCall(MatCopy_Basic(A, B, str)); 2413 PetscFunctionReturn(0); 2414 } 2415 PetscCheck(m == B->rmap->n && n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "size(B) != size(A)"); 2416 PetscCall(MatDenseGetArrayRead(A, &va)); 2417 PetscCall(MatDenseGetArray(B, &vb)); 2418 if (lda1 > m || lda2 > m) { 2419 for (j = 0; j < n; j++) PetscCall(PetscArraycpy(vb + j * lda2, va + j * lda1, m)); 2420 } else { 2421 PetscCall(PetscArraycpy(vb, va, A->rmap->n * A->cmap->n)); 2422 } 2423 PetscCall(MatDenseRestoreArray(B, &vb)); 2424 PetscCall(MatDenseRestoreArrayRead(A, &va)); 2425 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2426 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 PetscErrorCode MatSetUp_SeqDense(Mat A) 2431 { 2432 PetscFunctionBegin; 2433 PetscCall(PetscLayoutSetUp(A->rmap)); 2434 PetscCall(PetscLayoutSetUp(A->cmap)); 2435 if (!A->preallocated) PetscCall(MatSeqDenseSetPreallocation(A, NULL)); 2436 PetscFunctionReturn(0); 2437 } 2438 2439 static PetscErrorCode MatConjugate_SeqDense(Mat A) 2440 { 2441 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2442 PetscInt i, j; 2443 PetscInt min = PetscMin(A->rmap->n, A->cmap->n); 2444 PetscScalar *aa; 2445 2446 PetscFunctionBegin; 2447 PetscCall(MatDenseGetArray(A, &aa)); 2448 for (j = 0; j < A->cmap->n; j++) { 2449 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscConj(aa[i + j * mat->lda]); 2450 } 2451 PetscCall(MatDenseRestoreArray(A, &aa)); 2452 if (mat->tau) 2453 for (i = 0; i < min; i++) mat->tau[i] = PetscConj(mat->tau[i]); 2454 PetscFunctionReturn(0); 2455 } 2456 2457 static PetscErrorCode MatRealPart_SeqDense(Mat A) 2458 { 2459 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2460 PetscInt i, j; 2461 PetscScalar *aa; 2462 2463 PetscFunctionBegin; 2464 PetscCall(MatDenseGetArray(A, &aa)); 2465 for (j = 0; j < A->cmap->n; j++) { 2466 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscRealPart(aa[i + j * mat->lda]); 2467 } 2468 PetscCall(MatDenseRestoreArray(A, &aa)); 2469 PetscFunctionReturn(0); 2470 } 2471 2472 static PetscErrorCode MatImaginaryPart_SeqDense(Mat A) 2473 { 2474 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2475 PetscInt i, j; 2476 PetscScalar *aa; 2477 2478 PetscFunctionBegin; 2479 PetscCall(MatDenseGetArray(A, &aa)); 2480 for (j = 0; j < A->cmap->n; j++) { 2481 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscImaginaryPart(aa[i + j * mat->lda]); 2482 } 2483 PetscCall(MatDenseRestoreArray(A, &aa)); 2484 PetscFunctionReturn(0); 2485 } 2486 2487 /* ----------------------------------------------------------------*/ 2488 PetscErrorCode MatMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2489 { 2490 PetscInt m = A->rmap->n, n = B->cmap->n; 2491 PetscBool cisdense; 2492 2493 PetscFunctionBegin; 2494 PetscCall(MatSetSizes(C, m, n, m, n)); 2495 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2496 if (!cisdense) { 2497 PetscBool flg; 2498 2499 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2500 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2501 } 2502 PetscCall(MatSetUp(C)); 2503 PetscFunctionReturn(0); 2504 } 2505 2506 PetscErrorCode MatMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2507 { 2508 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data, *c = (Mat_SeqDense *)C->data; 2509 PetscBLASInt m, n, k; 2510 const PetscScalar *av, *bv; 2511 PetscScalar *cv; 2512 PetscScalar _DOne = 1.0, _DZero = 0.0; 2513 2514 PetscFunctionBegin; 2515 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2516 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2517 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2518 if (!m || !n || !k) PetscFunctionReturn(0); 2519 PetscCall(MatDenseGetArrayRead(A, &av)); 2520 PetscCall(MatDenseGetArrayRead(B, &bv)); 2521 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2522 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2523 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2524 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2525 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2526 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2527 PetscFunctionReturn(0); 2528 } 2529 2530 PetscErrorCode MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2531 { 2532 PetscInt m = A->rmap->n, n = B->rmap->n; 2533 PetscBool cisdense; 2534 2535 PetscFunctionBegin; 2536 PetscCall(MatSetSizes(C, m, n, m, n)); 2537 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2538 if (!cisdense) { 2539 PetscBool flg; 2540 2541 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2542 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2543 } 2544 PetscCall(MatSetUp(C)); 2545 PetscFunctionReturn(0); 2546 } 2547 2548 PetscErrorCode MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2549 { 2550 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2551 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2552 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2553 const PetscScalar *av, *bv; 2554 PetscScalar *cv; 2555 PetscBLASInt m, n, k; 2556 PetscScalar _DOne = 1.0, _DZero = 0.0; 2557 2558 PetscFunctionBegin; 2559 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2560 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2561 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2562 if (!m || !n || !k) PetscFunctionReturn(0); 2563 PetscCall(MatDenseGetArrayRead(A, &av)); 2564 PetscCall(MatDenseGetArrayRead(B, &bv)); 2565 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2566 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2567 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2568 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2569 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2570 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2571 PetscFunctionReturn(0); 2572 } 2573 2574 PetscErrorCode MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) 2575 { 2576 PetscInt m = A->cmap->n, n = B->cmap->n; 2577 PetscBool cisdense; 2578 2579 PetscFunctionBegin; 2580 PetscCall(MatSetSizes(C, m, n, m, n)); 2581 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2582 if (!cisdense) { 2583 PetscBool flg; 2584 2585 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2586 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2587 } 2588 PetscCall(MatSetUp(C)); 2589 PetscFunctionReturn(0); 2590 } 2591 2592 PetscErrorCode MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) 2593 { 2594 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2595 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2596 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2597 const PetscScalar *av, *bv; 2598 PetscScalar *cv; 2599 PetscBLASInt m, n, k; 2600 PetscScalar _DOne = 1.0, _DZero = 0.0; 2601 2602 PetscFunctionBegin; 2603 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2604 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2605 PetscCall(PetscBLASIntCast(A->rmap->n, &k)); 2606 if (!m || !n || !k) PetscFunctionReturn(0); 2607 PetscCall(MatDenseGetArrayRead(A, &av)); 2608 PetscCall(MatDenseGetArrayRead(B, &bv)); 2609 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2610 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2611 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2612 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2613 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2614 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2615 PetscFunctionReturn(0); 2616 } 2617 2618 /* ----------------------------------------------- */ 2619 static PetscErrorCode MatProductSetFromOptions_SeqDense_AB(Mat C) 2620 { 2621 PetscFunctionBegin; 2622 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqDense; 2623 C->ops->productsymbolic = MatProductSymbolic_AB; 2624 PetscFunctionReturn(0); 2625 } 2626 2627 static PetscErrorCode MatProductSetFromOptions_SeqDense_AtB(Mat C) 2628 { 2629 PetscFunctionBegin; 2630 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqDense_SeqDense; 2631 C->ops->productsymbolic = MatProductSymbolic_AtB; 2632 PetscFunctionReturn(0); 2633 } 2634 2635 static PetscErrorCode MatProductSetFromOptions_SeqDense_ABt(Mat C) 2636 { 2637 PetscFunctionBegin; 2638 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqDense_SeqDense; 2639 C->ops->productsymbolic = MatProductSymbolic_ABt; 2640 PetscFunctionReturn(0); 2641 } 2642 2643 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense(Mat C) 2644 { 2645 Mat_Product *product = C->product; 2646 2647 PetscFunctionBegin; 2648 switch (product->type) { 2649 case MATPRODUCT_AB: 2650 PetscCall(MatProductSetFromOptions_SeqDense_AB(C)); 2651 break; 2652 case MATPRODUCT_AtB: 2653 PetscCall(MatProductSetFromOptions_SeqDense_AtB(C)); 2654 break; 2655 case MATPRODUCT_ABt: 2656 PetscCall(MatProductSetFromOptions_SeqDense_ABt(C)); 2657 break; 2658 default: 2659 break; 2660 } 2661 PetscFunctionReturn(0); 2662 } 2663 /* ----------------------------------------------- */ 2664 2665 static PetscErrorCode MatGetRowMax_SeqDense(Mat A, Vec v, PetscInt idx[]) 2666 { 2667 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2668 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2669 PetscScalar *x; 2670 const PetscScalar *aa; 2671 2672 PetscFunctionBegin; 2673 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2674 PetscCall(VecGetArray(v, &x)); 2675 PetscCall(VecGetLocalSize(v, &p)); 2676 PetscCall(MatDenseGetArrayRead(A, &aa)); 2677 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2678 for (i = 0; i < m; i++) { 2679 x[i] = aa[i]; 2680 if (idx) idx[i] = 0; 2681 for (j = 1; j < n; j++) { 2682 if (PetscRealPart(x[i]) < PetscRealPart(aa[i + a->lda * j])) { 2683 x[i] = aa[i + a->lda * j]; 2684 if (idx) idx[i] = j; 2685 } 2686 } 2687 } 2688 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2689 PetscCall(VecRestoreArray(v, &x)); 2690 PetscFunctionReturn(0); 2691 } 2692 2693 static PetscErrorCode MatGetRowMaxAbs_SeqDense(Mat A, Vec v, PetscInt idx[]) 2694 { 2695 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2696 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2697 PetscScalar *x; 2698 PetscReal atmp; 2699 const PetscScalar *aa; 2700 2701 PetscFunctionBegin; 2702 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2703 PetscCall(VecGetArray(v, &x)); 2704 PetscCall(VecGetLocalSize(v, &p)); 2705 PetscCall(MatDenseGetArrayRead(A, &aa)); 2706 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2707 for (i = 0; i < m; i++) { 2708 x[i] = PetscAbsScalar(aa[i]); 2709 for (j = 1; j < n; j++) { 2710 atmp = PetscAbsScalar(aa[i + a->lda * j]); 2711 if (PetscAbsScalar(x[i]) < atmp) { 2712 x[i] = atmp; 2713 if (idx) idx[i] = j; 2714 } 2715 } 2716 } 2717 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2718 PetscCall(VecRestoreArray(v, &x)); 2719 PetscFunctionReturn(0); 2720 } 2721 2722 static PetscErrorCode MatGetRowMin_SeqDense(Mat A, Vec v, PetscInt idx[]) 2723 { 2724 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2725 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2726 PetscScalar *x; 2727 const PetscScalar *aa; 2728 2729 PetscFunctionBegin; 2730 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2731 PetscCall(MatDenseGetArrayRead(A, &aa)); 2732 PetscCall(VecGetArray(v, &x)); 2733 PetscCall(VecGetLocalSize(v, &p)); 2734 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2735 for (i = 0; i < m; i++) { 2736 x[i] = aa[i]; 2737 if (idx) idx[i] = 0; 2738 for (j = 1; j < n; j++) { 2739 if (PetscRealPart(x[i]) > PetscRealPart(aa[i + a->lda * j])) { 2740 x[i] = aa[i + a->lda * j]; 2741 if (idx) idx[i] = j; 2742 } 2743 } 2744 } 2745 PetscCall(VecRestoreArray(v, &x)); 2746 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2747 PetscFunctionReturn(0); 2748 } 2749 2750 PetscErrorCode MatGetColumnVector_SeqDense(Mat A, Vec v, PetscInt col) 2751 { 2752 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2753 PetscScalar *x; 2754 const PetscScalar *aa; 2755 2756 PetscFunctionBegin; 2757 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2758 PetscCall(MatDenseGetArrayRead(A, &aa)); 2759 PetscCall(VecGetArray(v, &x)); 2760 PetscCall(PetscArraycpy(x, aa + col * a->lda, A->rmap->n)); 2761 PetscCall(VecRestoreArray(v, &x)); 2762 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2763 PetscFunctionReturn(0); 2764 } 2765 2766 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat A, PetscInt type, PetscReal *reductions) 2767 { 2768 PetscInt i, j, m, n; 2769 const PetscScalar *a; 2770 2771 PetscFunctionBegin; 2772 PetscCall(MatGetSize(A, &m, &n)); 2773 PetscCall(PetscArrayzero(reductions, n)); 2774 PetscCall(MatDenseGetArrayRead(A, &a)); 2775 if (type == NORM_2) { 2776 for (i = 0; i < n; i++) { 2777 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j] * a[j]); 2778 a += m; 2779 } 2780 } else if (type == NORM_1) { 2781 for (i = 0; i < n; i++) { 2782 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j]); 2783 a += m; 2784 } 2785 } else if (type == NORM_INFINITY) { 2786 for (i = 0; i < n; i++) { 2787 for (j = 0; j < m; j++) reductions[i] = PetscMax(PetscAbsScalar(a[j]), reductions[i]); 2788 a += m; 2789 } 2790 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2791 for (i = 0; i < n; i++) { 2792 for (j = 0; j < m; j++) reductions[i] += PetscRealPart(a[j]); 2793 a += m; 2794 } 2795 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2796 for (i = 0; i < n; i++) { 2797 for (j = 0; j < m; j++) reductions[i] += PetscImaginaryPart(a[j]); 2798 a += m; 2799 } 2800 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2801 PetscCall(MatDenseRestoreArrayRead(A, &a)); 2802 if (type == NORM_2) { 2803 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2804 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2805 for (i = 0; i < n; i++) reductions[i] /= m; 2806 } 2807 PetscFunctionReturn(0); 2808 } 2809 2810 PetscErrorCode MatSetRandom_SeqDense(Mat x, PetscRandom rctx) 2811 { 2812 PetscScalar *a; 2813 PetscInt lda, m, n, i, j; 2814 2815 PetscFunctionBegin; 2816 PetscCall(MatGetSize(x, &m, &n)); 2817 PetscCall(MatDenseGetLDA(x, &lda)); 2818 PetscCall(MatDenseGetArrayWrite(x, &a)); 2819 for (j = 0; j < n; j++) { 2820 for (i = 0; i < m; i++) PetscCall(PetscRandomGetValue(rctx, a + j * lda + i)); 2821 } 2822 PetscCall(MatDenseRestoreArrayWrite(x, &a)); 2823 PetscFunctionReturn(0); 2824 } 2825 2826 static PetscErrorCode MatMissingDiagonal_SeqDense(Mat A, PetscBool *missing, PetscInt *d) 2827 { 2828 PetscFunctionBegin; 2829 *missing = PETSC_FALSE; 2830 PetscFunctionReturn(0); 2831 } 2832 2833 /* vals is not const */ 2834 static PetscErrorCode MatDenseGetColumn_SeqDense(Mat A, PetscInt col, PetscScalar **vals) 2835 { 2836 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2837 PetscScalar *v; 2838 2839 PetscFunctionBegin; 2840 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2841 PetscCall(MatDenseGetArray(A, &v)); 2842 *vals = v + col * a->lda; 2843 PetscCall(MatDenseRestoreArray(A, &v)); 2844 PetscFunctionReturn(0); 2845 } 2846 2847 static PetscErrorCode MatDenseRestoreColumn_SeqDense(Mat A, PetscScalar **vals) 2848 { 2849 PetscFunctionBegin; 2850 if (vals) *vals = NULL; /* user cannot accidentally use the array later */ 2851 PetscFunctionReturn(0); 2852 } 2853 2854 /* -------------------------------------------------------------------*/ 2855 static struct _MatOps MatOps_Values = {MatSetValues_SeqDense, 2856 MatGetRow_SeqDense, 2857 MatRestoreRow_SeqDense, 2858 MatMult_SeqDense, 2859 /* 4*/ MatMultAdd_SeqDense, 2860 MatMultTranspose_SeqDense, 2861 MatMultTransposeAdd_SeqDense, 2862 NULL, 2863 NULL, 2864 NULL, 2865 /* 10*/ NULL, 2866 MatLUFactor_SeqDense, 2867 MatCholeskyFactor_SeqDense, 2868 MatSOR_SeqDense, 2869 MatTranspose_SeqDense, 2870 /* 15*/ MatGetInfo_SeqDense, 2871 MatEqual_SeqDense, 2872 MatGetDiagonal_SeqDense, 2873 MatDiagonalScale_SeqDense, 2874 MatNorm_SeqDense, 2875 /* 20*/ MatAssemblyBegin_SeqDense, 2876 MatAssemblyEnd_SeqDense, 2877 MatSetOption_SeqDense, 2878 MatZeroEntries_SeqDense, 2879 /* 24*/ MatZeroRows_SeqDense, 2880 NULL, 2881 NULL, 2882 NULL, 2883 NULL, 2884 /* 29*/ MatSetUp_SeqDense, 2885 NULL, 2886 NULL, 2887 NULL, 2888 NULL, 2889 /* 34*/ MatDuplicate_SeqDense, 2890 NULL, 2891 NULL, 2892 NULL, 2893 NULL, 2894 /* 39*/ MatAXPY_SeqDense, 2895 MatCreateSubMatrices_SeqDense, 2896 NULL, 2897 MatGetValues_SeqDense, 2898 MatCopy_SeqDense, 2899 /* 44*/ MatGetRowMax_SeqDense, 2900 MatScale_SeqDense, 2901 MatShift_SeqDense, 2902 NULL, 2903 MatZeroRowsColumns_SeqDense, 2904 /* 49*/ MatSetRandom_SeqDense, 2905 NULL, 2906 NULL, 2907 NULL, 2908 NULL, 2909 /* 54*/ NULL, 2910 NULL, 2911 NULL, 2912 NULL, 2913 NULL, 2914 /* 59*/ MatCreateSubMatrix_SeqDense, 2915 MatDestroy_SeqDense, 2916 MatView_SeqDense, 2917 NULL, 2918 NULL, 2919 /* 64*/ NULL, 2920 NULL, 2921 NULL, 2922 NULL, 2923 NULL, 2924 /* 69*/ MatGetRowMaxAbs_SeqDense, 2925 NULL, 2926 NULL, 2927 NULL, 2928 NULL, 2929 /* 74*/ NULL, 2930 NULL, 2931 NULL, 2932 NULL, 2933 NULL, 2934 /* 79*/ NULL, 2935 NULL, 2936 NULL, 2937 NULL, 2938 /* 83*/ MatLoad_SeqDense, 2939 MatIsSymmetric_SeqDense, 2940 MatIsHermitian_SeqDense, 2941 NULL, 2942 NULL, 2943 NULL, 2944 /* 89*/ NULL, 2945 NULL, 2946 MatMatMultNumeric_SeqDense_SeqDense, 2947 NULL, 2948 NULL, 2949 /* 94*/ NULL, 2950 NULL, 2951 NULL, 2952 MatMatTransposeMultNumeric_SeqDense_SeqDense, 2953 NULL, 2954 /* 99*/ MatProductSetFromOptions_SeqDense, 2955 NULL, 2956 NULL, 2957 MatConjugate_SeqDense, 2958 NULL, 2959 /*104*/ NULL, 2960 MatRealPart_SeqDense, 2961 MatImaginaryPart_SeqDense, 2962 NULL, 2963 NULL, 2964 /*109*/ NULL, 2965 NULL, 2966 MatGetRowMin_SeqDense, 2967 MatGetColumnVector_SeqDense, 2968 MatMissingDiagonal_SeqDense, 2969 /*114*/ NULL, 2970 NULL, 2971 NULL, 2972 NULL, 2973 NULL, 2974 /*119*/ NULL, 2975 NULL, 2976 NULL, 2977 NULL, 2978 NULL, 2979 /*124*/ NULL, 2980 MatGetColumnReductions_SeqDense, 2981 NULL, 2982 NULL, 2983 NULL, 2984 /*129*/ NULL, 2985 NULL, 2986 NULL, 2987 MatTransposeMatMultNumeric_SeqDense_SeqDense, 2988 NULL, 2989 /*134*/ NULL, 2990 NULL, 2991 NULL, 2992 NULL, 2993 NULL, 2994 /*139*/ NULL, 2995 NULL, 2996 NULL, 2997 NULL, 2998 NULL, 2999 MatCreateMPIMatConcatenateSeqMat_SeqDense, 3000 /*145*/ NULL, 3001 NULL, 3002 NULL, 3003 NULL, 3004 NULL, 3005 /*150*/ NULL}; 3006 3007 /*@C 3008 MatCreateSeqDense - Creates a `MATSEQDENSE` that 3009 is stored in column major order (the usual Fortran 77 manner). Many 3010 of the matrix operations use the BLAS and LAPACK routines. 3011 3012 Collective 3013 3014 Input Parameters: 3015 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3016 . m - number of rows 3017 . n - number of columns 3018 - data - optional location of matrix data in column major order. Set data=NULL for PETSc 3019 to control all matrix memory allocation. 3020 3021 Output Parameter: 3022 . A - the matrix 3023 3024 Note: 3025 The data input variable is intended primarily for Fortran programmers 3026 who wish to allocate their own matrix memory space. Most users should 3027 set data=NULL. 3028 3029 Level: intermediate 3030 3031 .seealso: `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 3032 @*/ 3033 PetscErrorCode MatCreateSeqDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscScalar *data, Mat *A) 3034 { 3035 PetscFunctionBegin; 3036 PetscCall(MatCreate(comm, A)); 3037 PetscCall(MatSetSizes(*A, m, n, m, n)); 3038 PetscCall(MatSetType(*A, MATSEQDENSE)); 3039 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 3040 PetscFunctionReturn(0); 3041 } 3042 3043 /*@C 3044 MatSeqDenseSetPreallocation - Sets the array used for storing the matrix elements of a `MATSEQDENSE` matrix 3045 3046 Collective 3047 3048 Input Parameters: 3049 + B - the matrix 3050 - data - the array (or NULL) 3051 3052 Note: 3053 The data input variable is intended primarily for Fortran programmers 3054 who wish to allocate their own matrix memory space. Most users should 3055 need not call this routine. 3056 3057 Level: intermediate 3058 3059 .seealso: `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`, `MatDenseSetLDA()` 3060 @*/ 3061 PetscErrorCode MatSeqDenseSetPreallocation(Mat B, PetscScalar data[]) 3062 { 3063 PetscFunctionBegin; 3064 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3065 PetscTryMethod(B, "MatSeqDenseSetPreallocation_C", (Mat, PetscScalar[]), (B, data)); 3066 PetscFunctionReturn(0); 3067 } 3068 3069 PetscErrorCode MatSeqDenseSetPreallocation_SeqDense(Mat B, PetscScalar *data) 3070 { 3071 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3072 3073 PetscFunctionBegin; 3074 PetscCheck(!b->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3075 B->preallocated = PETSC_TRUE; 3076 3077 PetscCall(PetscLayoutSetUp(B->rmap)); 3078 PetscCall(PetscLayoutSetUp(B->cmap)); 3079 3080 if (b->lda <= 0) b->lda = B->rmap->n; 3081 3082 if (!data) { /* petsc-allocated storage */ 3083 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 3084 PetscCall(PetscCalloc1((size_t)b->lda * B->cmap->n, &b->v)); 3085 3086 b->user_alloc = PETSC_FALSE; 3087 } else { /* user-allocated storage */ 3088 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 3089 b->v = data; 3090 b->user_alloc = PETSC_TRUE; 3091 } 3092 B->assembled = PETSC_TRUE; 3093 PetscFunctionReturn(0); 3094 } 3095 3096 #if defined(PETSC_HAVE_ELEMENTAL) 3097 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 3098 { 3099 Mat mat_elemental; 3100 const PetscScalar *array; 3101 PetscScalar *v_colwise; 3102 PetscInt M = A->rmap->N, N = A->cmap->N, i, j, k, *rows, *cols; 3103 3104 PetscFunctionBegin; 3105 PetscCall(PetscMalloc3(M * N, &v_colwise, M, &rows, N, &cols)); 3106 PetscCall(MatDenseGetArrayRead(A, &array)); 3107 /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */ 3108 k = 0; 3109 for (j = 0; j < N; j++) { 3110 cols[j] = j; 3111 for (i = 0; i < M; i++) v_colwise[j * M + i] = array[k++]; 3112 } 3113 for (i = 0; i < M; i++) rows[i] = i; 3114 PetscCall(MatDenseRestoreArrayRead(A, &array)); 3115 3116 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 3117 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N)); 3118 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 3119 PetscCall(MatSetUp(mat_elemental)); 3120 3121 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 3122 PetscCall(MatSetValues(mat_elemental, M, rows, N, cols, v_colwise, ADD_VALUES)); 3123 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 3124 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 3125 PetscCall(PetscFree3(v_colwise, rows, cols)); 3126 3127 if (reuse == MAT_INPLACE_MATRIX) { 3128 PetscCall(MatHeaderReplace(A, &mat_elemental)); 3129 } else { 3130 *newmat = mat_elemental; 3131 } 3132 PetscFunctionReturn(0); 3133 } 3134 #endif 3135 3136 PetscErrorCode MatDenseSetLDA_SeqDense(Mat B, PetscInt lda) 3137 { 3138 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3139 PetscBool data; 3140 3141 PetscFunctionBegin; 3142 data = (PetscBool)((B->rmap->n > 0 && B->cmap->n > 0) ? (b->v ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE); 3143 PetscCheck(b->user_alloc || !data || b->lda == lda, PETSC_COMM_SELF, PETSC_ERR_ORDER, "LDA cannot be changed after allocation of internal storage"); 3144 PetscCheck(lda >= B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "LDA %" PetscInt_FMT " must be at least matrix dimension %" PetscInt_FMT, lda, B->rmap->n); 3145 b->lda = lda; 3146 PetscFunctionReturn(0); 3147 } 3148 3149 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3150 { 3151 PetscFunctionBegin; 3152 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIDense(comm, inmat, n, scall, outmat)); 3153 PetscFunctionReturn(0); 3154 } 3155 3156 PetscErrorCode MatDenseGetColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) 3157 { 3158 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3159 3160 PetscFunctionBegin; 3161 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3162 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3163 if (!a->cvec) { PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec)); } 3164 a->vecinuse = col + 1; 3165 PetscCall(MatDenseGetArray(A, (PetscScalar **)&a->ptrinuse)); 3166 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3167 *v = a->cvec; 3168 PetscFunctionReturn(0); 3169 } 3170 3171 PetscErrorCode MatDenseRestoreColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) 3172 { 3173 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3174 3175 PetscFunctionBegin; 3176 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3177 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3178 a->vecinuse = 0; 3179 PetscCall(MatDenseRestoreArray(A, (PetscScalar **)&a->ptrinuse)); 3180 PetscCall(VecResetArray(a->cvec)); 3181 if (v) *v = NULL; 3182 PetscFunctionReturn(0); 3183 } 3184 3185 PetscErrorCode MatDenseGetColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) 3186 { 3187 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3188 3189 PetscFunctionBegin; 3190 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3191 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3192 if (!a->cvec) { PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec)); } 3193 a->vecinuse = col + 1; 3194 PetscCall(MatDenseGetArrayRead(A, &a->ptrinuse)); 3195 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3196 PetscCall(VecLockReadPush(a->cvec)); 3197 *v = a->cvec; 3198 PetscFunctionReturn(0); 3199 } 3200 3201 PetscErrorCode MatDenseRestoreColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) 3202 { 3203 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3204 3205 PetscFunctionBegin; 3206 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3207 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3208 a->vecinuse = 0; 3209 PetscCall(MatDenseRestoreArrayRead(A, &a->ptrinuse)); 3210 PetscCall(VecLockReadPop(a->cvec)); 3211 PetscCall(VecResetArray(a->cvec)); 3212 if (v) *v = NULL; 3213 PetscFunctionReturn(0); 3214 } 3215 3216 PetscErrorCode MatDenseGetColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) 3217 { 3218 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3219 3220 PetscFunctionBegin; 3221 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3222 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3223 if (!a->cvec) { PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec)); } 3224 a->vecinuse = col + 1; 3225 PetscCall(MatDenseGetArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3226 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3227 *v = a->cvec; 3228 PetscFunctionReturn(0); 3229 } 3230 3231 PetscErrorCode MatDenseRestoreColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) 3232 { 3233 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3234 3235 PetscFunctionBegin; 3236 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3237 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3238 a->vecinuse = 0; 3239 PetscCall(MatDenseRestoreArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3240 PetscCall(VecResetArray(a->cvec)); 3241 if (v) *v = NULL; 3242 PetscFunctionReturn(0); 3243 } 3244 3245 PetscErrorCode MatDenseGetSubMatrix_SeqDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 3246 { 3247 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3248 3249 PetscFunctionBegin; 3250 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3251 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3252 if (a->cmat && (cend - cbegin != a->cmat->cmap->N || rend - rbegin != a->cmat->rmap->N)) PetscCall(MatDestroy(&a->cmat)); 3253 if (!a->cmat) { 3254 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), rend - rbegin, PETSC_DECIDE, rend - rbegin, cend - cbegin, a->v + rbegin + (size_t)cbegin * a->lda, &a->cmat)); 3255 } else { 3256 PetscCall(MatDensePlaceArray(a->cmat, a->v + rbegin + (size_t)cbegin * a->lda)); 3257 } 3258 PetscCall(MatDenseSetLDA(a->cmat, a->lda)); 3259 a->matinuse = cbegin + 1; 3260 *v = a->cmat; 3261 #if defined(PETSC_HAVE_CUDA) 3262 A->offloadmask = PETSC_OFFLOAD_CPU; 3263 #endif 3264 PetscFunctionReturn(0); 3265 } 3266 3267 PetscErrorCode MatDenseRestoreSubMatrix_SeqDense(Mat A, Mat *v) 3268 { 3269 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3270 3271 PetscFunctionBegin; 3272 PetscCheck(a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 3273 PetscCheck(a->cmat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column matrix"); 3274 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 3275 a->matinuse = 0; 3276 PetscCall(MatDenseResetArray(a->cmat)); 3277 if (v) *v = NULL; 3278 #if defined(PETSC_HAVE_CUDA) 3279 A->offloadmask = PETSC_OFFLOAD_CPU; 3280 #endif 3281 PetscFunctionReturn(0); 3282 } 3283 3284 /*MC 3285 MATSEQDENSE - MATSEQDENSE = "seqdense" - A matrix type to be used for sequential dense matrices. 3286 3287 Options Database Keys: 3288 . -mat_type seqdense - sets the matrix type to `MATSEQDENSE` during a call to `MatSetFromOptions()` 3289 3290 Level: beginner 3291 3292 .seealso: `MATSEQDENSE`, `MatCreateSeqDense()` 3293 M*/ 3294 PetscErrorCode MatCreate_SeqDense(Mat B) 3295 { 3296 Mat_SeqDense *b; 3297 PetscMPIInt size; 3298 3299 PetscFunctionBegin; 3300 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3301 PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3302 3303 PetscCall(PetscNew(&b)); 3304 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 3305 B->data = (void *)b; 3306 3307 b->roworiented = PETSC_TRUE; 3308 3309 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatQRFactor_C", MatQRFactor_SeqDense)); 3310 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetLDA_C", MatDenseGetLDA_SeqDense)); 3311 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseSetLDA_C", MatDenseSetLDA_SeqDense)); 3312 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArray_C", MatDenseGetArray_SeqDense)); 3313 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArray_C", MatDenseRestoreArray_SeqDense)); 3314 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDensePlaceArray_C", MatDensePlaceArray_SeqDense)); 3315 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseResetArray_C", MatDenseResetArray_SeqDense)); 3316 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseReplaceArray_C", MatDenseReplaceArray_SeqDense)); 3317 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayRead_C", MatDenseGetArray_SeqDense)); 3318 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayRead_C", MatDenseRestoreArray_SeqDense)); 3319 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayWrite_C", MatDenseGetArray_SeqDense)); 3320 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArray_SeqDense)); 3321 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqaij_C", MatConvert_SeqDense_SeqAIJ)); 3322 #if defined(PETSC_HAVE_ELEMENTAL) 3323 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_elemental_C", MatConvert_SeqDense_Elemental)); 3324 #endif 3325 #if defined(PETSC_HAVE_SCALAPACK) 3326 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 3327 #endif 3328 #if defined(PETSC_HAVE_CUDA) 3329 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensecuda_C", MatConvert_SeqDense_SeqDenseCUDA)); 3330 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3331 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdense_C", MatProductSetFromOptions_SeqDense)); 3332 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3333 #endif 3334 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqDenseSetPreallocation_C", MatSeqDenseSetPreallocation_SeqDense)); 3335 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 3336 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdense_C", MatProductSetFromOptions_SeqDense)); 3337 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3338 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqsbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3339 3340 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumn_C", MatDenseGetColumn_SeqDense)); 3341 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_SeqDense)); 3342 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_SeqDense)); 3343 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_SeqDense)); 3344 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_SeqDense)); 3345 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_SeqDense)); 3346 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_SeqDense)); 3347 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_SeqDense)); 3348 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_SeqDense)); 3349 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_SeqDense)); 3350 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQDENSE)); 3351 PetscFunctionReturn(0); 3352 } 3353 3354 /*@C 3355 MatDenseGetColumn - gives access to a column of a dense matrix. This is only the local part of the column. You MUST call `MatDenseRestoreColumn()` to avoid memory bleeding. 3356 3357 Not Collective 3358 3359 Input Parameters: 3360 + mat - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3361 - col - column index 3362 3363 Output Parameter: 3364 . vals - pointer to the data 3365 3366 Level: intermediate 3367 3368 Note: 3369 Use `MatDenseGetColumnVec()` to get access to a column of a `MATDENSE` treated as a `Vec` 3370 3371 .seealso: `MATDENSE`, `MatDenseRestoreColumn()`, `MatDenseGetColumnVec()` 3372 @*/ 3373 PetscErrorCode MatDenseGetColumn(Mat A, PetscInt col, PetscScalar **vals) 3374 { 3375 PetscFunctionBegin; 3376 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3377 PetscValidLogicalCollectiveInt(A, col, 2); 3378 PetscValidPointer(vals, 3); 3379 PetscUseMethod(A, "MatDenseGetColumn_C", (Mat, PetscInt, PetscScalar **), (A, col, vals)); 3380 PetscFunctionReturn(0); 3381 } 3382 3383 /*@C 3384 MatDenseRestoreColumn - returns access to a column of a `MATDENSE` matrix which is returned by `MatDenseGetColumn()`. 3385 3386 Not Collective 3387 3388 Input Parameters: 3389 + mat - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3390 - vals - pointer to the data (may be NULL) 3391 3392 Level: intermediate 3393 3394 .seealso: `MATDENSE`, `MatDenseGetColumn()` 3395 @*/ 3396 PetscErrorCode MatDenseRestoreColumn(Mat A, PetscScalar **vals) 3397 { 3398 PetscFunctionBegin; 3399 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3400 PetscValidPointer(vals, 2); 3401 PetscUseMethod(A, "MatDenseRestoreColumn_C", (Mat, PetscScalar **), (A, vals)); 3402 PetscFunctionReturn(0); 3403 } 3404 3405 /*@ 3406 MatDenseGetColumnVec - Gives read-write access to a column of a `MATDENSE` matrix, represented as a `Vec`. 3407 3408 Collective 3409 3410 Input Parameters: 3411 + mat - the `Mat` object 3412 - col - the column index 3413 3414 Output Parameter: 3415 . v - the vector 3416 3417 Notes: 3418 The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVec()` when the vector is no longer needed. 3419 3420 Use `MatDenseGetColumnVecRead()` to obtain read-only access or `MatDenseGetColumnVecWrite()` for write-only access. 3421 3422 Level: intermediate 3423 3424 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`, `MatDenseGetColumn()` 3425 @*/ 3426 PetscErrorCode MatDenseGetColumnVec(Mat A, PetscInt col, Vec *v) 3427 { 3428 PetscFunctionBegin; 3429 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3430 PetscValidType(A, 1); 3431 PetscValidLogicalCollectiveInt(A, col, 2); 3432 PetscValidPointer(v, 3); 3433 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3434 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3435 PetscUseMethod(A, "MatDenseGetColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3436 PetscFunctionReturn(0); 3437 } 3438 3439 /*@ 3440 MatDenseRestoreColumnVec - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVec(). 3441 3442 Collective 3443 3444 Input Parameters: 3445 + mat - the Mat object 3446 . col - the column index 3447 - v - the Vec object (may be NULL) 3448 3449 Level: intermediate 3450 3451 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3452 @*/ 3453 PetscErrorCode MatDenseRestoreColumnVec(Mat A, PetscInt col, Vec *v) 3454 { 3455 PetscFunctionBegin; 3456 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3457 PetscValidType(A, 1); 3458 PetscValidLogicalCollectiveInt(A, col, 2); 3459 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3460 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3461 PetscUseMethod(A, "MatDenseRestoreColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3462 PetscFunctionReturn(0); 3463 } 3464 3465 /*@ 3466 MatDenseGetColumnVecRead - Gives read-only access to a column of a dense matrix, represented as a Vec. 3467 3468 Collective 3469 3470 Input Parameters: 3471 + mat - the Mat object 3472 - col - the column index 3473 3474 Output Parameter: 3475 . v - the vector 3476 3477 Notes: 3478 The vector is owned by PETSc and users cannot modify it. 3479 3480 Users need to call MatDenseRestoreColumnVecRead() when the vector is no longer needed. 3481 3482 Use MatDenseGetColumnVec() to obtain read-write access or MatDenseGetColumnVecWrite() for write-only access. 3483 3484 Level: intermediate 3485 3486 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3487 @*/ 3488 PetscErrorCode MatDenseGetColumnVecRead(Mat A, PetscInt col, Vec *v) 3489 { 3490 PetscFunctionBegin; 3491 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3492 PetscValidType(A, 1); 3493 PetscValidLogicalCollectiveInt(A, col, 2); 3494 PetscValidPointer(v, 3); 3495 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3496 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3497 PetscUseMethod(A, "MatDenseGetColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3498 PetscFunctionReturn(0); 3499 } 3500 3501 /*@ 3502 MatDenseRestoreColumnVecRead - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVecRead(). 3503 3504 Collective 3505 3506 Input Parameters: 3507 + mat - the Mat object 3508 . col - the column index 3509 - v - the Vec object (may be NULL) 3510 3511 Level: intermediate 3512 3513 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecWrite()` 3514 @*/ 3515 PetscErrorCode MatDenseRestoreColumnVecRead(Mat A, PetscInt col, Vec *v) 3516 { 3517 PetscFunctionBegin; 3518 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3519 PetscValidType(A, 1); 3520 PetscValidLogicalCollectiveInt(A, col, 2); 3521 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3522 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3523 PetscUseMethod(A, "MatDenseRestoreColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3524 PetscFunctionReturn(0); 3525 } 3526 3527 /*@ 3528 MatDenseGetColumnVecWrite - Gives write-only access to a column of a dense matrix, represented as a Vec. 3529 3530 Collective 3531 3532 Input Parameters: 3533 + mat - the Mat object 3534 - col - the column index 3535 3536 Output Parameter: 3537 . v - the vector 3538 3539 Notes: 3540 The vector is owned by PETSc. Users need to call MatDenseRestoreColumnVecWrite() when the vector is no longer needed. 3541 3542 Use MatDenseGetColumnVec() to obtain read-write access or MatDenseGetColumnVecRead() for read-only access. 3543 3544 Level: intermediate 3545 3546 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3547 @*/ 3548 PetscErrorCode MatDenseGetColumnVecWrite(Mat A, PetscInt col, Vec *v) 3549 { 3550 PetscFunctionBegin; 3551 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3552 PetscValidType(A, 1); 3553 PetscValidLogicalCollectiveInt(A, col, 2); 3554 PetscValidPointer(v, 3); 3555 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3556 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3557 PetscUseMethod(A, "MatDenseGetColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3558 PetscFunctionReturn(0); 3559 } 3560 3561 /*@ 3562 MatDenseRestoreColumnVecWrite - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVecWrite(). 3563 3564 Collective 3565 3566 Input Parameters: 3567 + mat - the Mat object 3568 . col - the column index 3569 - v - the Vec object (may be NULL) 3570 3571 Level: intermediate 3572 3573 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()` 3574 @*/ 3575 PetscErrorCode MatDenseRestoreColumnVecWrite(Mat A, PetscInt col, Vec *v) 3576 { 3577 PetscFunctionBegin; 3578 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3579 PetscValidType(A, 1); 3580 PetscValidLogicalCollectiveInt(A, col, 2); 3581 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3582 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3583 PetscUseMethod(A, "MatDenseRestoreColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3584 PetscFunctionReturn(0); 3585 } 3586 3587 /*@ 3588 MatDenseGetSubMatrix - Gives access to a block of rows and columns of a dense matrix, represented as a Mat. 3589 3590 Collective 3591 3592 Input Parameters: 3593 + mat - the Mat object 3594 . rbegin - the first global row index in the block (if PETSC_DECIDE, is 0) 3595 . rend - the global row index past the last one in the block (if PETSC_DECIDE, is M) 3596 . cbegin - the first global column index in the block (if PETSC_DECIDE, is 0) 3597 - cend - the global column index past the last one in the block (if PETSC_DECIDE, is N) 3598 3599 Output Parameter: 3600 . v - the matrix 3601 3602 Notes: 3603 The matrix is owned by PETSc. Users need to call MatDenseRestoreSubMatrix() when the matrix is no longer needed. 3604 3605 The output matrix is not redistributed by PETSc, so depending on the values of rbegin and rend, some processes may have no local rows. 3606 3607 Level: intermediate 3608 3609 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreSubMatrix()` 3610 @*/ 3611 PetscErrorCode MatDenseGetSubMatrix(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 3612 { 3613 PetscFunctionBegin; 3614 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3615 PetscValidType(A, 1); 3616 PetscValidLogicalCollectiveInt(A, rbegin, 2); 3617 PetscValidLogicalCollectiveInt(A, rend, 3); 3618 PetscValidLogicalCollectiveInt(A, cbegin, 4); 3619 PetscValidLogicalCollectiveInt(A, cend, 5); 3620 PetscValidPointer(v, 6); 3621 if (rbegin == PETSC_DECIDE) rbegin = 0; 3622 if (rend == PETSC_DECIDE) rend = A->rmap->N; 3623 if (cbegin == PETSC_DECIDE) cbegin = 0; 3624 if (cend == PETSC_DECIDE) cend = A->cmap->N; 3625 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3626 PetscCheck(rbegin >= 0 && rbegin <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", rbegin, A->rmap->N); 3627 PetscCheck(rend >= rbegin && rend <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", rend, rbegin, A->rmap->N); 3628 PetscCheck(cbegin >= 0 && cbegin <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", cbegin, A->cmap->N); 3629 PetscCheck(cend >= cbegin && cend <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", cend, cbegin, A->cmap->N); 3630 PetscUseMethod(A, "MatDenseGetSubMatrix_C", (Mat, PetscInt, PetscInt, PetscInt, PetscInt, Mat *), (A, rbegin, rend, cbegin, cend, v)); 3631 PetscFunctionReturn(0); 3632 } 3633 3634 /*@ 3635 MatDenseRestoreSubMatrix - Returns access to a block of columns of a dense matrix obtained from MatDenseGetSubMatrix(). 3636 3637 Collective 3638 3639 Input Parameters: 3640 + mat - the Mat object 3641 - v - the Mat object (may be NULL) 3642 3643 Level: intermediate 3644 3645 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseGetSubMatrix()` 3646 @*/ 3647 PetscErrorCode MatDenseRestoreSubMatrix(Mat A, Mat *v) 3648 { 3649 PetscFunctionBegin; 3650 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3651 PetscValidType(A, 1); 3652 PetscValidPointer(v, 2); 3653 PetscUseMethod(A, "MatDenseRestoreSubMatrix_C", (Mat, Mat *), (A, v)); 3654 PetscFunctionReturn(0); 3655 } 3656 3657 #include <petscblaslapack.h> 3658 #include <petsc/private/kernels/blockinvert.h> 3659 3660 PetscErrorCode MatSeqDenseInvert(Mat A) 3661 { 3662 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3663 PetscInt bs = A->rmap->n; 3664 MatScalar *values = a->v; 3665 const PetscReal shift = 0.0; 3666 PetscBool allowzeropivot = PetscNot(A->erroriffailure), zeropivotdetected = PETSC_FALSE; 3667 3668 PetscFunctionBegin; 3669 /* factor and invert each block */ 3670 switch (bs) { 3671 case 1: 3672 values[0] = (PetscScalar)1.0 / (values[0] + shift); 3673 break; 3674 case 2: 3675 PetscCall(PetscKernel_A_gets_inverse_A_2(values, shift, allowzeropivot, &zeropivotdetected)); 3676 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3677 break; 3678 case 3: 3679 PetscCall(PetscKernel_A_gets_inverse_A_3(values, shift, allowzeropivot, &zeropivotdetected)); 3680 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3681 break; 3682 case 4: 3683 PetscCall(PetscKernel_A_gets_inverse_A_4(values, shift, allowzeropivot, &zeropivotdetected)); 3684 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3685 break; 3686 case 5: { 3687 PetscScalar work[25]; 3688 PetscInt ipvt[5]; 3689 3690 PetscCall(PetscKernel_A_gets_inverse_A_5(values, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 3691 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3692 } break; 3693 case 6: 3694 PetscCall(PetscKernel_A_gets_inverse_A_6(values, shift, allowzeropivot, &zeropivotdetected)); 3695 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3696 break; 3697 case 7: 3698 PetscCall(PetscKernel_A_gets_inverse_A_7(values, shift, allowzeropivot, &zeropivotdetected)); 3699 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3700 break; 3701 default: { 3702 PetscInt *v_pivots, *IJ, j; 3703 PetscScalar *v_work; 3704 3705 PetscCall(PetscMalloc3(bs, &v_work, bs, &v_pivots, bs, &IJ)); 3706 for (j = 0; j < bs; j++) IJ[j] = j; 3707 PetscCall(PetscKernel_A_gets_inverse_A(bs, values, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 3708 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3709 PetscCall(PetscFree3(v_work, v_pivots, IJ)); 3710 } 3711 } 3712 PetscFunctionReturn(0); 3713 } 3714