1 2 /* 3 Defines the basic matrix operations for sequential dense. 4 */ 5 6 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 11 PetscErrorCode MatSeqDenseSymmetrize_Private(Mat A, PetscBool hermitian) { 12 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 13 PetscInt j, k, n = A->rmap->n; 14 PetscScalar *v; 15 16 PetscFunctionBegin; 17 PetscCheck(A->rmap->n == A->cmap->n, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot symmetrize a rectangular matrix"); 18 PetscCall(MatDenseGetArray(A, &v)); 19 if (!hermitian) { 20 for (k = 0; k < n; k++) { 21 for (j = k; j < n; j++) { v[j * mat->lda + k] = v[k * mat->lda + j]; } 22 } 23 } else { 24 for (k = 0; k < n; k++) { 25 for (j = k; j < n; j++) { v[j * mat->lda + k] = PetscConj(v[k * mat->lda + j]); } 26 } 27 } 28 PetscCall(MatDenseRestoreArray(A, &v)); 29 PetscFunctionReturn(0); 30 } 31 32 PETSC_EXTERN PetscErrorCode MatSeqDenseInvertFactors_Private(Mat A) { 33 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 34 PetscBLASInt info, n; 35 36 PetscFunctionBegin; 37 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 38 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 39 if (A->factortype == MAT_FACTOR_LU) { 40 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 41 if (!mat->fwork) { 42 mat->lfwork = n; 43 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 44 PetscCall(PetscLogObjectMemory((PetscObject)A, mat->lfwork * sizeof(PetscBLASInt))); 45 } 46 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 47 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 48 PetscCall(PetscFPTrapPop()); 49 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 50 } else if (A->factortype == MAT_FACTOR_CHOLESKY) { 51 if (A->spd == PETSC_BOOL3_TRUE) { 52 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 53 PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &n, mat->v, &mat->lda, &info)); 54 PetscCall(PetscFPTrapPop()); 55 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 56 #if defined(PETSC_USE_COMPLEX) 57 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 58 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 59 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 60 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 61 PetscCallBLAS("LAPACKhetri", LAPACKhetri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 62 PetscCall(PetscFPTrapPop()); 63 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 64 #endif 65 } else { /* symmetric case */ 66 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 67 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 68 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 69 PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 70 PetscCall(PetscFPTrapPop()); 71 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_FALSE)); 72 } 73 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad Inversion: zero pivot in row %" PetscInt_FMT, (PetscInt)info - 1); 74 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 75 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix must be factored to solve"); 76 77 A->ops->solve = NULL; 78 A->ops->matsolve = NULL; 79 A->ops->solvetranspose = NULL; 80 A->ops->matsolvetranspose = NULL; 81 A->ops->solveadd = NULL; 82 A->ops->solvetransposeadd = NULL; 83 A->factortype = MAT_FACTOR_NONE; 84 PetscCall(PetscFree(A->solvertype)); 85 PetscFunctionReturn(0); 86 } 87 88 PetscErrorCode MatZeroRowsColumns_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 89 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 90 PetscInt m = l->lda, n = A->cmap->n, r = A->rmap->n, i, j; 91 PetscScalar *slot, *bb, *v; 92 const PetscScalar *xx; 93 94 PetscFunctionBegin; 95 if (PetscDefined(USE_DEBUG)) { 96 for (i = 0; i < N; i++) { 97 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 98 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 99 PetscCheck(rows[i] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Col %" PetscInt_FMT " requested to be zeroed greater than or equal number of cols %" PetscInt_FMT, rows[i], A->cmap->n); 100 } 101 } 102 if (!N) PetscFunctionReturn(0); 103 104 /* fix right hand side if needed */ 105 if (x && b) { 106 Vec xt; 107 108 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 109 PetscCall(VecDuplicate(x, &xt)); 110 PetscCall(VecCopy(x, xt)); 111 PetscCall(VecScale(xt, -1.0)); 112 PetscCall(MatMultAdd(A, xt, b, b)); 113 PetscCall(VecDestroy(&xt)); 114 PetscCall(VecGetArrayRead(x, &xx)); 115 PetscCall(VecGetArray(b, &bb)); 116 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 117 PetscCall(VecRestoreArrayRead(x, &xx)); 118 PetscCall(VecRestoreArray(b, &bb)); 119 } 120 121 PetscCall(MatDenseGetArray(A, &v)); 122 for (i = 0; i < N; i++) { 123 slot = v + rows[i] * m; 124 PetscCall(PetscArrayzero(slot, r)); 125 } 126 for (i = 0; i < N; i++) { 127 slot = v + rows[i]; 128 for (j = 0; j < n; j++) { 129 *slot = 0.0; 130 slot += m; 131 } 132 } 133 if (diag != 0.0) { 134 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 135 for (i = 0; i < N; i++) { 136 slot = v + (m + 1) * rows[i]; 137 *slot = diag; 138 } 139 } 140 PetscCall(MatDenseRestoreArray(A, &v)); 141 PetscFunctionReturn(0); 142 } 143 144 PetscErrorCode MatPtAPNumeric_SeqDense_SeqDense(Mat A, Mat P, Mat C) { 145 Mat_SeqDense *c = (Mat_SeqDense *)(C->data); 146 147 PetscFunctionBegin; 148 if (c->ptapwork) { 149 PetscCall((*C->ops->matmultnumeric)(A, P, c->ptapwork)); 150 PetscCall((*C->ops->transposematmultnumeric)(P, c->ptapwork, C)); 151 } else SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Must call MatPtAPSymbolic_SeqDense_SeqDense() first"); 152 PetscFunctionReturn(0); 153 } 154 155 PetscErrorCode MatPtAPSymbolic_SeqDense_SeqDense(Mat A, Mat P, PetscReal fill, Mat C) { 156 Mat_SeqDense *c; 157 PetscBool cisdense; 158 159 PetscFunctionBegin; 160 PetscCall(MatSetSizes(C, P->cmap->n, P->cmap->n, P->cmap->N, P->cmap->N)); 161 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 162 if (!cisdense) { 163 PetscBool flg; 164 165 PetscCall(PetscObjectTypeCompare((PetscObject)P, ((PetscObject)A)->type_name, &flg)); 166 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 167 } 168 PetscCall(MatSetUp(C)); 169 c = (Mat_SeqDense *)C->data; 170 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &c->ptapwork)); 171 PetscCall(MatSetSizes(c->ptapwork, A->rmap->n, P->cmap->n, A->rmap->N, P->cmap->N)); 172 PetscCall(MatSetType(c->ptapwork, ((PetscObject)C)->type_name)); 173 PetscCall(MatSetUp(c->ptapwork)); 174 PetscFunctionReturn(0); 175 } 176 177 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 178 Mat B = NULL; 179 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 180 Mat_SeqDense *b; 181 PetscInt *ai = a->i, *aj = a->j, m = A->rmap->N, n = A->cmap->N, i; 182 const MatScalar *av; 183 PetscBool isseqdense; 184 185 PetscFunctionBegin; 186 if (reuse == MAT_REUSE_MATRIX) { 187 PetscCall(PetscObjectTypeCompare((PetscObject)*newmat, MATSEQDENSE, &isseqdense)); 188 PetscCheck(isseqdense, PetscObjectComm((PetscObject)*newmat), PETSC_ERR_USER, "Cannot reuse matrix of type %s", ((PetscObject)(*newmat))->type_name); 189 } 190 if (reuse != MAT_REUSE_MATRIX) { 191 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 192 PetscCall(MatSetSizes(B, m, n, m, n)); 193 PetscCall(MatSetType(B, MATSEQDENSE)); 194 PetscCall(MatSeqDenseSetPreallocation(B, NULL)); 195 b = (Mat_SeqDense *)(B->data); 196 } else { 197 b = (Mat_SeqDense *)((*newmat)->data); 198 PetscCall(PetscArrayzero(b->v, m * n)); 199 } 200 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 201 for (i = 0; i < m; i++) { 202 PetscInt j; 203 for (j = 0; j < ai[1] - ai[0]; j++) { 204 b->v[*aj * m + i] = *av; 205 aj++; 206 av++; 207 } 208 ai++; 209 } 210 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 211 212 if (reuse == MAT_INPLACE_MATRIX) { 213 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 214 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 215 PetscCall(MatHeaderReplace(A, &B)); 216 } else { 217 if (B) *newmat = B; 218 PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY)); 219 PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY)); 220 } 221 PetscFunctionReturn(0); 222 } 223 224 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 225 Mat B = NULL; 226 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 227 PetscInt i, j; 228 PetscInt *rows, *nnz; 229 MatScalar *aa = a->v, *vals; 230 231 PetscFunctionBegin; 232 PetscCall(PetscCalloc3(A->rmap->n, &rows, A->rmap->n, &nnz, A->rmap->n, &vals)); 233 if (reuse != MAT_REUSE_MATRIX) { 234 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 235 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 236 PetscCall(MatSetType(B, MATSEQAIJ)); 237 for (j = 0; j < A->cmap->n; j++) { 238 for (i = 0; i < A->rmap->n; i++) 239 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) ++nnz[i]; 240 aa += a->lda; 241 } 242 PetscCall(MatSeqAIJSetPreallocation(B, PETSC_DETERMINE, nnz)); 243 } else B = *newmat; 244 aa = a->v; 245 for (j = 0; j < A->cmap->n; j++) { 246 PetscInt numRows = 0; 247 for (i = 0; i < A->rmap->n; i++) 248 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) { 249 rows[numRows] = i; 250 vals[numRows++] = aa[i]; 251 } 252 PetscCall(MatSetValues(B, numRows, rows, 1, &j, vals, INSERT_VALUES)); 253 aa += a->lda; 254 } 255 PetscCall(PetscFree3(rows, nnz, vals)); 256 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 257 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 258 259 if (reuse == MAT_INPLACE_MATRIX) { 260 PetscCall(MatHeaderReplace(A, &B)); 261 } else if (reuse != MAT_REUSE_MATRIX) *newmat = B; 262 PetscFunctionReturn(0); 263 } 264 265 PetscErrorCode MatAXPY_SeqDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) { 266 Mat_SeqDense *x = (Mat_SeqDense *)X->data, *y = (Mat_SeqDense *)Y->data; 267 const PetscScalar *xv; 268 PetscScalar *yv; 269 PetscBLASInt N, m, ldax = 0, lday = 0, one = 1; 270 271 PetscFunctionBegin; 272 PetscCall(MatDenseGetArrayRead(X, &xv)); 273 PetscCall(MatDenseGetArray(Y, &yv)); 274 PetscCall(PetscBLASIntCast(X->rmap->n * X->cmap->n, &N)); 275 PetscCall(PetscBLASIntCast(X->rmap->n, &m)); 276 PetscCall(PetscBLASIntCast(x->lda, &ldax)); 277 PetscCall(PetscBLASIntCast(y->lda, &lday)); 278 if (ldax > m || lday > m) { 279 PetscInt j; 280 281 for (j = 0; j < X->cmap->n; j++) PetscCallBLAS("BLASaxpy", BLASaxpy_(&m, &alpha, xv + j * ldax, &one, yv + j * lday, &one)); 282 } else { 283 PetscCallBLAS("BLASaxpy", BLASaxpy_(&N, &alpha, xv, &one, yv, &one)); 284 } 285 PetscCall(MatDenseRestoreArrayRead(X, &xv)); 286 PetscCall(MatDenseRestoreArray(Y, &yv)); 287 PetscCall(PetscLogFlops(PetscMax(2.0 * N - 1, 0))); 288 PetscFunctionReturn(0); 289 } 290 291 static PetscErrorCode MatGetInfo_SeqDense(Mat A, MatInfoType flag, MatInfo *info) { 292 PetscLogDouble N = A->rmap->n * A->cmap->n; 293 294 PetscFunctionBegin; 295 info->block_size = 1.0; 296 info->nz_allocated = N; 297 info->nz_used = N; 298 info->nz_unneeded = 0; 299 info->assemblies = A->num_ass; 300 info->mallocs = 0; 301 info->memory = ((PetscObject)A)->mem; 302 info->fill_ratio_given = 0; 303 info->fill_ratio_needed = 0; 304 info->factor_mallocs = 0; 305 PetscFunctionReturn(0); 306 } 307 308 PetscErrorCode MatScale_SeqDense(Mat A, PetscScalar alpha) { 309 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 310 PetscScalar *v; 311 PetscBLASInt one = 1, j, nz, lda = 0; 312 313 PetscFunctionBegin; 314 PetscCall(MatDenseGetArray(A, &v)); 315 PetscCall(PetscBLASIntCast(a->lda, &lda)); 316 if (lda > A->rmap->n) { 317 PetscCall(PetscBLASIntCast(A->rmap->n, &nz)); 318 for (j = 0; j < A->cmap->n; j++) PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v + j * lda, &one)); 319 } else { 320 PetscCall(PetscBLASIntCast(A->rmap->n * A->cmap->n, &nz)); 321 PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v, &one)); 322 } 323 PetscCall(PetscLogFlops(A->rmap->n * A->cmap->n)); 324 PetscCall(MatDenseRestoreArray(A, &v)); 325 PetscFunctionReturn(0); 326 } 327 328 PetscErrorCode MatShift_SeqDense(Mat A, PetscScalar alpha) { 329 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 330 PetscScalar *v; 331 PetscInt j, k; 332 333 PetscFunctionBegin; 334 PetscCall(MatDenseGetArray(A, &v)); 335 k = PetscMin(A->rmap->n, A->cmap->n); 336 for (j = 0; j < k; j++) v[j + j * a->lda] += alpha; 337 PetscCall(PetscLogFlops(k)); 338 PetscCall(MatDenseRestoreArray(A, &v)); 339 PetscFunctionReturn(0); 340 } 341 342 static PetscErrorCode MatIsHermitian_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) { 343 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 344 PetscInt i, j, m = A->rmap->n, N = a->lda; 345 const PetscScalar *v; 346 347 PetscFunctionBegin; 348 *fl = PETSC_FALSE; 349 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(0); 350 PetscCall(MatDenseGetArrayRead(A, &v)); 351 for (i = 0; i < m; i++) { 352 for (j = i; j < m; j++) { 353 if (PetscAbsScalar(v[i + j * N] - PetscConj(v[j + i * N])) > rtol) { goto restore; } 354 } 355 } 356 *fl = PETSC_TRUE; 357 restore: 358 PetscCall(MatDenseRestoreArrayRead(A, &v)); 359 PetscFunctionReturn(0); 360 } 361 362 static PetscErrorCode MatIsSymmetric_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) { 363 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 364 PetscInt i, j, m = A->rmap->n, N = a->lda; 365 const PetscScalar *v; 366 367 PetscFunctionBegin; 368 *fl = PETSC_FALSE; 369 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(0); 370 PetscCall(MatDenseGetArrayRead(A, &v)); 371 for (i = 0; i < m; i++) { 372 for (j = i; j < m; j++) { 373 if (PetscAbsScalar(v[i + j * N] - v[j + i * N]) > rtol) { goto restore; } 374 } 375 } 376 *fl = PETSC_TRUE; 377 restore: 378 PetscCall(MatDenseRestoreArrayRead(A, &v)); 379 PetscFunctionReturn(0); 380 } 381 382 PetscErrorCode MatDuplicateNoCreate_SeqDense(Mat newi, Mat A, MatDuplicateOption cpvalues) { 383 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 384 PetscInt lda = (PetscInt)mat->lda, j, m, nlda = lda; 385 PetscBool isdensecpu; 386 387 PetscFunctionBegin; 388 PetscCall(PetscLayoutReference(A->rmap, &newi->rmap)); 389 PetscCall(PetscLayoutReference(A->cmap, &newi->cmap)); 390 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { /* propagate LDA */ 391 PetscCall(MatDenseSetLDA(newi, lda)); 392 } 393 PetscCall(PetscObjectTypeCompare((PetscObject)newi, MATSEQDENSE, &isdensecpu)); 394 if (isdensecpu) PetscCall(MatSeqDenseSetPreallocation(newi, NULL)); 395 if (cpvalues == MAT_COPY_VALUES) { 396 const PetscScalar *av; 397 PetscScalar *v; 398 399 PetscCall(MatDenseGetArrayRead(A, &av)); 400 PetscCall(MatDenseGetArrayWrite(newi, &v)); 401 PetscCall(MatDenseGetLDA(newi, &nlda)); 402 m = A->rmap->n; 403 if (lda > m || nlda > m) { 404 for (j = 0; j < A->cmap->n; j++) PetscCall(PetscArraycpy(v + j * nlda, av + j * lda, m)); 405 } else { 406 PetscCall(PetscArraycpy(v, av, A->rmap->n * A->cmap->n)); 407 } 408 PetscCall(MatDenseRestoreArrayWrite(newi, &v)); 409 PetscCall(MatDenseRestoreArrayRead(A, &av)); 410 } 411 PetscFunctionReturn(0); 412 } 413 414 PetscErrorCode MatDuplicate_SeqDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) { 415 PetscFunctionBegin; 416 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), newmat)); 417 PetscCall(MatSetSizes(*newmat, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 418 PetscCall(MatSetType(*newmat, ((PetscObject)A)->type_name)); 419 PetscCall(MatDuplicateNoCreate_SeqDense(*newmat, A, cpvalues)); 420 PetscFunctionReturn(0); 421 } 422 423 static PetscErrorCode MatSolve_SeqDense_Internal_LU(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) { 424 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 425 PetscBLASInt info; 426 427 PetscFunctionBegin; 428 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 429 PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_(T ? "T" : "N", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 430 PetscCall(PetscFPTrapPop()); 431 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve %d", (int)info); 432 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 433 PetscFunctionReturn(0); 434 } 435 436 static PetscErrorCode MatConjugate_SeqDense(Mat); 437 438 static PetscErrorCode MatSolve_SeqDense_Internal_Cholesky(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) { 439 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 440 PetscBLASInt info; 441 442 PetscFunctionBegin; 443 if (A->spd == PETSC_BOOL3_TRUE) { 444 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 445 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 446 PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &m, &nrhs, mat->v, &mat->lda, x, &m, &info)); 447 PetscCall(PetscFPTrapPop()); 448 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "POTRS Bad solve %d", (int)info); 449 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 450 #if defined(PETSC_USE_COMPLEX) 451 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 452 if (T) PetscCall(MatConjugate_SeqDense(A)); 453 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 454 PetscCallBLAS("LAPACKhetrs", LAPACKhetrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 455 PetscCall(PetscFPTrapPop()); 456 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "HETRS Bad solve %d", (int)info); 457 if (T) PetscCall(MatConjugate_SeqDense(A)); 458 #endif 459 } else { /* symmetric case */ 460 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 461 PetscCallBLAS("LAPACKsytrs", LAPACKsytrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 462 PetscCall(PetscFPTrapPop()); 463 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "SYTRS Bad solve %d", (int)info); 464 } 465 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 466 PetscFunctionReturn(0); 467 } 468 469 static PetscErrorCode MatSolve_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) { 470 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 471 PetscBLASInt info; 472 char trans; 473 474 PetscFunctionBegin; 475 if (PetscDefined(USE_COMPLEX)) { 476 trans = 'C'; 477 } else { 478 trans = 'T'; 479 } 480 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 481 { /* lwork depends on the number of right-hand sides */ 482 PetscBLASInt nlfwork, lfwork = -1; 483 PetscScalar fwork; 484 485 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 486 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 487 if (nlfwork > mat->lfwork) { 488 mat->lfwork = nlfwork; 489 PetscCall(PetscFree(mat->fwork)); 490 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 491 } 492 } 493 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 494 PetscCall(PetscFPTrapPop()); 495 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %d", (int)info); 496 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 497 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &mat->rank, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 498 PetscCall(PetscFPTrapPop()); 499 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %d", (int)info); 500 for (PetscInt j = 0; j < nrhs; j++) { 501 for (PetscInt i = mat->rank; i < k; i++) { x[j * ldx + i] = 0.; } 502 } 503 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 504 PetscFunctionReturn(0); 505 } 506 507 static PetscErrorCode MatSolveTranspose_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) { 508 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 509 PetscBLASInt info; 510 511 PetscFunctionBegin; 512 if (A->rmap->n == A->cmap->n && mat->rank == A->rmap->n) { 513 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 514 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &m, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 515 PetscCall(PetscFPTrapPop()); 516 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %d", (int)info); 517 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 518 { /* lwork depends on the number of right-hand sides */ 519 PetscBLASInt nlfwork, lfwork = -1; 520 PetscScalar fwork; 521 522 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 523 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 524 if (nlfwork > mat->lfwork) { 525 mat->lfwork = nlfwork; 526 PetscCall(PetscFree(mat->fwork)); 527 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 528 } 529 } 530 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 531 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 532 PetscCall(PetscFPTrapPop()); 533 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %d", (int)info); 534 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 535 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "QR factored matrix cannot be used for transpose solve"); 536 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 537 PetscFunctionReturn(0); 538 } 539 540 static PetscErrorCode MatSolve_SeqDense_SetUp(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) { 541 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 542 PetscScalar *y; 543 PetscBLASInt m = 0, k = 0; 544 545 PetscFunctionBegin; 546 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 547 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 548 if (k < m) { 549 PetscCall(VecCopy(xx, mat->qrrhs)); 550 PetscCall(VecGetArray(mat->qrrhs, &y)); 551 } else { 552 PetscCall(VecCopy(xx, yy)); 553 PetscCall(VecGetArray(yy, &y)); 554 } 555 *_y = y; 556 *_k = k; 557 *_m = m; 558 PetscFunctionReturn(0); 559 } 560 561 static PetscErrorCode MatSolve_SeqDense_TearDown(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) { 562 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 563 PetscScalar *y = NULL; 564 PetscBLASInt m, k; 565 566 PetscFunctionBegin; 567 y = *_y; 568 *_y = NULL; 569 k = *_k; 570 m = *_m; 571 if (k < m) { 572 PetscScalar *yv; 573 PetscCall(VecGetArray(yy, &yv)); 574 PetscCall(PetscArraycpy(yv, y, k)); 575 PetscCall(VecRestoreArray(yy, &yv)); 576 PetscCall(VecRestoreArray(mat->qrrhs, &y)); 577 } else { 578 PetscCall(VecRestoreArray(yy, &y)); 579 } 580 PetscFunctionReturn(0); 581 } 582 583 static PetscErrorCode MatSolve_SeqDense_LU(Mat A, Vec xx, Vec yy) { 584 PetscScalar *y = NULL; 585 PetscBLASInt m = 0, k = 0; 586 587 PetscFunctionBegin; 588 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 589 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_FALSE)); 590 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 591 PetscFunctionReturn(0); 592 } 593 594 static PetscErrorCode MatSolveTranspose_SeqDense_LU(Mat A, Vec xx, Vec yy) { 595 PetscScalar *y = NULL; 596 PetscBLASInt m = 0, k = 0; 597 598 PetscFunctionBegin; 599 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 600 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_TRUE)); 601 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 602 PetscFunctionReturn(0); 603 } 604 605 static PetscErrorCode MatSolve_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) { 606 PetscScalar *y = NULL; 607 PetscBLASInt m = 0, k = 0; 608 609 PetscFunctionBegin; 610 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 611 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_FALSE)); 612 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 613 PetscFunctionReturn(0); 614 } 615 616 static PetscErrorCode MatSolveTranspose_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) { 617 PetscScalar *y = NULL; 618 PetscBLASInt m = 0, k = 0; 619 620 PetscFunctionBegin; 621 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 622 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_TRUE)); 623 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 624 PetscFunctionReturn(0); 625 } 626 627 static PetscErrorCode MatSolve_SeqDense_QR(Mat A, Vec xx, Vec yy) { 628 PetscScalar *y = NULL; 629 PetscBLASInt m = 0, k = 0; 630 631 PetscFunctionBegin; 632 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 633 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 634 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 635 PetscFunctionReturn(0); 636 } 637 638 static PetscErrorCode MatSolveTranspose_SeqDense_QR(Mat A, Vec xx, Vec yy) { 639 PetscScalar *y = NULL; 640 PetscBLASInt m = 0, k = 0; 641 642 PetscFunctionBegin; 643 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 644 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 645 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 646 PetscFunctionReturn(0); 647 } 648 649 static PetscErrorCode MatMatSolve_SeqDense_SetUp(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) { 650 const PetscScalar *b; 651 PetscScalar *y; 652 PetscInt n, _ldb, _ldx; 653 PetscBLASInt nrhs = 0, m = 0, k = 0, ldb = 0, ldx = 0, ldy = 0; 654 655 PetscFunctionBegin; 656 *_ldy = 0; 657 *_m = 0; 658 *_nrhs = 0; 659 *_k = 0; 660 *_y = NULL; 661 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 662 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 663 PetscCall(MatGetSize(B, NULL, &n)); 664 PetscCall(PetscBLASIntCast(n, &nrhs)); 665 PetscCall(MatDenseGetLDA(B, &_ldb)); 666 PetscCall(PetscBLASIntCast(_ldb, &ldb)); 667 PetscCall(MatDenseGetLDA(X, &_ldx)); 668 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 669 if (ldx < m) { 670 PetscCall(MatDenseGetArrayRead(B, &b)); 671 PetscCall(PetscMalloc1(nrhs * m, &y)); 672 if (ldb == m) { 673 PetscCall(PetscArraycpy(y, b, ldb * nrhs)); 674 } else { 675 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * m], &b[j * ldb], m)); 676 } 677 ldy = m; 678 PetscCall(MatDenseRestoreArrayRead(B, &b)); 679 } else { 680 if (ldb == ldx) { 681 PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN)); 682 PetscCall(MatDenseGetArray(X, &y)); 683 } else { 684 PetscCall(MatDenseGetArray(X, &y)); 685 PetscCall(MatDenseGetArrayRead(B, &b)); 686 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * ldx], &b[j * ldb], m)); 687 PetscCall(MatDenseRestoreArrayRead(B, &b)); 688 } 689 ldy = ldx; 690 } 691 *_y = y; 692 *_ldy = ldy; 693 *_k = k; 694 *_m = m; 695 *_nrhs = nrhs; 696 PetscFunctionReturn(0); 697 } 698 699 static PetscErrorCode MatMatSolve_SeqDense_TearDown(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) { 700 PetscScalar *y; 701 PetscInt _ldx; 702 PetscBLASInt k, ldy, nrhs, ldx = 0; 703 704 PetscFunctionBegin; 705 y = *_y; 706 *_y = NULL; 707 k = *_k; 708 ldy = *_ldy; 709 nrhs = *_nrhs; 710 PetscCall(MatDenseGetLDA(X, &_ldx)); 711 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 712 if (ldx != ldy) { 713 PetscScalar *xv; 714 PetscCall(MatDenseGetArray(X, &xv)); 715 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&xv[j * ldx], &y[j * ldy], k)); 716 PetscCall(MatDenseRestoreArray(X, &xv)); 717 PetscCall(PetscFree(y)); 718 } else { 719 PetscCall(MatDenseRestoreArray(X, &y)); 720 } 721 PetscFunctionReturn(0); 722 } 723 724 static PetscErrorCode MatMatSolve_SeqDense_LU(Mat A, Mat B, Mat X) { 725 PetscScalar *y; 726 PetscBLASInt m, k, ldy, nrhs; 727 728 PetscFunctionBegin; 729 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 730 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 731 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 732 PetscFunctionReturn(0); 733 } 734 735 static PetscErrorCode MatMatSolveTranspose_SeqDense_LU(Mat A, Mat B, Mat X) { 736 PetscScalar *y; 737 PetscBLASInt m, k, ldy, nrhs; 738 739 PetscFunctionBegin; 740 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 741 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 742 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 743 PetscFunctionReturn(0); 744 } 745 746 static PetscErrorCode MatMatSolve_SeqDense_Cholesky(Mat A, Mat B, Mat X) { 747 PetscScalar *y; 748 PetscBLASInt m, k, ldy, nrhs; 749 750 PetscFunctionBegin; 751 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 752 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 753 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 754 PetscFunctionReturn(0); 755 } 756 757 static PetscErrorCode MatMatSolveTranspose_SeqDense_Cholesky(Mat A, Mat B, Mat X) { 758 PetscScalar *y; 759 PetscBLASInt m, k, ldy, nrhs; 760 761 PetscFunctionBegin; 762 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 763 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 764 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 765 PetscFunctionReturn(0); 766 } 767 768 static PetscErrorCode MatMatSolve_SeqDense_QR(Mat A, Mat B, Mat X) { 769 PetscScalar *y; 770 PetscBLASInt m, k, ldy, nrhs; 771 772 PetscFunctionBegin; 773 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 774 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 775 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 776 PetscFunctionReturn(0); 777 } 778 779 static PetscErrorCode MatMatSolveTranspose_SeqDense_QR(Mat A, Mat B, Mat X) { 780 PetscScalar *y; 781 PetscBLASInt m, k, ldy, nrhs; 782 783 PetscFunctionBegin; 784 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 785 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 786 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 787 PetscFunctionReturn(0); 788 } 789 790 /* ---------------------------------------------------------------*/ 791 /* COMMENT: I have chosen to hide row permutation in the pivots, 792 rather than put it in the Mat->row slot.*/ 793 PetscErrorCode MatLUFactor_SeqDense(Mat A, IS row, IS col, const MatFactorInfo *minfo) { 794 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 795 PetscBLASInt n, m, info; 796 797 PetscFunctionBegin; 798 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 799 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 800 if (!mat->pivots) { 801 PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); 802 PetscCall(PetscLogObjectMemory((PetscObject)A, A->rmap->n * sizeof(PetscBLASInt))); 803 } 804 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 805 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 806 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&m, &n, mat->v, &mat->lda, mat->pivots, &info)); 807 PetscCall(PetscFPTrapPop()); 808 809 PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to LU factorization %d", (int)info); 810 PetscCheck(info <= 0, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Bad LU factorization %d", (int)info); 811 812 A->ops->solve = MatSolve_SeqDense_LU; 813 A->ops->matsolve = MatMatSolve_SeqDense_LU; 814 A->ops->solvetranspose = MatSolveTranspose_SeqDense_LU; 815 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_LU; 816 A->factortype = MAT_FACTOR_LU; 817 818 PetscCall(PetscFree(A->solvertype)); 819 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 820 821 PetscCall(PetscLogFlops((2.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3)); 822 PetscFunctionReturn(0); 823 } 824 825 static PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) { 826 MatFactorInfo info; 827 828 PetscFunctionBegin; 829 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 830 PetscUseTypeMethod(fact, lufactor, NULL, NULL, &info); 831 PetscFunctionReturn(0); 832 } 833 834 PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) { 835 PetscFunctionBegin; 836 fact->preallocated = PETSC_TRUE; 837 fact->assembled = PETSC_TRUE; 838 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense; 839 PetscFunctionReturn(0); 840 } 841 842 /* Cholesky as L*L^T or L*D*L^T and the symmetric/hermitian complex variants */ 843 PetscErrorCode MatCholeskyFactor_SeqDense(Mat A, IS perm, const MatFactorInfo *factinfo) { 844 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 845 PetscBLASInt info, n; 846 847 PetscFunctionBegin; 848 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 849 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 850 if (A->spd == PETSC_BOOL3_TRUE) { 851 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 852 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &n, mat->v, &mat->lda, &info)); 853 PetscCall(PetscFPTrapPop()); 854 #if defined(PETSC_USE_COMPLEX) 855 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 856 if (!mat->pivots) { 857 PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); 858 PetscCall(PetscLogObjectMemory((PetscObject)A, A->rmap->n * sizeof(PetscBLASInt))); 859 } 860 if (!mat->fwork) { 861 PetscScalar dummy; 862 863 mat->lfwork = -1; 864 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 865 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 866 PetscCall(PetscFPTrapPop()); 867 mat->lfwork = (PetscInt)PetscRealPart(dummy); 868 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 869 PetscCall(PetscLogObjectMemory((PetscObject)A, mat->lfwork * sizeof(PetscBLASInt))); 870 } 871 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 872 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 873 PetscCall(PetscFPTrapPop()); 874 #endif 875 } else { /* symmetric case */ 876 if (!mat->pivots) { 877 PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); 878 PetscCall(PetscLogObjectMemory((PetscObject)A, A->rmap->n * sizeof(PetscBLASInt))); 879 } 880 if (!mat->fwork) { 881 PetscScalar dummy; 882 883 mat->lfwork = -1; 884 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 885 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 886 PetscCall(PetscFPTrapPop()); 887 mat->lfwork = (PetscInt)PetscRealPart(dummy); 888 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 889 PetscCall(PetscLogObjectMemory((PetscObject)A, mat->lfwork * sizeof(PetscBLASInt))); 890 } 891 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 892 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 893 PetscCall(PetscFPTrapPop()); 894 } 895 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad factorization: zero pivot in row %" PetscInt_FMT, (PetscInt)info - 1); 896 897 A->ops->solve = MatSolve_SeqDense_Cholesky; 898 A->ops->matsolve = MatMatSolve_SeqDense_Cholesky; 899 A->ops->solvetranspose = MatSolveTranspose_SeqDense_Cholesky; 900 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_Cholesky; 901 A->factortype = MAT_FACTOR_CHOLESKY; 902 903 PetscCall(PetscFree(A->solvertype)); 904 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 905 906 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 907 PetscFunctionReturn(0); 908 } 909 910 static PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) { 911 MatFactorInfo info; 912 913 PetscFunctionBegin; 914 info.fill = 1.0; 915 916 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 917 PetscUseTypeMethod(fact, choleskyfactor, NULL, &info); 918 PetscFunctionReturn(0); 919 } 920 921 PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) { 922 PetscFunctionBegin; 923 fact->assembled = PETSC_TRUE; 924 fact->preallocated = PETSC_TRUE; 925 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense; 926 PetscFunctionReturn(0); 927 } 928 929 PetscErrorCode MatQRFactor_SeqDense(Mat A, IS col, const MatFactorInfo *minfo) { 930 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 931 PetscBLASInt n, m, info, min, max; 932 933 PetscFunctionBegin; 934 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 935 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 936 max = PetscMax(m, n); 937 min = PetscMin(m, n); 938 if (!mat->tau) { 939 PetscCall(PetscMalloc1(min, &mat->tau)); 940 PetscCall(PetscLogObjectMemory((PetscObject)A, min * sizeof(PetscScalar))); 941 } 942 if (!mat->pivots) { 943 PetscCall(PetscMalloc1(n, &mat->pivots)); 944 PetscCall(PetscLogObjectMemory((PetscObject)A, n * sizeof(PetscScalar))); 945 } 946 if (!mat->qrrhs) PetscCall(MatCreateVecs(A, NULL, &(mat->qrrhs))); 947 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 948 if (!mat->fwork) { 949 PetscScalar dummy; 950 951 mat->lfwork = -1; 952 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 953 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, &dummy, &mat->lfwork, &info)); 954 PetscCall(PetscFPTrapPop()); 955 mat->lfwork = (PetscInt)PetscRealPart(dummy); 956 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 957 PetscCall(PetscLogObjectMemory((PetscObject)A, mat->lfwork * sizeof(PetscBLASInt))); 958 } 959 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 960 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, mat->fwork, &mat->lfwork, &info)); 961 PetscCall(PetscFPTrapPop()); 962 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to QR factorization %d", (int)info); 963 // TODO: try to estimate rank or test for and use geqp3 for rank revealing QR. For now just say rank is min of m and n 964 mat->rank = min; 965 966 A->ops->solve = MatSolve_SeqDense_QR; 967 A->ops->matsolve = MatMatSolve_SeqDense_QR; 968 A->factortype = MAT_FACTOR_QR; 969 if (m == n) { 970 A->ops->solvetranspose = MatSolveTranspose_SeqDense_QR; 971 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_QR; 972 } 973 974 PetscCall(PetscFree(A->solvertype)); 975 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 976 977 PetscCall(PetscLogFlops(2.0 * min * min * (max - min / 3.0))); 978 PetscFunctionReturn(0); 979 } 980 981 static PetscErrorCode MatQRFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) { 982 MatFactorInfo info; 983 984 PetscFunctionBegin; 985 info.fill = 1.0; 986 987 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 988 PetscUseMethod(fact, "MatQRFactor_C", (Mat, IS, const MatFactorInfo *), (fact, NULL, &info)); 989 PetscFunctionReturn(0); 990 } 991 992 PetscErrorCode MatQRFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) { 993 PetscFunctionBegin; 994 fact->assembled = PETSC_TRUE; 995 fact->preallocated = PETSC_TRUE; 996 PetscCall(PetscObjectComposeFunction((PetscObject)fact, "MatQRFactorNumeric_C", MatQRFactorNumeric_SeqDense)); 997 PetscFunctionReturn(0); 998 } 999 1000 /* uses LAPACK */ 1001 PETSC_INTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A, MatFactorType ftype, Mat *fact) { 1002 PetscFunctionBegin; 1003 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), fact)); 1004 PetscCall(MatSetSizes(*fact, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 1005 PetscCall(MatSetType(*fact, MATDENSE)); 1006 (*fact)->trivialsymbolic = PETSC_TRUE; 1007 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 1008 (*fact)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqDense; 1009 (*fact)->ops->ilufactorsymbolic = MatLUFactorSymbolic_SeqDense; 1010 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 1011 (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense; 1012 } else if (ftype == MAT_FACTOR_QR) { 1013 PetscCall(PetscObjectComposeFunction((PetscObject)(*fact), "MatQRFactorSymbolic_C", MatQRFactorSymbolic_SeqDense)); 1014 } 1015 (*fact)->factortype = ftype; 1016 1017 PetscCall(PetscFree((*fact)->solvertype)); 1018 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*fact)->solvertype)); 1019 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_LU])); 1020 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ILU])); 1021 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_CHOLESKY])); 1022 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ICC])); 1023 PetscFunctionReturn(0); 1024 } 1025 1026 /* ------------------------------------------------------------------*/ 1027 static PetscErrorCode MatSOR_SeqDense(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal shift, PetscInt its, PetscInt lits, Vec xx) { 1028 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1029 PetscScalar *x, *v = mat->v, zero = 0.0, xt; 1030 const PetscScalar *b; 1031 PetscInt m = A->rmap->n, i; 1032 PetscBLASInt o = 1, bm = 0; 1033 1034 PetscFunctionBegin; 1035 #if defined(PETSC_HAVE_CUDA) 1036 PetscCheck(A->offloadmask != PETSC_OFFLOAD_GPU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1037 #endif 1038 if (shift == -1) shift = 0.0; /* negative shift indicates do not error on zero diagonal; this code never zeros on zero diagonal */ 1039 PetscCall(PetscBLASIntCast(m, &bm)); 1040 if (flag & SOR_ZERO_INITIAL_GUESS) { 1041 /* this is a hack fix, should have another version without the second BLASdotu */ 1042 PetscCall(VecSet(xx, zero)); 1043 } 1044 PetscCall(VecGetArray(xx, &x)); 1045 PetscCall(VecGetArrayRead(bb, &b)); 1046 its = its * lits; 1047 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 1048 while (its--) { 1049 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 1050 for (i = 0; i < m; i++) { 1051 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1052 x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift); 1053 } 1054 } 1055 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1056 for (i = m - 1; i >= 0; i--) { 1057 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1058 x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift); 1059 } 1060 } 1061 } 1062 PetscCall(VecRestoreArrayRead(bb, &b)); 1063 PetscCall(VecRestoreArray(xx, &x)); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 /* -----------------------------------------------------------------*/ 1068 PetscErrorCode MatMultTranspose_SeqDense(Mat A, Vec xx, Vec yy) { 1069 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1070 const PetscScalar *v = mat->v, *x; 1071 PetscScalar *y; 1072 PetscBLASInt m, n, _One = 1; 1073 PetscScalar _DOne = 1.0, _DZero = 0.0; 1074 1075 PetscFunctionBegin; 1076 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1077 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1078 PetscCall(VecGetArrayRead(xx, &x)); 1079 PetscCall(VecGetArrayWrite(yy, &y)); 1080 if (!A->rmap->n || !A->cmap->n) { 1081 PetscBLASInt i; 1082 for (i = 0; i < n; i++) y[i] = 0.0; 1083 } else { 1084 PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v, &mat->lda, x, &_One, &_DZero, y, &_One)); 1085 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n - A->cmap->n)); 1086 } 1087 PetscCall(VecRestoreArrayRead(xx, &x)); 1088 PetscCall(VecRestoreArrayWrite(yy, &y)); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 PetscErrorCode MatMult_SeqDense(Mat A, Vec xx, Vec yy) { 1093 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1094 PetscScalar *y, _DOne = 1.0, _DZero = 0.0; 1095 PetscBLASInt m, n, _One = 1; 1096 const PetscScalar *v = mat->v, *x; 1097 1098 PetscFunctionBegin; 1099 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1100 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1101 PetscCall(VecGetArrayRead(xx, &x)); 1102 PetscCall(VecGetArrayWrite(yy, &y)); 1103 if (!A->rmap->n || !A->cmap->n) { 1104 PetscBLASInt i; 1105 for (i = 0; i < m; i++) y[i] = 0.0; 1106 } else { 1107 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DZero, y, &_One)); 1108 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n - A->rmap->n)); 1109 } 1110 PetscCall(VecRestoreArrayRead(xx, &x)); 1111 PetscCall(VecRestoreArrayWrite(yy, &y)); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 PetscErrorCode MatMultAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) { 1116 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1117 const PetscScalar *v = mat->v, *x; 1118 PetscScalar *y, _DOne = 1.0; 1119 PetscBLASInt m, n, _One = 1; 1120 1121 PetscFunctionBegin; 1122 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1123 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1124 PetscCall(VecCopy(zz, yy)); 1125 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 1126 PetscCall(VecGetArrayRead(xx, &x)); 1127 PetscCall(VecGetArray(yy, &y)); 1128 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DOne, y, &_One)); 1129 PetscCall(VecRestoreArrayRead(xx, &x)); 1130 PetscCall(VecRestoreArray(yy, &y)); 1131 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n)); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 PetscErrorCode MatMultTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) { 1136 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1137 const PetscScalar *v = mat->v, *x; 1138 PetscScalar *y; 1139 PetscBLASInt m, n, _One = 1; 1140 PetscScalar _DOne = 1.0; 1141 1142 PetscFunctionBegin; 1143 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1144 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1145 PetscCall(VecCopy(zz, yy)); 1146 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 1147 PetscCall(VecGetArrayRead(xx, &x)); 1148 PetscCall(VecGetArray(yy, &y)); 1149 PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DOne, y, &_One)); 1150 PetscCall(VecRestoreArrayRead(xx, &x)); 1151 PetscCall(VecRestoreArray(yy, &y)); 1152 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n)); 1153 PetscFunctionReturn(0); 1154 } 1155 1156 /* -----------------------------------------------------------------*/ 1157 static PetscErrorCode MatGetRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) { 1158 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1159 PetscInt i; 1160 1161 PetscFunctionBegin; 1162 *ncols = A->cmap->n; 1163 if (cols) { 1164 PetscCall(PetscMalloc1(A->cmap->n, cols)); 1165 for (i = 0; i < A->cmap->n; i++) (*cols)[i] = i; 1166 } 1167 if (vals) { 1168 const PetscScalar *v; 1169 1170 PetscCall(MatDenseGetArrayRead(A, &v)); 1171 PetscCall(PetscMalloc1(A->cmap->n, vals)); 1172 v += row; 1173 for (i = 0; i < A->cmap->n; i++) { 1174 (*vals)[i] = *v; 1175 v += mat->lda; 1176 } 1177 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1178 } 1179 PetscFunctionReturn(0); 1180 } 1181 1182 static PetscErrorCode MatRestoreRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) { 1183 PetscFunctionBegin; 1184 if (ncols) *ncols = 0; 1185 if (cols) PetscCall(PetscFree(*cols)); 1186 if (vals) PetscCall(PetscFree(*vals)); 1187 PetscFunctionReturn(0); 1188 } 1189 /* ----------------------------------------------------------------*/ 1190 static PetscErrorCode MatSetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], const PetscScalar v[], InsertMode addv) { 1191 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1192 PetscScalar *av; 1193 PetscInt i, j, idx = 0; 1194 #if defined(PETSC_HAVE_CUDA) 1195 PetscOffloadMask oldf; 1196 #endif 1197 1198 PetscFunctionBegin; 1199 PetscCall(MatDenseGetArray(A, &av)); 1200 if (!mat->roworiented) { 1201 if (addv == INSERT_VALUES) { 1202 for (j = 0; j < n; j++) { 1203 if (indexn[j] < 0) { 1204 idx += m; 1205 continue; 1206 } 1207 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1208 for (i = 0; i < m; i++) { 1209 if (indexm[i] < 0) { 1210 idx++; 1211 continue; 1212 } 1213 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1214 av[indexn[j] * mat->lda + indexm[i]] = v[idx++]; 1215 } 1216 } 1217 } else { 1218 for (j = 0; j < n; j++) { 1219 if (indexn[j] < 0) { 1220 idx += m; 1221 continue; 1222 } 1223 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1224 for (i = 0; i < m; i++) { 1225 if (indexm[i] < 0) { 1226 idx++; 1227 continue; 1228 } 1229 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1230 av[indexn[j] * mat->lda + indexm[i]] += v[idx++]; 1231 } 1232 } 1233 } 1234 } else { 1235 if (addv == INSERT_VALUES) { 1236 for (i = 0; i < m; i++) { 1237 if (indexm[i] < 0) { 1238 idx += n; 1239 continue; 1240 } 1241 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1242 for (j = 0; j < n; j++) { 1243 if (indexn[j] < 0) { 1244 idx++; 1245 continue; 1246 } 1247 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1248 av[indexn[j] * mat->lda + indexm[i]] = v[idx++]; 1249 } 1250 } 1251 } else { 1252 for (i = 0; i < m; i++) { 1253 if (indexm[i] < 0) { 1254 idx += n; 1255 continue; 1256 } 1257 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1258 for (j = 0; j < n; j++) { 1259 if (indexn[j] < 0) { 1260 idx++; 1261 continue; 1262 } 1263 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1264 av[indexn[j] * mat->lda + indexm[i]] += v[idx++]; 1265 } 1266 } 1267 } 1268 } 1269 /* hack to prevent unneeded copy to the GPU while returning the array */ 1270 #if defined(PETSC_HAVE_CUDA) 1271 oldf = A->offloadmask; 1272 A->offloadmask = PETSC_OFFLOAD_GPU; 1273 #endif 1274 PetscCall(MatDenseRestoreArray(A, &av)); 1275 #if defined(PETSC_HAVE_CUDA) 1276 A->offloadmask = (oldf == PETSC_OFFLOAD_UNALLOCATED ? PETSC_OFFLOAD_UNALLOCATED : PETSC_OFFLOAD_CPU); 1277 #endif 1278 PetscFunctionReturn(0); 1279 } 1280 1281 static PetscErrorCode MatGetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], PetscScalar v[]) { 1282 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1283 const PetscScalar *vv; 1284 PetscInt i, j; 1285 1286 PetscFunctionBegin; 1287 PetscCall(MatDenseGetArrayRead(A, &vv)); 1288 /* row-oriented output */ 1289 for (i = 0; i < m; i++) { 1290 if (indexm[i] < 0) { 1291 v += n; 1292 continue; 1293 } 1294 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested larger than number rows %" PetscInt_FMT, indexm[i], A->rmap->n); 1295 for (j = 0; j < n; j++) { 1296 if (indexn[j] < 0) { 1297 v++; 1298 continue; 1299 } 1300 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " requested larger than number columns %" PetscInt_FMT, indexn[j], A->cmap->n); 1301 *v++ = vv[indexn[j] * mat->lda + indexm[i]]; 1302 } 1303 } 1304 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 /* -----------------------------------------------------------------*/ 1309 1310 PetscErrorCode MatView_Dense_Binary(Mat mat, PetscViewer viewer) { 1311 PetscBool skipHeader; 1312 PetscViewerFormat format; 1313 PetscInt header[4], M, N, m, lda, i, j, k; 1314 const PetscScalar *v; 1315 PetscScalar *vwork; 1316 1317 PetscFunctionBegin; 1318 PetscCall(PetscViewerSetUp(viewer)); 1319 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1320 PetscCall(PetscViewerGetFormat(viewer, &format)); 1321 if (skipHeader) format = PETSC_VIEWER_NATIVE; 1322 1323 PetscCall(MatGetSize(mat, &M, &N)); 1324 1325 /* write matrix header */ 1326 header[0] = MAT_FILE_CLASSID; 1327 header[1] = M; 1328 header[2] = N; 1329 header[3] = (format == PETSC_VIEWER_NATIVE) ? MATRIX_BINARY_FORMAT_DENSE : M * N; 1330 if (!skipHeader) PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1331 1332 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1333 if (format != PETSC_VIEWER_NATIVE) { 1334 PetscInt nnz = m * N, *iwork; 1335 /* store row lengths for each row */ 1336 PetscCall(PetscMalloc1(nnz, &iwork)); 1337 for (i = 0; i < m; i++) iwork[i] = N; 1338 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1339 /* store column indices (zero start index) */ 1340 for (k = 0, i = 0; i < m; i++) 1341 for (j = 0; j < N; j++, k++) iwork[k] = j; 1342 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1343 PetscCall(PetscFree(iwork)); 1344 } 1345 /* store matrix values as a dense matrix in row major order */ 1346 PetscCall(PetscMalloc1(m * N, &vwork)); 1347 PetscCall(MatDenseGetArrayRead(mat, &v)); 1348 PetscCall(MatDenseGetLDA(mat, &lda)); 1349 for (k = 0, i = 0; i < m; i++) 1350 for (j = 0; j < N; j++, k++) vwork[k] = v[i + lda * j]; 1351 PetscCall(MatDenseRestoreArrayRead(mat, &v)); 1352 PetscCall(PetscViewerBinaryWriteAll(viewer, vwork, m * N, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1353 PetscCall(PetscFree(vwork)); 1354 PetscFunctionReturn(0); 1355 } 1356 1357 PetscErrorCode MatLoad_Dense_Binary(Mat mat, PetscViewer viewer) { 1358 PetscBool skipHeader; 1359 PetscInt header[4], M, N, m, nz, lda, i, j, k; 1360 PetscInt rows, cols; 1361 PetscScalar *v, *vwork; 1362 1363 PetscFunctionBegin; 1364 PetscCall(PetscViewerSetUp(viewer)); 1365 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1366 1367 if (!skipHeader) { 1368 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 1369 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 1370 M = header[1]; 1371 N = header[2]; 1372 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 1373 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 1374 nz = header[3]; 1375 PetscCheck(nz == MATRIX_BINARY_FORMAT_DENSE || nz >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Unknown matrix format %" PetscInt_FMT " in file", nz); 1376 } else { 1377 PetscCall(MatGetSize(mat, &M, &N)); 1378 PetscCheck(M >= 0 && N >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "Matrix binary file header was skipped, thus the user must specify the global sizes of input matrix"); 1379 nz = MATRIX_BINARY_FORMAT_DENSE; 1380 } 1381 1382 /* setup global sizes if not set */ 1383 if (mat->rmap->N < 0) mat->rmap->N = M; 1384 if (mat->cmap->N < 0) mat->cmap->N = N; 1385 PetscCall(MatSetUp(mat)); 1386 /* check if global sizes are correct */ 1387 PetscCall(MatGetSize(mat, &rows, &cols)); 1388 PetscCheck(M == rows && N == cols, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 1389 1390 PetscCall(MatGetSize(mat, NULL, &N)); 1391 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1392 PetscCall(MatDenseGetArray(mat, &v)); 1393 PetscCall(MatDenseGetLDA(mat, &lda)); 1394 if (nz == MATRIX_BINARY_FORMAT_DENSE) { /* matrix in file is dense format */ 1395 PetscInt nnz = m * N; 1396 /* read in matrix values */ 1397 PetscCall(PetscMalloc1(nnz, &vwork)); 1398 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1399 /* store values in column major order */ 1400 for (j = 0; j < N; j++) 1401 for (i = 0; i < m; i++) v[i + lda * j] = vwork[i * N + j]; 1402 PetscCall(PetscFree(vwork)); 1403 } else { /* matrix in file is sparse format */ 1404 PetscInt nnz = 0, *rlens, *icols; 1405 /* read in row lengths */ 1406 PetscCall(PetscMalloc1(m, &rlens)); 1407 PetscCall(PetscViewerBinaryReadAll(viewer, rlens, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1408 for (i = 0; i < m; i++) nnz += rlens[i]; 1409 /* read in column indices and values */ 1410 PetscCall(PetscMalloc2(nnz, &icols, nnz, &vwork)); 1411 PetscCall(PetscViewerBinaryReadAll(viewer, icols, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1412 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1413 /* store values in column major order */ 1414 for (k = 0, i = 0; i < m; i++) 1415 for (j = 0; j < rlens[i]; j++, k++) v[i + lda * icols[k]] = vwork[k]; 1416 PetscCall(PetscFree(rlens)); 1417 PetscCall(PetscFree2(icols, vwork)); 1418 } 1419 PetscCall(MatDenseRestoreArray(mat, &v)); 1420 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 1421 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 1422 PetscFunctionReturn(0); 1423 } 1424 1425 PetscErrorCode MatLoad_SeqDense(Mat newMat, PetscViewer viewer) { 1426 PetscBool isbinary, ishdf5; 1427 1428 PetscFunctionBegin; 1429 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1430 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1431 /* force binary viewer to load .info file if it has not yet done so */ 1432 PetscCall(PetscViewerSetUp(viewer)); 1433 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1434 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1435 if (isbinary) { 1436 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1437 } else if (ishdf5) { 1438 #if defined(PETSC_HAVE_HDF5) 1439 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1440 #else 1441 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 1442 #endif 1443 } else { 1444 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1445 } 1446 PetscFunctionReturn(0); 1447 } 1448 1449 static PetscErrorCode MatView_SeqDense_ASCII(Mat A, PetscViewer viewer) { 1450 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1451 PetscInt i, j; 1452 const char *name; 1453 PetscScalar *v, *av; 1454 PetscViewerFormat format; 1455 #if defined(PETSC_USE_COMPLEX) 1456 PetscBool allreal = PETSC_TRUE; 1457 #endif 1458 1459 PetscFunctionBegin; 1460 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&av)); 1461 PetscCall(PetscViewerGetFormat(viewer, &format)); 1462 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1463 PetscFunctionReturn(0); /* do nothing for now */ 1464 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1465 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1466 for (i = 0; i < A->rmap->n; i++) { 1467 v = av + i; 1468 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i)); 1469 for (j = 0; j < A->cmap->n; j++) { 1470 #if defined(PETSC_USE_COMPLEX) 1471 if (PetscRealPart(*v) != 0.0 && PetscImaginaryPart(*v) != 0.0) { 1472 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", j, (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1473 } else if (PetscRealPart(*v)) { 1474 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)PetscRealPart(*v))); 1475 } 1476 #else 1477 if (*v) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)*v)); 1478 #endif 1479 v += a->lda; 1480 } 1481 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1482 } 1483 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1484 } else { 1485 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1486 #if defined(PETSC_USE_COMPLEX) 1487 /* determine if matrix has all real values */ 1488 for (j = 0; j < A->cmap->n; j++) { 1489 v = av + j * a->lda; 1490 for (i = 0; i < A->rmap->n; i++) { 1491 if (PetscImaginaryPart(v[i])) { 1492 allreal = PETSC_FALSE; 1493 break; 1494 } 1495 } 1496 } 1497 #endif 1498 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1499 PetscCall(PetscObjectGetName((PetscObject)A, &name)); 1500 PetscCall(PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", A->rmap->n, A->cmap->n)); 1501 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = zeros(%" PetscInt_FMT ",%" PetscInt_FMT ");\n", name, A->rmap->n, A->cmap->n)); 1502 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = [\n", name)); 1503 } 1504 1505 for (i = 0; i < A->rmap->n; i++) { 1506 v = av + i; 1507 for (j = 0; j < A->cmap->n; j++) { 1508 #if defined(PETSC_USE_COMPLEX) 1509 if (allreal) { 1510 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(*v))); 1511 } else { 1512 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei ", (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1513 } 1514 #else 1515 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)*v)); 1516 #endif 1517 v += a->lda; 1518 } 1519 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1520 } 1521 if (format == PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, "];\n")); 1522 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1523 } 1524 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&av)); 1525 PetscCall(PetscViewerFlush(viewer)); 1526 PetscFunctionReturn(0); 1527 } 1528 1529 #include <petscdraw.h> 1530 static PetscErrorCode MatView_SeqDense_Draw_Zoom(PetscDraw draw, void *Aa) { 1531 Mat A = (Mat)Aa; 1532 PetscInt m = A->rmap->n, n = A->cmap->n, i, j; 1533 int color = PETSC_DRAW_WHITE; 1534 const PetscScalar *v; 1535 PetscViewer viewer; 1536 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1537 PetscViewerFormat format; 1538 1539 PetscFunctionBegin; 1540 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1541 PetscCall(PetscViewerGetFormat(viewer, &format)); 1542 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1543 1544 /* Loop over matrix elements drawing boxes */ 1545 PetscCall(MatDenseGetArrayRead(A, &v)); 1546 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1547 PetscDrawCollectiveBegin(draw); 1548 /* Blue for negative and Red for positive */ 1549 for (j = 0; j < n; j++) { 1550 x_l = j; 1551 x_r = x_l + 1.0; 1552 for (i = 0; i < m; i++) { 1553 y_l = m - i - 1.0; 1554 y_r = y_l + 1.0; 1555 if (PetscRealPart(v[j * m + i]) > 0.) color = PETSC_DRAW_RED; 1556 else if (PetscRealPart(v[j * m + i]) < 0.) color = PETSC_DRAW_BLUE; 1557 else continue; 1558 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1559 } 1560 } 1561 PetscDrawCollectiveEnd(draw); 1562 } else { 1563 /* use contour shading to indicate magnitude of values */ 1564 /* first determine max of all nonzero values */ 1565 PetscReal minv = 0.0, maxv = 0.0; 1566 PetscDraw popup; 1567 1568 for (i = 0; i < m * n; i++) { 1569 if (PetscAbsScalar(v[i]) > maxv) maxv = PetscAbsScalar(v[i]); 1570 } 1571 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1572 PetscCall(PetscDrawGetPopup(draw, &popup)); 1573 PetscCall(PetscDrawScalePopup(popup, minv, maxv)); 1574 1575 PetscDrawCollectiveBegin(draw); 1576 for (j = 0; j < n; j++) { 1577 x_l = j; 1578 x_r = x_l + 1.0; 1579 for (i = 0; i < m; i++) { 1580 y_l = m - i - 1.0; 1581 y_r = y_l + 1.0; 1582 color = PetscDrawRealToColor(PetscAbsScalar(v[j * m + i]), minv, maxv); 1583 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1584 } 1585 } 1586 PetscDrawCollectiveEnd(draw); 1587 } 1588 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1589 PetscFunctionReturn(0); 1590 } 1591 1592 static PetscErrorCode MatView_SeqDense_Draw(Mat A, PetscViewer viewer) { 1593 PetscDraw draw; 1594 PetscBool isnull; 1595 PetscReal xr, yr, xl, yl, h, w; 1596 1597 PetscFunctionBegin; 1598 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1599 PetscCall(PetscDrawIsNull(draw, &isnull)); 1600 if (isnull) PetscFunctionReturn(0); 1601 1602 xr = A->cmap->n; 1603 yr = A->rmap->n; 1604 h = yr / 10.0; 1605 w = xr / 10.0; 1606 xr += w; 1607 yr += h; 1608 xl = -w; 1609 yl = -h; 1610 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 1611 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 1612 PetscCall(PetscDrawZoom(draw, MatView_SeqDense_Draw_Zoom, A)); 1613 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 1614 PetscCall(PetscDrawSave(draw)); 1615 PetscFunctionReturn(0); 1616 } 1617 1618 PetscErrorCode MatView_SeqDense(Mat A, PetscViewer viewer) { 1619 PetscBool iascii, isbinary, isdraw; 1620 1621 PetscFunctionBegin; 1622 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1623 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1624 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1625 if (iascii) PetscCall(MatView_SeqDense_ASCII(A, viewer)); 1626 else if (isbinary) PetscCall(MatView_Dense_Binary(A, viewer)); 1627 else if (isdraw) PetscCall(MatView_SeqDense_Draw(A, viewer)); 1628 PetscFunctionReturn(0); 1629 } 1630 1631 static PetscErrorCode MatDensePlaceArray_SeqDense(Mat A, const PetscScalar *array) { 1632 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1633 1634 PetscFunctionBegin; 1635 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1636 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1637 PetscCheck(!a->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreArray() first"); 1638 a->unplacedarray = a->v; 1639 a->unplaced_user_alloc = a->user_alloc; 1640 a->v = (PetscScalar *)array; 1641 a->user_alloc = PETSC_TRUE; 1642 #if defined(PETSC_HAVE_CUDA) 1643 A->offloadmask = PETSC_OFFLOAD_CPU; 1644 #endif 1645 PetscFunctionReturn(0); 1646 } 1647 1648 static PetscErrorCode MatDenseResetArray_SeqDense(Mat A) { 1649 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1650 1651 PetscFunctionBegin; 1652 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1653 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1654 a->v = a->unplacedarray; 1655 a->user_alloc = a->unplaced_user_alloc; 1656 a->unplacedarray = NULL; 1657 #if defined(PETSC_HAVE_CUDA) 1658 A->offloadmask = PETSC_OFFLOAD_CPU; 1659 #endif 1660 PetscFunctionReturn(0); 1661 } 1662 1663 static PetscErrorCode MatDenseReplaceArray_SeqDense(Mat A, const PetscScalar *array) { 1664 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1665 1666 PetscFunctionBegin; 1667 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1668 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1669 if (!a->user_alloc) PetscCall(PetscFree(a->v)); 1670 a->v = (PetscScalar *)array; 1671 a->user_alloc = PETSC_FALSE; 1672 #if defined(PETSC_HAVE_CUDA) 1673 A->offloadmask = PETSC_OFFLOAD_CPU; 1674 #endif 1675 PetscFunctionReturn(0); 1676 } 1677 1678 PetscErrorCode MatDestroy_SeqDense(Mat mat) { 1679 Mat_SeqDense *l = (Mat_SeqDense *)mat->data; 1680 1681 PetscFunctionBegin; 1682 #if defined(PETSC_USE_LOG) 1683 PetscLogObjectState((PetscObject)mat, "Rows %" PetscInt_FMT " Cols %" PetscInt_FMT, mat->rmap->n, mat->cmap->n); 1684 #endif 1685 PetscCall(VecDestroy(&(l->qrrhs))); 1686 PetscCall(PetscFree(l->tau)); 1687 PetscCall(PetscFree(l->pivots)); 1688 PetscCall(PetscFree(l->fwork)); 1689 PetscCall(MatDestroy(&l->ptapwork)); 1690 if (!l->user_alloc) PetscCall(PetscFree(l->v)); 1691 if (!l->unplaced_user_alloc) PetscCall(PetscFree(l->unplacedarray)); 1692 PetscCheck(!l->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1693 PetscCheck(!l->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1694 PetscCall(VecDestroy(&l->cvec)); 1695 PetscCall(MatDestroy(&l->cmat)); 1696 PetscCall(PetscFree(mat->data)); 1697 1698 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1699 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactor_C", NULL)); 1700 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorSymbolic_C", NULL)); 1701 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorNumeric_C", NULL)); 1702 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 1703 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 1704 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 1705 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 1706 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 1707 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 1708 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 1709 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 1710 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 1711 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 1712 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 1713 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqaij_C", NULL)); 1714 #if defined(PETSC_HAVE_ELEMENTAL) 1715 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_elemental_C", NULL)); 1716 #endif 1717 #if defined(PETSC_HAVE_SCALAPACK) 1718 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_scalapack_C", NULL)); 1719 #endif 1720 #if defined(PETSC_HAVE_CUDA) 1721 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensecuda_C", NULL)); 1722 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", NULL)); 1723 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdense_C", NULL)); 1724 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensecuda_C", NULL)); 1725 #endif 1726 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSeqDenseSetPreallocation_C", NULL)); 1727 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqaij_seqdense_C", NULL)); 1728 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdense_C", NULL)); 1729 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqbaij_seqdense_C", NULL)); 1730 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqsbaij_seqdense_C", NULL)); 1731 1732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 1733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 1734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 1735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 1736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 1737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 1738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 1739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 1740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 1741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 1742 PetscFunctionReturn(0); 1743 } 1744 1745 static PetscErrorCode MatTranspose_SeqDense(Mat A, MatReuse reuse, Mat *matout) { 1746 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1747 PetscInt k, j, m = A->rmap->n, M = mat->lda, n = A->cmap->n; 1748 PetscScalar *v, tmp; 1749 1750 PetscFunctionBegin; 1751 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1752 if (reuse == MAT_INPLACE_MATRIX) { 1753 if (m == n) { /* in place transpose */ 1754 PetscCall(MatDenseGetArray(A, &v)); 1755 for (j = 0; j < m; j++) { 1756 for (k = 0; k < j; k++) { 1757 tmp = v[j + k * M]; 1758 v[j + k * M] = v[k + j * M]; 1759 v[k + j * M] = tmp; 1760 } 1761 } 1762 PetscCall(MatDenseRestoreArray(A, &v)); 1763 } else { /* reuse memory, temporary allocates new memory */ 1764 PetscScalar *v2; 1765 PetscLayout tmplayout; 1766 1767 PetscCall(PetscMalloc1((size_t)m * n, &v2)); 1768 PetscCall(MatDenseGetArray(A, &v)); 1769 for (j = 0; j < n; j++) { 1770 for (k = 0; k < m; k++) v2[j + (size_t)k * n] = v[k + (size_t)j * M]; 1771 } 1772 PetscCall(PetscArraycpy(v, v2, (size_t)m * n)); 1773 PetscCall(PetscFree(v2)); 1774 PetscCall(MatDenseRestoreArray(A, &v)); 1775 /* cleanup size dependent quantities */ 1776 PetscCall(VecDestroy(&mat->cvec)); 1777 PetscCall(MatDestroy(&mat->cmat)); 1778 PetscCall(PetscFree(mat->pivots)); 1779 PetscCall(PetscFree(mat->fwork)); 1780 PetscCall(MatDestroy(&mat->ptapwork)); 1781 /* swap row/col layouts */ 1782 mat->lda = n; 1783 tmplayout = A->rmap; 1784 A->rmap = A->cmap; 1785 A->cmap = tmplayout; 1786 } 1787 } else { /* out-of-place transpose */ 1788 Mat tmat; 1789 Mat_SeqDense *tmatd; 1790 PetscScalar *v2; 1791 PetscInt M2; 1792 1793 if (reuse == MAT_INITIAL_MATRIX) { 1794 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &tmat)); 1795 PetscCall(MatSetSizes(tmat, A->cmap->n, A->rmap->n, A->cmap->n, A->rmap->n)); 1796 PetscCall(MatSetType(tmat, ((PetscObject)A)->type_name)); 1797 PetscCall(MatSeqDenseSetPreallocation(tmat, NULL)); 1798 } else tmat = *matout; 1799 1800 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&v)); 1801 PetscCall(MatDenseGetArray(tmat, &v2)); 1802 tmatd = (Mat_SeqDense *)tmat->data; 1803 M2 = tmatd->lda; 1804 for (j = 0; j < n; j++) { 1805 for (k = 0; k < m; k++) v2[j + k * M2] = v[k + j * M]; 1806 } 1807 PetscCall(MatDenseRestoreArray(tmat, &v2)); 1808 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&v)); 1809 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 1810 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 1811 *matout = tmat; 1812 } 1813 PetscFunctionReturn(0); 1814 } 1815 1816 static PetscErrorCode MatEqual_SeqDense(Mat A1, Mat A2, PetscBool *flg) { 1817 Mat_SeqDense *mat1 = (Mat_SeqDense *)A1->data; 1818 Mat_SeqDense *mat2 = (Mat_SeqDense *)A2->data; 1819 PetscInt i; 1820 const PetscScalar *v1, *v2; 1821 1822 PetscFunctionBegin; 1823 if (A1->rmap->n != A2->rmap->n) { 1824 *flg = PETSC_FALSE; 1825 PetscFunctionReturn(0); 1826 } 1827 if (A1->cmap->n != A2->cmap->n) { 1828 *flg = PETSC_FALSE; 1829 PetscFunctionReturn(0); 1830 } 1831 PetscCall(MatDenseGetArrayRead(A1, &v1)); 1832 PetscCall(MatDenseGetArrayRead(A2, &v2)); 1833 for (i = 0; i < A1->cmap->n; i++) { 1834 PetscCall(PetscArraycmp(v1, v2, A1->rmap->n, flg)); 1835 if (*flg == PETSC_FALSE) PetscFunctionReturn(0); 1836 v1 += mat1->lda; 1837 v2 += mat2->lda; 1838 } 1839 PetscCall(MatDenseRestoreArrayRead(A1, &v1)); 1840 PetscCall(MatDenseRestoreArrayRead(A2, &v2)); 1841 *flg = PETSC_TRUE; 1842 PetscFunctionReturn(0); 1843 } 1844 1845 static PetscErrorCode MatGetDiagonal_SeqDense(Mat A, Vec v) { 1846 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1847 PetscInt i, n, len; 1848 PetscScalar *x; 1849 const PetscScalar *vv; 1850 1851 PetscFunctionBegin; 1852 PetscCall(VecGetSize(v, &n)); 1853 PetscCall(VecGetArray(v, &x)); 1854 len = PetscMin(A->rmap->n, A->cmap->n); 1855 PetscCall(MatDenseGetArrayRead(A, &vv)); 1856 PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 1857 for (i = 0; i < len; i++) { x[i] = vv[i * mat->lda + i]; } 1858 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1859 PetscCall(VecRestoreArray(v, &x)); 1860 PetscFunctionReturn(0); 1861 } 1862 1863 static PetscErrorCode MatDiagonalScale_SeqDense(Mat A, Vec ll, Vec rr) { 1864 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1865 const PetscScalar *l, *r; 1866 PetscScalar x, *v, *vv; 1867 PetscInt i, j, m = A->rmap->n, n = A->cmap->n; 1868 1869 PetscFunctionBegin; 1870 PetscCall(MatDenseGetArray(A, &vv)); 1871 if (ll) { 1872 PetscCall(VecGetSize(ll, &m)); 1873 PetscCall(VecGetArrayRead(ll, &l)); 1874 PetscCheck(m == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vec wrong size"); 1875 for (i = 0; i < m; i++) { 1876 x = l[i]; 1877 v = vv + i; 1878 for (j = 0; j < n; j++) { 1879 (*v) *= x; 1880 v += mat->lda; 1881 } 1882 } 1883 PetscCall(VecRestoreArrayRead(ll, &l)); 1884 PetscCall(PetscLogFlops(1.0 * n * m)); 1885 } 1886 if (rr) { 1887 PetscCall(VecGetSize(rr, &n)); 1888 PetscCall(VecGetArrayRead(rr, &r)); 1889 PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec wrong size"); 1890 for (i = 0; i < n; i++) { 1891 x = r[i]; 1892 v = vv + i * mat->lda; 1893 for (j = 0; j < m; j++) (*v++) *= x; 1894 } 1895 PetscCall(VecRestoreArrayRead(rr, &r)); 1896 PetscCall(PetscLogFlops(1.0 * n * m)); 1897 } 1898 PetscCall(MatDenseRestoreArray(A, &vv)); 1899 PetscFunctionReturn(0); 1900 } 1901 1902 PetscErrorCode MatNorm_SeqDense(Mat A, NormType type, PetscReal *nrm) { 1903 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1904 PetscScalar *v, *vv; 1905 PetscReal sum = 0.0; 1906 PetscInt lda, m = A->rmap->n, i, j; 1907 1908 PetscFunctionBegin; 1909 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&vv)); 1910 PetscCall(MatDenseGetLDA(A, &lda)); 1911 v = vv; 1912 if (type == NORM_FROBENIUS) { 1913 if (lda > m) { 1914 for (j = 0; j < A->cmap->n; j++) { 1915 v = vv + j * lda; 1916 for (i = 0; i < m; i++) { 1917 sum += PetscRealPart(PetscConj(*v) * (*v)); 1918 v++; 1919 } 1920 } 1921 } else { 1922 #if defined(PETSC_USE_REAL___FP16) 1923 PetscBLASInt one = 1, cnt = A->cmap->n * A->rmap->n; 1924 PetscCallBLAS("BLASnrm2", *nrm = BLASnrm2_(&cnt, v, &one)); 1925 } 1926 #else 1927 for (i = 0; i < A->cmap->n * A->rmap->n; i++) { 1928 sum += PetscRealPart(PetscConj(*v) * (*v)); 1929 v++; 1930 } 1931 } 1932 *nrm = PetscSqrtReal(sum); 1933 #endif 1934 PetscCall(PetscLogFlops(2.0 * A->cmap->n * A->rmap->n)); 1935 } else if (type == NORM_1) { 1936 *nrm = 0.0; 1937 for (j = 0; j < A->cmap->n; j++) { 1938 v = vv + j * mat->lda; 1939 sum = 0.0; 1940 for (i = 0; i < A->rmap->n; i++) { 1941 sum += PetscAbsScalar(*v); 1942 v++; 1943 } 1944 if (sum > *nrm) *nrm = sum; 1945 } 1946 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 1947 } else if (type == NORM_INFINITY) { 1948 *nrm = 0.0; 1949 for (j = 0; j < A->rmap->n; j++) { 1950 v = vv + j; 1951 sum = 0.0; 1952 for (i = 0; i < A->cmap->n; i++) { 1953 sum += PetscAbsScalar(*v); 1954 v += mat->lda; 1955 } 1956 if (sum > *nrm) *nrm = sum; 1957 } 1958 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 1959 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No two norm"); 1960 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&vv)); 1961 PetscFunctionReturn(0); 1962 } 1963 1964 static PetscErrorCode MatSetOption_SeqDense(Mat A, MatOption op, PetscBool flg) { 1965 Mat_SeqDense *aij = (Mat_SeqDense *)A->data; 1966 1967 PetscFunctionBegin; 1968 switch (op) { 1969 case MAT_ROW_ORIENTED: aij->roworiented = flg; break; 1970 case MAT_NEW_NONZERO_LOCATIONS: 1971 case MAT_NEW_NONZERO_LOCATION_ERR: 1972 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1973 case MAT_FORCE_DIAGONAL_ENTRIES: 1974 case MAT_KEEP_NONZERO_PATTERN: 1975 case MAT_IGNORE_OFF_PROC_ENTRIES: 1976 case MAT_USE_HASH_TABLE: 1977 case MAT_IGNORE_ZERO_ENTRIES: 1978 case MAT_IGNORE_LOWER_TRIANGULAR: 1979 case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break; 1980 case MAT_SPD: 1981 case MAT_SYMMETRIC: 1982 case MAT_STRUCTURALLY_SYMMETRIC: 1983 case MAT_HERMITIAN: 1984 case MAT_SYMMETRY_ETERNAL: 1985 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1986 case MAT_SPD_ETERNAL: break; 1987 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 1988 } 1989 PetscFunctionReturn(0); 1990 } 1991 1992 PetscErrorCode MatZeroEntries_SeqDense(Mat A) { 1993 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 1994 PetscInt lda = l->lda, m = A->rmap->n, n = A->cmap->n, j; 1995 PetscScalar *v; 1996 1997 PetscFunctionBegin; 1998 PetscCall(MatDenseGetArrayWrite(A, &v)); 1999 if (lda > m) { 2000 for (j = 0; j < n; j++) PetscCall(PetscArrayzero(v + j * lda, m)); 2001 } else { 2002 PetscCall(PetscArrayzero(v, PetscInt64Mult(m, n))); 2003 } 2004 PetscCall(MatDenseRestoreArrayWrite(A, &v)); 2005 PetscFunctionReturn(0); 2006 } 2007 2008 static PetscErrorCode MatZeroRows_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 2009 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 2010 PetscInt m = l->lda, n = A->cmap->n, i, j; 2011 PetscScalar *slot, *bb, *v; 2012 const PetscScalar *xx; 2013 2014 PetscFunctionBegin; 2015 if (PetscDefined(USE_DEBUG)) { 2016 for (i = 0; i < N; i++) { 2017 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 2018 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 2019 } 2020 } 2021 if (!N) PetscFunctionReturn(0); 2022 2023 /* fix right hand side if needed */ 2024 if (x && b) { 2025 PetscCall(VecGetArrayRead(x, &xx)); 2026 PetscCall(VecGetArray(b, &bb)); 2027 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 2028 PetscCall(VecRestoreArrayRead(x, &xx)); 2029 PetscCall(VecRestoreArray(b, &bb)); 2030 } 2031 2032 PetscCall(MatDenseGetArray(A, &v)); 2033 for (i = 0; i < N; i++) { 2034 slot = v + rows[i]; 2035 for (j = 0; j < n; j++) { 2036 *slot = 0.0; 2037 slot += m; 2038 } 2039 } 2040 if (diag != 0.0) { 2041 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 2042 for (i = 0; i < N; i++) { 2043 slot = v + (m + 1) * rows[i]; 2044 *slot = diag; 2045 } 2046 } 2047 PetscCall(MatDenseRestoreArray(A, &v)); 2048 PetscFunctionReturn(0); 2049 } 2050 2051 static PetscErrorCode MatDenseGetLDA_SeqDense(Mat A, PetscInt *lda) { 2052 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2053 2054 PetscFunctionBegin; 2055 *lda = mat->lda; 2056 PetscFunctionReturn(0); 2057 } 2058 2059 PetscErrorCode MatDenseGetArray_SeqDense(Mat A, PetscScalar **array) { 2060 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2061 2062 PetscFunctionBegin; 2063 PetscCheck(!mat->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2064 *array = mat->v; 2065 PetscFunctionReturn(0); 2066 } 2067 2068 PetscErrorCode MatDenseRestoreArray_SeqDense(Mat A, PetscScalar **array) { 2069 PetscFunctionBegin; 2070 if (array) *array = NULL; 2071 PetscFunctionReturn(0); 2072 } 2073 2074 /*@ 2075 MatDenseGetLDA - gets the leading dimension of the array returned from MatDenseGetArray() 2076 2077 Not collective 2078 2079 Input Parameter: 2080 . mat - a MATSEQDENSE or MATMPIDENSE matrix 2081 2082 Output Parameter: 2083 . lda - the leading dimension 2084 2085 Level: intermediate 2086 2087 .seealso: `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseSetLDA()` 2088 @*/ 2089 PetscErrorCode MatDenseGetLDA(Mat A, PetscInt *lda) { 2090 PetscFunctionBegin; 2091 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2092 PetscValidIntPointer(lda, 2); 2093 MatCheckPreallocated(A, 1); 2094 PetscUseMethod(A, "MatDenseGetLDA_C", (Mat, PetscInt *), (A, lda)); 2095 PetscFunctionReturn(0); 2096 } 2097 2098 /*@ 2099 MatDenseSetLDA - Sets the leading dimension of the array used by the dense matrix 2100 2101 Not collective 2102 2103 Input Parameters: 2104 + mat - a MATSEQDENSE or MATMPIDENSE matrix 2105 - lda - the leading dimension 2106 2107 Level: intermediate 2108 2109 .seealso: `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetLDA()` 2110 @*/ 2111 PetscErrorCode MatDenseSetLDA(Mat A, PetscInt lda) { 2112 PetscFunctionBegin; 2113 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2114 PetscTryMethod(A, "MatDenseSetLDA_C", (Mat, PetscInt), (A, lda)); 2115 PetscFunctionReturn(0); 2116 } 2117 2118 /*@C 2119 MatDenseGetArray - gives read-write access to the array where the data for a dense matrix is stored 2120 2121 Logically Collective on Mat 2122 2123 Input Parameter: 2124 . mat - a dense matrix 2125 2126 Output Parameter: 2127 . array - pointer to the data 2128 2129 Level: intermediate 2130 2131 .seealso: `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2132 @*/ 2133 PetscErrorCode MatDenseGetArray(Mat A, PetscScalar **array) { 2134 PetscFunctionBegin; 2135 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2136 PetscValidPointer(array, 2); 2137 PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array)); 2138 PetscFunctionReturn(0); 2139 } 2140 2141 /*@C 2142 MatDenseRestoreArray - returns access to the array where the data for a dense matrix is stored obtained by MatDenseGetArray() 2143 2144 Logically Collective on Mat 2145 2146 Input Parameters: 2147 + mat - a dense matrix 2148 - array - pointer to the data (may be NULL) 2149 2150 Level: intermediate 2151 2152 .seealso: `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2153 @*/ 2154 PetscErrorCode MatDenseRestoreArray(Mat A, PetscScalar **array) { 2155 PetscFunctionBegin; 2156 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2157 PetscValidPointer(array, 2); 2158 PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array)); 2159 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2160 #if defined(PETSC_HAVE_CUDA) 2161 A->offloadmask = PETSC_OFFLOAD_CPU; 2162 #endif 2163 PetscFunctionReturn(0); 2164 } 2165 2166 /*@C 2167 MatDenseGetArrayRead - gives read-only access to the array where the data for a dense matrix is stored 2168 2169 Not Collective 2170 2171 Input Parameter: 2172 . mat - a dense matrix 2173 2174 Output Parameter: 2175 . array - pointer to the data 2176 2177 Level: intermediate 2178 2179 .seealso: `MatDenseRestoreArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2180 @*/ 2181 PetscErrorCode MatDenseGetArrayRead(Mat A, const PetscScalar **array) { 2182 PetscFunctionBegin; 2183 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2184 PetscValidPointer(array, 2); 2185 PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, const PetscScalar **), (A, array)); 2186 PetscFunctionReturn(0); 2187 } 2188 2189 /*@C 2190 MatDenseRestoreArrayRead - returns access to the array where the data for a dense matrix is stored obtained by MatDenseGetArrayRead() 2191 2192 Not Collective 2193 2194 Input Parameters: 2195 + mat - a dense matrix 2196 - array - pointer to the data (may be NULL) 2197 2198 Level: intermediate 2199 2200 .seealso: `MatDenseGetArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2201 @*/ 2202 PetscErrorCode MatDenseRestoreArrayRead(Mat A, const PetscScalar **array) { 2203 PetscFunctionBegin; 2204 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2205 PetscValidPointer(array, 2); 2206 PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, const PetscScalar **), (A, array)); 2207 PetscFunctionReturn(0); 2208 } 2209 2210 /*@C 2211 MatDenseGetArrayWrite - gives write-only access to the array where the data for a dense matrix is stored 2212 2213 Not Collective 2214 2215 Input Parameter: 2216 . mat - a dense matrix 2217 2218 Output Parameter: 2219 . array - pointer to the data 2220 2221 Level: intermediate 2222 2223 .seealso: `MatDenseRestoreArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2224 @*/ 2225 PetscErrorCode MatDenseGetArrayWrite(Mat A, PetscScalar **array) { 2226 PetscFunctionBegin; 2227 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2228 PetscValidPointer(array, 2); 2229 PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2230 PetscFunctionReturn(0); 2231 } 2232 2233 /*@C 2234 MatDenseRestoreArrayWrite - returns access to the array where the data for a dense matrix is stored obtained by MatDenseGetArrayWrite() 2235 2236 Not Collective 2237 2238 Input Parameters: 2239 + mat - a dense matrix 2240 - array - pointer to the data (may be NULL) 2241 2242 Level: intermediate 2243 2244 .seealso: `MatDenseGetArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2245 @*/ 2246 PetscErrorCode MatDenseRestoreArrayWrite(Mat A, PetscScalar **array) { 2247 PetscFunctionBegin; 2248 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2249 PetscValidPointer(array, 2); 2250 PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2251 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2252 #if defined(PETSC_HAVE_CUDA) 2253 A->offloadmask = PETSC_OFFLOAD_CPU; 2254 #endif 2255 PetscFunctionReturn(0); 2256 } 2257 2258 static PetscErrorCode MatCreateSubMatrix_SeqDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) { 2259 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2260 PetscInt i, j, nrows, ncols, ldb; 2261 const PetscInt *irow, *icol; 2262 PetscScalar *av, *bv, *v = mat->v; 2263 Mat newmat; 2264 2265 PetscFunctionBegin; 2266 PetscCall(ISGetIndices(isrow, &irow)); 2267 PetscCall(ISGetIndices(iscol, &icol)); 2268 PetscCall(ISGetLocalSize(isrow, &nrows)); 2269 PetscCall(ISGetLocalSize(iscol, &ncols)); 2270 2271 /* Check submatrixcall */ 2272 if (scall == MAT_REUSE_MATRIX) { 2273 PetscInt n_cols, n_rows; 2274 PetscCall(MatGetSize(*B, &n_rows, &n_cols)); 2275 if (n_rows != nrows || n_cols != ncols) { 2276 /* resize the result matrix to match number of requested rows/columns */ 2277 PetscCall(MatSetSizes(*B, nrows, ncols, nrows, ncols)); 2278 } 2279 newmat = *B; 2280 } else { 2281 /* Create and fill new matrix */ 2282 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 2283 PetscCall(MatSetSizes(newmat, nrows, ncols, nrows, ncols)); 2284 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 2285 PetscCall(MatSeqDenseSetPreallocation(newmat, NULL)); 2286 } 2287 2288 /* Now extract the data pointers and do the copy,column at a time */ 2289 PetscCall(MatDenseGetArray(newmat, &bv)); 2290 PetscCall(MatDenseGetLDA(newmat, &ldb)); 2291 for (i = 0; i < ncols; i++) { 2292 av = v + mat->lda * icol[i]; 2293 for (j = 0; j < nrows; j++) bv[j] = av[irow[j]]; 2294 bv += ldb; 2295 } 2296 PetscCall(MatDenseRestoreArray(newmat, &bv)); 2297 2298 /* Assemble the matrices so that the correct flags are set */ 2299 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 2300 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 2301 2302 /* Free work space */ 2303 PetscCall(ISRestoreIndices(isrow, &irow)); 2304 PetscCall(ISRestoreIndices(iscol, &icol)); 2305 *B = newmat; 2306 PetscFunctionReturn(0); 2307 } 2308 2309 static PetscErrorCode MatCreateSubMatrices_SeqDense(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) { 2310 PetscInt i; 2311 2312 PetscFunctionBegin; 2313 if (scall == MAT_INITIAL_MATRIX) PetscCall(PetscCalloc1(n, B)); 2314 2315 for (i = 0; i < n; i++) PetscCall(MatCreateSubMatrix_SeqDense(A, irow[i], icol[i], scall, &(*B)[i])); 2316 PetscFunctionReturn(0); 2317 } 2318 2319 static PetscErrorCode MatAssemblyBegin_SeqDense(Mat mat, MatAssemblyType mode) { 2320 PetscFunctionBegin; 2321 PetscFunctionReturn(0); 2322 } 2323 2324 static PetscErrorCode MatAssemblyEnd_SeqDense(Mat mat, MatAssemblyType mode) { 2325 PetscFunctionBegin; 2326 PetscFunctionReturn(0); 2327 } 2328 2329 PetscErrorCode MatCopy_SeqDense(Mat A, Mat B, MatStructure str) { 2330 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data; 2331 const PetscScalar *va; 2332 PetscScalar *vb; 2333 PetscInt lda1 = a->lda, lda2 = b->lda, m = A->rmap->n, n = A->cmap->n, j; 2334 2335 PetscFunctionBegin; 2336 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2337 if (A->ops->copy != B->ops->copy) { 2338 PetscCall(MatCopy_Basic(A, B, str)); 2339 PetscFunctionReturn(0); 2340 } 2341 PetscCheck(m == B->rmap->n && n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "size(B) != size(A)"); 2342 PetscCall(MatDenseGetArrayRead(A, &va)); 2343 PetscCall(MatDenseGetArray(B, &vb)); 2344 if (lda1 > m || lda2 > m) { 2345 for (j = 0; j < n; j++) PetscCall(PetscArraycpy(vb + j * lda2, va + j * lda1, m)); 2346 } else { 2347 PetscCall(PetscArraycpy(vb, va, A->rmap->n * A->cmap->n)); 2348 } 2349 PetscCall(MatDenseRestoreArray(B, &vb)); 2350 PetscCall(MatDenseRestoreArrayRead(A, &va)); 2351 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2352 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2353 PetscFunctionReturn(0); 2354 } 2355 2356 PetscErrorCode MatSetUp_SeqDense(Mat A) { 2357 PetscFunctionBegin; 2358 PetscCall(PetscLayoutSetUp(A->rmap)); 2359 PetscCall(PetscLayoutSetUp(A->cmap)); 2360 if (!A->preallocated) PetscCall(MatSeqDenseSetPreallocation(A, NULL)); 2361 PetscFunctionReturn(0); 2362 } 2363 2364 static PetscErrorCode MatConjugate_SeqDense(Mat A) { 2365 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2366 PetscInt i, j; 2367 PetscInt min = PetscMin(A->rmap->n, A->cmap->n); 2368 PetscScalar *aa; 2369 2370 PetscFunctionBegin; 2371 PetscCall(MatDenseGetArray(A, &aa)); 2372 for (j = 0; j < A->cmap->n; j++) { 2373 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscConj(aa[i + j * mat->lda]); 2374 } 2375 PetscCall(MatDenseRestoreArray(A, &aa)); 2376 if (mat->tau) 2377 for (i = 0; i < min; i++) mat->tau[i] = PetscConj(mat->tau[i]); 2378 PetscFunctionReturn(0); 2379 } 2380 2381 static PetscErrorCode MatRealPart_SeqDense(Mat A) { 2382 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2383 PetscInt i, j; 2384 PetscScalar *aa; 2385 2386 PetscFunctionBegin; 2387 PetscCall(MatDenseGetArray(A, &aa)); 2388 for (j = 0; j < A->cmap->n; j++) { 2389 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscRealPart(aa[i + j * mat->lda]); 2390 } 2391 PetscCall(MatDenseRestoreArray(A, &aa)); 2392 PetscFunctionReturn(0); 2393 } 2394 2395 static PetscErrorCode MatImaginaryPart_SeqDense(Mat A) { 2396 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2397 PetscInt i, j; 2398 PetscScalar *aa; 2399 2400 PetscFunctionBegin; 2401 PetscCall(MatDenseGetArray(A, &aa)); 2402 for (j = 0; j < A->cmap->n; j++) { 2403 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscImaginaryPart(aa[i + j * mat->lda]); 2404 } 2405 PetscCall(MatDenseRestoreArray(A, &aa)); 2406 PetscFunctionReturn(0); 2407 } 2408 2409 /* ----------------------------------------------------------------*/ 2410 PetscErrorCode MatMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) { 2411 PetscInt m = A->rmap->n, n = B->cmap->n; 2412 PetscBool cisdense; 2413 2414 PetscFunctionBegin; 2415 PetscCall(MatSetSizes(C, m, n, m, n)); 2416 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2417 if (!cisdense) { 2418 PetscBool flg; 2419 2420 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2421 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2422 } 2423 PetscCall(MatSetUp(C)); 2424 PetscFunctionReturn(0); 2425 } 2426 2427 PetscErrorCode MatMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) { 2428 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data, *c = (Mat_SeqDense *)C->data; 2429 PetscBLASInt m, n, k; 2430 const PetscScalar *av, *bv; 2431 PetscScalar *cv; 2432 PetscScalar _DOne = 1.0, _DZero = 0.0; 2433 2434 PetscFunctionBegin; 2435 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2436 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2437 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2438 if (!m || !n || !k) PetscFunctionReturn(0); 2439 PetscCall(MatDenseGetArrayRead(A, &av)); 2440 PetscCall(MatDenseGetArrayRead(B, &bv)); 2441 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2442 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2443 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2444 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2445 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2446 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2447 PetscFunctionReturn(0); 2448 } 2449 2450 PetscErrorCode MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) { 2451 PetscInt m = A->rmap->n, n = B->rmap->n; 2452 PetscBool cisdense; 2453 2454 PetscFunctionBegin; 2455 PetscCall(MatSetSizes(C, m, n, m, n)); 2456 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2457 if (!cisdense) { 2458 PetscBool flg; 2459 2460 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2461 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2462 } 2463 PetscCall(MatSetUp(C)); 2464 PetscFunctionReturn(0); 2465 } 2466 2467 PetscErrorCode MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) { 2468 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2469 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2470 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2471 const PetscScalar *av, *bv; 2472 PetscScalar *cv; 2473 PetscBLASInt m, n, k; 2474 PetscScalar _DOne = 1.0, _DZero = 0.0; 2475 2476 PetscFunctionBegin; 2477 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2478 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2479 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2480 if (!m || !n || !k) PetscFunctionReturn(0); 2481 PetscCall(MatDenseGetArrayRead(A, &av)); 2482 PetscCall(MatDenseGetArrayRead(B, &bv)); 2483 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2484 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2485 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2486 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2487 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2488 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2489 PetscFunctionReturn(0); 2490 } 2491 2492 PetscErrorCode MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) { 2493 PetscInt m = A->cmap->n, n = B->cmap->n; 2494 PetscBool cisdense; 2495 2496 PetscFunctionBegin; 2497 PetscCall(MatSetSizes(C, m, n, m, n)); 2498 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2499 if (!cisdense) { 2500 PetscBool flg; 2501 2502 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2503 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2504 } 2505 PetscCall(MatSetUp(C)); 2506 PetscFunctionReturn(0); 2507 } 2508 2509 PetscErrorCode MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) { 2510 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2511 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2512 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2513 const PetscScalar *av, *bv; 2514 PetscScalar *cv; 2515 PetscBLASInt m, n, k; 2516 PetscScalar _DOne = 1.0, _DZero = 0.0; 2517 2518 PetscFunctionBegin; 2519 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2520 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2521 PetscCall(PetscBLASIntCast(A->rmap->n, &k)); 2522 if (!m || !n || !k) PetscFunctionReturn(0); 2523 PetscCall(MatDenseGetArrayRead(A, &av)); 2524 PetscCall(MatDenseGetArrayRead(B, &bv)); 2525 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2526 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2527 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2528 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2529 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2530 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2531 PetscFunctionReturn(0); 2532 } 2533 2534 /* ----------------------------------------------- */ 2535 static PetscErrorCode MatProductSetFromOptions_SeqDense_AB(Mat C) { 2536 PetscFunctionBegin; 2537 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqDense; 2538 C->ops->productsymbolic = MatProductSymbolic_AB; 2539 PetscFunctionReturn(0); 2540 } 2541 2542 static PetscErrorCode MatProductSetFromOptions_SeqDense_AtB(Mat C) { 2543 PetscFunctionBegin; 2544 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqDense_SeqDense; 2545 C->ops->productsymbolic = MatProductSymbolic_AtB; 2546 PetscFunctionReturn(0); 2547 } 2548 2549 static PetscErrorCode MatProductSetFromOptions_SeqDense_ABt(Mat C) { 2550 PetscFunctionBegin; 2551 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqDense_SeqDense; 2552 C->ops->productsymbolic = MatProductSymbolic_ABt; 2553 PetscFunctionReturn(0); 2554 } 2555 2556 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense(Mat C) { 2557 Mat_Product *product = C->product; 2558 2559 PetscFunctionBegin; 2560 switch (product->type) { 2561 case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_SeqDense_AB(C)); break; 2562 case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_SeqDense_AtB(C)); break; 2563 case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_SeqDense_ABt(C)); break; 2564 default: break; 2565 } 2566 PetscFunctionReturn(0); 2567 } 2568 /* ----------------------------------------------- */ 2569 2570 static PetscErrorCode MatGetRowMax_SeqDense(Mat A, Vec v, PetscInt idx[]) { 2571 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2572 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2573 PetscScalar *x; 2574 const PetscScalar *aa; 2575 2576 PetscFunctionBegin; 2577 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2578 PetscCall(VecGetArray(v, &x)); 2579 PetscCall(VecGetLocalSize(v, &p)); 2580 PetscCall(MatDenseGetArrayRead(A, &aa)); 2581 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2582 for (i = 0; i < m; i++) { 2583 x[i] = aa[i]; 2584 if (idx) idx[i] = 0; 2585 for (j = 1; j < n; j++) { 2586 if (PetscRealPart(x[i]) < PetscRealPart(aa[i + a->lda * j])) { 2587 x[i] = aa[i + a->lda * j]; 2588 if (idx) idx[i] = j; 2589 } 2590 } 2591 } 2592 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2593 PetscCall(VecRestoreArray(v, &x)); 2594 PetscFunctionReturn(0); 2595 } 2596 2597 static PetscErrorCode MatGetRowMaxAbs_SeqDense(Mat A, Vec v, PetscInt idx[]) { 2598 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2599 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2600 PetscScalar *x; 2601 PetscReal atmp; 2602 const PetscScalar *aa; 2603 2604 PetscFunctionBegin; 2605 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2606 PetscCall(VecGetArray(v, &x)); 2607 PetscCall(VecGetLocalSize(v, &p)); 2608 PetscCall(MatDenseGetArrayRead(A, &aa)); 2609 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2610 for (i = 0; i < m; i++) { 2611 x[i] = PetscAbsScalar(aa[i]); 2612 for (j = 1; j < n; j++) { 2613 atmp = PetscAbsScalar(aa[i + a->lda * j]); 2614 if (PetscAbsScalar(x[i]) < atmp) { 2615 x[i] = atmp; 2616 if (idx) idx[i] = j; 2617 } 2618 } 2619 } 2620 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2621 PetscCall(VecRestoreArray(v, &x)); 2622 PetscFunctionReturn(0); 2623 } 2624 2625 static PetscErrorCode MatGetRowMin_SeqDense(Mat A, Vec v, PetscInt idx[]) { 2626 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2627 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2628 PetscScalar *x; 2629 const PetscScalar *aa; 2630 2631 PetscFunctionBegin; 2632 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2633 PetscCall(MatDenseGetArrayRead(A, &aa)); 2634 PetscCall(VecGetArray(v, &x)); 2635 PetscCall(VecGetLocalSize(v, &p)); 2636 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2637 for (i = 0; i < m; i++) { 2638 x[i] = aa[i]; 2639 if (idx) idx[i] = 0; 2640 for (j = 1; j < n; j++) { 2641 if (PetscRealPart(x[i]) > PetscRealPart(aa[i + a->lda * j])) { 2642 x[i] = aa[i + a->lda * j]; 2643 if (idx) idx[i] = j; 2644 } 2645 } 2646 } 2647 PetscCall(VecRestoreArray(v, &x)); 2648 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2649 PetscFunctionReturn(0); 2650 } 2651 2652 PetscErrorCode MatGetColumnVector_SeqDense(Mat A, Vec v, PetscInt col) { 2653 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2654 PetscScalar *x; 2655 const PetscScalar *aa; 2656 2657 PetscFunctionBegin; 2658 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2659 PetscCall(MatDenseGetArrayRead(A, &aa)); 2660 PetscCall(VecGetArray(v, &x)); 2661 PetscCall(PetscArraycpy(x, aa + col * a->lda, A->rmap->n)); 2662 PetscCall(VecRestoreArray(v, &x)); 2663 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2664 PetscFunctionReturn(0); 2665 } 2666 2667 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat A, PetscInt type, PetscReal *reductions) { 2668 PetscInt i, j, m, n; 2669 const PetscScalar *a; 2670 2671 PetscFunctionBegin; 2672 PetscCall(MatGetSize(A, &m, &n)); 2673 PetscCall(PetscArrayzero(reductions, n)); 2674 PetscCall(MatDenseGetArrayRead(A, &a)); 2675 if (type == NORM_2) { 2676 for (i = 0; i < n; i++) { 2677 for (j = 0; j < m; j++) { reductions[i] += PetscAbsScalar(a[j] * a[j]); } 2678 a += m; 2679 } 2680 } else if (type == NORM_1) { 2681 for (i = 0; i < n; i++) { 2682 for (j = 0; j < m; j++) { reductions[i] += PetscAbsScalar(a[j]); } 2683 a += m; 2684 } 2685 } else if (type == NORM_INFINITY) { 2686 for (i = 0; i < n; i++) { 2687 for (j = 0; j < m; j++) { reductions[i] = PetscMax(PetscAbsScalar(a[j]), reductions[i]); } 2688 a += m; 2689 } 2690 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2691 for (i = 0; i < n; i++) { 2692 for (j = 0; j < m; j++) { reductions[i] += PetscRealPart(a[j]); } 2693 a += m; 2694 } 2695 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2696 for (i = 0; i < n; i++) { 2697 for (j = 0; j < m; j++) { reductions[i] += PetscImaginaryPart(a[j]); } 2698 a += m; 2699 } 2700 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2701 PetscCall(MatDenseRestoreArrayRead(A, &a)); 2702 if (type == NORM_2) { 2703 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2704 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2705 for (i = 0; i < n; i++) reductions[i] /= m; 2706 } 2707 PetscFunctionReturn(0); 2708 } 2709 2710 static PetscErrorCode MatSetRandom_SeqDense(Mat x, PetscRandom rctx) { 2711 PetscScalar *a; 2712 PetscInt lda, m, n, i, j; 2713 2714 PetscFunctionBegin; 2715 PetscCall(MatGetSize(x, &m, &n)); 2716 PetscCall(MatDenseGetLDA(x, &lda)); 2717 PetscCall(MatDenseGetArray(x, &a)); 2718 for (j = 0; j < n; j++) { 2719 for (i = 0; i < m; i++) PetscCall(PetscRandomGetValue(rctx, a + j * lda + i)); 2720 } 2721 PetscCall(MatDenseRestoreArray(x, &a)); 2722 PetscFunctionReturn(0); 2723 } 2724 2725 static PetscErrorCode MatMissingDiagonal_SeqDense(Mat A, PetscBool *missing, PetscInt *d) { 2726 PetscFunctionBegin; 2727 *missing = PETSC_FALSE; 2728 PetscFunctionReturn(0); 2729 } 2730 2731 /* vals is not const */ 2732 static PetscErrorCode MatDenseGetColumn_SeqDense(Mat A, PetscInt col, PetscScalar **vals) { 2733 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2734 PetscScalar *v; 2735 2736 PetscFunctionBegin; 2737 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2738 PetscCall(MatDenseGetArray(A, &v)); 2739 *vals = v + col * a->lda; 2740 PetscCall(MatDenseRestoreArray(A, &v)); 2741 PetscFunctionReturn(0); 2742 } 2743 2744 static PetscErrorCode MatDenseRestoreColumn_SeqDense(Mat A, PetscScalar **vals) { 2745 PetscFunctionBegin; 2746 if (vals) *vals = NULL; /* user cannot accidentally use the array later */ 2747 PetscFunctionReturn(0); 2748 } 2749 2750 /* -------------------------------------------------------------------*/ 2751 static struct _MatOps MatOps_Values = {MatSetValues_SeqDense, 2752 MatGetRow_SeqDense, 2753 MatRestoreRow_SeqDense, 2754 MatMult_SeqDense, 2755 /* 4*/ MatMultAdd_SeqDense, 2756 MatMultTranspose_SeqDense, 2757 MatMultTransposeAdd_SeqDense, 2758 NULL, 2759 NULL, 2760 NULL, 2761 /* 10*/ NULL, 2762 MatLUFactor_SeqDense, 2763 MatCholeskyFactor_SeqDense, 2764 MatSOR_SeqDense, 2765 MatTranspose_SeqDense, 2766 /* 15*/ MatGetInfo_SeqDense, 2767 MatEqual_SeqDense, 2768 MatGetDiagonal_SeqDense, 2769 MatDiagonalScale_SeqDense, 2770 MatNorm_SeqDense, 2771 /* 20*/ MatAssemblyBegin_SeqDense, 2772 MatAssemblyEnd_SeqDense, 2773 MatSetOption_SeqDense, 2774 MatZeroEntries_SeqDense, 2775 /* 24*/ MatZeroRows_SeqDense, 2776 NULL, 2777 NULL, 2778 NULL, 2779 NULL, 2780 /* 29*/ MatSetUp_SeqDense, 2781 NULL, 2782 NULL, 2783 NULL, 2784 NULL, 2785 /* 34*/ MatDuplicate_SeqDense, 2786 NULL, 2787 NULL, 2788 NULL, 2789 NULL, 2790 /* 39*/ MatAXPY_SeqDense, 2791 MatCreateSubMatrices_SeqDense, 2792 NULL, 2793 MatGetValues_SeqDense, 2794 MatCopy_SeqDense, 2795 /* 44*/ MatGetRowMax_SeqDense, 2796 MatScale_SeqDense, 2797 MatShift_SeqDense, 2798 NULL, 2799 MatZeroRowsColumns_SeqDense, 2800 /* 49*/ MatSetRandom_SeqDense, 2801 NULL, 2802 NULL, 2803 NULL, 2804 NULL, 2805 /* 54*/ NULL, 2806 NULL, 2807 NULL, 2808 NULL, 2809 NULL, 2810 /* 59*/ MatCreateSubMatrix_SeqDense, 2811 MatDestroy_SeqDense, 2812 MatView_SeqDense, 2813 NULL, 2814 NULL, 2815 /* 64*/ NULL, 2816 NULL, 2817 NULL, 2818 NULL, 2819 NULL, 2820 /* 69*/ MatGetRowMaxAbs_SeqDense, 2821 NULL, 2822 NULL, 2823 NULL, 2824 NULL, 2825 /* 74*/ NULL, 2826 NULL, 2827 NULL, 2828 NULL, 2829 NULL, 2830 /* 79*/ NULL, 2831 NULL, 2832 NULL, 2833 NULL, 2834 /* 83*/ MatLoad_SeqDense, 2835 MatIsSymmetric_SeqDense, 2836 MatIsHermitian_SeqDense, 2837 NULL, 2838 NULL, 2839 NULL, 2840 /* 89*/ NULL, 2841 NULL, 2842 MatMatMultNumeric_SeqDense_SeqDense, 2843 NULL, 2844 NULL, 2845 /* 94*/ NULL, 2846 NULL, 2847 NULL, 2848 MatMatTransposeMultNumeric_SeqDense_SeqDense, 2849 NULL, 2850 /* 99*/ MatProductSetFromOptions_SeqDense, 2851 NULL, 2852 NULL, 2853 MatConjugate_SeqDense, 2854 NULL, 2855 /*104*/ NULL, 2856 MatRealPart_SeqDense, 2857 MatImaginaryPart_SeqDense, 2858 NULL, 2859 NULL, 2860 /*109*/ NULL, 2861 NULL, 2862 MatGetRowMin_SeqDense, 2863 MatGetColumnVector_SeqDense, 2864 MatMissingDiagonal_SeqDense, 2865 /*114*/ NULL, 2866 NULL, 2867 NULL, 2868 NULL, 2869 NULL, 2870 /*119*/ NULL, 2871 NULL, 2872 NULL, 2873 NULL, 2874 NULL, 2875 /*124*/ NULL, 2876 MatGetColumnReductions_SeqDense, 2877 NULL, 2878 NULL, 2879 NULL, 2880 /*129*/ NULL, 2881 NULL, 2882 NULL, 2883 MatTransposeMatMultNumeric_SeqDense_SeqDense, 2884 NULL, 2885 /*134*/ NULL, 2886 NULL, 2887 NULL, 2888 NULL, 2889 NULL, 2890 /*139*/ NULL, 2891 NULL, 2892 NULL, 2893 NULL, 2894 NULL, 2895 MatCreateMPIMatConcatenateSeqMat_SeqDense, 2896 /*145*/ NULL, 2897 NULL, 2898 NULL, 2899 NULL, 2900 NULL, 2901 /*150*/ NULL}; 2902 2903 /*@C 2904 MatCreateSeqDense - Creates a sequential dense matrix that 2905 is stored in column major order (the usual Fortran 77 manner). Many 2906 of the matrix operations use the BLAS and LAPACK routines. 2907 2908 Collective 2909 2910 Input Parameters: 2911 + comm - MPI communicator, set to PETSC_COMM_SELF 2912 . m - number of rows 2913 . n - number of columns 2914 - data - optional location of matrix data in column major order. Set data=NULL for PETSc 2915 to control all matrix memory allocation. 2916 2917 Output Parameter: 2918 . A - the matrix 2919 2920 Notes: 2921 The data input variable is intended primarily for Fortran programmers 2922 who wish to allocate their own matrix memory space. Most users should 2923 set data=NULL. 2924 2925 Level: intermediate 2926 2927 .seealso: `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 2928 @*/ 2929 PetscErrorCode MatCreateSeqDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscScalar *data, Mat *A) { 2930 PetscFunctionBegin; 2931 PetscCall(MatCreate(comm, A)); 2932 PetscCall(MatSetSizes(*A, m, n, m, n)); 2933 PetscCall(MatSetType(*A, MATSEQDENSE)); 2934 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 2935 PetscFunctionReturn(0); 2936 } 2937 2938 /*@C 2939 MatSeqDenseSetPreallocation - Sets the array used for storing the matrix elements 2940 2941 Collective 2942 2943 Input Parameters: 2944 + B - the matrix 2945 - data - the array (or NULL) 2946 2947 Notes: 2948 The data input variable is intended primarily for Fortran programmers 2949 who wish to allocate their own matrix memory space. Most users should 2950 need not call this routine. 2951 2952 Level: intermediate 2953 2954 .seealso: `MatCreate()`, `MatCreateDense()`, `MatSetValues()`, `MatDenseSetLDA()` 2955 2956 @*/ 2957 PetscErrorCode MatSeqDenseSetPreallocation(Mat B, PetscScalar data[]) { 2958 PetscFunctionBegin; 2959 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2960 PetscTryMethod(B, "MatSeqDenseSetPreallocation_C", (Mat, PetscScalar[]), (B, data)); 2961 PetscFunctionReturn(0); 2962 } 2963 2964 PetscErrorCode MatSeqDenseSetPreallocation_SeqDense(Mat B, PetscScalar *data) { 2965 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2966 2967 PetscFunctionBegin; 2968 PetscCheck(!b->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2969 B->preallocated = PETSC_TRUE; 2970 2971 PetscCall(PetscLayoutSetUp(B->rmap)); 2972 PetscCall(PetscLayoutSetUp(B->cmap)); 2973 2974 if (b->lda <= 0) b->lda = B->rmap->n; 2975 2976 if (!data) { /* petsc-allocated storage */ 2977 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 2978 PetscCall(PetscCalloc1((size_t)b->lda * B->cmap->n, &b->v)); 2979 PetscCall(PetscLogObjectMemory((PetscObject)B, b->lda * B->cmap->n * sizeof(PetscScalar))); 2980 2981 b->user_alloc = PETSC_FALSE; 2982 } else { /* user-allocated storage */ 2983 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 2984 b->v = data; 2985 b->user_alloc = PETSC_TRUE; 2986 } 2987 B->assembled = PETSC_TRUE; 2988 PetscFunctionReturn(0); 2989 } 2990 2991 #if defined(PETSC_HAVE_ELEMENTAL) 2992 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 2993 Mat mat_elemental; 2994 const PetscScalar *array; 2995 PetscScalar *v_colwise; 2996 PetscInt M = A->rmap->N, N = A->cmap->N, i, j, k, *rows, *cols; 2997 2998 PetscFunctionBegin; 2999 PetscCall(PetscMalloc3(M * N, &v_colwise, M, &rows, N, &cols)); 3000 PetscCall(MatDenseGetArrayRead(A, &array)); 3001 /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */ 3002 k = 0; 3003 for (j = 0; j < N; j++) { 3004 cols[j] = j; 3005 for (i = 0; i < M; i++) { v_colwise[j * M + i] = array[k++]; } 3006 } 3007 for (i = 0; i < M; i++) { rows[i] = i; } 3008 PetscCall(MatDenseRestoreArrayRead(A, &array)); 3009 3010 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 3011 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N)); 3012 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 3013 PetscCall(MatSetUp(mat_elemental)); 3014 3015 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 3016 PetscCall(MatSetValues(mat_elemental, M, rows, N, cols, v_colwise, ADD_VALUES)); 3017 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 3018 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 3019 PetscCall(PetscFree3(v_colwise, rows, cols)); 3020 3021 if (reuse == MAT_INPLACE_MATRIX) { 3022 PetscCall(MatHeaderReplace(A, &mat_elemental)); 3023 } else { 3024 *newmat = mat_elemental; 3025 } 3026 PetscFunctionReturn(0); 3027 } 3028 #endif 3029 3030 PetscErrorCode MatDenseSetLDA_SeqDense(Mat B, PetscInt lda) { 3031 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3032 PetscBool data; 3033 3034 PetscFunctionBegin; 3035 data = (PetscBool)((B->rmap->n > 0 && B->cmap->n > 0) ? (b->v ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE); 3036 PetscCheck(b->user_alloc || !data || b->lda == lda, PETSC_COMM_SELF, PETSC_ERR_ORDER, "LDA cannot be changed after allocation of internal storage"); 3037 PetscCheck(lda >= B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "LDA %" PetscInt_FMT " must be at least matrix dimension %" PetscInt_FMT, lda, B->rmap->n); 3038 b->lda = lda; 3039 PetscFunctionReturn(0); 3040 } 3041 3042 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) { 3043 PetscFunctionBegin; 3044 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIDense(comm, inmat, n, scall, outmat)); 3045 PetscFunctionReturn(0); 3046 } 3047 3048 PetscErrorCode MatDenseGetColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) { 3049 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3050 3051 PetscFunctionBegin; 3052 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3053 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3054 if (!a->cvec) { 3055 PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec)); 3056 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 3057 } 3058 a->vecinuse = col + 1; 3059 PetscCall(MatDenseGetArray(A, (PetscScalar **)&a->ptrinuse)); 3060 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3061 *v = a->cvec; 3062 PetscFunctionReturn(0); 3063 } 3064 3065 PetscErrorCode MatDenseRestoreColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) { 3066 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3067 3068 PetscFunctionBegin; 3069 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3070 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3071 a->vecinuse = 0; 3072 PetscCall(MatDenseRestoreArray(A, (PetscScalar **)&a->ptrinuse)); 3073 PetscCall(VecResetArray(a->cvec)); 3074 if (v) *v = NULL; 3075 PetscFunctionReturn(0); 3076 } 3077 3078 PetscErrorCode MatDenseGetColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) { 3079 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3080 3081 PetscFunctionBegin; 3082 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3083 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3084 if (!a->cvec) { 3085 PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec)); 3086 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 3087 } 3088 a->vecinuse = col + 1; 3089 PetscCall(MatDenseGetArrayRead(A, &a->ptrinuse)); 3090 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3091 PetscCall(VecLockReadPush(a->cvec)); 3092 *v = a->cvec; 3093 PetscFunctionReturn(0); 3094 } 3095 3096 PetscErrorCode MatDenseRestoreColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) { 3097 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3098 3099 PetscFunctionBegin; 3100 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3101 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3102 a->vecinuse = 0; 3103 PetscCall(MatDenseRestoreArrayRead(A, &a->ptrinuse)); 3104 PetscCall(VecLockReadPop(a->cvec)); 3105 PetscCall(VecResetArray(a->cvec)); 3106 if (v) *v = NULL; 3107 PetscFunctionReturn(0); 3108 } 3109 3110 PetscErrorCode MatDenseGetColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) { 3111 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3112 3113 PetscFunctionBegin; 3114 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3115 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3116 if (!a->cvec) { 3117 PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec)); 3118 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 3119 } 3120 a->vecinuse = col + 1; 3121 PetscCall(MatDenseGetArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3122 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3123 *v = a->cvec; 3124 PetscFunctionReturn(0); 3125 } 3126 3127 PetscErrorCode MatDenseRestoreColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) { 3128 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3129 3130 PetscFunctionBegin; 3131 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3132 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3133 a->vecinuse = 0; 3134 PetscCall(MatDenseRestoreArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3135 PetscCall(VecResetArray(a->cvec)); 3136 if (v) *v = NULL; 3137 PetscFunctionReturn(0); 3138 } 3139 3140 PetscErrorCode MatDenseGetSubMatrix_SeqDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) { 3141 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3142 3143 PetscFunctionBegin; 3144 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3145 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3146 if (a->cmat && (cend - cbegin != a->cmat->cmap->N || rend - rbegin != a->cmat->rmap->N)) PetscCall(MatDestroy(&a->cmat)); 3147 if (!a->cmat) { 3148 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), rend - rbegin, PETSC_DECIDE, rend - rbegin, cend - cbegin, a->v + rbegin + (size_t)cbegin * a->lda, &a->cmat)); 3149 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cmat)); 3150 } else { 3151 PetscCall(MatDensePlaceArray(a->cmat, a->v + rbegin + (size_t)cbegin * a->lda)); 3152 } 3153 PetscCall(MatDenseSetLDA(a->cmat, a->lda)); 3154 a->matinuse = cbegin + 1; 3155 *v = a->cmat; 3156 #if defined(PETSC_HAVE_CUDA) 3157 A->offloadmask = PETSC_OFFLOAD_CPU; 3158 #endif 3159 PetscFunctionReturn(0); 3160 } 3161 3162 PetscErrorCode MatDenseRestoreSubMatrix_SeqDense(Mat A, Mat *v) { 3163 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3164 3165 PetscFunctionBegin; 3166 PetscCheck(a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 3167 PetscCheck(a->cmat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column matrix"); 3168 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 3169 a->matinuse = 0; 3170 PetscCall(MatDenseResetArray(a->cmat)); 3171 if (v) *v = NULL; 3172 PetscFunctionReturn(0); 3173 } 3174 3175 /*MC 3176 MATSEQDENSE - MATSEQDENSE = "seqdense" - A matrix type to be used for sequential dense matrices. 3177 3178 Options Database Keys: 3179 . -mat_type seqdense - sets the matrix type to "seqdense" during a call to MatSetFromOptions() 3180 3181 Level: beginner 3182 3183 .seealso: `MatCreateSeqDense()` 3184 3185 M*/ 3186 PetscErrorCode MatCreate_SeqDense(Mat B) { 3187 Mat_SeqDense *b; 3188 PetscMPIInt size; 3189 3190 PetscFunctionBegin; 3191 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3192 PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3193 3194 PetscCall(PetscNewLog(B, &b)); 3195 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 3196 B->data = (void *)b; 3197 3198 b->roworiented = PETSC_TRUE; 3199 3200 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatQRFactor_C", MatQRFactor_SeqDense)); 3201 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetLDA_C", MatDenseGetLDA_SeqDense)); 3202 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseSetLDA_C", MatDenseSetLDA_SeqDense)); 3203 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArray_C", MatDenseGetArray_SeqDense)); 3204 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArray_C", MatDenseRestoreArray_SeqDense)); 3205 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDensePlaceArray_C", MatDensePlaceArray_SeqDense)); 3206 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseResetArray_C", MatDenseResetArray_SeqDense)); 3207 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseReplaceArray_C", MatDenseReplaceArray_SeqDense)); 3208 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayRead_C", MatDenseGetArray_SeqDense)); 3209 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayRead_C", MatDenseRestoreArray_SeqDense)); 3210 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayWrite_C", MatDenseGetArray_SeqDense)); 3211 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArray_SeqDense)); 3212 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqaij_C", MatConvert_SeqDense_SeqAIJ)); 3213 #if defined(PETSC_HAVE_ELEMENTAL) 3214 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_elemental_C", MatConvert_SeqDense_Elemental)); 3215 #endif 3216 #if defined(PETSC_HAVE_SCALAPACK) 3217 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 3218 #endif 3219 #if defined(PETSC_HAVE_CUDA) 3220 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensecuda_C", MatConvert_SeqDense_SeqDenseCUDA)); 3221 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3222 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdense_C", MatProductSetFromOptions_SeqDense)); 3223 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3224 #endif 3225 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqDenseSetPreallocation_C", MatSeqDenseSetPreallocation_SeqDense)); 3226 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 3227 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdense_C", MatProductSetFromOptions_SeqDense)); 3228 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3229 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqsbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3230 3231 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumn_C", MatDenseGetColumn_SeqDense)); 3232 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_SeqDense)); 3233 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_SeqDense)); 3234 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_SeqDense)); 3235 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_SeqDense)); 3236 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_SeqDense)); 3237 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_SeqDense)); 3238 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_SeqDense)); 3239 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_SeqDense)); 3240 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_SeqDense)); 3241 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQDENSE)); 3242 PetscFunctionReturn(0); 3243 } 3244 3245 /*@C 3246 MatDenseGetColumn - gives access to a column of a dense matrix. This is only the local part of the column. You MUST call MatDenseRestoreColumn() to avoid memory bleeding. 3247 3248 Not Collective 3249 3250 Input Parameters: 3251 + mat - a MATSEQDENSE or MATMPIDENSE matrix 3252 - col - column index 3253 3254 Output Parameter: 3255 . vals - pointer to the data 3256 3257 Level: intermediate 3258 3259 .seealso: `MatDenseRestoreColumn()` 3260 @*/ 3261 PetscErrorCode MatDenseGetColumn(Mat A, PetscInt col, PetscScalar **vals) { 3262 PetscFunctionBegin; 3263 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3264 PetscValidLogicalCollectiveInt(A, col, 2); 3265 PetscValidPointer(vals, 3); 3266 PetscUseMethod(A, "MatDenseGetColumn_C", (Mat, PetscInt, PetscScalar **), (A, col, vals)); 3267 PetscFunctionReturn(0); 3268 } 3269 3270 /*@C 3271 MatDenseRestoreColumn - returns access to a column of a dense matrix which is returned by MatDenseGetColumn(). 3272 3273 Not Collective 3274 3275 Input Parameters: 3276 + mat - a MATSEQDENSE or MATMPIDENSE matrix 3277 - vals - pointer to the data (may be NULL) 3278 3279 Level: intermediate 3280 3281 .seealso: `MatDenseGetColumn()` 3282 @*/ 3283 PetscErrorCode MatDenseRestoreColumn(Mat A, PetscScalar **vals) { 3284 PetscFunctionBegin; 3285 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3286 PetscValidPointer(vals, 2); 3287 PetscUseMethod(A, "MatDenseRestoreColumn_C", (Mat, PetscScalar **), (A, vals)); 3288 PetscFunctionReturn(0); 3289 } 3290 3291 /*@ 3292 MatDenseGetColumnVec - Gives read-write access to a column of a dense matrix, represented as a Vec. 3293 3294 Collective 3295 3296 Input Parameters: 3297 + mat - the Mat object 3298 - col - the column index 3299 3300 Output Parameter: 3301 . v - the vector 3302 3303 Notes: 3304 The vector is owned by PETSc. Users need to call MatDenseRestoreColumnVec() when the vector is no longer needed. 3305 Use MatDenseGetColumnVecRead() to obtain read-only access or MatDenseGetColumnVecWrite() for write-only access. 3306 3307 Level: intermediate 3308 3309 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3310 @*/ 3311 PetscErrorCode MatDenseGetColumnVec(Mat A, PetscInt col, Vec *v) { 3312 PetscFunctionBegin; 3313 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3314 PetscValidType(A, 1); 3315 PetscValidLogicalCollectiveInt(A, col, 2); 3316 PetscValidPointer(v, 3); 3317 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3318 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3319 PetscUseMethod(A, "MatDenseGetColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3320 PetscFunctionReturn(0); 3321 } 3322 3323 /*@ 3324 MatDenseRestoreColumnVec - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVec(). 3325 3326 Collective 3327 3328 Input Parameters: 3329 + mat - the Mat object 3330 . col - the column index 3331 - v - the Vec object (may be NULL) 3332 3333 Level: intermediate 3334 3335 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3336 @*/ 3337 PetscErrorCode MatDenseRestoreColumnVec(Mat A, PetscInt col, Vec *v) { 3338 PetscFunctionBegin; 3339 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3340 PetscValidType(A, 1); 3341 PetscValidLogicalCollectiveInt(A, col, 2); 3342 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3343 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3344 PetscUseMethod(A, "MatDenseRestoreColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3345 PetscFunctionReturn(0); 3346 } 3347 3348 /*@ 3349 MatDenseGetColumnVecRead - Gives read-only access to a column of a dense matrix, represented as a Vec. 3350 3351 Collective 3352 3353 Input Parameters: 3354 + mat - the Mat object 3355 - col - the column index 3356 3357 Output Parameter: 3358 . v - the vector 3359 3360 Notes: 3361 The vector is owned by PETSc and users cannot modify it. 3362 Users need to call MatDenseRestoreColumnVecRead() when the vector is no longer needed. 3363 Use MatDenseGetColumnVec() to obtain read-write access or MatDenseGetColumnVecWrite() for write-only access. 3364 3365 Level: intermediate 3366 3367 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3368 @*/ 3369 PetscErrorCode MatDenseGetColumnVecRead(Mat A, PetscInt col, Vec *v) { 3370 PetscFunctionBegin; 3371 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3372 PetscValidType(A, 1); 3373 PetscValidLogicalCollectiveInt(A, col, 2); 3374 PetscValidPointer(v, 3); 3375 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3376 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3377 PetscUseMethod(A, "MatDenseGetColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3378 PetscFunctionReturn(0); 3379 } 3380 3381 /*@ 3382 MatDenseRestoreColumnVecRead - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVecRead(). 3383 3384 Collective 3385 3386 Input Parameters: 3387 + mat - the Mat object 3388 . col - the column index 3389 - v - the Vec object (may be NULL) 3390 3391 Level: intermediate 3392 3393 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecWrite()` 3394 @*/ 3395 PetscErrorCode MatDenseRestoreColumnVecRead(Mat A, PetscInt col, Vec *v) { 3396 PetscFunctionBegin; 3397 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3398 PetscValidType(A, 1); 3399 PetscValidLogicalCollectiveInt(A, col, 2); 3400 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3401 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3402 PetscUseMethod(A, "MatDenseRestoreColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3403 PetscFunctionReturn(0); 3404 } 3405 3406 /*@ 3407 MatDenseGetColumnVecWrite - Gives write-only access to a column of a dense matrix, represented as a Vec. 3408 3409 Collective 3410 3411 Input Parameters: 3412 + mat - the Mat object 3413 - col - the column index 3414 3415 Output Parameter: 3416 . v - the vector 3417 3418 Notes: 3419 The vector is owned by PETSc. Users need to call MatDenseRestoreColumnVecWrite() when the vector is no longer needed. 3420 Use MatDenseGetColumnVec() to obtain read-write access or MatDenseGetColumnVecRead() for read-only access. 3421 3422 Level: intermediate 3423 3424 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3425 @*/ 3426 PetscErrorCode MatDenseGetColumnVecWrite(Mat A, PetscInt col, Vec *v) { 3427 PetscFunctionBegin; 3428 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3429 PetscValidType(A, 1); 3430 PetscValidLogicalCollectiveInt(A, col, 2); 3431 PetscValidPointer(v, 3); 3432 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3433 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3434 PetscUseMethod(A, "MatDenseGetColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3435 PetscFunctionReturn(0); 3436 } 3437 3438 /*@ 3439 MatDenseRestoreColumnVecWrite - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVecWrite(). 3440 3441 Collective 3442 3443 Input Parameters: 3444 + mat - the Mat object 3445 . col - the column index 3446 - v - the Vec object (may be NULL) 3447 3448 Level: intermediate 3449 3450 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()` 3451 @*/ 3452 PetscErrorCode MatDenseRestoreColumnVecWrite(Mat A, PetscInt col, Vec *v) { 3453 PetscFunctionBegin; 3454 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3455 PetscValidType(A, 1); 3456 PetscValidLogicalCollectiveInt(A, col, 2); 3457 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3458 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3459 PetscUseMethod(A, "MatDenseRestoreColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3460 PetscFunctionReturn(0); 3461 } 3462 3463 /*@ 3464 MatDenseGetSubMatrix - Gives access to a block of rows and columns of a dense matrix, represented as a Mat. 3465 3466 Collective 3467 3468 Input Parameters: 3469 + mat - the Mat object 3470 . rbegin - the first global row index in the block (if PETSC_DECIDE, is 0) 3471 . rend - the global row index past the last one in the block (if PETSC_DECIDE, is M) 3472 . cbegin - the first global column index in the block (if PETSC_DECIDE, is 0) 3473 - cend - the global column index past the last one in the block (if PETSC_DECIDE, is N) 3474 3475 Output Parameter: 3476 . v - the matrix 3477 3478 Notes: 3479 The matrix is owned by PETSc. Users need to call MatDenseRestoreSubMatrix() when the matrix is no longer needed. 3480 The output matrix is not redistributed by PETSc, so depending on the values of rbegin and rend, some processes may have no local rows. 3481 3482 Level: intermediate 3483 3484 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreSubMatrix()` 3485 @*/ 3486 PetscErrorCode MatDenseGetSubMatrix(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) { 3487 PetscFunctionBegin; 3488 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3489 PetscValidType(A, 1); 3490 PetscValidLogicalCollectiveInt(A, rbegin, 2); 3491 PetscValidLogicalCollectiveInt(A, rend, 3); 3492 PetscValidLogicalCollectiveInt(A, cbegin, 4); 3493 PetscValidLogicalCollectiveInt(A, cend, 5); 3494 PetscValidPointer(v, 6); 3495 if (rbegin == PETSC_DECIDE) rbegin = 0; 3496 if (rend == PETSC_DECIDE) rend = A->rmap->N; 3497 if (cbegin == PETSC_DECIDE) cbegin = 0; 3498 if (cend == PETSC_DECIDE) cend = A->cmap->N; 3499 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3500 PetscCheck(rbegin >= 0 && rbegin <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", rbegin, A->rmap->N); 3501 PetscCheck(rend >= rbegin && rend <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", rend, rbegin, A->rmap->N); 3502 PetscCheck(cbegin >= 0 && cbegin <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", cbegin, A->cmap->N); 3503 PetscCheck(cend >= cbegin && cend <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", cend, cbegin, A->cmap->N); 3504 PetscUseMethod(A, "MatDenseGetSubMatrix_C", (Mat, PetscInt, PetscInt, PetscInt, PetscInt, Mat *), (A, rbegin, rend, cbegin, cend, v)); 3505 PetscFunctionReturn(0); 3506 } 3507 3508 /*@ 3509 MatDenseRestoreSubMatrix - Returns access to a block of columns of a dense matrix obtained from MatDenseGetSubMatrix(). 3510 3511 Collective 3512 3513 Input Parameters: 3514 + mat - the Mat object 3515 - v - the Mat object (may be NULL) 3516 3517 Level: intermediate 3518 3519 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseGetSubMatrix()` 3520 @*/ 3521 PetscErrorCode MatDenseRestoreSubMatrix(Mat A, Mat *v) { 3522 PetscFunctionBegin; 3523 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3524 PetscValidType(A, 1); 3525 PetscValidPointer(v, 2); 3526 PetscUseMethod(A, "MatDenseRestoreSubMatrix_C", (Mat, Mat *), (A, v)); 3527 PetscFunctionReturn(0); 3528 } 3529 3530 #include <petscblaslapack.h> 3531 #include <petsc/private/kernels/blockinvert.h> 3532 3533 PetscErrorCode MatSeqDenseInvert(Mat A) { 3534 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3535 PetscInt bs = A->rmap->n; 3536 MatScalar *values = a->v; 3537 const PetscReal shift = 0.0; 3538 PetscBool allowzeropivot = PetscNot(A->erroriffailure), zeropivotdetected = PETSC_FALSE; 3539 3540 PetscFunctionBegin; 3541 /* factor and invert each block */ 3542 switch (bs) { 3543 case 1: values[0] = (PetscScalar)1.0 / (values[0] + shift); break; 3544 case 2: 3545 PetscCall(PetscKernel_A_gets_inverse_A_2(values, shift, allowzeropivot, &zeropivotdetected)); 3546 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3547 break; 3548 case 3: 3549 PetscCall(PetscKernel_A_gets_inverse_A_3(values, shift, allowzeropivot, &zeropivotdetected)); 3550 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3551 break; 3552 case 4: 3553 PetscCall(PetscKernel_A_gets_inverse_A_4(values, shift, allowzeropivot, &zeropivotdetected)); 3554 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3555 break; 3556 case 5: { 3557 PetscScalar work[25]; 3558 PetscInt ipvt[5]; 3559 3560 PetscCall(PetscKernel_A_gets_inverse_A_5(values, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 3561 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3562 } break; 3563 case 6: 3564 PetscCall(PetscKernel_A_gets_inverse_A_6(values, shift, allowzeropivot, &zeropivotdetected)); 3565 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3566 break; 3567 case 7: 3568 PetscCall(PetscKernel_A_gets_inverse_A_7(values, shift, allowzeropivot, &zeropivotdetected)); 3569 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3570 break; 3571 default: { 3572 PetscInt *v_pivots, *IJ, j; 3573 PetscScalar *v_work; 3574 3575 PetscCall(PetscMalloc3(bs, &v_work, bs, &v_pivots, bs, &IJ)); 3576 for (j = 0; j < bs; j++) { IJ[j] = j; } 3577 PetscCall(PetscKernel_A_gets_inverse_A(bs, values, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 3578 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3579 PetscCall(PetscFree3(v_work, v_pivots, IJ)); 3580 } 3581 } 3582 PetscFunctionReturn(0); 3583 } 3584