1 /* 2 Defines the basic matrix operations for sequential dense. 3 */ 4 5 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <../src/mat/impls/aij/seq/aij.h> 8 9 PetscErrorCode MatSeqDenseSymmetrize_Private(Mat A, PetscBool hermitian) { 10 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 11 PetscInt j, k, n = A->rmap->n; 12 PetscScalar *v; 13 14 PetscFunctionBegin; 15 PetscCheck(A->rmap->n == A->cmap->n, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot symmetrize a rectangular matrix"); 16 PetscCall(MatDenseGetArray(A, &v)); 17 if (!hermitian) { 18 for (k = 0; k < n; k++) { 19 for (j = k; j < n; j++) v[j * mat->lda + k] = v[k * mat->lda + j]; 20 } 21 } else { 22 for (k = 0; k < n; k++) { 23 for (j = k; j < n; j++) v[j * mat->lda + k] = PetscConj(v[k * mat->lda + j]); 24 } 25 } 26 PetscCall(MatDenseRestoreArray(A, &v)); 27 PetscFunctionReturn(0); 28 } 29 30 PETSC_EXTERN PetscErrorCode MatSeqDenseInvertFactors_Private(Mat A) { 31 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 32 PetscBLASInt info, n; 33 34 PetscFunctionBegin; 35 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 36 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 37 if (A->factortype == MAT_FACTOR_LU) { 38 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 39 if (!mat->fwork) { 40 mat->lfwork = n; 41 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 42 PetscCall(PetscLogObjectMemory((PetscObject)A, mat->lfwork * sizeof(PetscBLASInt))); 43 } 44 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 45 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 46 PetscCall(PetscFPTrapPop()); 47 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 48 } else if (A->factortype == MAT_FACTOR_CHOLESKY) { 49 if (A->spd == PETSC_BOOL3_TRUE) { 50 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 51 PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &n, mat->v, &mat->lda, &info)); 52 PetscCall(PetscFPTrapPop()); 53 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 54 #if defined(PETSC_USE_COMPLEX) 55 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 56 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 57 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 58 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 59 PetscCallBLAS("LAPACKhetri", LAPACKhetri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 60 PetscCall(PetscFPTrapPop()); 61 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 62 #endif 63 } else { /* symmetric case */ 64 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 65 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 66 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 67 PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 68 PetscCall(PetscFPTrapPop()); 69 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_FALSE)); 70 } 71 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad Inversion: zero pivot in row %" PetscInt_FMT, (PetscInt)info - 1); 72 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 73 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix must be factored to solve"); 74 75 A->ops->solve = NULL; 76 A->ops->matsolve = NULL; 77 A->ops->solvetranspose = NULL; 78 A->ops->matsolvetranspose = NULL; 79 A->ops->solveadd = NULL; 80 A->ops->solvetransposeadd = NULL; 81 A->factortype = MAT_FACTOR_NONE; 82 PetscCall(PetscFree(A->solvertype)); 83 PetscFunctionReturn(0); 84 } 85 86 PetscErrorCode MatZeroRowsColumns_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 87 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 88 PetscInt m = l->lda, n = A->cmap->n, r = A->rmap->n, i, j; 89 PetscScalar *slot, *bb, *v; 90 const PetscScalar *xx; 91 92 PetscFunctionBegin; 93 if (PetscDefined(USE_DEBUG)) { 94 for (i = 0; i < N; i++) { 95 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 96 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 97 PetscCheck(rows[i] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Col %" PetscInt_FMT " requested to be zeroed greater than or equal number of cols %" PetscInt_FMT, rows[i], A->cmap->n); 98 } 99 } 100 if (!N) PetscFunctionReturn(0); 101 102 /* fix right hand side if needed */ 103 if (x && b) { 104 Vec xt; 105 106 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 107 PetscCall(VecDuplicate(x, &xt)); 108 PetscCall(VecCopy(x, xt)); 109 PetscCall(VecScale(xt, -1.0)); 110 PetscCall(MatMultAdd(A, xt, b, b)); 111 PetscCall(VecDestroy(&xt)); 112 PetscCall(VecGetArrayRead(x, &xx)); 113 PetscCall(VecGetArray(b, &bb)); 114 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 115 PetscCall(VecRestoreArrayRead(x, &xx)); 116 PetscCall(VecRestoreArray(b, &bb)); 117 } 118 119 PetscCall(MatDenseGetArray(A, &v)); 120 for (i = 0; i < N; i++) { 121 slot = v + rows[i] * m; 122 PetscCall(PetscArrayzero(slot, r)); 123 } 124 for (i = 0; i < N; i++) { 125 slot = v + rows[i]; 126 for (j = 0; j < n; j++) { 127 *slot = 0.0; 128 slot += m; 129 } 130 } 131 if (diag != 0.0) { 132 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 133 for (i = 0; i < N; i++) { 134 slot = v + (m + 1) * rows[i]; 135 *slot = diag; 136 } 137 } 138 PetscCall(MatDenseRestoreArray(A, &v)); 139 PetscFunctionReturn(0); 140 } 141 142 PetscErrorCode MatPtAPNumeric_SeqDense_SeqDense(Mat A, Mat P, Mat C) { 143 Mat_SeqDense *c = (Mat_SeqDense *)(C->data); 144 145 PetscFunctionBegin; 146 if (c->ptapwork) { 147 PetscCall((*C->ops->matmultnumeric)(A, P, c->ptapwork)); 148 PetscCall((*C->ops->transposematmultnumeric)(P, c->ptapwork, C)); 149 } else SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Must call MatPtAPSymbolic_SeqDense_SeqDense() first"); 150 PetscFunctionReturn(0); 151 } 152 153 PetscErrorCode MatPtAPSymbolic_SeqDense_SeqDense(Mat A, Mat P, PetscReal fill, Mat C) { 154 Mat_SeqDense *c; 155 PetscBool cisdense; 156 157 PetscFunctionBegin; 158 PetscCall(MatSetSizes(C, P->cmap->n, P->cmap->n, P->cmap->N, P->cmap->N)); 159 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 160 if (!cisdense) { 161 PetscBool flg; 162 163 PetscCall(PetscObjectTypeCompare((PetscObject)P, ((PetscObject)A)->type_name, &flg)); 164 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 165 } 166 PetscCall(MatSetUp(C)); 167 c = (Mat_SeqDense *)C->data; 168 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &c->ptapwork)); 169 PetscCall(MatSetSizes(c->ptapwork, A->rmap->n, P->cmap->n, A->rmap->N, P->cmap->N)); 170 PetscCall(MatSetType(c->ptapwork, ((PetscObject)C)->type_name)); 171 PetscCall(MatSetUp(c->ptapwork)); 172 PetscFunctionReturn(0); 173 } 174 175 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 176 Mat B = NULL; 177 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 178 Mat_SeqDense *b; 179 PetscInt *ai = a->i, *aj = a->j, m = A->rmap->N, n = A->cmap->N, i; 180 const MatScalar *av; 181 PetscBool isseqdense; 182 183 PetscFunctionBegin; 184 if (reuse == MAT_REUSE_MATRIX) { 185 PetscCall(PetscObjectTypeCompare((PetscObject)*newmat, MATSEQDENSE, &isseqdense)); 186 PetscCheck(isseqdense, PetscObjectComm((PetscObject)*newmat), PETSC_ERR_USER, "Cannot reuse matrix of type %s", ((PetscObject)(*newmat))->type_name); 187 } 188 if (reuse != MAT_REUSE_MATRIX) { 189 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 190 PetscCall(MatSetSizes(B, m, n, m, n)); 191 PetscCall(MatSetType(B, MATSEQDENSE)); 192 PetscCall(MatSeqDenseSetPreallocation(B, NULL)); 193 b = (Mat_SeqDense *)(B->data); 194 } else { 195 b = (Mat_SeqDense *)((*newmat)->data); 196 PetscCall(PetscArrayzero(b->v, m * n)); 197 } 198 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 199 for (i = 0; i < m; i++) { 200 PetscInt j; 201 for (j = 0; j < ai[1] - ai[0]; j++) { 202 b->v[*aj * m + i] = *av; 203 aj++; 204 av++; 205 } 206 ai++; 207 } 208 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 209 210 if (reuse == MAT_INPLACE_MATRIX) { 211 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 212 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 213 PetscCall(MatHeaderReplace(A, &B)); 214 } else { 215 if (B) *newmat = B; 216 PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY)); 217 PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY)); 218 } 219 PetscFunctionReturn(0); 220 } 221 222 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 223 Mat B = NULL; 224 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 225 PetscInt i, j; 226 PetscInt *rows, *nnz; 227 MatScalar *aa = a->v, *vals; 228 229 PetscFunctionBegin; 230 PetscCall(PetscCalloc3(A->rmap->n, &rows, A->rmap->n, &nnz, A->rmap->n, &vals)); 231 if (reuse != MAT_REUSE_MATRIX) { 232 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 233 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 234 PetscCall(MatSetType(B, MATSEQAIJ)); 235 for (j = 0; j < A->cmap->n; j++) { 236 for (i = 0; i < A->rmap->n; i++) 237 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) ++nnz[i]; 238 aa += a->lda; 239 } 240 PetscCall(MatSeqAIJSetPreallocation(B, PETSC_DETERMINE, nnz)); 241 } else B = *newmat; 242 aa = a->v; 243 for (j = 0; j < A->cmap->n; j++) { 244 PetscInt numRows = 0; 245 for (i = 0; i < A->rmap->n; i++) 246 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) { 247 rows[numRows] = i; 248 vals[numRows++] = aa[i]; 249 } 250 PetscCall(MatSetValues(B, numRows, rows, 1, &j, vals, INSERT_VALUES)); 251 aa += a->lda; 252 } 253 PetscCall(PetscFree3(rows, nnz, vals)); 254 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 255 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 256 257 if (reuse == MAT_INPLACE_MATRIX) { 258 PetscCall(MatHeaderReplace(A, &B)); 259 } else if (reuse != MAT_REUSE_MATRIX) *newmat = B; 260 PetscFunctionReturn(0); 261 } 262 263 PetscErrorCode MatAXPY_SeqDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) { 264 Mat_SeqDense *x = (Mat_SeqDense *)X->data, *y = (Mat_SeqDense *)Y->data; 265 const PetscScalar *xv; 266 PetscScalar *yv; 267 PetscBLASInt N, m, ldax = 0, lday = 0, one = 1; 268 269 PetscFunctionBegin; 270 PetscCall(MatDenseGetArrayRead(X, &xv)); 271 PetscCall(MatDenseGetArray(Y, &yv)); 272 PetscCall(PetscBLASIntCast(X->rmap->n * X->cmap->n, &N)); 273 PetscCall(PetscBLASIntCast(X->rmap->n, &m)); 274 PetscCall(PetscBLASIntCast(x->lda, &ldax)); 275 PetscCall(PetscBLASIntCast(y->lda, &lday)); 276 if (ldax > m || lday > m) { 277 PetscInt j; 278 279 for (j = 0; j < X->cmap->n; j++) PetscCallBLAS("BLASaxpy", BLASaxpy_(&m, &alpha, xv + j * ldax, &one, yv + j * lday, &one)); 280 } else { 281 PetscCallBLAS("BLASaxpy", BLASaxpy_(&N, &alpha, xv, &one, yv, &one)); 282 } 283 PetscCall(MatDenseRestoreArrayRead(X, &xv)); 284 PetscCall(MatDenseRestoreArray(Y, &yv)); 285 PetscCall(PetscLogFlops(PetscMax(2.0 * N - 1, 0))); 286 PetscFunctionReturn(0); 287 } 288 289 static PetscErrorCode MatGetInfo_SeqDense(Mat A, MatInfoType flag, MatInfo *info) { 290 PetscLogDouble N = A->rmap->n * A->cmap->n; 291 292 PetscFunctionBegin; 293 info->block_size = 1.0; 294 info->nz_allocated = N; 295 info->nz_used = N; 296 info->nz_unneeded = 0; 297 info->assemblies = A->num_ass; 298 info->mallocs = 0; 299 info->memory = ((PetscObject)A)->mem; 300 info->fill_ratio_given = 0; 301 info->fill_ratio_needed = 0; 302 info->factor_mallocs = 0; 303 PetscFunctionReturn(0); 304 } 305 306 PetscErrorCode MatScale_SeqDense(Mat A, PetscScalar alpha) { 307 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 308 PetscScalar *v; 309 PetscBLASInt one = 1, j, nz, lda = 0; 310 311 PetscFunctionBegin; 312 PetscCall(MatDenseGetArray(A, &v)); 313 PetscCall(PetscBLASIntCast(a->lda, &lda)); 314 if (lda > A->rmap->n) { 315 PetscCall(PetscBLASIntCast(A->rmap->n, &nz)); 316 for (j = 0; j < A->cmap->n; j++) PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v + j * lda, &one)); 317 } else { 318 PetscCall(PetscBLASIntCast(A->rmap->n * A->cmap->n, &nz)); 319 PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v, &one)); 320 } 321 PetscCall(PetscLogFlops(A->rmap->n * A->cmap->n)); 322 PetscCall(MatDenseRestoreArray(A, &v)); 323 PetscFunctionReturn(0); 324 } 325 326 PetscErrorCode MatShift_SeqDense(Mat A, PetscScalar alpha) { 327 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 328 PetscScalar *v; 329 PetscInt j, k; 330 331 PetscFunctionBegin; 332 PetscCall(MatDenseGetArray(A, &v)); 333 k = PetscMin(A->rmap->n, A->cmap->n); 334 for (j = 0; j < k; j++) v[j + j * a->lda] += alpha; 335 PetscCall(PetscLogFlops(k)); 336 PetscCall(MatDenseRestoreArray(A, &v)); 337 PetscFunctionReturn(0); 338 } 339 340 static PetscErrorCode MatIsHermitian_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) { 341 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 342 PetscInt i, j, m = A->rmap->n, N = a->lda; 343 const PetscScalar *v; 344 345 PetscFunctionBegin; 346 *fl = PETSC_FALSE; 347 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(0); 348 PetscCall(MatDenseGetArrayRead(A, &v)); 349 for (i = 0; i < m; i++) { 350 for (j = i; j < m; j++) { 351 if (PetscAbsScalar(v[i + j * N] - PetscConj(v[j + i * N])) > rtol) goto restore; 352 } 353 } 354 *fl = PETSC_TRUE; 355 restore: 356 PetscCall(MatDenseRestoreArrayRead(A, &v)); 357 PetscFunctionReturn(0); 358 } 359 360 static PetscErrorCode MatIsSymmetric_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) { 361 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 362 PetscInt i, j, m = A->rmap->n, N = a->lda; 363 const PetscScalar *v; 364 365 PetscFunctionBegin; 366 *fl = PETSC_FALSE; 367 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(0); 368 PetscCall(MatDenseGetArrayRead(A, &v)); 369 for (i = 0; i < m; i++) { 370 for (j = i; j < m; j++) { 371 if (PetscAbsScalar(v[i + j * N] - v[j + i * N]) > rtol) goto restore; 372 } 373 } 374 *fl = PETSC_TRUE; 375 restore: 376 PetscCall(MatDenseRestoreArrayRead(A, &v)); 377 PetscFunctionReturn(0); 378 } 379 380 PetscErrorCode MatDuplicateNoCreate_SeqDense(Mat newi, Mat A, MatDuplicateOption cpvalues) { 381 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 382 PetscInt lda = (PetscInt)mat->lda, j, m, nlda = lda; 383 PetscBool isdensecpu; 384 385 PetscFunctionBegin; 386 PetscCall(PetscLayoutReference(A->rmap, &newi->rmap)); 387 PetscCall(PetscLayoutReference(A->cmap, &newi->cmap)); 388 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { /* propagate LDA */ 389 PetscCall(MatDenseSetLDA(newi, lda)); 390 } 391 PetscCall(PetscObjectTypeCompare((PetscObject)newi, MATSEQDENSE, &isdensecpu)); 392 if (isdensecpu) PetscCall(MatSeqDenseSetPreallocation(newi, NULL)); 393 if (cpvalues == MAT_COPY_VALUES) { 394 const PetscScalar *av; 395 PetscScalar *v; 396 397 PetscCall(MatDenseGetArrayRead(A, &av)); 398 PetscCall(MatDenseGetArrayWrite(newi, &v)); 399 PetscCall(MatDenseGetLDA(newi, &nlda)); 400 m = A->rmap->n; 401 if (lda > m || nlda > m) { 402 for (j = 0; j < A->cmap->n; j++) PetscCall(PetscArraycpy(v + j * nlda, av + j * lda, m)); 403 } else { 404 PetscCall(PetscArraycpy(v, av, A->rmap->n * A->cmap->n)); 405 } 406 PetscCall(MatDenseRestoreArrayWrite(newi, &v)); 407 PetscCall(MatDenseRestoreArrayRead(A, &av)); 408 } 409 PetscFunctionReturn(0); 410 } 411 412 PetscErrorCode MatDuplicate_SeqDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) { 413 PetscFunctionBegin; 414 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), newmat)); 415 PetscCall(MatSetSizes(*newmat, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 416 PetscCall(MatSetType(*newmat, ((PetscObject)A)->type_name)); 417 PetscCall(MatDuplicateNoCreate_SeqDense(*newmat, A, cpvalues)); 418 PetscFunctionReturn(0); 419 } 420 421 static PetscErrorCode MatSolve_SeqDense_Internal_LU(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) { 422 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 423 PetscBLASInt info; 424 425 PetscFunctionBegin; 426 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 427 PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_(T ? "T" : "N", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 428 PetscCall(PetscFPTrapPop()); 429 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve %d", (int)info); 430 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 431 PetscFunctionReturn(0); 432 } 433 434 static PetscErrorCode MatConjugate_SeqDense(Mat); 435 436 static PetscErrorCode MatSolve_SeqDense_Internal_Cholesky(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) { 437 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 438 PetscBLASInt info; 439 440 PetscFunctionBegin; 441 if (A->spd == PETSC_BOOL3_TRUE) { 442 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 443 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 444 PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &m, &nrhs, mat->v, &mat->lda, x, &m, &info)); 445 PetscCall(PetscFPTrapPop()); 446 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "POTRS Bad solve %d", (int)info); 447 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 448 #if defined(PETSC_USE_COMPLEX) 449 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 450 if (T) PetscCall(MatConjugate_SeqDense(A)); 451 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 452 PetscCallBLAS("LAPACKhetrs", LAPACKhetrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 453 PetscCall(PetscFPTrapPop()); 454 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "HETRS Bad solve %d", (int)info); 455 if (T) PetscCall(MatConjugate_SeqDense(A)); 456 #endif 457 } else { /* symmetric case */ 458 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 459 PetscCallBLAS("LAPACKsytrs", LAPACKsytrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 460 PetscCall(PetscFPTrapPop()); 461 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "SYTRS Bad solve %d", (int)info); 462 } 463 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 464 PetscFunctionReturn(0); 465 } 466 467 static PetscErrorCode MatSolve_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) { 468 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 469 PetscBLASInt info; 470 char trans; 471 472 PetscFunctionBegin; 473 if (PetscDefined(USE_COMPLEX)) { 474 trans = 'C'; 475 } else { 476 trans = 'T'; 477 } 478 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 479 { /* lwork depends on the number of right-hand sides */ 480 PetscBLASInt nlfwork, lfwork = -1; 481 PetscScalar fwork; 482 483 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 484 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 485 if (nlfwork > mat->lfwork) { 486 mat->lfwork = nlfwork; 487 PetscCall(PetscFree(mat->fwork)); 488 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 489 } 490 } 491 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 492 PetscCall(PetscFPTrapPop()); 493 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %d", (int)info); 494 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 495 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &mat->rank, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 496 PetscCall(PetscFPTrapPop()); 497 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %d", (int)info); 498 for (PetscInt j = 0; j < nrhs; j++) { 499 for (PetscInt i = mat->rank; i < k; i++) x[j * ldx + i] = 0.; 500 } 501 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 502 PetscFunctionReturn(0); 503 } 504 505 static PetscErrorCode MatSolveTranspose_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) { 506 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 507 PetscBLASInt info; 508 509 PetscFunctionBegin; 510 if (A->rmap->n == A->cmap->n && mat->rank == A->rmap->n) { 511 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 512 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &m, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 513 PetscCall(PetscFPTrapPop()); 514 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %d", (int)info); 515 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 516 { /* lwork depends on the number of right-hand sides */ 517 PetscBLASInt nlfwork, lfwork = -1; 518 PetscScalar fwork; 519 520 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 521 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 522 if (nlfwork > mat->lfwork) { 523 mat->lfwork = nlfwork; 524 PetscCall(PetscFree(mat->fwork)); 525 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 526 } 527 } 528 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 529 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 530 PetscCall(PetscFPTrapPop()); 531 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %d", (int)info); 532 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 533 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "QR factored matrix cannot be used for transpose solve"); 534 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 535 PetscFunctionReturn(0); 536 } 537 538 static PetscErrorCode MatSolve_SeqDense_SetUp(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) { 539 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 540 PetscScalar *y; 541 PetscBLASInt m = 0, k = 0; 542 543 PetscFunctionBegin; 544 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 545 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 546 if (k < m) { 547 PetscCall(VecCopy(xx, mat->qrrhs)); 548 PetscCall(VecGetArray(mat->qrrhs, &y)); 549 } else { 550 PetscCall(VecCopy(xx, yy)); 551 PetscCall(VecGetArray(yy, &y)); 552 } 553 *_y = y; 554 *_k = k; 555 *_m = m; 556 PetscFunctionReturn(0); 557 } 558 559 static PetscErrorCode MatSolve_SeqDense_TearDown(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) { 560 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 561 PetscScalar *y = NULL; 562 PetscBLASInt m, k; 563 564 PetscFunctionBegin; 565 y = *_y; 566 *_y = NULL; 567 k = *_k; 568 m = *_m; 569 if (k < m) { 570 PetscScalar *yv; 571 PetscCall(VecGetArray(yy, &yv)); 572 PetscCall(PetscArraycpy(yv, y, k)); 573 PetscCall(VecRestoreArray(yy, &yv)); 574 PetscCall(VecRestoreArray(mat->qrrhs, &y)); 575 } else { 576 PetscCall(VecRestoreArray(yy, &y)); 577 } 578 PetscFunctionReturn(0); 579 } 580 581 static PetscErrorCode MatSolve_SeqDense_LU(Mat A, Vec xx, Vec yy) { 582 PetscScalar *y = NULL; 583 PetscBLASInt m = 0, k = 0; 584 585 PetscFunctionBegin; 586 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 587 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_FALSE)); 588 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 589 PetscFunctionReturn(0); 590 } 591 592 static PetscErrorCode MatSolveTranspose_SeqDense_LU(Mat A, Vec xx, Vec yy) { 593 PetscScalar *y = NULL; 594 PetscBLASInt m = 0, k = 0; 595 596 PetscFunctionBegin; 597 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 598 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_TRUE)); 599 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 600 PetscFunctionReturn(0); 601 } 602 603 static PetscErrorCode MatSolve_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) { 604 PetscScalar *y = NULL; 605 PetscBLASInt m = 0, k = 0; 606 607 PetscFunctionBegin; 608 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 609 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_FALSE)); 610 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 611 PetscFunctionReturn(0); 612 } 613 614 static PetscErrorCode MatSolveTranspose_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) { 615 PetscScalar *y = NULL; 616 PetscBLASInt m = 0, k = 0; 617 618 PetscFunctionBegin; 619 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 620 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_TRUE)); 621 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 622 PetscFunctionReturn(0); 623 } 624 625 static PetscErrorCode MatSolve_SeqDense_QR(Mat A, Vec xx, Vec yy) { 626 PetscScalar *y = NULL; 627 PetscBLASInt m = 0, k = 0; 628 629 PetscFunctionBegin; 630 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 631 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 632 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 633 PetscFunctionReturn(0); 634 } 635 636 static PetscErrorCode MatSolveTranspose_SeqDense_QR(Mat A, Vec xx, Vec yy) { 637 PetscScalar *y = NULL; 638 PetscBLASInt m = 0, k = 0; 639 640 PetscFunctionBegin; 641 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 642 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 643 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 644 PetscFunctionReturn(0); 645 } 646 647 static PetscErrorCode MatMatSolve_SeqDense_SetUp(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) { 648 const PetscScalar *b; 649 PetscScalar *y; 650 PetscInt n, _ldb, _ldx; 651 PetscBLASInt nrhs = 0, m = 0, k = 0, ldb = 0, ldx = 0, ldy = 0; 652 653 PetscFunctionBegin; 654 *_ldy = 0; 655 *_m = 0; 656 *_nrhs = 0; 657 *_k = 0; 658 *_y = NULL; 659 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 660 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 661 PetscCall(MatGetSize(B, NULL, &n)); 662 PetscCall(PetscBLASIntCast(n, &nrhs)); 663 PetscCall(MatDenseGetLDA(B, &_ldb)); 664 PetscCall(PetscBLASIntCast(_ldb, &ldb)); 665 PetscCall(MatDenseGetLDA(X, &_ldx)); 666 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 667 if (ldx < m) { 668 PetscCall(MatDenseGetArrayRead(B, &b)); 669 PetscCall(PetscMalloc1(nrhs * m, &y)); 670 if (ldb == m) { 671 PetscCall(PetscArraycpy(y, b, ldb * nrhs)); 672 } else { 673 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * m], &b[j * ldb], m)); 674 } 675 ldy = m; 676 PetscCall(MatDenseRestoreArrayRead(B, &b)); 677 } else { 678 if (ldb == ldx) { 679 PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN)); 680 PetscCall(MatDenseGetArray(X, &y)); 681 } else { 682 PetscCall(MatDenseGetArray(X, &y)); 683 PetscCall(MatDenseGetArrayRead(B, &b)); 684 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * ldx], &b[j * ldb], m)); 685 PetscCall(MatDenseRestoreArrayRead(B, &b)); 686 } 687 ldy = ldx; 688 } 689 *_y = y; 690 *_ldy = ldy; 691 *_k = k; 692 *_m = m; 693 *_nrhs = nrhs; 694 PetscFunctionReturn(0); 695 } 696 697 static PetscErrorCode MatMatSolve_SeqDense_TearDown(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) { 698 PetscScalar *y; 699 PetscInt _ldx; 700 PetscBLASInt k, ldy, nrhs, ldx = 0; 701 702 PetscFunctionBegin; 703 y = *_y; 704 *_y = NULL; 705 k = *_k; 706 ldy = *_ldy; 707 nrhs = *_nrhs; 708 PetscCall(MatDenseGetLDA(X, &_ldx)); 709 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 710 if (ldx != ldy) { 711 PetscScalar *xv; 712 PetscCall(MatDenseGetArray(X, &xv)); 713 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&xv[j * ldx], &y[j * ldy], k)); 714 PetscCall(MatDenseRestoreArray(X, &xv)); 715 PetscCall(PetscFree(y)); 716 } else { 717 PetscCall(MatDenseRestoreArray(X, &y)); 718 } 719 PetscFunctionReturn(0); 720 } 721 722 static PetscErrorCode MatMatSolve_SeqDense_LU(Mat A, Mat B, Mat X) { 723 PetscScalar *y; 724 PetscBLASInt m, k, ldy, nrhs; 725 726 PetscFunctionBegin; 727 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 728 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 729 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 730 PetscFunctionReturn(0); 731 } 732 733 static PetscErrorCode MatMatSolveTranspose_SeqDense_LU(Mat A, Mat B, Mat X) { 734 PetscScalar *y; 735 PetscBLASInt m, k, ldy, nrhs; 736 737 PetscFunctionBegin; 738 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 739 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 740 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 741 PetscFunctionReturn(0); 742 } 743 744 static PetscErrorCode MatMatSolve_SeqDense_Cholesky(Mat A, Mat B, Mat X) { 745 PetscScalar *y; 746 PetscBLASInt m, k, ldy, nrhs; 747 748 PetscFunctionBegin; 749 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 750 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 751 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 752 PetscFunctionReturn(0); 753 } 754 755 static PetscErrorCode MatMatSolveTranspose_SeqDense_Cholesky(Mat A, Mat B, Mat X) { 756 PetscScalar *y; 757 PetscBLASInt m, k, ldy, nrhs; 758 759 PetscFunctionBegin; 760 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 761 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 762 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 763 PetscFunctionReturn(0); 764 } 765 766 static PetscErrorCode MatMatSolve_SeqDense_QR(Mat A, Mat B, Mat X) { 767 PetscScalar *y; 768 PetscBLASInt m, k, ldy, nrhs; 769 770 PetscFunctionBegin; 771 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 772 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 773 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 774 PetscFunctionReturn(0); 775 } 776 777 static PetscErrorCode MatMatSolveTranspose_SeqDense_QR(Mat A, Mat B, Mat X) { 778 PetscScalar *y; 779 PetscBLASInt m, k, ldy, nrhs; 780 781 PetscFunctionBegin; 782 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 783 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 784 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 785 PetscFunctionReturn(0); 786 } 787 788 /* ---------------------------------------------------------------*/ 789 /* COMMENT: I have chosen to hide row permutation in the pivots, 790 rather than put it in the Mat->row slot.*/ 791 PetscErrorCode MatLUFactor_SeqDense(Mat A, IS row, IS col, const MatFactorInfo *minfo) { 792 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 793 PetscBLASInt n, m, info; 794 795 PetscFunctionBegin; 796 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 797 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 798 if (!mat->pivots) { 799 PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); 800 PetscCall(PetscLogObjectMemory((PetscObject)A, A->rmap->n * sizeof(PetscBLASInt))); 801 } 802 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 803 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 804 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&m, &n, mat->v, &mat->lda, mat->pivots, &info)); 805 PetscCall(PetscFPTrapPop()); 806 807 PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to LU factorization %d", (int)info); 808 PetscCheck(info <= 0, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Bad LU factorization %d", (int)info); 809 810 A->ops->solve = MatSolve_SeqDense_LU; 811 A->ops->matsolve = MatMatSolve_SeqDense_LU; 812 A->ops->solvetranspose = MatSolveTranspose_SeqDense_LU; 813 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_LU; 814 A->factortype = MAT_FACTOR_LU; 815 816 PetscCall(PetscFree(A->solvertype)); 817 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 818 819 PetscCall(PetscLogFlops((2.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3)); 820 PetscFunctionReturn(0); 821 } 822 823 static PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) { 824 MatFactorInfo info; 825 826 PetscFunctionBegin; 827 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 828 PetscUseTypeMethod(fact, lufactor, NULL, NULL, &info); 829 PetscFunctionReturn(0); 830 } 831 832 PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) { 833 PetscFunctionBegin; 834 fact->preallocated = PETSC_TRUE; 835 fact->assembled = PETSC_TRUE; 836 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense; 837 PetscFunctionReturn(0); 838 } 839 840 /* Cholesky as L*L^T or L*D*L^T and the symmetric/hermitian complex variants */ 841 PetscErrorCode MatCholeskyFactor_SeqDense(Mat A, IS perm, const MatFactorInfo *factinfo) { 842 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 843 PetscBLASInt info, n; 844 845 PetscFunctionBegin; 846 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 847 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 848 if (A->spd == PETSC_BOOL3_TRUE) { 849 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 850 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &n, mat->v, &mat->lda, &info)); 851 PetscCall(PetscFPTrapPop()); 852 #if defined(PETSC_USE_COMPLEX) 853 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 854 if (!mat->pivots) { 855 PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); 856 PetscCall(PetscLogObjectMemory((PetscObject)A, A->rmap->n * sizeof(PetscBLASInt))); 857 } 858 if (!mat->fwork) { 859 PetscScalar dummy; 860 861 mat->lfwork = -1; 862 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 863 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 864 PetscCall(PetscFPTrapPop()); 865 mat->lfwork = (PetscInt)PetscRealPart(dummy); 866 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 867 PetscCall(PetscLogObjectMemory((PetscObject)A, mat->lfwork * sizeof(PetscBLASInt))); 868 } 869 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 870 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 871 PetscCall(PetscFPTrapPop()); 872 #endif 873 } else { /* symmetric case */ 874 if (!mat->pivots) { 875 PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); 876 PetscCall(PetscLogObjectMemory((PetscObject)A, A->rmap->n * sizeof(PetscBLASInt))); 877 } 878 if (!mat->fwork) { 879 PetscScalar dummy; 880 881 mat->lfwork = -1; 882 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 883 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 884 PetscCall(PetscFPTrapPop()); 885 mat->lfwork = (PetscInt)PetscRealPart(dummy); 886 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 887 PetscCall(PetscLogObjectMemory((PetscObject)A, mat->lfwork * sizeof(PetscBLASInt))); 888 } 889 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 890 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 891 PetscCall(PetscFPTrapPop()); 892 } 893 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad factorization: zero pivot in row %" PetscInt_FMT, (PetscInt)info - 1); 894 895 A->ops->solve = MatSolve_SeqDense_Cholesky; 896 A->ops->matsolve = MatMatSolve_SeqDense_Cholesky; 897 A->ops->solvetranspose = MatSolveTranspose_SeqDense_Cholesky; 898 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_Cholesky; 899 A->factortype = MAT_FACTOR_CHOLESKY; 900 901 PetscCall(PetscFree(A->solvertype)); 902 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 903 904 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 905 PetscFunctionReturn(0); 906 } 907 908 static PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) { 909 MatFactorInfo info; 910 911 PetscFunctionBegin; 912 info.fill = 1.0; 913 914 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 915 PetscUseTypeMethod(fact, choleskyfactor, NULL, &info); 916 PetscFunctionReturn(0); 917 } 918 919 PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) { 920 PetscFunctionBegin; 921 fact->assembled = PETSC_TRUE; 922 fact->preallocated = PETSC_TRUE; 923 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense; 924 PetscFunctionReturn(0); 925 } 926 927 PetscErrorCode MatQRFactor_SeqDense(Mat A, IS col, const MatFactorInfo *minfo) { 928 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 929 PetscBLASInt n, m, info, min, max; 930 931 PetscFunctionBegin; 932 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 933 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 934 max = PetscMax(m, n); 935 min = PetscMin(m, n); 936 if (!mat->tau) { 937 PetscCall(PetscMalloc1(min, &mat->tau)); 938 PetscCall(PetscLogObjectMemory((PetscObject)A, min * sizeof(PetscScalar))); 939 } 940 if (!mat->pivots) { 941 PetscCall(PetscMalloc1(n, &mat->pivots)); 942 PetscCall(PetscLogObjectMemory((PetscObject)A, n * sizeof(PetscScalar))); 943 } 944 if (!mat->qrrhs) PetscCall(MatCreateVecs(A, NULL, &(mat->qrrhs))); 945 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 946 if (!mat->fwork) { 947 PetscScalar dummy; 948 949 mat->lfwork = -1; 950 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 951 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, &dummy, &mat->lfwork, &info)); 952 PetscCall(PetscFPTrapPop()); 953 mat->lfwork = (PetscInt)PetscRealPart(dummy); 954 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 955 PetscCall(PetscLogObjectMemory((PetscObject)A, mat->lfwork * sizeof(PetscBLASInt))); 956 } 957 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 958 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, mat->fwork, &mat->lfwork, &info)); 959 PetscCall(PetscFPTrapPop()); 960 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to QR factorization %d", (int)info); 961 // TODO: try to estimate rank or test for and use geqp3 for rank revealing QR. For now just say rank is min of m and n 962 mat->rank = min; 963 964 A->ops->solve = MatSolve_SeqDense_QR; 965 A->ops->matsolve = MatMatSolve_SeqDense_QR; 966 A->factortype = MAT_FACTOR_QR; 967 if (m == n) { 968 A->ops->solvetranspose = MatSolveTranspose_SeqDense_QR; 969 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_QR; 970 } 971 972 PetscCall(PetscFree(A->solvertype)); 973 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 974 975 PetscCall(PetscLogFlops(2.0 * min * min * (max - min / 3.0))); 976 PetscFunctionReturn(0); 977 } 978 979 static PetscErrorCode MatQRFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) { 980 MatFactorInfo info; 981 982 PetscFunctionBegin; 983 info.fill = 1.0; 984 985 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 986 PetscUseMethod(fact, "MatQRFactor_C", (Mat, IS, const MatFactorInfo *), (fact, NULL, &info)); 987 PetscFunctionReturn(0); 988 } 989 990 PetscErrorCode MatQRFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) { 991 PetscFunctionBegin; 992 fact->assembled = PETSC_TRUE; 993 fact->preallocated = PETSC_TRUE; 994 PetscCall(PetscObjectComposeFunction((PetscObject)fact, "MatQRFactorNumeric_C", MatQRFactorNumeric_SeqDense)); 995 PetscFunctionReturn(0); 996 } 997 998 /* uses LAPACK */ 999 PETSC_INTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A, MatFactorType ftype, Mat *fact) { 1000 PetscFunctionBegin; 1001 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), fact)); 1002 PetscCall(MatSetSizes(*fact, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 1003 PetscCall(MatSetType(*fact, MATDENSE)); 1004 (*fact)->trivialsymbolic = PETSC_TRUE; 1005 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 1006 (*fact)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqDense; 1007 (*fact)->ops->ilufactorsymbolic = MatLUFactorSymbolic_SeqDense; 1008 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 1009 (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense; 1010 } else if (ftype == MAT_FACTOR_QR) { 1011 PetscCall(PetscObjectComposeFunction((PetscObject)(*fact), "MatQRFactorSymbolic_C", MatQRFactorSymbolic_SeqDense)); 1012 } 1013 (*fact)->factortype = ftype; 1014 1015 PetscCall(PetscFree((*fact)->solvertype)); 1016 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*fact)->solvertype)); 1017 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_LU])); 1018 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ILU])); 1019 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_CHOLESKY])); 1020 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ICC])); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 /* ------------------------------------------------------------------*/ 1025 static PetscErrorCode MatSOR_SeqDense(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal shift, PetscInt its, PetscInt lits, Vec xx) { 1026 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1027 PetscScalar *x, *v = mat->v, zero = 0.0, xt; 1028 const PetscScalar *b; 1029 PetscInt m = A->rmap->n, i; 1030 PetscBLASInt o = 1, bm = 0; 1031 1032 PetscFunctionBegin; 1033 #if defined(PETSC_HAVE_CUDA) 1034 PetscCheck(A->offloadmask != PETSC_OFFLOAD_GPU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1035 #endif 1036 if (shift == -1) shift = 0.0; /* negative shift indicates do not error on zero diagonal; this code never zeros on zero diagonal */ 1037 PetscCall(PetscBLASIntCast(m, &bm)); 1038 if (flag & SOR_ZERO_INITIAL_GUESS) { 1039 /* this is a hack fix, should have another version without the second BLASdotu */ 1040 PetscCall(VecSet(xx, zero)); 1041 } 1042 PetscCall(VecGetArray(xx, &x)); 1043 PetscCall(VecGetArrayRead(bb, &b)); 1044 its = its * lits; 1045 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 1046 while (its--) { 1047 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 1048 for (i = 0; i < m; i++) { 1049 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1050 x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift); 1051 } 1052 } 1053 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1054 for (i = m - 1; i >= 0; i--) { 1055 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1056 x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift); 1057 } 1058 } 1059 } 1060 PetscCall(VecRestoreArrayRead(bb, &b)); 1061 PetscCall(VecRestoreArray(xx, &x)); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 /* -----------------------------------------------------------------*/ 1066 PetscErrorCode MatMultTranspose_SeqDense(Mat A, Vec xx, Vec yy) { 1067 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1068 const PetscScalar *v = mat->v, *x; 1069 PetscScalar *y; 1070 PetscBLASInt m, n, _One = 1; 1071 PetscScalar _DOne = 1.0, _DZero = 0.0; 1072 1073 PetscFunctionBegin; 1074 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1075 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1076 PetscCall(VecGetArrayRead(xx, &x)); 1077 PetscCall(VecGetArrayWrite(yy, &y)); 1078 if (!A->rmap->n || !A->cmap->n) { 1079 PetscBLASInt i; 1080 for (i = 0; i < n; i++) y[i] = 0.0; 1081 } else { 1082 PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v, &mat->lda, x, &_One, &_DZero, y, &_One)); 1083 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n - A->cmap->n)); 1084 } 1085 PetscCall(VecRestoreArrayRead(xx, &x)); 1086 PetscCall(VecRestoreArrayWrite(yy, &y)); 1087 PetscFunctionReturn(0); 1088 } 1089 1090 PetscErrorCode MatMult_SeqDense(Mat A, Vec xx, Vec yy) { 1091 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1092 PetscScalar *y, _DOne = 1.0, _DZero = 0.0; 1093 PetscBLASInt m, n, _One = 1; 1094 const PetscScalar *v = mat->v, *x; 1095 1096 PetscFunctionBegin; 1097 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1098 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1099 PetscCall(VecGetArrayRead(xx, &x)); 1100 PetscCall(VecGetArrayWrite(yy, &y)); 1101 if (!A->rmap->n || !A->cmap->n) { 1102 PetscBLASInt i; 1103 for (i = 0; i < m; i++) y[i] = 0.0; 1104 } else { 1105 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DZero, y, &_One)); 1106 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n - A->rmap->n)); 1107 } 1108 PetscCall(VecRestoreArrayRead(xx, &x)); 1109 PetscCall(VecRestoreArrayWrite(yy, &y)); 1110 PetscFunctionReturn(0); 1111 } 1112 1113 PetscErrorCode MatMultAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) { 1114 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1115 const PetscScalar *v = mat->v, *x; 1116 PetscScalar *y, _DOne = 1.0; 1117 PetscBLASInt m, n, _One = 1; 1118 1119 PetscFunctionBegin; 1120 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1121 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1122 PetscCall(VecCopy(zz, yy)); 1123 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 1124 PetscCall(VecGetArrayRead(xx, &x)); 1125 PetscCall(VecGetArray(yy, &y)); 1126 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DOne, y, &_One)); 1127 PetscCall(VecRestoreArrayRead(xx, &x)); 1128 PetscCall(VecRestoreArray(yy, &y)); 1129 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n)); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 PetscErrorCode MatMultTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) { 1134 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1135 const PetscScalar *v = mat->v, *x; 1136 PetscScalar *y; 1137 PetscBLASInt m, n, _One = 1; 1138 PetscScalar _DOne = 1.0; 1139 1140 PetscFunctionBegin; 1141 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1142 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1143 PetscCall(VecCopy(zz, yy)); 1144 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 1145 PetscCall(VecGetArrayRead(xx, &x)); 1146 PetscCall(VecGetArray(yy, &y)); 1147 PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DOne, y, &_One)); 1148 PetscCall(VecRestoreArrayRead(xx, &x)); 1149 PetscCall(VecRestoreArray(yy, &y)); 1150 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n)); 1151 PetscFunctionReturn(0); 1152 } 1153 1154 /* -----------------------------------------------------------------*/ 1155 static PetscErrorCode MatGetRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) { 1156 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1157 PetscInt i; 1158 1159 PetscFunctionBegin; 1160 *ncols = A->cmap->n; 1161 if (cols) { 1162 PetscCall(PetscMalloc1(A->cmap->n, cols)); 1163 for (i = 0; i < A->cmap->n; i++) (*cols)[i] = i; 1164 } 1165 if (vals) { 1166 const PetscScalar *v; 1167 1168 PetscCall(MatDenseGetArrayRead(A, &v)); 1169 PetscCall(PetscMalloc1(A->cmap->n, vals)); 1170 v += row; 1171 for (i = 0; i < A->cmap->n; i++) { 1172 (*vals)[i] = *v; 1173 v += mat->lda; 1174 } 1175 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1176 } 1177 PetscFunctionReturn(0); 1178 } 1179 1180 static PetscErrorCode MatRestoreRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) { 1181 PetscFunctionBegin; 1182 if (ncols) *ncols = 0; 1183 if (cols) PetscCall(PetscFree(*cols)); 1184 if (vals) PetscCall(PetscFree(*vals)); 1185 PetscFunctionReturn(0); 1186 } 1187 /* ----------------------------------------------------------------*/ 1188 static PetscErrorCode MatSetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], const PetscScalar v[], InsertMode addv) { 1189 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1190 PetscScalar *av; 1191 PetscInt i, j, idx = 0; 1192 #if defined(PETSC_HAVE_CUDA) 1193 PetscOffloadMask oldf; 1194 #endif 1195 1196 PetscFunctionBegin; 1197 PetscCall(MatDenseGetArray(A, &av)); 1198 if (!mat->roworiented) { 1199 if (addv == INSERT_VALUES) { 1200 for (j = 0; j < n; j++) { 1201 if (indexn[j] < 0) { 1202 idx += m; 1203 continue; 1204 } 1205 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1206 for (i = 0; i < m; i++) { 1207 if (indexm[i] < 0) { 1208 idx++; 1209 continue; 1210 } 1211 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1212 av[indexn[j] * mat->lda + indexm[i]] = v[idx++]; 1213 } 1214 } 1215 } else { 1216 for (j = 0; j < n; j++) { 1217 if (indexn[j] < 0) { 1218 idx += m; 1219 continue; 1220 } 1221 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1222 for (i = 0; i < m; i++) { 1223 if (indexm[i] < 0) { 1224 idx++; 1225 continue; 1226 } 1227 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1228 av[indexn[j] * mat->lda + indexm[i]] += v[idx++]; 1229 } 1230 } 1231 } 1232 } else { 1233 if (addv == INSERT_VALUES) { 1234 for (i = 0; i < m; i++) { 1235 if (indexm[i] < 0) { 1236 idx += n; 1237 continue; 1238 } 1239 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1240 for (j = 0; j < n; j++) { 1241 if (indexn[j] < 0) { 1242 idx++; 1243 continue; 1244 } 1245 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1246 av[indexn[j] * mat->lda + indexm[i]] = v[idx++]; 1247 } 1248 } 1249 } else { 1250 for (i = 0; i < m; i++) { 1251 if (indexm[i] < 0) { 1252 idx += n; 1253 continue; 1254 } 1255 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1256 for (j = 0; j < n; j++) { 1257 if (indexn[j] < 0) { 1258 idx++; 1259 continue; 1260 } 1261 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1262 av[indexn[j] * mat->lda + indexm[i]] += v[idx++]; 1263 } 1264 } 1265 } 1266 } 1267 /* hack to prevent unneeded copy to the GPU while returning the array */ 1268 #if defined(PETSC_HAVE_CUDA) 1269 oldf = A->offloadmask; 1270 A->offloadmask = PETSC_OFFLOAD_GPU; 1271 #endif 1272 PetscCall(MatDenseRestoreArray(A, &av)); 1273 #if defined(PETSC_HAVE_CUDA) 1274 A->offloadmask = (oldf == PETSC_OFFLOAD_UNALLOCATED ? PETSC_OFFLOAD_UNALLOCATED : PETSC_OFFLOAD_CPU); 1275 #endif 1276 PetscFunctionReturn(0); 1277 } 1278 1279 static PetscErrorCode MatGetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], PetscScalar v[]) { 1280 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1281 const PetscScalar *vv; 1282 PetscInt i, j; 1283 1284 PetscFunctionBegin; 1285 PetscCall(MatDenseGetArrayRead(A, &vv)); 1286 /* row-oriented output */ 1287 for (i = 0; i < m; i++) { 1288 if (indexm[i] < 0) { 1289 v += n; 1290 continue; 1291 } 1292 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested larger than number rows %" PetscInt_FMT, indexm[i], A->rmap->n); 1293 for (j = 0; j < n; j++) { 1294 if (indexn[j] < 0) { 1295 v++; 1296 continue; 1297 } 1298 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " requested larger than number columns %" PetscInt_FMT, indexn[j], A->cmap->n); 1299 *v++ = vv[indexn[j] * mat->lda + indexm[i]]; 1300 } 1301 } 1302 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 /* -----------------------------------------------------------------*/ 1307 1308 PetscErrorCode MatView_Dense_Binary(Mat mat, PetscViewer viewer) { 1309 PetscBool skipHeader; 1310 PetscViewerFormat format; 1311 PetscInt header[4], M, N, m, lda, i, j, k; 1312 const PetscScalar *v; 1313 PetscScalar *vwork; 1314 1315 PetscFunctionBegin; 1316 PetscCall(PetscViewerSetUp(viewer)); 1317 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1318 PetscCall(PetscViewerGetFormat(viewer, &format)); 1319 if (skipHeader) format = PETSC_VIEWER_NATIVE; 1320 1321 PetscCall(MatGetSize(mat, &M, &N)); 1322 1323 /* write matrix header */ 1324 header[0] = MAT_FILE_CLASSID; 1325 header[1] = M; 1326 header[2] = N; 1327 header[3] = (format == PETSC_VIEWER_NATIVE) ? MATRIX_BINARY_FORMAT_DENSE : M * N; 1328 if (!skipHeader) PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1329 1330 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1331 if (format != PETSC_VIEWER_NATIVE) { 1332 PetscInt nnz = m * N, *iwork; 1333 /* store row lengths for each row */ 1334 PetscCall(PetscMalloc1(nnz, &iwork)); 1335 for (i = 0; i < m; i++) iwork[i] = N; 1336 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1337 /* store column indices (zero start index) */ 1338 for (k = 0, i = 0; i < m; i++) 1339 for (j = 0; j < N; j++, k++) iwork[k] = j; 1340 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1341 PetscCall(PetscFree(iwork)); 1342 } 1343 /* store matrix values as a dense matrix in row major order */ 1344 PetscCall(PetscMalloc1(m * N, &vwork)); 1345 PetscCall(MatDenseGetArrayRead(mat, &v)); 1346 PetscCall(MatDenseGetLDA(mat, &lda)); 1347 for (k = 0, i = 0; i < m; i++) 1348 for (j = 0; j < N; j++, k++) vwork[k] = v[i + lda * j]; 1349 PetscCall(MatDenseRestoreArrayRead(mat, &v)); 1350 PetscCall(PetscViewerBinaryWriteAll(viewer, vwork, m * N, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1351 PetscCall(PetscFree(vwork)); 1352 PetscFunctionReturn(0); 1353 } 1354 1355 PetscErrorCode MatLoad_Dense_Binary(Mat mat, PetscViewer viewer) { 1356 PetscBool skipHeader; 1357 PetscInt header[4], M, N, m, nz, lda, i, j, k; 1358 PetscInt rows, cols; 1359 PetscScalar *v, *vwork; 1360 1361 PetscFunctionBegin; 1362 PetscCall(PetscViewerSetUp(viewer)); 1363 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1364 1365 if (!skipHeader) { 1366 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 1367 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 1368 M = header[1]; 1369 N = header[2]; 1370 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 1371 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 1372 nz = header[3]; 1373 PetscCheck(nz == MATRIX_BINARY_FORMAT_DENSE || nz >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Unknown matrix format %" PetscInt_FMT " in file", nz); 1374 } else { 1375 PetscCall(MatGetSize(mat, &M, &N)); 1376 PetscCheck(M >= 0 && N >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "Matrix binary file header was skipped, thus the user must specify the global sizes of input matrix"); 1377 nz = MATRIX_BINARY_FORMAT_DENSE; 1378 } 1379 1380 /* setup global sizes if not set */ 1381 if (mat->rmap->N < 0) mat->rmap->N = M; 1382 if (mat->cmap->N < 0) mat->cmap->N = N; 1383 PetscCall(MatSetUp(mat)); 1384 /* check if global sizes are correct */ 1385 PetscCall(MatGetSize(mat, &rows, &cols)); 1386 PetscCheck(M == rows && N == cols, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 1387 1388 PetscCall(MatGetSize(mat, NULL, &N)); 1389 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1390 PetscCall(MatDenseGetArray(mat, &v)); 1391 PetscCall(MatDenseGetLDA(mat, &lda)); 1392 if (nz == MATRIX_BINARY_FORMAT_DENSE) { /* matrix in file is dense format */ 1393 PetscInt nnz = m * N; 1394 /* read in matrix values */ 1395 PetscCall(PetscMalloc1(nnz, &vwork)); 1396 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1397 /* store values in column major order */ 1398 for (j = 0; j < N; j++) 1399 for (i = 0; i < m; i++) v[i + lda * j] = vwork[i * N + j]; 1400 PetscCall(PetscFree(vwork)); 1401 } else { /* matrix in file is sparse format */ 1402 PetscInt nnz = 0, *rlens, *icols; 1403 /* read in row lengths */ 1404 PetscCall(PetscMalloc1(m, &rlens)); 1405 PetscCall(PetscViewerBinaryReadAll(viewer, rlens, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1406 for (i = 0; i < m; i++) nnz += rlens[i]; 1407 /* read in column indices and values */ 1408 PetscCall(PetscMalloc2(nnz, &icols, nnz, &vwork)); 1409 PetscCall(PetscViewerBinaryReadAll(viewer, icols, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1410 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1411 /* store values in column major order */ 1412 for (k = 0, i = 0; i < m; i++) 1413 for (j = 0; j < rlens[i]; j++, k++) v[i + lda * icols[k]] = vwork[k]; 1414 PetscCall(PetscFree(rlens)); 1415 PetscCall(PetscFree2(icols, vwork)); 1416 } 1417 PetscCall(MatDenseRestoreArray(mat, &v)); 1418 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 1419 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 1420 PetscFunctionReturn(0); 1421 } 1422 1423 PetscErrorCode MatLoad_SeqDense(Mat newMat, PetscViewer viewer) { 1424 PetscBool isbinary, ishdf5; 1425 1426 PetscFunctionBegin; 1427 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1428 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1429 /* force binary viewer to load .info file if it has not yet done so */ 1430 PetscCall(PetscViewerSetUp(viewer)); 1431 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1432 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1433 if (isbinary) { 1434 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1435 } else if (ishdf5) { 1436 #if defined(PETSC_HAVE_HDF5) 1437 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1438 #else 1439 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 1440 #endif 1441 } else { 1442 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1443 } 1444 PetscFunctionReturn(0); 1445 } 1446 1447 static PetscErrorCode MatView_SeqDense_ASCII(Mat A, PetscViewer viewer) { 1448 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1449 PetscInt i, j; 1450 const char *name; 1451 PetscScalar *v, *av; 1452 PetscViewerFormat format; 1453 #if defined(PETSC_USE_COMPLEX) 1454 PetscBool allreal = PETSC_TRUE; 1455 #endif 1456 1457 PetscFunctionBegin; 1458 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&av)); 1459 PetscCall(PetscViewerGetFormat(viewer, &format)); 1460 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1461 PetscFunctionReturn(0); /* do nothing for now */ 1462 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1463 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1464 for (i = 0; i < A->rmap->n; i++) { 1465 v = av + i; 1466 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i)); 1467 for (j = 0; j < A->cmap->n; j++) { 1468 #if defined(PETSC_USE_COMPLEX) 1469 if (PetscRealPart(*v) != 0.0 && PetscImaginaryPart(*v) != 0.0) { 1470 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", j, (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1471 } else if (PetscRealPart(*v)) { 1472 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)PetscRealPart(*v))); 1473 } 1474 #else 1475 if (*v) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)*v)); 1476 #endif 1477 v += a->lda; 1478 } 1479 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1480 } 1481 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1482 } else { 1483 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1484 #if defined(PETSC_USE_COMPLEX) 1485 /* determine if matrix has all real values */ 1486 for (j = 0; j < A->cmap->n; j++) { 1487 v = av + j * a->lda; 1488 for (i = 0; i < A->rmap->n; i++) { 1489 if (PetscImaginaryPart(v[i])) { 1490 allreal = PETSC_FALSE; 1491 break; 1492 } 1493 } 1494 } 1495 #endif 1496 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1497 PetscCall(PetscObjectGetName((PetscObject)A, &name)); 1498 PetscCall(PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", A->rmap->n, A->cmap->n)); 1499 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = zeros(%" PetscInt_FMT ",%" PetscInt_FMT ");\n", name, A->rmap->n, A->cmap->n)); 1500 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = [\n", name)); 1501 } 1502 1503 for (i = 0; i < A->rmap->n; i++) { 1504 v = av + i; 1505 for (j = 0; j < A->cmap->n; j++) { 1506 #if defined(PETSC_USE_COMPLEX) 1507 if (allreal) { 1508 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(*v))); 1509 } else { 1510 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei ", (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1511 } 1512 #else 1513 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)*v)); 1514 #endif 1515 v += a->lda; 1516 } 1517 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1518 } 1519 if (format == PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, "];\n")); 1520 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1521 } 1522 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&av)); 1523 PetscCall(PetscViewerFlush(viewer)); 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #include <petscdraw.h> 1528 static PetscErrorCode MatView_SeqDense_Draw_Zoom(PetscDraw draw, void *Aa) { 1529 Mat A = (Mat)Aa; 1530 PetscInt m = A->rmap->n, n = A->cmap->n, i, j; 1531 int color = PETSC_DRAW_WHITE; 1532 const PetscScalar *v; 1533 PetscViewer viewer; 1534 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1535 PetscViewerFormat format; 1536 1537 PetscFunctionBegin; 1538 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1539 PetscCall(PetscViewerGetFormat(viewer, &format)); 1540 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1541 1542 /* Loop over matrix elements drawing boxes */ 1543 PetscCall(MatDenseGetArrayRead(A, &v)); 1544 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1545 PetscDrawCollectiveBegin(draw); 1546 /* Blue for negative and Red for positive */ 1547 for (j = 0; j < n; j++) { 1548 x_l = j; 1549 x_r = x_l + 1.0; 1550 for (i = 0; i < m; i++) { 1551 y_l = m - i - 1.0; 1552 y_r = y_l + 1.0; 1553 if (PetscRealPart(v[j * m + i]) > 0.) color = PETSC_DRAW_RED; 1554 else if (PetscRealPart(v[j * m + i]) < 0.) color = PETSC_DRAW_BLUE; 1555 else continue; 1556 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1557 } 1558 } 1559 PetscDrawCollectiveEnd(draw); 1560 } else { 1561 /* use contour shading to indicate magnitude of values */ 1562 /* first determine max of all nonzero values */ 1563 PetscReal minv = 0.0, maxv = 0.0; 1564 PetscDraw popup; 1565 1566 for (i = 0; i < m * n; i++) { 1567 if (PetscAbsScalar(v[i]) > maxv) maxv = PetscAbsScalar(v[i]); 1568 } 1569 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1570 PetscCall(PetscDrawGetPopup(draw, &popup)); 1571 PetscCall(PetscDrawScalePopup(popup, minv, maxv)); 1572 1573 PetscDrawCollectiveBegin(draw); 1574 for (j = 0; j < n; j++) { 1575 x_l = j; 1576 x_r = x_l + 1.0; 1577 for (i = 0; i < m; i++) { 1578 y_l = m - i - 1.0; 1579 y_r = y_l + 1.0; 1580 color = PetscDrawRealToColor(PetscAbsScalar(v[j * m + i]), minv, maxv); 1581 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1582 } 1583 } 1584 PetscDrawCollectiveEnd(draw); 1585 } 1586 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1587 PetscFunctionReturn(0); 1588 } 1589 1590 static PetscErrorCode MatView_SeqDense_Draw(Mat A, PetscViewer viewer) { 1591 PetscDraw draw; 1592 PetscBool isnull; 1593 PetscReal xr, yr, xl, yl, h, w; 1594 1595 PetscFunctionBegin; 1596 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1597 PetscCall(PetscDrawIsNull(draw, &isnull)); 1598 if (isnull) PetscFunctionReturn(0); 1599 1600 xr = A->cmap->n; 1601 yr = A->rmap->n; 1602 h = yr / 10.0; 1603 w = xr / 10.0; 1604 xr += w; 1605 yr += h; 1606 xl = -w; 1607 yl = -h; 1608 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 1609 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 1610 PetscCall(PetscDrawZoom(draw, MatView_SeqDense_Draw_Zoom, A)); 1611 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 1612 PetscCall(PetscDrawSave(draw)); 1613 PetscFunctionReturn(0); 1614 } 1615 1616 PetscErrorCode MatView_SeqDense(Mat A, PetscViewer viewer) { 1617 PetscBool iascii, isbinary, isdraw; 1618 1619 PetscFunctionBegin; 1620 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1621 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1622 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1623 if (iascii) PetscCall(MatView_SeqDense_ASCII(A, viewer)); 1624 else if (isbinary) PetscCall(MatView_Dense_Binary(A, viewer)); 1625 else if (isdraw) PetscCall(MatView_SeqDense_Draw(A, viewer)); 1626 PetscFunctionReturn(0); 1627 } 1628 1629 static PetscErrorCode MatDensePlaceArray_SeqDense(Mat A, const PetscScalar *array) { 1630 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1631 1632 PetscFunctionBegin; 1633 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1634 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1635 PetscCheck(!a->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreArray() first"); 1636 a->unplacedarray = a->v; 1637 a->unplaced_user_alloc = a->user_alloc; 1638 a->v = (PetscScalar *)array; 1639 a->user_alloc = PETSC_TRUE; 1640 #if defined(PETSC_HAVE_CUDA) 1641 A->offloadmask = PETSC_OFFLOAD_CPU; 1642 #endif 1643 PetscFunctionReturn(0); 1644 } 1645 1646 static PetscErrorCode MatDenseResetArray_SeqDense(Mat A) { 1647 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1648 1649 PetscFunctionBegin; 1650 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1651 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1652 a->v = a->unplacedarray; 1653 a->user_alloc = a->unplaced_user_alloc; 1654 a->unplacedarray = NULL; 1655 #if defined(PETSC_HAVE_CUDA) 1656 A->offloadmask = PETSC_OFFLOAD_CPU; 1657 #endif 1658 PetscFunctionReturn(0); 1659 } 1660 1661 static PetscErrorCode MatDenseReplaceArray_SeqDense(Mat A, const PetscScalar *array) { 1662 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1663 1664 PetscFunctionBegin; 1665 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1666 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1667 if (!a->user_alloc) PetscCall(PetscFree(a->v)); 1668 a->v = (PetscScalar *)array; 1669 a->user_alloc = PETSC_FALSE; 1670 #if defined(PETSC_HAVE_CUDA) 1671 A->offloadmask = PETSC_OFFLOAD_CPU; 1672 #endif 1673 PetscFunctionReturn(0); 1674 } 1675 1676 PetscErrorCode MatDestroy_SeqDense(Mat mat) { 1677 Mat_SeqDense *l = (Mat_SeqDense *)mat->data; 1678 1679 PetscFunctionBegin; 1680 #if defined(PETSC_USE_LOG) 1681 PetscLogObjectState((PetscObject)mat, "Rows %" PetscInt_FMT " Cols %" PetscInt_FMT, mat->rmap->n, mat->cmap->n); 1682 #endif 1683 PetscCall(VecDestroy(&(l->qrrhs))); 1684 PetscCall(PetscFree(l->tau)); 1685 PetscCall(PetscFree(l->pivots)); 1686 PetscCall(PetscFree(l->fwork)); 1687 PetscCall(MatDestroy(&l->ptapwork)); 1688 if (!l->user_alloc) PetscCall(PetscFree(l->v)); 1689 if (!l->unplaced_user_alloc) PetscCall(PetscFree(l->unplacedarray)); 1690 PetscCheck(!l->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1691 PetscCheck(!l->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1692 PetscCall(VecDestroy(&l->cvec)); 1693 PetscCall(MatDestroy(&l->cmat)); 1694 PetscCall(PetscFree(mat->data)); 1695 1696 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1697 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactor_C", NULL)); 1698 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorSymbolic_C", NULL)); 1699 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorNumeric_C", NULL)); 1700 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 1701 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 1702 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 1703 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 1704 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 1705 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 1706 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 1707 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 1708 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 1709 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 1710 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 1711 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqaij_C", NULL)); 1712 #if defined(PETSC_HAVE_ELEMENTAL) 1713 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_elemental_C", NULL)); 1714 #endif 1715 #if defined(PETSC_HAVE_SCALAPACK) 1716 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_scalapack_C", NULL)); 1717 #endif 1718 #if defined(PETSC_HAVE_CUDA) 1719 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensecuda_C", NULL)); 1720 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", NULL)); 1721 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdense_C", NULL)); 1722 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensecuda_C", NULL)); 1723 #endif 1724 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSeqDenseSetPreallocation_C", NULL)); 1725 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqaij_seqdense_C", NULL)); 1726 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdense_C", NULL)); 1727 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqbaij_seqdense_C", NULL)); 1728 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqsbaij_seqdense_C", NULL)); 1729 1730 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 1731 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 1732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 1733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 1734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 1735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 1736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 1737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 1738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 1739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 1740 PetscFunctionReturn(0); 1741 } 1742 1743 static PetscErrorCode MatTranspose_SeqDense(Mat A, MatReuse reuse, Mat *matout) { 1744 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1745 PetscInt k, j, m = A->rmap->n, M = mat->lda, n = A->cmap->n; 1746 PetscScalar *v, tmp; 1747 1748 PetscFunctionBegin; 1749 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1750 if (reuse == MAT_INPLACE_MATRIX) { 1751 if (m == n) { /* in place transpose */ 1752 PetscCall(MatDenseGetArray(A, &v)); 1753 for (j = 0; j < m; j++) { 1754 for (k = 0; k < j; k++) { 1755 tmp = v[j + k * M]; 1756 v[j + k * M] = v[k + j * M]; 1757 v[k + j * M] = tmp; 1758 } 1759 } 1760 PetscCall(MatDenseRestoreArray(A, &v)); 1761 } else { /* reuse memory, temporary allocates new memory */ 1762 PetscScalar *v2; 1763 PetscLayout tmplayout; 1764 1765 PetscCall(PetscMalloc1((size_t)m * n, &v2)); 1766 PetscCall(MatDenseGetArray(A, &v)); 1767 for (j = 0; j < n; j++) { 1768 for (k = 0; k < m; k++) v2[j + (size_t)k * n] = v[k + (size_t)j * M]; 1769 } 1770 PetscCall(PetscArraycpy(v, v2, (size_t)m * n)); 1771 PetscCall(PetscFree(v2)); 1772 PetscCall(MatDenseRestoreArray(A, &v)); 1773 /* cleanup size dependent quantities */ 1774 PetscCall(VecDestroy(&mat->cvec)); 1775 PetscCall(MatDestroy(&mat->cmat)); 1776 PetscCall(PetscFree(mat->pivots)); 1777 PetscCall(PetscFree(mat->fwork)); 1778 PetscCall(MatDestroy(&mat->ptapwork)); 1779 /* swap row/col layouts */ 1780 mat->lda = n; 1781 tmplayout = A->rmap; 1782 A->rmap = A->cmap; 1783 A->cmap = tmplayout; 1784 } 1785 } else { /* out-of-place transpose */ 1786 Mat tmat; 1787 Mat_SeqDense *tmatd; 1788 PetscScalar *v2; 1789 PetscInt M2; 1790 1791 if (reuse == MAT_INITIAL_MATRIX) { 1792 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &tmat)); 1793 PetscCall(MatSetSizes(tmat, A->cmap->n, A->rmap->n, A->cmap->n, A->rmap->n)); 1794 PetscCall(MatSetType(tmat, ((PetscObject)A)->type_name)); 1795 PetscCall(MatSeqDenseSetPreallocation(tmat, NULL)); 1796 } else tmat = *matout; 1797 1798 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&v)); 1799 PetscCall(MatDenseGetArray(tmat, &v2)); 1800 tmatd = (Mat_SeqDense *)tmat->data; 1801 M2 = tmatd->lda; 1802 for (j = 0; j < n; j++) { 1803 for (k = 0; k < m; k++) v2[j + k * M2] = v[k + j * M]; 1804 } 1805 PetscCall(MatDenseRestoreArray(tmat, &v2)); 1806 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&v)); 1807 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 1808 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 1809 *matout = tmat; 1810 } 1811 PetscFunctionReturn(0); 1812 } 1813 1814 static PetscErrorCode MatEqual_SeqDense(Mat A1, Mat A2, PetscBool *flg) { 1815 Mat_SeqDense *mat1 = (Mat_SeqDense *)A1->data; 1816 Mat_SeqDense *mat2 = (Mat_SeqDense *)A2->data; 1817 PetscInt i; 1818 const PetscScalar *v1, *v2; 1819 1820 PetscFunctionBegin; 1821 if (A1->rmap->n != A2->rmap->n) { 1822 *flg = PETSC_FALSE; 1823 PetscFunctionReturn(0); 1824 } 1825 if (A1->cmap->n != A2->cmap->n) { 1826 *flg = PETSC_FALSE; 1827 PetscFunctionReturn(0); 1828 } 1829 PetscCall(MatDenseGetArrayRead(A1, &v1)); 1830 PetscCall(MatDenseGetArrayRead(A2, &v2)); 1831 for (i = 0; i < A1->cmap->n; i++) { 1832 PetscCall(PetscArraycmp(v1, v2, A1->rmap->n, flg)); 1833 if (*flg == PETSC_FALSE) PetscFunctionReturn(0); 1834 v1 += mat1->lda; 1835 v2 += mat2->lda; 1836 } 1837 PetscCall(MatDenseRestoreArrayRead(A1, &v1)); 1838 PetscCall(MatDenseRestoreArrayRead(A2, &v2)); 1839 *flg = PETSC_TRUE; 1840 PetscFunctionReturn(0); 1841 } 1842 1843 static PetscErrorCode MatGetDiagonal_SeqDense(Mat A, Vec v) { 1844 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1845 PetscInt i, n, len; 1846 PetscScalar *x; 1847 const PetscScalar *vv; 1848 1849 PetscFunctionBegin; 1850 PetscCall(VecGetSize(v, &n)); 1851 PetscCall(VecGetArray(v, &x)); 1852 len = PetscMin(A->rmap->n, A->cmap->n); 1853 PetscCall(MatDenseGetArrayRead(A, &vv)); 1854 PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 1855 for (i = 0; i < len; i++) x[i] = vv[i * mat->lda + i]; 1856 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1857 PetscCall(VecRestoreArray(v, &x)); 1858 PetscFunctionReturn(0); 1859 } 1860 1861 static PetscErrorCode MatDiagonalScale_SeqDense(Mat A, Vec ll, Vec rr) { 1862 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1863 const PetscScalar *l, *r; 1864 PetscScalar x, *v, *vv; 1865 PetscInt i, j, m = A->rmap->n, n = A->cmap->n; 1866 1867 PetscFunctionBegin; 1868 PetscCall(MatDenseGetArray(A, &vv)); 1869 if (ll) { 1870 PetscCall(VecGetSize(ll, &m)); 1871 PetscCall(VecGetArrayRead(ll, &l)); 1872 PetscCheck(m == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vec wrong size"); 1873 for (i = 0; i < m; i++) { 1874 x = l[i]; 1875 v = vv + i; 1876 for (j = 0; j < n; j++) { 1877 (*v) *= x; 1878 v += mat->lda; 1879 } 1880 } 1881 PetscCall(VecRestoreArrayRead(ll, &l)); 1882 PetscCall(PetscLogFlops(1.0 * n * m)); 1883 } 1884 if (rr) { 1885 PetscCall(VecGetSize(rr, &n)); 1886 PetscCall(VecGetArrayRead(rr, &r)); 1887 PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec wrong size"); 1888 for (i = 0; i < n; i++) { 1889 x = r[i]; 1890 v = vv + i * mat->lda; 1891 for (j = 0; j < m; j++) (*v++) *= x; 1892 } 1893 PetscCall(VecRestoreArrayRead(rr, &r)); 1894 PetscCall(PetscLogFlops(1.0 * n * m)); 1895 } 1896 PetscCall(MatDenseRestoreArray(A, &vv)); 1897 PetscFunctionReturn(0); 1898 } 1899 1900 PetscErrorCode MatNorm_SeqDense(Mat A, NormType type, PetscReal *nrm) { 1901 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1902 PetscScalar *v, *vv; 1903 PetscReal sum = 0.0; 1904 PetscInt lda, m = A->rmap->n, i, j; 1905 1906 PetscFunctionBegin; 1907 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&vv)); 1908 PetscCall(MatDenseGetLDA(A, &lda)); 1909 v = vv; 1910 if (type == NORM_FROBENIUS) { 1911 if (lda > m) { 1912 for (j = 0; j < A->cmap->n; j++) { 1913 v = vv + j * lda; 1914 for (i = 0; i < m; i++) { 1915 sum += PetscRealPart(PetscConj(*v) * (*v)); 1916 v++; 1917 } 1918 } 1919 } else { 1920 #if defined(PETSC_USE_REAL___FP16) 1921 PetscBLASInt one = 1, cnt = A->cmap->n * A->rmap->n; 1922 PetscCallBLAS("BLASnrm2", *nrm = BLASnrm2_(&cnt, v, &one)); 1923 } 1924 #else 1925 for (i = 0; i < A->cmap->n * A->rmap->n; i++) { 1926 sum += PetscRealPart(PetscConj(*v) * (*v)); 1927 v++; 1928 } 1929 } 1930 *nrm = PetscSqrtReal(sum); 1931 #endif 1932 PetscCall(PetscLogFlops(2.0 * A->cmap->n * A->rmap->n)); 1933 } else if (type == NORM_1) { 1934 *nrm = 0.0; 1935 for (j = 0; j < A->cmap->n; j++) { 1936 v = vv + j * mat->lda; 1937 sum = 0.0; 1938 for (i = 0; i < A->rmap->n; i++) { 1939 sum += PetscAbsScalar(*v); 1940 v++; 1941 } 1942 if (sum > *nrm) *nrm = sum; 1943 } 1944 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 1945 } else if (type == NORM_INFINITY) { 1946 *nrm = 0.0; 1947 for (j = 0; j < A->rmap->n; j++) { 1948 v = vv + j; 1949 sum = 0.0; 1950 for (i = 0; i < A->cmap->n; i++) { 1951 sum += PetscAbsScalar(*v); 1952 v += mat->lda; 1953 } 1954 if (sum > *nrm) *nrm = sum; 1955 } 1956 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 1957 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No two norm"); 1958 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&vv)); 1959 PetscFunctionReturn(0); 1960 } 1961 1962 static PetscErrorCode MatSetOption_SeqDense(Mat A, MatOption op, PetscBool flg) { 1963 Mat_SeqDense *aij = (Mat_SeqDense *)A->data; 1964 1965 PetscFunctionBegin; 1966 switch (op) { 1967 case MAT_ROW_ORIENTED: aij->roworiented = flg; break; 1968 case MAT_NEW_NONZERO_LOCATIONS: 1969 case MAT_NEW_NONZERO_LOCATION_ERR: 1970 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1971 case MAT_FORCE_DIAGONAL_ENTRIES: 1972 case MAT_KEEP_NONZERO_PATTERN: 1973 case MAT_IGNORE_OFF_PROC_ENTRIES: 1974 case MAT_USE_HASH_TABLE: 1975 case MAT_IGNORE_ZERO_ENTRIES: 1976 case MAT_IGNORE_LOWER_TRIANGULAR: 1977 case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break; 1978 case MAT_SPD: 1979 case MAT_SYMMETRIC: 1980 case MAT_STRUCTURALLY_SYMMETRIC: 1981 case MAT_HERMITIAN: 1982 case MAT_SYMMETRY_ETERNAL: 1983 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1984 case MAT_SPD_ETERNAL: break; 1985 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 1986 } 1987 PetscFunctionReturn(0); 1988 } 1989 1990 PetscErrorCode MatZeroEntries_SeqDense(Mat A) { 1991 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 1992 PetscInt lda = l->lda, m = A->rmap->n, n = A->cmap->n, j; 1993 PetscScalar *v; 1994 1995 PetscFunctionBegin; 1996 PetscCall(MatDenseGetArrayWrite(A, &v)); 1997 if (lda > m) { 1998 for (j = 0; j < n; j++) PetscCall(PetscArrayzero(v + j * lda, m)); 1999 } else { 2000 PetscCall(PetscArrayzero(v, PetscInt64Mult(m, n))); 2001 } 2002 PetscCall(MatDenseRestoreArrayWrite(A, &v)); 2003 PetscFunctionReturn(0); 2004 } 2005 2006 static PetscErrorCode MatZeroRows_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 2007 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 2008 PetscInt m = l->lda, n = A->cmap->n, i, j; 2009 PetscScalar *slot, *bb, *v; 2010 const PetscScalar *xx; 2011 2012 PetscFunctionBegin; 2013 if (PetscDefined(USE_DEBUG)) { 2014 for (i = 0; i < N; i++) { 2015 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 2016 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 2017 } 2018 } 2019 if (!N) PetscFunctionReturn(0); 2020 2021 /* fix right hand side if needed */ 2022 if (x && b) { 2023 PetscCall(VecGetArrayRead(x, &xx)); 2024 PetscCall(VecGetArray(b, &bb)); 2025 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 2026 PetscCall(VecRestoreArrayRead(x, &xx)); 2027 PetscCall(VecRestoreArray(b, &bb)); 2028 } 2029 2030 PetscCall(MatDenseGetArray(A, &v)); 2031 for (i = 0; i < N; i++) { 2032 slot = v + rows[i]; 2033 for (j = 0; j < n; j++) { 2034 *slot = 0.0; 2035 slot += m; 2036 } 2037 } 2038 if (diag != 0.0) { 2039 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 2040 for (i = 0; i < N; i++) { 2041 slot = v + (m + 1) * rows[i]; 2042 *slot = diag; 2043 } 2044 } 2045 PetscCall(MatDenseRestoreArray(A, &v)); 2046 PetscFunctionReturn(0); 2047 } 2048 2049 static PetscErrorCode MatDenseGetLDA_SeqDense(Mat A, PetscInt *lda) { 2050 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2051 2052 PetscFunctionBegin; 2053 *lda = mat->lda; 2054 PetscFunctionReturn(0); 2055 } 2056 2057 PetscErrorCode MatDenseGetArray_SeqDense(Mat A, PetscScalar **array) { 2058 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2059 2060 PetscFunctionBegin; 2061 PetscCheck(!mat->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2062 *array = mat->v; 2063 PetscFunctionReturn(0); 2064 } 2065 2066 PetscErrorCode MatDenseRestoreArray_SeqDense(Mat A, PetscScalar **array) { 2067 PetscFunctionBegin; 2068 if (array) *array = NULL; 2069 PetscFunctionReturn(0); 2070 } 2071 2072 /*@ 2073 MatDenseGetLDA - gets the leading dimension of the array returned from `MatDenseGetArray()` 2074 2075 Not collective 2076 2077 Input Parameter: 2078 . mat - a `MATDENSE` or `MATDENSECUDA` matrix 2079 2080 Output Parameter: 2081 . lda - the leading dimension 2082 2083 Level: intermediate 2084 2085 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseSetLDA()` 2086 @*/ 2087 PetscErrorCode MatDenseGetLDA(Mat A, PetscInt *lda) { 2088 PetscFunctionBegin; 2089 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2090 PetscValidIntPointer(lda, 2); 2091 MatCheckPreallocated(A, 1); 2092 PetscUseMethod(A, "MatDenseGetLDA_C", (Mat, PetscInt *), (A, lda)); 2093 PetscFunctionReturn(0); 2094 } 2095 2096 /*@ 2097 MatDenseSetLDA - Sets the leading dimension of the array used by the `MATDENSE` matrix 2098 2099 Not collective 2100 2101 Input Parameters: 2102 + mat - a `MATDENSE` or `MATDENSECUDA` matrix 2103 - lda - the leading dimension 2104 2105 Level: intermediate 2106 2107 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetLDA()` 2108 @*/ 2109 PetscErrorCode MatDenseSetLDA(Mat A, PetscInt lda) { 2110 PetscFunctionBegin; 2111 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2112 PetscTryMethod(A, "MatDenseSetLDA_C", (Mat, PetscInt), (A, lda)); 2113 PetscFunctionReturn(0); 2114 } 2115 2116 /*@C 2117 MatDenseGetArray - gives read-write access to the array where the data for a `MATDENSE` matrix is stored 2118 2119 Logically Collective on A 2120 2121 Input Parameter: 2122 . mat - a dense matrix 2123 2124 Output Parameter: 2125 . array - pointer to the data 2126 2127 Level: intermediate 2128 2129 .seealso: `MATDENSE`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2130 @*/ 2131 PetscErrorCode MatDenseGetArray(Mat A, PetscScalar **array) { 2132 PetscFunctionBegin; 2133 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2134 PetscValidPointer(array, 2); 2135 PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array)); 2136 PetscFunctionReturn(0); 2137 } 2138 2139 /*@C 2140 MatDenseRestoreArray - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArray()` 2141 2142 Logically Collective on A 2143 2144 Input Parameters: 2145 + mat - a dense matrix 2146 - array - pointer to the data (may be NULL) 2147 2148 Level: intermediate 2149 2150 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2151 @*/ 2152 PetscErrorCode MatDenseRestoreArray(Mat A, PetscScalar **array) { 2153 PetscFunctionBegin; 2154 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2155 PetscValidPointer(array, 2); 2156 PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array)); 2157 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2158 #if defined(PETSC_HAVE_CUDA) 2159 A->offloadmask = PETSC_OFFLOAD_CPU; 2160 #endif 2161 PetscFunctionReturn(0); 2162 } 2163 2164 /*@C 2165 MatDenseGetArrayRead - gives read-only access to the array where the data for a `MATDENSE` matrix is stored 2166 2167 Not Collective 2168 2169 Input Parameter: 2170 . mat - a dense matrix 2171 2172 Output Parameter: 2173 . array - pointer to the data 2174 2175 Level: intermediate 2176 2177 .seealso: `MATDENSE`, `MatDenseRestoreArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2178 @*/ 2179 PetscErrorCode MatDenseGetArrayRead(Mat A, const PetscScalar **array) { 2180 PetscFunctionBegin; 2181 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2182 PetscValidPointer(array, 2); 2183 PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, const PetscScalar **), (A, array)); 2184 PetscFunctionReturn(0); 2185 } 2186 2187 /*@C 2188 MatDenseRestoreArrayRead - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayRead()` 2189 2190 Not Collective 2191 2192 Input Parameters: 2193 + mat - a dense matrix 2194 - array - pointer to the data (may be NULL) 2195 2196 Level: intermediate 2197 2198 .seealso: `MATDENSE`, `MatDenseGetArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2199 @*/ 2200 PetscErrorCode MatDenseRestoreArrayRead(Mat A, const PetscScalar **array) { 2201 PetscFunctionBegin; 2202 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2203 PetscValidPointer(array, 2); 2204 PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, const PetscScalar **), (A, array)); 2205 PetscFunctionReturn(0); 2206 } 2207 2208 /*@C 2209 MatDenseGetArrayWrite - gives write-only access to the array where the data for a `MATDENSE` matrix is stored 2210 2211 Not Collective 2212 2213 Input Parameter: 2214 . mat - a dense matrix 2215 2216 Output Parameter: 2217 . array - pointer to the data 2218 2219 Level: intermediate 2220 2221 .seealso: `MATDENSE`, `MatDenseRestoreArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2222 @*/ 2223 PetscErrorCode MatDenseGetArrayWrite(Mat A, PetscScalar **array) { 2224 PetscFunctionBegin; 2225 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2226 PetscValidPointer(array, 2); 2227 PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2228 PetscFunctionReturn(0); 2229 } 2230 2231 /*@C 2232 MatDenseRestoreArrayWrite - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayWrite()` 2233 2234 Not Collective 2235 2236 Input Parameters: 2237 + mat - a dense matrix 2238 - array - pointer to the data (may be NULL) 2239 2240 Level: intermediate 2241 2242 .seealso: `MATDENSE`, `MatDenseGetArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2243 @*/ 2244 PetscErrorCode MatDenseRestoreArrayWrite(Mat A, PetscScalar **array) { 2245 PetscFunctionBegin; 2246 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2247 PetscValidPointer(array, 2); 2248 PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2249 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2250 #if defined(PETSC_HAVE_CUDA) 2251 A->offloadmask = PETSC_OFFLOAD_CPU; 2252 #endif 2253 PetscFunctionReturn(0); 2254 } 2255 2256 static PetscErrorCode MatCreateSubMatrix_SeqDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) { 2257 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2258 PetscInt i, j, nrows, ncols, ldb; 2259 const PetscInt *irow, *icol; 2260 PetscScalar *av, *bv, *v = mat->v; 2261 Mat newmat; 2262 2263 PetscFunctionBegin; 2264 PetscCall(ISGetIndices(isrow, &irow)); 2265 PetscCall(ISGetIndices(iscol, &icol)); 2266 PetscCall(ISGetLocalSize(isrow, &nrows)); 2267 PetscCall(ISGetLocalSize(iscol, &ncols)); 2268 2269 /* Check submatrixcall */ 2270 if (scall == MAT_REUSE_MATRIX) { 2271 PetscInt n_cols, n_rows; 2272 PetscCall(MatGetSize(*B, &n_rows, &n_cols)); 2273 if (n_rows != nrows || n_cols != ncols) { 2274 /* resize the result matrix to match number of requested rows/columns */ 2275 PetscCall(MatSetSizes(*B, nrows, ncols, nrows, ncols)); 2276 } 2277 newmat = *B; 2278 } else { 2279 /* Create and fill new matrix */ 2280 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 2281 PetscCall(MatSetSizes(newmat, nrows, ncols, nrows, ncols)); 2282 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 2283 PetscCall(MatSeqDenseSetPreallocation(newmat, NULL)); 2284 } 2285 2286 /* Now extract the data pointers and do the copy,column at a time */ 2287 PetscCall(MatDenseGetArray(newmat, &bv)); 2288 PetscCall(MatDenseGetLDA(newmat, &ldb)); 2289 for (i = 0; i < ncols; i++) { 2290 av = v + mat->lda * icol[i]; 2291 for (j = 0; j < nrows; j++) bv[j] = av[irow[j]]; 2292 bv += ldb; 2293 } 2294 PetscCall(MatDenseRestoreArray(newmat, &bv)); 2295 2296 /* Assemble the matrices so that the correct flags are set */ 2297 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 2298 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 2299 2300 /* Free work space */ 2301 PetscCall(ISRestoreIndices(isrow, &irow)); 2302 PetscCall(ISRestoreIndices(iscol, &icol)); 2303 *B = newmat; 2304 PetscFunctionReturn(0); 2305 } 2306 2307 static PetscErrorCode MatCreateSubMatrices_SeqDense(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) { 2308 PetscInt i; 2309 2310 PetscFunctionBegin; 2311 if (scall == MAT_INITIAL_MATRIX) PetscCall(PetscCalloc1(n, B)); 2312 2313 for (i = 0; i < n; i++) PetscCall(MatCreateSubMatrix_SeqDense(A, irow[i], icol[i], scall, &(*B)[i])); 2314 PetscFunctionReturn(0); 2315 } 2316 2317 static PetscErrorCode MatAssemblyBegin_SeqDense(Mat mat, MatAssemblyType mode) { 2318 PetscFunctionBegin; 2319 PetscFunctionReturn(0); 2320 } 2321 2322 static PetscErrorCode MatAssemblyEnd_SeqDense(Mat mat, MatAssemblyType mode) { 2323 PetscFunctionBegin; 2324 PetscFunctionReturn(0); 2325 } 2326 2327 PetscErrorCode MatCopy_SeqDense(Mat A, Mat B, MatStructure str) { 2328 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data; 2329 const PetscScalar *va; 2330 PetscScalar *vb; 2331 PetscInt lda1 = a->lda, lda2 = b->lda, m = A->rmap->n, n = A->cmap->n, j; 2332 2333 PetscFunctionBegin; 2334 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2335 if (A->ops->copy != B->ops->copy) { 2336 PetscCall(MatCopy_Basic(A, B, str)); 2337 PetscFunctionReturn(0); 2338 } 2339 PetscCheck(m == B->rmap->n && n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "size(B) != size(A)"); 2340 PetscCall(MatDenseGetArrayRead(A, &va)); 2341 PetscCall(MatDenseGetArray(B, &vb)); 2342 if (lda1 > m || lda2 > m) { 2343 for (j = 0; j < n; j++) PetscCall(PetscArraycpy(vb + j * lda2, va + j * lda1, m)); 2344 } else { 2345 PetscCall(PetscArraycpy(vb, va, A->rmap->n * A->cmap->n)); 2346 } 2347 PetscCall(MatDenseRestoreArray(B, &vb)); 2348 PetscCall(MatDenseRestoreArrayRead(A, &va)); 2349 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2350 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2351 PetscFunctionReturn(0); 2352 } 2353 2354 PetscErrorCode MatSetUp_SeqDense(Mat A) { 2355 PetscFunctionBegin; 2356 PetscCall(PetscLayoutSetUp(A->rmap)); 2357 PetscCall(PetscLayoutSetUp(A->cmap)); 2358 if (!A->preallocated) PetscCall(MatSeqDenseSetPreallocation(A, NULL)); 2359 PetscFunctionReturn(0); 2360 } 2361 2362 static PetscErrorCode MatConjugate_SeqDense(Mat A) { 2363 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2364 PetscInt i, j; 2365 PetscInt min = PetscMin(A->rmap->n, A->cmap->n); 2366 PetscScalar *aa; 2367 2368 PetscFunctionBegin; 2369 PetscCall(MatDenseGetArray(A, &aa)); 2370 for (j = 0; j < A->cmap->n; j++) { 2371 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscConj(aa[i + j * mat->lda]); 2372 } 2373 PetscCall(MatDenseRestoreArray(A, &aa)); 2374 if (mat->tau) 2375 for (i = 0; i < min; i++) mat->tau[i] = PetscConj(mat->tau[i]); 2376 PetscFunctionReturn(0); 2377 } 2378 2379 static PetscErrorCode MatRealPart_SeqDense(Mat A) { 2380 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2381 PetscInt i, j; 2382 PetscScalar *aa; 2383 2384 PetscFunctionBegin; 2385 PetscCall(MatDenseGetArray(A, &aa)); 2386 for (j = 0; j < A->cmap->n; j++) { 2387 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscRealPart(aa[i + j * mat->lda]); 2388 } 2389 PetscCall(MatDenseRestoreArray(A, &aa)); 2390 PetscFunctionReturn(0); 2391 } 2392 2393 static PetscErrorCode MatImaginaryPart_SeqDense(Mat A) { 2394 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2395 PetscInt i, j; 2396 PetscScalar *aa; 2397 2398 PetscFunctionBegin; 2399 PetscCall(MatDenseGetArray(A, &aa)); 2400 for (j = 0; j < A->cmap->n; j++) { 2401 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscImaginaryPart(aa[i + j * mat->lda]); 2402 } 2403 PetscCall(MatDenseRestoreArray(A, &aa)); 2404 PetscFunctionReturn(0); 2405 } 2406 2407 /* ----------------------------------------------------------------*/ 2408 PetscErrorCode MatMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) { 2409 PetscInt m = A->rmap->n, n = B->cmap->n; 2410 PetscBool cisdense; 2411 2412 PetscFunctionBegin; 2413 PetscCall(MatSetSizes(C, m, n, m, n)); 2414 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2415 if (!cisdense) { 2416 PetscBool flg; 2417 2418 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2419 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2420 } 2421 PetscCall(MatSetUp(C)); 2422 PetscFunctionReturn(0); 2423 } 2424 2425 PetscErrorCode MatMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) { 2426 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data, *c = (Mat_SeqDense *)C->data; 2427 PetscBLASInt m, n, k; 2428 const PetscScalar *av, *bv; 2429 PetscScalar *cv; 2430 PetscScalar _DOne = 1.0, _DZero = 0.0; 2431 2432 PetscFunctionBegin; 2433 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2434 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2435 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2436 if (!m || !n || !k) PetscFunctionReturn(0); 2437 PetscCall(MatDenseGetArrayRead(A, &av)); 2438 PetscCall(MatDenseGetArrayRead(B, &bv)); 2439 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2440 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2441 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2442 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2443 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2444 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2445 PetscFunctionReturn(0); 2446 } 2447 2448 PetscErrorCode MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) { 2449 PetscInt m = A->rmap->n, n = B->rmap->n; 2450 PetscBool cisdense; 2451 2452 PetscFunctionBegin; 2453 PetscCall(MatSetSizes(C, m, n, m, n)); 2454 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2455 if (!cisdense) { 2456 PetscBool flg; 2457 2458 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2459 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2460 } 2461 PetscCall(MatSetUp(C)); 2462 PetscFunctionReturn(0); 2463 } 2464 2465 PetscErrorCode MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) { 2466 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2467 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2468 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2469 const PetscScalar *av, *bv; 2470 PetscScalar *cv; 2471 PetscBLASInt m, n, k; 2472 PetscScalar _DOne = 1.0, _DZero = 0.0; 2473 2474 PetscFunctionBegin; 2475 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2476 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2477 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2478 if (!m || !n || !k) PetscFunctionReturn(0); 2479 PetscCall(MatDenseGetArrayRead(A, &av)); 2480 PetscCall(MatDenseGetArrayRead(B, &bv)); 2481 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2482 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2483 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2484 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2485 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2486 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2487 PetscFunctionReturn(0); 2488 } 2489 2490 PetscErrorCode MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) { 2491 PetscInt m = A->cmap->n, n = B->cmap->n; 2492 PetscBool cisdense; 2493 2494 PetscFunctionBegin; 2495 PetscCall(MatSetSizes(C, m, n, m, n)); 2496 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2497 if (!cisdense) { 2498 PetscBool flg; 2499 2500 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2501 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2502 } 2503 PetscCall(MatSetUp(C)); 2504 PetscFunctionReturn(0); 2505 } 2506 2507 PetscErrorCode MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) { 2508 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2509 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2510 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2511 const PetscScalar *av, *bv; 2512 PetscScalar *cv; 2513 PetscBLASInt m, n, k; 2514 PetscScalar _DOne = 1.0, _DZero = 0.0; 2515 2516 PetscFunctionBegin; 2517 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2518 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2519 PetscCall(PetscBLASIntCast(A->rmap->n, &k)); 2520 if (!m || !n || !k) PetscFunctionReturn(0); 2521 PetscCall(MatDenseGetArrayRead(A, &av)); 2522 PetscCall(MatDenseGetArrayRead(B, &bv)); 2523 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2524 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2525 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2526 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2527 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2528 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2529 PetscFunctionReturn(0); 2530 } 2531 2532 /* ----------------------------------------------- */ 2533 static PetscErrorCode MatProductSetFromOptions_SeqDense_AB(Mat C) { 2534 PetscFunctionBegin; 2535 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqDense; 2536 C->ops->productsymbolic = MatProductSymbolic_AB; 2537 PetscFunctionReturn(0); 2538 } 2539 2540 static PetscErrorCode MatProductSetFromOptions_SeqDense_AtB(Mat C) { 2541 PetscFunctionBegin; 2542 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqDense_SeqDense; 2543 C->ops->productsymbolic = MatProductSymbolic_AtB; 2544 PetscFunctionReturn(0); 2545 } 2546 2547 static PetscErrorCode MatProductSetFromOptions_SeqDense_ABt(Mat C) { 2548 PetscFunctionBegin; 2549 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqDense_SeqDense; 2550 C->ops->productsymbolic = MatProductSymbolic_ABt; 2551 PetscFunctionReturn(0); 2552 } 2553 2554 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense(Mat C) { 2555 Mat_Product *product = C->product; 2556 2557 PetscFunctionBegin; 2558 switch (product->type) { 2559 case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_SeqDense_AB(C)); break; 2560 case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_SeqDense_AtB(C)); break; 2561 case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_SeqDense_ABt(C)); break; 2562 default: break; 2563 } 2564 PetscFunctionReturn(0); 2565 } 2566 /* ----------------------------------------------- */ 2567 2568 static PetscErrorCode MatGetRowMax_SeqDense(Mat A, Vec v, PetscInt idx[]) { 2569 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2570 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2571 PetscScalar *x; 2572 const PetscScalar *aa; 2573 2574 PetscFunctionBegin; 2575 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2576 PetscCall(VecGetArray(v, &x)); 2577 PetscCall(VecGetLocalSize(v, &p)); 2578 PetscCall(MatDenseGetArrayRead(A, &aa)); 2579 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2580 for (i = 0; i < m; i++) { 2581 x[i] = aa[i]; 2582 if (idx) idx[i] = 0; 2583 for (j = 1; j < n; j++) { 2584 if (PetscRealPart(x[i]) < PetscRealPart(aa[i + a->lda * j])) { 2585 x[i] = aa[i + a->lda * j]; 2586 if (idx) idx[i] = j; 2587 } 2588 } 2589 } 2590 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2591 PetscCall(VecRestoreArray(v, &x)); 2592 PetscFunctionReturn(0); 2593 } 2594 2595 static PetscErrorCode MatGetRowMaxAbs_SeqDense(Mat A, Vec v, PetscInt idx[]) { 2596 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2597 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2598 PetscScalar *x; 2599 PetscReal atmp; 2600 const PetscScalar *aa; 2601 2602 PetscFunctionBegin; 2603 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2604 PetscCall(VecGetArray(v, &x)); 2605 PetscCall(VecGetLocalSize(v, &p)); 2606 PetscCall(MatDenseGetArrayRead(A, &aa)); 2607 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2608 for (i = 0; i < m; i++) { 2609 x[i] = PetscAbsScalar(aa[i]); 2610 for (j = 1; j < n; j++) { 2611 atmp = PetscAbsScalar(aa[i + a->lda * j]); 2612 if (PetscAbsScalar(x[i]) < atmp) { 2613 x[i] = atmp; 2614 if (idx) idx[i] = j; 2615 } 2616 } 2617 } 2618 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2619 PetscCall(VecRestoreArray(v, &x)); 2620 PetscFunctionReturn(0); 2621 } 2622 2623 static PetscErrorCode MatGetRowMin_SeqDense(Mat A, Vec v, PetscInt idx[]) { 2624 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2625 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2626 PetscScalar *x; 2627 const PetscScalar *aa; 2628 2629 PetscFunctionBegin; 2630 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2631 PetscCall(MatDenseGetArrayRead(A, &aa)); 2632 PetscCall(VecGetArray(v, &x)); 2633 PetscCall(VecGetLocalSize(v, &p)); 2634 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2635 for (i = 0; i < m; i++) { 2636 x[i] = aa[i]; 2637 if (idx) idx[i] = 0; 2638 for (j = 1; j < n; j++) { 2639 if (PetscRealPart(x[i]) > PetscRealPart(aa[i + a->lda * j])) { 2640 x[i] = aa[i + a->lda * j]; 2641 if (idx) idx[i] = j; 2642 } 2643 } 2644 } 2645 PetscCall(VecRestoreArray(v, &x)); 2646 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2647 PetscFunctionReturn(0); 2648 } 2649 2650 PetscErrorCode MatGetColumnVector_SeqDense(Mat A, Vec v, PetscInt col) { 2651 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2652 PetscScalar *x; 2653 const PetscScalar *aa; 2654 2655 PetscFunctionBegin; 2656 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2657 PetscCall(MatDenseGetArrayRead(A, &aa)); 2658 PetscCall(VecGetArray(v, &x)); 2659 PetscCall(PetscArraycpy(x, aa + col * a->lda, A->rmap->n)); 2660 PetscCall(VecRestoreArray(v, &x)); 2661 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2662 PetscFunctionReturn(0); 2663 } 2664 2665 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat A, PetscInt type, PetscReal *reductions) { 2666 PetscInt i, j, m, n; 2667 const PetscScalar *a; 2668 2669 PetscFunctionBegin; 2670 PetscCall(MatGetSize(A, &m, &n)); 2671 PetscCall(PetscArrayzero(reductions, n)); 2672 PetscCall(MatDenseGetArrayRead(A, &a)); 2673 if (type == NORM_2) { 2674 for (i = 0; i < n; i++) { 2675 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j] * a[j]); 2676 a += m; 2677 } 2678 } else if (type == NORM_1) { 2679 for (i = 0; i < n; i++) { 2680 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j]); 2681 a += m; 2682 } 2683 } else if (type == NORM_INFINITY) { 2684 for (i = 0; i < n; i++) { 2685 for (j = 0; j < m; j++) reductions[i] = PetscMax(PetscAbsScalar(a[j]), reductions[i]); 2686 a += m; 2687 } 2688 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2689 for (i = 0; i < n; i++) { 2690 for (j = 0; j < m; j++) reductions[i] += PetscRealPart(a[j]); 2691 a += m; 2692 } 2693 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2694 for (i = 0; i < n; i++) { 2695 for (j = 0; j < m; j++) reductions[i] += PetscImaginaryPart(a[j]); 2696 a += m; 2697 } 2698 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2699 PetscCall(MatDenseRestoreArrayRead(A, &a)); 2700 if (type == NORM_2) { 2701 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2702 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2703 for (i = 0; i < n; i++) reductions[i] /= m; 2704 } 2705 PetscFunctionReturn(0); 2706 } 2707 2708 PetscErrorCode MatSetRandom_SeqDense(Mat x, PetscRandom rctx) { 2709 PetscScalar *a; 2710 PetscInt lda, m, n, i, j; 2711 2712 PetscFunctionBegin; 2713 PetscCall(MatGetSize(x, &m, &n)); 2714 PetscCall(MatDenseGetLDA(x, &lda)); 2715 PetscCall(MatDenseGetArrayWrite(x, &a)); 2716 for (j = 0; j < n; j++) { 2717 for (i = 0; i < m; i++) PetscCall(PetscRandomGetValue(rctx, a + j * lda + i)); 2718 } 2719 PetscCall(MatDenseRestoreArrayWrite(x, &a)); 2720 PetscFunctionReturn(0); 2721 } 2722 2723 static PetscErrorCode MatMissingDiagonal_SeqDense(Mat A, PetscBool *missing, PetscInt *d) { 2724 PetscFunctionBegin; 2725 *missing = PETSC_FALSE; 2726 PetscFunctionReturn(0); 2727 } 2728 2729 /* vals is not const */ 2730 static PetscErrorCode MatDenseGetColumn_SeqDense(Mat A, PetscInt col, PetscScalar **vals) { 2731 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2732 PetscScalar *v; 2733 2734 PetscFunctionBegin; 2735 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2736 PetscCall(MatDenseGetArray(A, &v)); 2737 *vals = v + col * a->lda; 2738 PetscCall(MatDenseRestoreArray(A, &v)); 2739 PetscFunctionReturn(0); 2740 } 2741 2742 static PetscErrorCode MatDenseRestoreColumn_SeqDense(Mat A, PetscScalar **vals) { 2743 PetscFunctionBegin; 2744 if (vals) *vals = NULL; /* user cannot accidentally use the array later */ 2745 PetscFunctionReturn(0); 2746 } 2747 2748 /* -------------------------------------------------------------------*/ 2749 static struct _MatOps MatOps_Values = {MatSetValues_SeqDense, 2750 MatGetRow_SeqDense, 2751 MatRestoreRow_SeqDense, 2752 MatMult_SeqDense, 2753 /* 4*/ MatMultAdd_SeqDense, 2754 MatMultTranspose_SeqDense, 2755 MatMultTransposeAdd_SeqDense, 2756 NULL, 2757 NULL, 2758 NULL, 2759 /* 10*/ NULL, 2760 MatLUFactor_SeqDense, 2761 MatCholeskyFactor_SeqDense, 2762 MatSOR_SeqDense, 2763 MatTranspose_SeqDense, 2764 /* 15*/ MatGetInfo_SeqDense, 2765 MatEqual_SeqDense, 2766 MatGetDiagonal_SeqDense, 2767 MatDiagonalScale_SeqDense, 2768 MatNorm_SeqDense, 2769 /* 20*/ MatAssemblyBegin_SeqDense, 2770 MatAssemblyEnd_SeqDense, 2771 MatSetOption_SeqDense, 2772 MatZeroEntries_SeqDense, 2773 /* 24*/ MatZeroRows_SeqDense, 2774 NULL, 2775 NULL, 2776 NULL, 2777 NULL, 2778 /* 29*/ MatSetUp_SeqDense, 2779 NULL, 2780 NULL, 2781 NULL, 2782 NULL, 2783 /* 34*/ MatDuplicate_SeqDense, 2784 NULL, 2785 NULL, 2786 NULL, 2787 NULL, 2788 /* 39*/ MatAXPY_SeqDense, 2789 MatCreateSubMatrices_SeqDense, 2790 NULL, 2791 MatGetValues_SeqDense, 2792 MatCopy_SeqDense, 2793 /* 44*/ MatGetRowMax_SeqDense, 2794 MatScale_SeqDense, 2795 MatShift_SeqDense, 2796 NULL, 2797 MatZeroRowsColumns_SeqDense, 2798 /* 49*/ MatSetRandom_SeqDense, 2799 NULL, 2800 NULL, 2801 NULL, 2802 NULL, 2803 /* 54*/ NULL, 2804 NULL, 2805 NULL, 2806 NULL, 2807 NULL, 2808 /* 59*/ MatCreateSubMatrix_SeqDense, 2809 MatDestroy_SeqDense, 2810 MatView_SeqDense, 2811 NULL, 2812 NULL, 2813 /* 64*/ NULL, 2814 NULL, 2815 NULL, 2816 NULL, 2817 NULL, 2818 /* 69*/ MatGetRowMaxAbs_SeqDense, 2819 NULL, 2820 NULL, 2821 NULL, 2822 NULL, 2823 /* 74*/ NULL, 2824 NULL, 2825 NULL, 2826 NULL, 2827 NULL, 2828 /* 79*/ NULL, 2829 NULL, 2830 NULL, 2831 NULL, 2832 /* 83*/ MatLoad_SeqDense, 2833 MatIsSymmetric_SeqDense, 2834 MatIsHermitian_SeqDense, 2835 NULL, 2836 NULL, 2837 NULL, 2838 /* 89*/ NULL, 2839 NULL, 2840 MatMatMultNumeric_SeqDense_SeqDense, 2841 NULL, 2842 NULL, 2843 /* 94*/ NULL, 2844 NULL, 2845 NULL, 2846 MatMatTransposeMultNumeric_SeqDense_SeqDense, 2847 NULL, 2848 /* 99*/ MatProductSetFromOptions_SeqDense, 2849 NULL, 2850 NULL, 2851 MatConjugate_SeqDense, 2852 NULL, 2853 /*104*/ NULL, 2854 MatRealPart_SeqDense, 2855 MatImaginaryPart_SeqDense, 2856 NULL, 2857 NULL, 2858 /*109*/ NULL, 2859 NULL, 2860 MatGetRowMin_SeqDense, 2861 MatGetColumnVector_SeqDense, 2862 MatMissingDiagonal_SeqDense, 2863 /*114*/ NULL, 2864 NULL, 2865 NULL, 2866 NULL, 2867 NULL, 2868 /*119*/ NULL, 2869 NULL, 2870 NULL, 2871 NULL, 2872 NULL, 2873 /*124*/ NULL, 2874 MatGetColumnReductions_SeqDense, 2875 NULL, 2876 NULL, 2877 NULL, 2878 /*129*/ NULL, 2879 NULL, 2880 NULL, 2881 MatTransposeMatMultNumeric_SeqDense_SeqDense, 2882 NULL, 2883 /*134*/ NULL, 2884 NULL, 2885 NULL, 2886 NULL, 2887 NULL, 2888 /*139*/ NULL, 2889 NULL, 2890 NULL, 2891 NULL, 2892 NULL, 2893 MatCreateMPIMatConcatenateSeqMat_SeqDense, 2894 /*145*/ NULL, 2895 NULL, 2896 NULL, 2897 NULL, 2898 NULL, 2899 /*150*/ NULL}; 2900 2901 /*@C 2902 MatCreateSeqDense - Creates a `MATSEQDENSE` that 2903 is stored in column major order (the usual Fortran 77 manner). Many 2904 of the matrix operations use the BLAS and LAPACK routines. 2905 2906 Collective 2907 2908 Input Parameters: 2909 + comm - MPI communicator, set to `PETSC_COMM_SELF` 2910 . m - number of rows 2911 . n - number of columns 2912 - data - optional location of matrix data in column major order. Set data=NULL for PETSc 2913 to control all matrix memory allocation. 2914 2915 Output Parameter: 2916 . A - the matrix 2917 2918 Note: 2919 The data input variable is intended primarily for Fortran programmers 2920 who wish to allocate their own matrix memory space. Most users should 2921 set data=NULL. 2922 2923 Level: intermediate 2924 2925 .seealso: `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 2926 @*/ 2927 PetscErrorCode MatCreateSeqDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscScalar *data, Mat *A) { 2928 PetscFunctionBegin; 2929 PetscCall(MatCreate(comm, A)); 2930 PetscCall(MatSetSizes(*A, m, n, m, n)); 2931 PetscCall(MatSetType(*A, MATSEQDENSE)); 2932 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 2933 PetscFunctionReturn(0); 2934 } 2935 2936 /*@C 2937 MatSeqDenseSetPreallocation - Sets the array used for storing the matrix elements of a `MATSEQDENSE` matrix 2938 2939 Collective 2940 2941 Input Parameters: 2942 + B - the matrix 2943 - data - the array (or NULL) 2944 2945 Note: 2946 The data input variable is intended primarily for Fortran programmers 2947 who wish to allocate their own matrix memory space. Most users should 2948 need not call this routine. 2949 2950 Level: intermediate 2951 2952 .seealso: `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`, `MatDenseSetLDA()` 2953 @*/ 2954 PetscErrorCode MatSeqDenseSetPreallocation(Mat B, PetscScalar data[]) { 2955 PetscFunctionBegin; 2956 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2957 PetscTryMethod(B, "MatSeqDenseSetPreallocation_C", (Mat, PetscScalar[]), (B, data)); 2958 PetscFunctionReturn(0); 2959 } 2960 2961 PetscErrorCode MatSeqDenseSetPreallocation_SeqDense(Mat B, PetscScalar *data) { 2962 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2963 2964 PetscFunctionBegin; 2965 PetscCheck(!b->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2966 B->preallocated = PETSC_TRUE; 2967 2968 PetscCall(PetscLayoutSetUp(B->rmap)); 2969 PetscCall(PetscLayoutSetUp(B->cmap)); 2970 2971 if (b->lda <= 0) b->lda = B->rmap->n; 2972 2973 if (!data) { /* petsc-allocated storage */ 2974 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 2975 PetscCall(PetscCalloc1((size_t)b->lda * B->cmap->n, &b->v)); 2976 PetscCall(PetscLogObjectMemory((PetscObject)B, b->lda * B->cmap->n * sizeof(PetscScalar))); 2977 2978 b->user_alloc = PETSC_FALSE; 2979 } else { /* user-allocated storage */ 2980 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 2981 b->v = data; 2982 b->user_alloc = PETSC_TRUE; 2983 } 2984 B->assembled = PETSC_TRUE; 2985 PetscFunctionReturn(0); 2986 } 2987 2988 #if defined(PETSC_HAVE_ELEMENTAL) 2989 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 2990 Mat mat_elemental; 2991 const PetscScalar *array; 2992 PetscScalar *v_colwise; 2993 PetscInt M = A->rmap->N, N = A->cmap->N, i, j, k, *rows, *cols; 2994 2995 PetscFunctionBegin; 2996 PetscCall(PetscMalloc3(M * N, &v_colwise, M, &rows, N, &cols)); 2997 PetscCall(MatDenseGetArrayRead(A, &array)); 2998 /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */ 2999 k = 0; 3000 for (j = 0; j < N; j++) { 3001 cols[j] = j; 3002 for (i = 0; i < M; i++) v_colwise[j * M + i] = array[k++]; 3003 } 3004 for (i = 0; i < M; i++) rows[i] = i; 3005 PetscCall(MatDenseRestoreArrayRead(A, &array)); 3006 3007 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 3008 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N)); 3009 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 3010 PetscCall(MatSetUp(mat_elemental)); 3011 3012 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 3013 PetscCall(MatSetValues(mat_elemental, M, rows, N, cols, v_colwise, ADD_VALUES)); 3014 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 3015 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 3016 PetscCall(PetscFree3(v_colwise, rows, cols)); 3017 3018 if (reuse == MAT_INPLACE_MATRIX) { 3019 PetscCall(MatHeaderReplace(A, &mat_elemental)); 3020 } else { 3021 *newmat = mat_elemental; 3022 } 3023 PetscFunctionReturn(0); 3024 } 3025 #endif 3026 3027 PetscErrorCode MatDenseSetLDA_SeqDense(Mat B, PetscInt lda) { 3028 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3029 PetscBool data; 3030 3031 PetscFunctionBegin; 3032 data = (PetscBool)((B->rmap->n > 0 && B->cmap->n > 0) ? (b->v ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE); 3033 PetscCheck(b->user_alloc || !data || b->lda == lda, PETSC_COMM_SELF, PETSC_ERR_ORDER, "LDA cannot be changed after allocation of internal storage"); 3034 PetscCheck(lda >= B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "LDA %" PetscInt_FMT " must be at least matrix dimension %" PetscInt_FMT, lda, B->rmap->n); 3035 b->lda = lda; 3036 PetscFunctionReturn(0); 3037 } 3038 3039 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) { 3040 PetscFunctionBegin; 3041 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIDense(comm, inmat, n, scall, outmat)); 3042 PetscFunctionReturn(0); 3043 } 3044 3045 PetscErrorCode MatDenseGetColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) { 3046 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3047 3048 PetscFunctionBegin; 3049 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3050 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3051 if (!a->cvec) { 3052 PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec)); 3053 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 3054 } 3055 a->vecinuse = col + 1; 3056 PetscCall(MatDenseGetArray(A, (PetscScalar **)&a->ptrinuse)); 3057 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3058 *v = a->cvec; 3059 PetscFunctionReturn(0); 3060 } 3061 3062 PetscErrorCode MatDenseRestoreColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) { 3063 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3064 3065 PetscFunctionBegin; 3066 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3067 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3068 a->vecinuse = 0; 3069 PetscCall(MatDenseRestoreArray(A, (PetscScalar **)&a->ptrinuse)); 3070 PetscCall(VecResetArray(a->cvec)); 3071 if (v) *v = NULL; 3072 PetscFunctionReturn(0); 3073 } 3074 3075 PetscErrorCode MatDenseGetColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) { 3076 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3077 3078 PetscFunctionBegin; 3079 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3080 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3081 if (!a->cvec) { 3082 PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec)); 3083 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 3084 } 3085 a->vecinuse = col + 1; 3086 PetscCall(MatDenseGetArrayRead(A, &a->ptrinuse)); 3087 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3088 PetscCall(VecLockReadPush(a->cvec)); 3089 *v = a->cvec; 3090 PetscFunctionReturn(0); 3091 } 3092 3093 PetscErrorCode MatDenseRestoreColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) { 3094 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3095 3096 PetscFunctionBegin; 3097 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3098 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3099 a->vecinuse = 0; 3100 PetscCall(MatDenseRestoreArrayRead(A, &a->ptrinuse)); 3101 PetscCall(VecLockReadPop(a->cvec)); 3102 PetscCall(VecResetArray(a->cvec)); 3103 if (v) *v = NULL; 3104 PetscFunctionReturn(0); 3105 } 3106 3107 PetscErrorCode MatDenseGetColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) { 3108 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3109 3110 PetscFunctionBegin; 3111 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3112 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3113 if (!a->cvec) { 3114 PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec)); 3115 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 3116 } 3117 a->vecinuse = col + 1; 3118 PetscCall(MatDenseGetArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3119 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3120 *v = a->cvec; 3121 PetscFunctionReturn(0); 3122 } 3123 3124 PetscErrorCode MatDenseRestoreColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) { 3125 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3126 3127 PetscFunctionBegin; 3128 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3129 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3130 a->vecinuse = 0; 3131 PetscCall(MatDenseRestoreArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3132 PetscCall(VecResetArray(a->cvec)); 3133 if (v) *v = NULL; 3134 PetscFunctionReturn(0); 3135 } 3136 3137 PetscErrorCode MatDenseGetSubMatrix_SeqDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) { 3138 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3139 3140 PetscFunctionBegin; 3141 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3142 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3143 if (a->cmat && (cend - cbegin != a->cmat->cmap->N || rend - rbegin != a->cmat->rmap->N)) PetscCall(MatDestroy(&a->cmat)); 3144 if (!a->cmat) { 3145 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), rend - rbegin, PETSC_DECIDE, rend - rbegin, cend - cbegin, a->v + rbegin + (size_t)cbegin * a->lda, &a->cmat)); 3146 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cmat)); 3147 } else { 3148 PetscCall(MatDensePlaceArray(a->cmat, a->v + rbegin + (size_t)cbegin * a->lda)); 3149 } 3150 PetscCall(MatDenseSetLDA(a->cmat, a->lda)); 3151 a->matinuse = cbegin + 1; 3152 *v = a->cmat; 3153 #if defined(PETSC_HAVE_CUDA) 3154 A->offloadmask = PETSC_OFFLOAD_CPU; 3155 #endif 3156 PetscFunctionReturn(0); 3157 } 3158 3159 PetscErrorCode MatDenseRestoreSubMatrix_SeqDense(Mat A, Mat *v) { 3160 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3161 3162 PetscFunctionBegin; 3163 PetscCheck(a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 3164 PetscCheck(a->cmat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column matrix"); 3165 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 3166 a->matinuse = 0; 3167 PetscCall(MatDenseResetArray(a->cmat)); 3168 if (v) *v = NULL; 3169 #if defined(PETSC_HAVE_CUDA) 3170 A->offloadmask = PETSC_OFFLOAD_CPU; 3171 #endif 3172 PetscFunctionReturn(0); 3173 } 3174 3175 /*MC 3176 MATSEQDENSE - MATSEQDENSE = "seqdense" - A matrix type to be used for sequential dense matrices. 3177 3178 Options Database Keys: 3179 . -mat_type seqdense - sets the matrix type to `MATSEQDENSE` during a call to `MatSetFromOptions()` 3180 3181 Level: beginner 3182 3183 .seealso: `MATSEQDENSE`, `MatCreateSeqDense()` 3184 M*/ 3185 PetscErrorCode MatCreate_SeqDense(Mat B) { 3186 Mat_SeqDense *b; 3187 PetscMPIInt size; 3188 3189 PetscFunctionBegin; 3190 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3191 PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3192 3193 PetscCall(PetscNewLog(B, &b)); 3194 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 3195 B->data = (void *)b; 3196 3197 b->roworiented = PETSC_TRUE; 3198 3199 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatQRFactor_C", MatQRFactor_SeqDense)); 3200 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetLDA_C", MatDenseGetLDA_SeqDense)); 3201 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseSetLDA_C", MatDenseSetLDA_SeqDense)); 3202 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArray_C", MatDenseGetArray_SeqDense)); 3203 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArray_C", MatDenseRestoreArray_SeqDense)); 3204 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDensePlaceArray_C", MatDensePlaceArray_SeqDense)); 3205 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseResetArray_C", MatDenseResetArray_SeqDense)); 3206 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseReplaceArray_C", MatDenseReplaceArray_SeqDense)); 3207 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayRead_C", MatDenseGetArray_SeqDense)); 3208 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayRead_C", MatDenseRestoreArray_SeqDense)); 3209 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayWrite_C", MatDenseGetArray_SeqDense)); 3210 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArray_SeqDense)); 3211 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqaij_C", MatConvert_SeqDense_SeqAIJ)); 3212 #if defined(PETSC_HAVE_ELEMENTAL) 3213 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_elemental_C", MatConvert_SeqDense_Elemental)); 3214 #endif 3215 #if defined(PETSC_HAVE_SCALAPACK) 3216 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 3217 #endif 3218 #if defined(PETSC_HAVE_CUDA) 3219 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensecuda_C", MatConvert_SeqDense_SeqDenseCUDA)); 3220 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3221 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdense_C", MatProductSetFromOptions_SeqDense)); 3222 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3223 #endif 3224 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqDenseSetPreallocation_C", MatSeqDenseSetPreallocation_SeqDense)); 3225 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 3226 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdense_C", MatProductSetFromOptions_SeqDense)); 3227 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3228 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqsbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3229 3230 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumn_C", MatDenseGetColumn_SeqDense)); 3231 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_SeqDense)); 3232 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_SeqDense)); 3233 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_SeqDense)); 3234 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_SeqDense)); 3235 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_SeqDense)); 3236 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_SeqDense)); 3237 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_SeqDense)); 3238 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_SeqDense)); 3239 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_SeqDense)); 3240 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQDENSE)); 3241 PetscFunctionReturn(0); 3242 } 3243 3244 /*@C 3245 MatDenseGetColumn - gives access to a column of a dense matrix. This is only the local part of the column. You MUST call `MatDenseRestoreColumn()` to avoid memory bleeding. 3246 3247 Not Collective 3248 3249 Input Parameters: 3250 + mat - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3251 - col - column index 3252 3253 Output Parameter: 3254 . vals - pointer to the data 3255 3256 Level: intermediate 3257 3258 Note: 3259 Use `MatDenseGetColumnVec()` to get access to a column of a `MATDENSE` treated as a `Vec` 3260 3261 .seealso: `MATDENSE`, `MatDenseRestoreColumn()`, `MatDenseGetColumnVec()` 3262 @*/ 3263 PetscErrorCode MatDenseGetColumn(Mat A, PetscInt col, PetscScalar **vals) { 3264 PetscFunctionBegin; 3265 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3266 PetscValidLogicalCollectiveInt(A, col, 2); 3267 PetscValidPointer(vals, 3); 3268 PetscUseMethod(A, "MatDenseGetColumn_C", (Mat, PetscInt, PetscScalar **), (A, col, vals)); 3269 PetscFunctionReturn(0); 3270 } 3271 3272 /*@C 3273 MatDenseRestoreColumn - returns access to a column of a `MATDENSE` matrix which is returned by `MatDenseGetColumn()`. 3274 3275 Not Collective 3276 3277 Input Parameters: 3278 + mat - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3279 - vals - pointer to the data (may be NULL) 3280 3281 Level: intermediate 3282 3283 .seealso: `MATDENSE`, `MatDenseGetColumn()` 3284 @*/ 3285 PetscErrorCode MatDenseRestoreColumn(Mat A, PetscScalar **vals) { 3286 PetscFunctionBegin; 3287 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3288 PetscValidPointer(vals, 2); 3289 PetscUseMethod(A, "MatDenseRestoreColumn_C", (Mat, PetscScalar **), (A, vals)); 3290 PetscFunctionReturn(0); 3291 } 3292 3293 /*@ 3294 MatDenseGetColumnVec - Gives read-write access to a column of a `MATDENSE` matrix, represented as a `Vec`. 3295 3296 Collective 3297 3298 Input Parameters: 3299 + mat - the `Mat` object 3300 - col - the column index 3301 3302 Output Parameter: 3303 . v - the vector 3304 3305 Notes: 3306 The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVec()` when the vector is no longer needed. 3307 3308 Use `MatDenseGetColumnVecRead()` to obtain read-only access or `MatDenseGetColumnVecWrite()` for write-only access. 3309 3310 Level: intermediate 3311 3312 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`, `MatDenseGetColumn()` 3313 @*/ 3314 PetscErrorCode MatDenseGetColumnVec(Mat A, PetscInt col, Vec *v) { 3315 PetscFunctionBegin; 3316 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3317 PetscValidType(A, 1); 3318 PetscValidLogicalCollectiveInt(A, col, 2); 3319 PetscValidPointer(v, 3); 3320 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3321 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3322 PetscUseMethod(A, "MatDenseGetColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3323 PetscFunctionReturn(0); 3324 } 3325 3326 /*@ 3327 MatDenseRestoreColumnVec - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVec(). 3328 3329 Collective 3330 3331 Input Parameters: 3332 + mat - the Mat object 3333 . col - the column index 3334 - v - the Vec object (may be NULL) 3335 3336 Level: intermediate 3337 3338 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3339 @*/ 3340 PetscErrorCode MatDenseRestoreColumnVec(Mat A, PetscInt col, Vec *v) { 3341 PetscFunctionBegin; 3342 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3343 PetscValidType(A, 1); 3344 PetscValidLogicalCollectiveInt(A, col, 2); 3345 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3346 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3347 PetscUseMethod(A, "MatDenseRestoreColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3348 PetscFunctionReturn(0); 3349 } 3350 3351 /*@ 3352 MatDenseGetColumnVecRead - Gives read-only access to a column of a dense matrix, represented as a Vec. 3353 3354 Collective 3355 3356 Input Parameters: 3357 + mat - the Mat object 3358 - col - the column index 3359 3360 Output Parameter: 3361 . v - the vector 3362 3363 Notes: 3364 The vector is owned by PETSc and users cannot modify it. 3365 3366 Users need to call MatDenseRestoreColumnVecRead() when the vector is no longer needed. 3367 3368 Use MatDenseGetColumnVec() to obtain read-write access or MatDenseGetColumnVecWrite() for write-only access. 3369 3370 Level: intermediate 3371 3372 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3373 @*/ 3374 PetscErrorCode MatDenseGetColumnVecRead(Mat A, PetscInt col, Vec *v) { 3375 PetscFunctionBegin; 3376 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3377 PetscValidType(A, 1); 3378 PetscValidLogicalCollectiveInt(A, col, 2); 3379 PetscValidPointer(v, 3); 3380 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3381 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3382 PetscUseMethod(A, "MatDenseGetColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3383 PetscFunctionReturn(0); 3384 } 3385 3386 /*@ 3387 MatDenseRestoreColumnVecRead - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVecRead(). 3388 3389 Collective 3390 3391 Input Parameters: 3392 + mat - the Mat object 3393 . col - the column index 3394 - v - the Vec object (may be NULL) 3395 3396 Level: intermediate 3397 3398 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecWrite()` 3399 @*/ 3400 PetscErrorCode MatDenseRestoreColumnVecRead(Mat A, PetscInt col, Vec *v) { 3401 PetscFunctionBegin; 3402 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3403 PetscValidType(A, 1); 3404 PetscValidLogicalCollectiveInt(A, col, 2); 3405 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3406 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3407 PetscUseMethod(A, "MatDenseRestoreColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3408 PetscFunctionReturn(0); 3409 } 3410 3411 /*@ 3412 MatDenseGetColumnVecWrite - Gives write-only access to a column of a dense matrix, represented as a Vec. 3413 3414 Collective 3415 3416 Input Parameters: 3417 + mat - the Mat object 3418 - col - the column index 3419 3420 Output Parameter: 3421 . v - the vector 3422 3423 Notes: 3424 The vector is owned by PETSc. Users need to call MatDenseRestoreColumnVecWrite() when the vector is no longer needed. 3425 3426 Use MatDenseGetColumnVec() to obtain read-write access or MatDenseGetColumnVecRead() for read-only access. 3427 3428 Level: intermediate 3429 3430 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3431 @*/ 3432 PetscErrorCode MatDenseGetColumnVecWrite(Mat A, PetscInt col, Vec *v) { 3433 PetscFunctionBegin; 3434 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3435 PetscValidType(A, 1); 3436 PetscValidLogicalCollectiveInt(A, col, 2); 3437 PetscValidPointer(v, 3); 3438 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3439 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3440 PetscUseMethod(A, "MatDenseGetColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3441 PetscFunctionReturn(0); 3442 } 3443 3444 /*@ 3445 MatDenseRestoreColumnVecWrite - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVecWrite(). 3446 3447 Collective 3448 3449 Input Parameters: 3450 + mat - the Mat object 3451 . col - the column index 3452 - v - the Vec object (may be NULL) 3453 3454 Level: intermediate 3455 3456 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()` 3457 @*/ 3458 PetscErrorCode MatDenseRestoreColumnVecWrite(Mat A, PetscInt col, Vec *v) { 3459 PetscFunctionBegin; 3460 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3461 PetscValidType(A, 1); 3462 PetscValidLogicalCollectiveInt(A, col, 2); 3463 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3464 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3465 PetscUseMethod(A, "MatDenseRestoreColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3466 PetscFunctionReturn(0); 3467 } 3468 3469 /*@ 3470 MatDenseGetSubMatrix - Gives access to a block of rows and columns of a dense matrix, represented as a Mat. 3471 3472 Collective 3473 3474 Input Parameters: 3475 + mat - the Mat object 3476 . rbegin - the first global row index in the block (if PETSC_DECIDE, is 0) 3477 . rend - the global row index past the last one in the block (if PETSC_DECIDE, is M) 3478 . cbegin - the first global column index in the block (if PETSC_DECIDE, is 0) 3479 - cend - the global column index past the last one in the block (if PETSC_DECIDE, is N) 3480 3481 Output Parameter: 3482 . v - the matrix 3483 3484 Notes: 3485 The matrix is owned by PETSc. Users need to call MatDenseRestoreSubMatrix() when the matrix is no longer needed. 3486 3487 The output matrix is not redistributed by PETSc, so depending on the values of rbegin and rend, some processes may have no local rows. 3488 3489 Level: intermediate 3490 3491 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreSubMatrix()` 3492 @*/ 3493 PetscErrorCode MatDenseGetSubMatrix(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) { 3494 PetscFunctionBegin; 3495 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3496 PetscValidType(A, 1); 3497 PetscValidLogicalCollectiveInt(A, rbegin, 2); 3498 PetscValidLogicalCollectiveInt(A, rend, 3); 3499 PetscValidLogicalCollectiveInt(A, cbegin, 4); 3500 PetscValidLogicalCollectiveInt(A, cend, 5); 3501 PetscValidPointer(v, 6); 3502 if (rbegin == PETSC_DECIDE) rbegin = 0; 3503 if (rend == PETSC_DECIDE) rend = A->rmap->N; 3504 if (cbegin == PETSC_DECIDE) cbegin = 0; 3505 if (cend == PETSC_DECIDE) cend = A->cmap->N; 3506 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3507 PetscCheck(rbegin >= 0 && rbegin <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", rbegin, A->rmap->N); 3508 PetscCheck(rend >= rbegin && rend <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", rend, rbegin, A->rmap->N); 3509 PetscCheck(cbegin >= 0 && cbegin <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", cbegin, A->cmap->N); 3510 PetscCheck(cend >= cbegin && cend <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", cend, cbegin, A->cmap->N); 3511 PetscUseMethod(A, "MatDenseGetSubMatrix_C", (Mat, PetscInt, PetscInt, PetscInt, PetscInt, Mat *), (A, rbegin, rend, cbegin, cend, v)); 3512 PetscFunctionReturn(0); 3513 } 3514 3515 /*@ 3516 MatDenseRestoreSubMatrix - Returns access to a block of columns of a dense matrix obtained from MatDenseGetSubMatrix(). 3517 3518 Collective 3519 3520 Input Parameters: 3521 + mat - the Mat object 3522 - v - the Mat object (may be NULL) 3523 3524 Level: intermediate 3525 3526 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseGetSubMatrix()` 3527 @*/ 3528 PetscErrorCode MatDenseRestoreSubMatrix(Mat A, Mat *v) { 3529 PetscFunctionBegin; 3530 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3531 PetscValidType(A, 1); 3532 PetscValidPointer(v, 2); 3533 PetscUseMethod(A, "MatDenseRestoreSubMatrix_C", (Mat, Mat *), (A, v)); 3534 PetscFunctionReturn(0); 3535 } 3536 3537 #include <petscblaslapack.h> 3538 #include <petsc/private/kernels/blockinvert.h> 3539 3540 PetscErrorCode MatSeqDenseInvert(Mat A) { 3541 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3542 PetscInt bs = A->rmap->n; 3543 MatScalar *values = a->v; 3544 const PetscReal shift = 0.0; 3545 PetscBool allowzeropivot = PetscNot(A->erroriffailure), zeropivotdetected = PETSC_FALSE; 3546 3547 PetscFunctionBegin; 3548 /* factor and invert each block */ 3549 switch (bs) { 3550 case 1: values[0] = (PetscScalar)1.0 / (values[0] + shift); break; 3551 case 2: 3552 PetscCall(PetscKernel_A_gets_inverse_A_2(values, shift, allowzeropivot, &zeropivotdetected)); 3553 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3554 break; 3555 case 3: 3556 PetscCall(PetscKernel_A_gets_inverse_A_3(values, shift, allowzeropivot, &zeropivotdetected)); 3557 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3558 break; 3559 case 4: 3560 PetscCall(PetscKernel_A_gets_inverse_A_4(values, shift, allowzeropivot, &zeropivotdetected)); 3561 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3562 break; 3563 case 5: { 3564 PetscScalar work[25]; 3565 PetscInt ipvt[5]; 3566 3567 PetscCall(PetscKernel_A_gets_inverse_A_5(values, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 3568 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3569 } break; 3570 case 6: 3571 PetscCall(PetscKernel_A_gets_inverse_A_6(values, shift, allowzeropivot, &zeropivotdetected)); 3572 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3573 break; 3574 case 7: 3575 PetscCall(PetscKernel_A_gets_inverse_A_7(values, shift, allowzeropivot, &zeropivotdetected)); 3576 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3577 break; 3578 default: { 3579 PetscInt *v_pivots, *IJ, j; 3580 PetscScalar *v_work; 3581 3582 PetscCall(PetscMalloc3(bs, &v_work, bs, &v_pivots, bs, &IJ)); 3583 for (j = 0; j < bs; j++) IJ[j] = j; 3584 PetscCall(PetscKernel_A_gets_inverse_A(bs, values, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 3585 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3586 PetscCall(PetscFree3(v_work, v_pivots, IJ)); 3587 } 3588 } 3589 PetscFunctionReturn(0); 3590 } 3591