1 /* 2 Defines the basic matrix operations for sequential dense. 3 */ 4 5 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <../src/mat/impls/aij/seq/aij.h> 8 9 PetscErrorCode MatSeqDenseSymmetrize_Private(Mat A, PetscBool hermitian) { 10 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 11 PetscInt j, k, n = A->rmap->n; 12 PetscScalar *v; 13 14 PetscFunctionBegin; 15 PetscCheck(A->rmap->n == A->cmap->n, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot symmetrize a rectangular matrix"); 16 PetscCall(MatDenseGetArray(A, &v)); 17 if (!hermitian) { 18 for (k = 0; k < n; k++) { 19 for (j = k; j < n; j++) v[j * mat->lda + k] = v[k * mat->lda + j]; 20 } 21 } else { 22 for (k = 0; k < n; k++) { 23 for (j = k; j < n; j++) v[j * mat->lda + k] = PetscConj(v[k * mat->lda + j]); 24 } 25 } 26 PetscCall(MatDenseRestoreArray(A, &v)); 27 PetscFunctionReturn(0); 28 } 29 30 PETSC_EXTERN PetscErrorCode MatSeqDenseInvertFactors_Private(Mat A) { 31 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 32 PetscBLASInt info, n; 33 34 PetscFunctionBegin; 35 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 36 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 37 if (A->factortype == MAT_FACTOR_LU) { 38 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 39 if (!mat->fwork) { 40 mat->lfwork = n; 41 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 42 } 43 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 44 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 45 PetscCall(PetscFPTrapPop()); 46 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 47 } else if (A->factortype == MAT_FACTOR_CHOLESKY) { 48 if (A->spd == PETSC_BOOL3_TRUE) { 49 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 50 PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &n, mat->v, &mat->lda, &info)); 51 PetscCall(PetscFPTrapPop()); 52 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 53 #if defined(PETSC_USE_COMPLEX) 54 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 55 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 56 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 57 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 58 PetscCallBLAS("LAPACKhetri", LAPACKhetri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 59 PetscCall(PetscFPTrapPop()); 60 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE)); 61 #endif 62 } else { /* symmetric case */ 63 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present"); 64 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present"); 65 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 66 PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info)); 67 PetscCall(PetscFPTrapPop()); 68 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_FALSE)); 69 } 70 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad Inversion: zero pivot in row %" PetscInt_FMT, (PetscInt)info - 1); 71 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 72 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix must be factored to solve"); 73 74 A->ops->solve = NULL; 75 A->ops->matsolve = NULL; 76 A->ops->solvetranspose = NULL; 77 A->ops->matsolvetranspose = NULL; 78 A->ops->solveadd = NULL; 79 A->ops->solvetransposeadd = NULL; 80 A->factortype = MAT_FACTOR_NONE; 81 PetscCall(PetscFree(A->solvertype)); 82 PetscFunctionReturn(0); 83 } 84 85 PetscErrorCode MatZeroRowsColumns_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 86 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 87 PetscInt m = l->lda, n = A->cmap->n, r = A->rmap->n, i, j; 88 PetscScalar *slot, *bb, *v; 89 const PetscScalar *xx; 90 91 PetscFunctionBegin; 92 if (PetscDefined(USE_DEBUG)) { 93 for (i = 0; i < N; i++) { 94 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 95 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 96 PetscCheck(rows[i] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Col %" PetscInt_FMT " requested to be zeroed greater than or equal number of cols %" PetscInt_FMT, rows[i], A->cmap->n); 97 } 98 } 99 if (!N) PetscFunctionReturn(0); 100 101 /* fix right hand side if needed */ 102 if (x && b) { 103 Vec xt; 104 105 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 106 PetscCall(VecDuplicate(x, &xt)); 107 PetscCall(VecCopy(x, xt)); 108 PetscCall(VecScale(xt, -1.0)); 109 PetscCall(MatMultAdd(A, xt, b, b)); 110 PetscCall(VecDestroy(&xt)); 111 PetscCall(VecGetArrayRead(x, &xx)); 112 PetscCall(VecGetArray(b, &bb)); 113 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 114 PetscCall(VecRestoreArrayRead(x, &xx)); 115 PetscCall(VecRestoreArray(b, &bb)); 116 } 117 118 PetscCall(MatDenseGetArray(A, &v)); 119 for (i = 0; i < N; i++) { 120 slot = v + rows[i] * m; 121 PetscCall(PetscArrayzero(slot, r)); 122 } 123 for (i = 0; i < N; i++) { 124 slot = v + rows[i]; 125 for (j = 0; j < n; j++) { 126 *slot = 0.0; 127 slot += m; 128 } 129 } 130 if (diag != 0.0) { 131 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 132 for (i = 0; i < N; i++) { 133 slot = v + (m + 1) * rows[i]; 134 *slot = diag; 135 } 136 } 137 PetscCall(MatDenseRestoreArray(A, &v)); 138 PetscFunctionReturn(0); 139 } 140 141 PetscErrorCode MatPtAPNumeric_SeqDense_SeqDense(Mat A, Mat P, Mat C) { 142 Mat_SeqDense *c = (Mat_SeqDense *)(C->data); 143 144 PetscFunctionBegin; 145 if (c->ptapwork) { 146 PetscCall((*C->ops->matmultnumeric)(A, P, c->ptapwork)); 147 PetscCall((*C->ops->transposematmultnumeric)(P, c->ptapwork, C)); 148 } else SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Must call MatPtAPSymbolic_SeqDense_SeqDense() first"); 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode MatPtAPSymbolic_SeqDense_SeqDense(Mat A, Mat P, PetscReal fill, Mat C) { 153 Mat_SeqDense *c; 154 PetscBool cisdense; 155 156 PetscFunctionBegin; 157 PetscCall(MatSetSizes(C, P->cmap->n, P->cmap->n, P->cmap->N, P->cmap->N)); 158 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 159 if (!cisdense) { 160 PetscBool flg; 161 162 PetscCall(PetscObjectTypeCompare((PetscObject)P, ((PetscObject)A)->type_name, &flg)); 163 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 164 } 165 PetscCall(MatSetUp(C)); 166 c = (Mat_SeqDense *)C->data; 167 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &c->ptapwork)); 168 PetscCall(MatSetSizes(c->ptapwork, A->rmap->n, P->cmap->n, A->rmap->N, P->cmap->N)); 169 PetscCall(MatSetType(c->ptapwork, ((PetscObject)C)->type_name)); 170 PetscCall(MatSetUp(c->ptapwork)); 171 PetscFunctionReturn(0); 172 } 173 174 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 175 Mat B = NULL; 176 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 177 Mat_SeqDense *b; 178 PetscInt *ai = a->i, *aj = a->j, m = A->rmap->N, n = A->cmap->N, i; 179 const MatScalar *av; 180 PetscBool isseqdense; 181 182 PetscFunctionBegin; 183 if (reuse == MAT_REUSE_MATRIX) { 184 PetscCall(PetscObjectTypeCompare((PetscObject)*newmat, MATSEQDENSE, &isseqdense)); 185 PetscCheck(isseqdense, PetscObjectComm((PetscObject)*newmat), PETSC_ERR_USER, "Cannot reuse matrix of type %s", ((PetscObject)(*newmat))->type_name); 186 } 187 if (reuse != MAT_REUSE_MATRIX) { 188 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 189 PetscCall(MatSetSizes(B, m, n, m, n)); 190 PetscCall(MatSetType(B, MATSEQDENSE)); 191 PetscCall(MatSeqDenseSetPreallocation(B, NULL)); 192 b = (Mat_SeqDense *)(B->data); 193 } else { 194 b = (Mat_SeqDense *)((*newmat)->data); 195 PetscCall(PetscArrayzero(b->v, m * n)); 196 } 197 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 198 for (i = 0; i < m; i++) { 199 PetscInt j; 200 for (j = 0; j < ai[1] - ai[0]; j++) { 201 b->v[*aj * m + i] = *av; 202 aj++; 203 av++; 204 } 205 ai++; 206 } 207 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 208 209 if (reuse == MAT_INPLACE_MATRIX) { 210 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 211 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 212 PetscCall(MatHeaderReplace(A, &B)); 213 } else { 214 if (B) *newmat = B; 215 PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY)); 216 PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY)); 217 } 218 PetscFunctionReturn(0); 219 } 220 221 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 222 Mat B = NULL; 223 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 224 PetscInt i, j; 225 PetscInt *rows, *nnz; 226 MatScalar *aa = a->v, *vals; 227 228 PetscFunctionBegin; 229 PetscCall(PetscCalloc3(A->rmap->n, &rows, A->rmap->n, &nnz, A->rmap->n, &vals)); 230 if (reuse != MAT_REUSE_MATRIX) { 231 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 232 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 233 PetscCall(MatSetType(B, MATSEQAIJ)); 234 for (j = 0; j < A->cmap->n; j++) { 235 for (i = 0; i < A->rmap->n; i++) 236 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) ++nnz[i]; 237 aa += a->lda; 238 } 239 PetscCall(MatSeqAIJSetPreallocation(B, PETSC_DETERMINE, nnz)); 240 } else B = *newmat; 241 aa = a->v; 242 for (j = 0; j < A->cmap->n; j++) { 243 PetscInt numRows = 0; 244 for (i = 0; i < A->rmap->n; i++) 245 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) { 246 rows[numRows] = i; 247 vals[numRows++] = aa[i]; 248 } 249 PetscCall(MatSetValues(B, numRows, rows, 1, &j, vals, INSERT_VALUES)); 250 aa += a->lda; 251 } 252 PetscCall(PetscFree3(rows, nnz, vals)); 253 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 254 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 255 256 if (reuse == MAT_INPLACE_MATRIX) { 257 PetscCall(MatHeaderReplace(A, &B)); 258 } else if (reuse != MAT_REUSE_MATRIX) *newmat = B; 259 PetscFunctionReturn(0); 260 } 261 262 PetscErrorCode MatAXPY_SeqDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) { 263 Mat_SeqDense *x = (Mat_SeqDense *)X->data, *y = (Mat_SeqDense *)Y->data; 264 const PetscScalar *xv; 265 PetscScalar *yv; 266 PetscBLASInt N, m, ldax = 0, lday = 0, one = 1; 267 268 PetscFunctionBegin; 269 PetscCall(MatDenseGetArrayRead(X, &xv)); 270 PetscCall(MatDenseGetArray(Y, &yv)); 271 PetscCall(PetscBLASIntCast(X->rmap->n * X->cmap->n, &N)); 272 PetscCall(PetscBLASIntCast(X->rmap->n, &m)); 273 PetscCall(PetscBLASIntCast(x->lda, &ldax)); 274 PetscCall(PetscBLASIntCast(y->lda, &lday)); 275 if (ldax > m || lday > m) { 276 PetscInt j; 277 278 for (j = 0; j < X->cmap->n; j++) PetscCallBLAS("BLASaxpy", BLASaxpy_(&m, &alpha, xv + j * ldax, &one, yv + j * lday, &one)); 279 } else { 280 PetscCallBLAS("BLASaxpy", BLASaxpy_(&N, &alpha, xv, &one, yv, &one)); 281 } 282 PetscCall(MatDenseRestoreArrayRead(X, &xv)); 283 PetscCall(MatDenseRestoreArray(Y, &yv)); 284 PetscCall(PetscLogFlops(PetscMax(2.0 * N - 1, 0))); 285 PetscFunctionReturn(0); 286 } 287 288 static PetscErrorCode MatGetInfo_SeqDense(Mat A, MatInfoType flag, MatInfo *info) { 289 PetscLogDouble N = A->rmap->n * A->cmap->n; 290 291 PetscFunctionBegin; 292 info->block_size = 1.0; 293 info->nz_allocated = N; 294 info->nz_used = N; 295 info->nz_unneeded = 0; 296 info->assemblies = A->num_ass; 297 info->mallocs = 0; 298 info->memory = 0; /* REVIEW ME */ 299 info->fill_ratio_given = 0; 300 info->fill_ratio_needed = 0; 301 info->factor_mallocs = 0; 302 PetscFunctionReturn(0); 303 } 304 305 PetscErrorCode MatScale_SeqDense(Mat A, PetscScalar alpha) { 306 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 307 PetscScalar *v; 308 PetscBLASInt one = 1, j, nz, lda = 0; 309 310 PetscFunctionBegin; 311 PetscCall(MatDenseGetArray(A, &v)); 312 PetscCall(PetscBLASIntCast(a->lda, &lda)); 313 if (lda > A->rmap->n) { 314 PetscCall(PetscBLASIntCast(A->rmap->n, &nz)); 315 for (j = 0; j < A->cmap->n; j++) PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v + j * lda, &one)); 316 } else { 317 PetscCall(PetscBLASIntCast(A->rmap->n * A->cmap->n, &nz)); 318 PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v, &one)); 319 } 320 PetscCall(PetscLogFlops(A->rmap->n * A->cmap->n)); 321 PetscCall(MatDenseRestoreArray(A, &v)); 322 PetscFunctionReturn(0); 323 } 324 325 PetscErrorCode MatShift_SeqDense(Mat A, PetscScalar alpha) { 326 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 327 PetscScalar *v; 328 PetscInt j, k; 329 330 PetscFunctionBegin; 331 PetscCall(MatDenseGetArray(A, &v)); 332 k = PetscMin(A->rmap->n, A->cmap->n); 333 for (j = 0; j < k; j++) v[j + j * a->lda] += alpha; 334 PetscCall(PetscLogFlops(k)); 335 PetscCall(MatDenseRestoreArray(A, &v)); 336 PetscFunctionReturn(0); 337 } 338 339 static PetscErrorCode MatIsHermitian_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) { 340 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 341 PetscInt i, j, m = A->rmap->n, N = a->lda; 342 const PetscScalar *v; 343 344 PetscFunctionBegin; 345 *fl = PETSC_FALSE; 346 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(0); 347 PetscCall(MatDenseGetArrayRead(A, &v)); 348 for (i = 0; i < m; i++) { 349 for (j = i; j < m; j++) { 350 if (PetscAbsScalar(v[i + j * N] - PetscConj(v[j + i * N])) > rtol) goto restore; 351 } 352 } 353 *fl = PETSC_TRUE; 354 restore: 355 PetscCall(MatDenseRestoreArrayRead(A, &v)); 356 PetscFunctionReturn(0); 357 } 358 359 static PetscErrorCode MatIsSymmetric_SeqDense(Mat A, PetscReal rtol, PetscBool *fl) { 360 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 361 PetscInt i, j, m = A->rmap->n, N = a->lda; 362 const PetscScalar *v; 363 364 PetscFunctionBegin; 365 *fl = PETSC_FALSE; 366 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(0); 367 PetscCall(MatDenseGetArrayRead(A, &v)); 368 for (i = 0; i < m; i++) { 369 for (j = i; j < m; j++) { 370 if (PetscAbsScalar(v[i + j * N] - v[j + i * N]) > rtol) goto restore; 371 } 372 } 373 *fl = PETSC_TRUE; 374 restore: 375 PetscCall(MatDenseRestoreArrayRead(A, &v)); 376 PetscFunctionReturn(0); 377 } 378 379 PetscErrorCode MatDuplicateNoCreate_SeqDense(Mat newi, Mat A, MatDuplicateOption cpvalues) { 380 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 381 PetscInt lda = (PetscInt)mat->lda, j, m, nlda = lda; 382 PetscBool isdensecpu; 383 384 PetscFunctionBegin; 385 PetscCall(PetscLayoutReference(A->rmap, &newi->rmap)); 386 PetscCall(PetscLayoutReference(A->cmap, &newi->cmap)); 387 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { /* propagate LDA */ 388 PetscCall(MatDenseSetLDA(newi, lda)); 389 } 390 PetscCall(PetscObjectTypeCompare((PetscObject)newi, MATSEQDENSE, &isdensecpu)); 391 if (isdensecpu) PetscCall(MatSeqDenseSetPreallocation(newi, NULL)); 392 if (cpvalues == MAT_COPY_VALUES) { 393 const PetscScalar *av; 394 PetscScalar *v; 395 396 PetscCall(MatDenseGetArrayRead(A, &av)); 397 PetscCall(MatDenseGetArrayWrite(newi, &v)); 398 PetscCall(MatDenseGetLDA(newi, &nlda)); 399 m = A->rmap->n; 400 if (lda > m || nlda > m) { 401 for (j = 0; j < A->cmap->n; j++) PetscCall(PetscArraycpy(v + j * nlda, av + j * lda, m)); 402 } else { 403 PetscCall(PetscArraycpy(v, av, A->rmap->n * A->cmap->n)); 404 } 405 PetscCall(MatDenseRestoreArrayWrite(newi, &v)); 406 PetscCall(MatDenseRestoreArrayRead(A, &av)); 407 } 408 PetscFunctionReturn(0); 409 } 410 411 PetscErrorCode MatDuplicate_SeqDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) { 412 PetscFunctionBegin; 413 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), newmat)); 414 PetscCall(MatSetSizes(*newmat, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 415 PetscCall(MatSetType(*newmat, ((PetscObject)A)->type_name)); 416 PetscCall(MatDuplicateNoCreate_SeqDense(*newmat, A, cpvalues)); 417 PetscFunctionReturn(0); 418 } 419 420 static PetscErrorCode MatSolve_SeqDense_Internal_LU(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) { 421 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 422 PetscBLASInt info; 423 424 PetscFunctionBegin; 425 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 426 PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_(T ? "T" : "N", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 427 PetscCall(PetscFPTrapPop()); 428 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve %d", (int)info); 429 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 430 PetscFunctionReturn(0); 431 } 432 433 static PetscErrorCode MatConjugate_SeqDense(Mat); 434 435 static PetscErrorCode MatSolve_SeqDense_Internal_Cholesky(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T) { 436 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 437 PetscBLASInt info; 438 439 PetscFunctionBegin; 440 if (A->spd == PETSC_BOOL3_TRUE) { 441 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 442 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 443 PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &m, &nrhs, mat->v, &mat->lda, x, &m, &info)); 444 PetscCall(PetscFPTrapPop()); 445 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "POTRS Bad solve %d", (int)info); 446 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A)); 447 #if defined(PETSC_USE_COMPLEX) 448 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 449 if (T) PetscCall(MatConjugate_SeqDense(A)); 450 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 451 PetscCallBLAS("LAPACKhetrs", LAPACKhetrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 452 PetscCall(PetscFPTrapPop()); 453 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "HETRS Bad solve %d", (int)info); 454 if (T) PetscCall(MatConjugate_SeqDense(A)); 455 #endif 456 } else { /* symmetric case */ 457 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 458 PetscCallBLAS("LAPACKsytrs", LAPACKsytrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 459 PetscCall(PetscFPTrapPop()); 460 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "SYTRS Bad solve %d", (int)info); 461 } 462 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 463 PetscFunctionReturn(0); 464 } 465 466 static PetscErrorCode MatSolve_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) { 467 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 468 PetscBLASInt info; 469 char trans; 470 471 PetscFunctionBegin; 472 if (PetscDefined(USE_COMPLEX)) { 473 trans = 'C'; 474 } else { 475 trans = 'T'; 476 } 477 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 478 { /* lwork depends on the number of right-hand sides */ 479 PetscBLASInt nlfwork, lfwork = -1; 480 PetscScalar fwork; 481 482 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 483 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 484 if (nlfwork > mat->lfwork) { 485 mat->lfwork = nlfwork; 486 PetscCall(PetscFree(mat->fwork)); 487 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 488 } 489 } 490 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 491 PetscCall(PetscFPTrapPop()); 492 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %d", (int)info); 493 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 494 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &mat->rank, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 495 PetscCall(PetscFPTrapPop()); 496 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %d", (int)info); 497 for (PetscInt j = 0; j < nrhs; j++) { 498 for (PetscInt i = mat->rank; i < k; i++) x[j * ldx + i] = 0.; 499 } 500 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 501 PetscFunctionReturn(0); 502 } 503 504 static PetscErrorCode MatSolveTranspose_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k) { 505 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 506 PetscBLASInt info; 507 508 PetscFunctionBegin; 509 if (A->rmap->n == A->cmap->n && mat->rank == A->rmap->n) { 510 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 511 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &m, &nrhs, mat->v, &mat->lda, x, &ldx, &info)); 512 PetscCall(PetscFPTrapPop()); 513 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %d", (int)info); 514 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 515 { /* lwork depends on the number of right-hand sides */ 516 PetscBLASInt nlfwork, lfwork = -1; 517 PetscScalar fwork; 518 519 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info)); 520 nlfwork = (PetscBLASInt)PetscRealPart(fwork); 521 if (nlfwork > mat->lfwork) { 522 mat->lfwork = nlfwork; 523 PetscCall(PetscFree(mat->fwork)); 524 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 525 } 526 } 527 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 528 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info)); 529 PetscCall(PetscFPTrapPop()); 530 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %d", (int)info); 531 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A)); 532 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "QR factored matrix cannot be used for transpose solve"); 533 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)))); 534 PetscFunctionReturn(0); 535 } 536 537 static PetscErrorCode MatSolve_SeqDense_SetUp(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) { 538 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 539 PetscScalar *y; 540 PetscBLASInt m = 0, k = 0; 541 542 PetscFunctionBegin; 543 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 544 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 545 if (k < m) { 546 PetscCall(VecCopy(xx, mat->qrrhs)); 547 PetscCall(VecGetArray(mat->qrrhs, &y)); 548 } else { 549 PetscCall(VecCopy(xx, yy)); 550 PetscCall(VecGetArray(yy, &y)); 551 } 552 *_y = y; 553 *_k = k; 554 *_m = m; 555 PetscFunctionReturn(0); 556 } 557 558 static PetscErrorCode MatSolve_SeqDense_TearDown(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k) { 559 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 560 PetscScalar *y = NULL; 561 PetscBLASInt m, k; 562 563 PetscFunctionBegin; 564 y = *_y; 565 *_y = NULL; 566 k = *_k; 567 m = *_m; 568 if (k < m) { 569 PetscScalar *yv; 570 PetscCall(VecGetArray(yy, &yv)); 571 PetscCall(PetscArraycpy(yv, y, k)); 572 PetscCall(VecRestoreArray(yy, &yv)); 573 PetscCall(VecRestoreArray(mat->qrrhs, &y)); 574 } else { 575 PetscCall(VecRestoreArray(yy, &y)); 576 } 577 PetscFunctionReturn(0); 578 } 579 580 static PetscErrorCode MatSolve_SeqDense_LU(Mat A, Vec xx, Vec yy) { 581 PetscScalar *y = NULL; 582 PetscBLASInt m = 0, k = 0; 583 584 PetscFunctionBegin; 585 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 586 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_FALSE)); 587 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 588 PetscFunctionReturn(0); 589 } 590 591 static PetscErrorCode MatSolveTranspose_SeqDense_LU(Mat A, Vec xx, Vec yy) { 592 PetscScalar *y = NULL; 593 PetscBLASInt m = 0, k = 0; 594 595 PetscFunctionBegin; 596 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 597 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_TRUE)); 598 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 599 PetscFunctionReturn(0); 600 } 601 602 static PetscErrorCode MatSolve_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) { 603 PetscScalar *y = NULL; 604 PetscBLASInt m = 0, k = 0; 605 606 PetscFunctionBegin; 607 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 608 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_FALSE)); 609 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 610 PetscFunctionReturn(0); 611 } 612 613 static PetscErrorCode MatSolveTranspose_SeqDense_Cholesky(Mat A, Vec xx, Vec yy) { 614 PetscScalar *y = NULL; 615 PetscBLASInt m = 0, k = 0; 616 617 PetscFunctionBegin; 618 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 619 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_TRUE)); 620 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 621 PetscFunctionReturn(0); 622 } 623 624 static PetscErrorCode MatSolve_SeqDense_QR(Mat A, Vec xx, Vec yy) { 625 PetscScalar *y = NULL; 626 PetscBLASInt m = 0, k = 0; 627 628 PetscFunctionBegin; 629 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 630 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 631 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 632 PetscFunctionReturn(0); 633 } 634 635 static PetscErrorCode MatSolveTranspose_SeqDense_QR(Mat A, Vec xx, Vec yy) { 636 PetscScalar *y = NULL; 637 PetscBLASInt m = 0, k = 0; 638 639 PetscFunctionBegin; 640 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k)); 641 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k)); 642 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k)); 643 PetscFunctionReturn(0); 644 } 645 646 static PetscErrorCode MatMatSolve_SeqDense_SetUp(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) { 647 const PetscScalar *b; 648 PetscScalar *y; 649 PetscInt n, _ldb, _ldx; 650 PetscBLASInt nrhs = 0, m = 0, k = 0, ldb = 0, ldx = 0, ldy = 0; 651 652 PetscFunctionBegin; 653 *_ldy = 0; 654 *_m = 0; 655 *_nrhs = 0; 656 *_k = 0; 657 *_y = NULL; 658 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 659 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 660 PetscCall(MatGetSize(B, NULL, &n)); 661 PetscCall(PetscBLASIntCast(n, &nrhs)); 662 PetscCall(MatDenseGetLDA(B, &_ldb)); 663 PetscCall(PetscBLASIntCast(_ldb, &ldb)); 664 PetscCall(MatDenseGetLDA(X, &_ldx)); 665 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 666 if (ldx < m) { 667 PetscCall(MatDenseGetArrayRead(B, &b)); 668 PetscCall(PetscMalloc1(nrhs * m, &y)); 669 if (ldb == m) { 670 PetscCall(PetscArraycpy(y, b, ldb * nrhs)); 671 } else { 672 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * m], &b[j * ldb], m)); 673 } 674 ldy = m; 675 PetscCall(MatDenseRestoreArrayRead(B, &b)); 676 } else { 677 if (ldb == ldx) { 678 PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN)); 679 PetscCall(MatDenseGetArray(X, &y)); 680 } else { 681 PetscCall(MatDenseGetArray(X, &y)); 682 PetscCall(MatDenseGetArrayRead(B, &b)); 683 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * ldx], &b[j * ldb], m)); 684 PetscCall(MatDenseRestoreArrayRead(B, &b)); 685 } 686 ldy = ldx; 687 } 688 *_y = y; 689 *_ldy = ldy; 690 *_k = k; 691 *_m = m; 692 *_nrhs = nrhs; 693 PetscFunctionReturn(0); 694 } 695 696 static PetscErrorCode MatMatSolve_SeqDense_TearDown(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k) { 697 PetscScalar *y; 698 PetscInt _ldx; 699 PetscBLASInt k, ldy, nrhs, ldx = 0; 700 701 PetscFunctionBegin; 702 y = *_y; 703 *_y = NULL; 704 k = *_k; 705 ldy = *_ldy; 706 nrhs = *_nrhs; 707 PetscCall(MatDenseGetLDA(X, &_ldx)); 708 PetscCall(PetscBLASIntCast(_ldx, &ldx)); 709 if (ldx != ldy) { 710 PetscScalar *xv; 711 PetscCall(MatDenseGetArray(X, &xv)); 712 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&xv[j * ldx], &y[j * ldy], k)); 713 PetscCall(MatDenseRestoreArray(X, &xv)); 714 PetscCall(PetscFree(y)); 715 } else { 716 PetscCall(MatDenseRestoreArray(X, &y)); 717 } 718 PetscFunctionReturn(0); 719 } 720 721 static PetscErrorCode MatMatSolve_SeqDense_LU(Mat A, Mat B, Mat X) { 722 PetscScalar *y; 723 PetscBLASInt m, k, ldy, nrhs; 724 725 PetscFunctionBegin; 726 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 727 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 728 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 729 PetscFunctionReturn(0); 730 } 731 732 static PetscErrorCode MatMatSolveTranspose_SeqDense_LU(Mat A, Mat B, Mat X) { 733 PetscScalar *y; 734 PetscBLASInt m, k, ldy, nrhs; 735 736 PetscFunctionBegin; 737 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 738 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 739 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 740 PetscFunctionReturn(0); 741 } 742 743 static PetscErrorCode MatMatSolve_SeqDense_Cholesky(Mat A, Mat B, Mat X) { 744 PetscScalar *y; 745 PetscBLASInt m, k, ldy, nrhs; 746 747 PetscFunctionBegin; 748 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 749 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_FALSE)); 750 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 751 PetscFunctionReturn(0); 752 } 753 754 static PetscErrorCode MatMatSolveTranspose_SeqDense_Cholesky(Mat A, Mat B, Mat X) { 755 PetscScalar *y; 756 PetscBLASInt m, k, ldy, nrhs; 757 758 PetscFunctionBegin; 759 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 760 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_TRUE)); 761 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 762 PetscFunctionReturn(0); 763 } 764 765 static PetscErrorCode MatMatSolve_SeqDense_QR(Mat A, Mat B, Mat X) { 766 PetscScalar *y; 767 PetscBLASInt m, k, ldy, nrhs; 768 769 PetscFunctionBegin; 770 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 771 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 772 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 773 PetscFunctionReturn(0); 774 } 775 776 static PetscErrorCode MatMatSolveTranspose_SeqDense_QR(Mat A, Mat B, Mat X) { 777 PetscScalar *y; 778 PetscBLASInt m, k, ldy, nrhs; 779 780 PetscFunctionBegin; 781 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k)); 782 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k)); 783 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k)); 784 PetscFunctionReturn(0); 785 } 786 787 /* ---------------------------------------------------------------*/ 788 /* COMMENT: I have chosen to hide row permutation in the pivots, 789 rather than put it in the Mat->row slot.*/ 790 PetscErrorCode MatLUFactor_SeqDense(Mat A, IS row, IS col, const MatFactorInfo *minfo) { 791 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 792 PetscBLASInt n, m, info; 793 794 PetscFunctionBegin; 795 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 796 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 797 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 798 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 799 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 800 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&m, &n, mat->v, &mat->lda, mat->pivots, &info)); 801 PetscCall(PetscFPTrapPop()); 802 803 PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to LU factorization %d", (int)info); 804 PetscCheck(info <= 0, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Bad LU factorization %d", (int)info); 805 806 A->ops->solve = MatSolve_SeqDense_LU; 807 A->ops->matsolve = MatMatSolve_SeqDense_LU; 808 A->ops->solvetranspose = MatSolveTranspose_SeqDense_LU; 809 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_LU; 810 A->factortype = MAT_FACTOR_LU; 811 812 PetscCall(PetscFree(A->solvertype)); 813 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 814 815 PetscCall(PetscLogFlops((2.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3)); 816 PetscFunctionReturn(0); 817 } 818 819 static PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) { 820 MatFactorInfo info; 821 822 PetscFunctionBegin; 823 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 824 PetscUseTypeMethod(fact, lufactor, NULL, NULL, &info); 825 PetscFunctionReturn(0); 826 } 827 828 PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) { 829 PetscFunctionBegin; 830 fact->preallocated = PETSC_TRUE; 831 fact->assembled = PETSC_TRUE; 832 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense; 833 PetscFunctionReturn(0); 834 } 835 836 /* Cholesky as L*L^T or L*D*L^T and the symmetric/hermitian complex variants */ 837 PetscErrorCode MatCholeskyFactor_SeqDense(Mat A, IS perm, const MatFactorInfo *factinfo) { 838 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 839 PetscBLASInt info, n; 840 841 PetscFunctionBegin; 842 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 843 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 844 if (A->spd == PETSC_BOOL3_TRUE) { 845 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 846 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &n, mat->v, &mat->lda, &info)); 847 PetscCall(PetscFPTrapPop()); 848 #if defined(PETSC_USE_COMPLEX) 849 } else if (A->hermitian == PETSC_BOOL3_TRUE) { 850 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 851 if (!mat->fwork) { 852 PetscScalar dummy; 853 854 mat->lfwork = -1; 855 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 856 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 857 PetscCall(PetscFPTrapPop()); 858 mat->lfwork = (PetscInt)PetscRealPart(dummy); 859 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 860 } 861 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 862 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 863 PetscCall(PetscFPTrapPop()); 864 #endif 865 } else { /* symmetric case */ 866 if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); } 867 if (!mat->fwork) { 868 PetscScalar dummy; 869 870 mat->lfwork = -1; 871 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 872 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); 873 PetscCall(PetscFPTrapPop()); 874 mat->lfwork = (PetscInt)PetscRealPart(dummy); 875 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 876 } 877 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 878 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info)); 879 PetscCall(PetscFPTrapPop()); 880 } 881 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad factorization: zero pivot in row %" PetscInt_FMT, (PetscInt)info - 1); 882 883 A->ops->solve = MatSolve_SeqDense_Cholesky; 884 A->ops->matsolve = MatMatSolve_SeqDense_Cholesky; 885 A->ops->solvetranspose = MatSolveTranspose_SeqDense_Cholesky; 886 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_Cholesky; 887 A->factortype = MAT_FACTOR_CHOLESKY; 888 889 PetscCall(PetscFree(A->solvertype)); 890 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 891 892 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0)); 893 PetscFunctionReturn(0); 894 } 895 896 static PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) { 897 MatFactorInfo info; 898 899 PetscFunctionBegin; 900 info.fill = 1.0; 901 902 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 903 PetscUseTypeMethod(fact, choleskyfactor, NULL, &info); 904 PetscFunctionReturn(0); 905 } 906 907 PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) { 908 PetscFunctionBegin; 909 fact->assembled = PETSC_TRUE; 910 fact->preallocated = PETSC_TRUE; 911 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense; 912 PetscFunctionReturn(0); 913 } 914 915 PetscErrorCode MatQRFactor_SeqDense(Mat A, IS col, const MatFactorInfo *minfo) { 916 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 917 PetscBLASInt n, m, info, min, max; 918 919 PetscFunctionBegin; 920 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 921 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 922 max = PetscMax(m, n); 923 min = PetscMin(m, n); 924 if (!mat->tau) { PetscCall(PetscMalloc1(min, &mat->tau)); } 925 if (!mat->pivots) { PetscCall(PetscMalloc1(n, &mat->pivots)); } 926 if (!mat->qrrhs) PetscCall(MatCreateVecs(A, NULL, &(mat->qrrhs))); 927 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 928 if (!mat->fwork) { 929 PetscScalar dummy; 930 931 mat->lfwork = -1; 932 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 933 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, &dummy, &mat->lfwork, &info)); 934 PetscCall(PetscFPTrapPop()); 935 mat->lfwork = (PetscInt)PetscRealPart(dummy); 936 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); 937 } 938 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 939 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, mat->fwork, &mat->lfwork, &info)); 940 PetscCall(PetscFPTrapPop()); 941 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to QR factorization %d", (int)info); 942 // TODO: try to estimate rank or test for and use geqp3 for rank revealing QR. For now just say rank is min of m and n 943 mat->rank = min; 944 945 A->ops->solve = MatSolve_SeqDense_QR; 946 A->ops->matsolve = MatMatSolve_SeqDense_QR; 947 A->factortype = MAT_FACTOR_QR; 948 if (m == n) { 949 A->ops->solvetranspose = MatSolveTranspose_SeqDense_QR; 950 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_QR; 951 } 952 953 PetscCall(PetscFree(A->solvertype)); 954 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype)); 955 956 PetscCall(PetscLogFlops(2.0 * min * min * (max - min / 3.0))); 957 PetscFunctionReturn(0); 958 } 959 960 static PetscErrorCode MatQRFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy) { 961 MatFactorInfo info; 962 963 PetscFunctionBegin; 964 info.fill = 1.0; 965 966 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES)); 967 PetscUseMethod(fact, "MatQRFactor_C", (Mat, IS, const MatFactorInfo *), (fact, NULL, &info)); 968 PetscFunctionReturn(0); 969 } 970 971 PetscErrorCode MatQRFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info) { 972 PetscFunctionBegin; 973 fact->assembled = PETSC_TRUE; 974 fact->preallocated = PETSC_TRUE; 975 PetscCall(PetscObjectComposeFunction((PetscObject)fact, "MatQRFactorNumeric_C", MatQRFactorNumeric_SeqDense)); 976 PetscFunctionReturn(0); 977 } 978 979 /* uses LAPACK */ 980 PETSC_INTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A, MatFactorType ftype, Mat *fact) { 981 PetscFunctionBegin; 982 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), fact)); 983 PetscCall(MatSetSizes(*fact, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n)); 984 PetscCall(MatSetType(*fact, MATDENSE)); 985 (*fact)->trivialsymbolic = PETSC_TRUE; 986 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 987 (*fact)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqDense; 988 (*fact)->ops->ilufactorsymbolic = MatLUFactorSymbolic_SeqDense; 989 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 990 (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense; 991 } else if (ftype == MAT_FACTOR_QR) { 992 PetscCall(PetscObjectComposeFunction((PetscObject)(*fact), "MatQRFactorSymbolic_C", MatQRFactorSymbolic_SeqDense)); 993 } 994 (*fact)->factortype = ftype; 995 996 PetscCall(PetscFree((*fact)->solvertype)); 997 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*fact)->solvertype)); 998 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_LU])); 999 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ILU])); 1000 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_CHOLESKY])); 1001 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ICC])); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 /* ------------------------------------------------------------------*/ 1006 static PetscErrorCode MatSOR_SeqDense(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal shift, PetscInt its, PetscInt lits, Vec xx) { 1007 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1008 PetscScalar *x, *v = mat->v, zero = 0.0, xt; 1009 const PetscScalar *b; 1010 PetscInt m = A->rmap->n, i; 1011 PetscBLASInt o = 1, bm = 0; 1012 1013 PetscFunctionBegin; 1014 #if defined(PETSC_HAVE_CUDA) 1015 PetscCheck(A->offloadmask != PETSC_OFFLOAD_GPU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1016 #endif 1017 if (shift == -1) shift = 0.0; /* negative shift indicates do not error on zero diagonal; this code never zeros on zero diagonal */ 1018 PetscCall(PetscBLASIntCast(m, &bm)); 1019 if (flag & SOR_ZERO_INITIAL_GUESS) { 1020 /* this is a hack fix, should have another version without the second BLASdotu */ 1021 PetscCall(VecSet(xx, zero)); 1022 } 1023 PetscCall(VecGetArray(xx, &x)); 1024 PetscCall(VecGetArrayRead(bb, &b)); 1025 its = its * lits; 1026 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 1027 while (its--) { 1028 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 1029 for (i = 0; i < m; i++) { 1030 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1031 x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift); 1032 } 1033 } 1034 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1035 for (i = m - 1; i >= 0; i--) { 1036 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o)); 1037 x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift); 1038 } 1039 } 1040 } 1041 PetscCall(VecRestoreArrayRead(bb, &b)); 1042 PetscCall(VecRestoreArray(xx, &x)); 1043 PetscFunctionReturn(0); 1044 } 1045 1046 /* -----------------------------------------------------------------*/ 1047 PetscErrorCode MatMultTranspose_SeqDense(Mat A, Vec xx, Vec yy) { 1048 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1049 const PetscScalar *v = mat->v, *x; 1050 PetscScalar *y; 1051 PetscBLASInt m, n, _One = 1; 1052 PetscScalar _DOne = 1.0, _DZero = 0.0; 1053 1054 PetscFunctionBegin; 1055 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1056 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1057 PetscCall(VecGetArrayRead(xx, &x)); 1058 PetscCall(VecGetArrayWrite(yy, &y)); 1059 if (!A->rmap->n || !A->cmap->n) { 1060 PetscBLASInt i; 1061 for (i = 0; i < n; i++) y[i] = 0.0; 1062 } else { 1063 PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v, &mat->lda, x, &_One, &_DZero, y, &_One)); 1064 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n - A->cmap->n)); 1065 } 1066 PetscCall(VecRestoreArrayRead(xx, &x)); 1067 PetscCall(VecRestoreArrayWrite(yy, &y)); 1068 PetscFunctionReturn(0); 1069 } 1070 1071 PetscErrorCode MatMult_SeqDense(Mat A, Vec xx, Vec yy) { 1072 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1073 PetscScalar *y, _DOne = 1.0, _DZero = 0.0; 1074 PetscBLASInt m, n, _One = 1; 1075 const PetscScalar *v = mat->v, *x; 1076 1077 PetscFunctionBegin; 1078 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1079 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1080 PetscCall(VecGetArrayRead(xx, &x)); 1081 PetscCall(VecGetArrayWrite(yy, &y)); 1082 if (!A->rmap->n || !A->cmap->n) { 1083 PetscBLASInt i; 1084 for (i = 0; i < m; i++) y[i] = 0.0; 1085 } else { 1086 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DZero, y, &_One)); 1087 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n - A->rmap->n)); 1088 } 1089 PetscCall(VecRestoreArrayRead(xx, &x)); 1090 PetscCall(VecRestoreArrayWrite(yy, &y)); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 PetscErrorCode MatMultAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) { 1095 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1096 const PetscScalar *v = mat->v, *x; 1097 PetscScalar *y, _DOne = 1.0; 1098 PetscBLASInt m, n, _One = 1; 1099 1100 PetscFunctionBegin; 1101 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1102 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1103 PetscCall(VecCopy(zz, yy)); 1104 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 1105 PetscCall(VecGetArrayRead(xx, &x)); 1106 PetscCall(VecGetArray(yy, &y)); 1107 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DOne, y, &_One)); 1108 PetscCall(VecRestoreArrayRead(xx, &x)); 1109 PetscCall(VecRestoreArray(yy, &y)); 1110 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n)); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 PetscErrorCode MatMultTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy) { 1115 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1116 const PetscScalar *v = mat->v, *x; 1117 PetscScalar *y; 1118 PetscBLASInt m, n, _One = 1; 1119 PetscScalar _DOne = 1.0; 1120 1121 PetscFunctionBegin; 1122 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 1123 PetscCall(PetscBLASIntCast(A->cmap->n, &n)); 1124 PetscCall(VecCopy(zz, yy)); 1125 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0); 1126 PetscCall(VecGetArrayRead(xx, &x)); 1127 PetscCall(VecGetArray(yy, &y)); 1128 PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DOne, y, &_One)); 1129 PetscCall(VecRestoreArrayRead(xx, &x)); 1130 PetscCall(VecRestoreArray(yy, &y)); 1131 PetscCall(PetscLogFlops(2.0 * A->rmap->n * A->cmap->n)); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 /* -----------------------------------------------------------------*/ 1136 static PetscErrorCode MatGetRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) { 1137 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1138 PetscInt i; 1139 1140 PetscFunctionBegin; 1141 *ncols = A->cmap->n; 1142 if (cols) { 1143 PetscCall(PetscMalloc1(A->cmap->n, cols)); 1144 for (i = 0; i < A->cmap->n; i++) (*cols)[i] = i; 1145 } 1146 if (vals) { 1147 const PetscScalar *v; 1148 1149 PetscCall(MatDenseGetArrayRead(A, &v)); 1150 PetscCall(PetscMalloc1(A->cmap->n, vals)); 1151 v += row; 1152 for (i = 0; i < A->cmap->n; i++) { 1153 (*vals)[i] = *v; 1154 v += mat->lda; 1155 } 1156 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1157 } 1158 PetscFunctionReturn(0); 1159 } 1160 1161 static PetscErrorCode MatRestoreRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals) { 1162 PetscFunctionBegin; 1163 if (ncols) *ncols = 0; 1164 if (cols) PetscCall(PetscFree(*cols)); 1165 if (vals) PetscCall(PetscFree(*vals)); 1166 PetscFunctionReturn(0); 1167 } 1168 /* ----------------------------------------------------------------*/ 1169 static PetscErrorCode MatSetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], const PetscScalar v[], InsertMode addv) { 1170 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1171 PetscScalar *av; 1172 PetscInt i, j, idx = 0; 1173 #if defined(PETSC_HAVE_CUDA) 1174 PetscOffloadMask oldf; 1175 #endif 1176 1177 PetscFunctionBegin; 1178 PetscCall(MatDenseGetArray(A, &av)); 1179 if (!mat->roworiented) { 1180 if (addv == INSERT_VALUES) { 1181 for (j = 0; j < n; j++) { 1182 if (indexn[j] < 0) { 1183 idx += m; 1184 continue; 1185 } 1186 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1187 for (i = 0; i < m; i++) { 1188 if (indexm[i] < 0) { 1189 idx++; 1190 continue; 1191 } 1192 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1193 av[indexn[j] * mat->lda + indexm[i]] = v[idx++]; 1194 } 1195 } 1196 } else { 1197 for (j = 0; j < n; j++) { 1198 if (indexn[j] < 0) { 1199 idx += m; 1200 continue; 1201 } 1202 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1203 for (i = 0; i < m; i++) { 1204 if (indexm[i] < 0) { 1205 idx++; 1206 continue; 1207 } 1208 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1209 av[indexn[j] * mat->lda + indexm[i]] += v[idx++]; 1210 } 1211 } 1212 } 1213 } else { 1214 if (addv == INSERT_VALUES) { 1215 for (i = 0; i < m; i++) { 1216 if (indexm[i] < 0) { 1217 idx += n; 1218 continue; 1219 } 1220 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1221 for (j = 0; j < n; j++) { 1222 if (indexn[j] < 0) { 1223 idx++; 1224 continue; 1225 } 1226 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1227 av[indexn[j] * mat->lda + indexm[i]] = v[idx++]; 1228 } 1229 } 1230 } else { 1231 for (i = 0; i < m; i++) { 1232 if (indexm[i] < 0) { 1233 idx += n; 1234 continue; 1235 } 1236 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1); 1237 for (j = 0; j < n; j++) { 1238 if (indexn[j] < 0) { 1239 idx++; 1240 continue; 1241 } 1242 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1); 1243 av[indexn[j] * mat->lda + indexm[i]] += v[idx++]; 1244 } 1245 } 1246 } 1247 } 1248 /* hack to prevent unneeded copy to the GPU while returning the array */ 1249 #if defined(PETSC_HAVE_CUDA) 1250 oldf = A->offloadmask; 1251 A->offloadmask = PETSC_OFFLOAD_GPU; 1252 #endif 1253 PetscCall(MatDenseRestoreArray(A, &av)); 1254 #if defined(PETSC_HAVE_CUDA) 1255 A->offloadmask = (oldf == PETSC_OFFLOAD_UNALLOCATED ? PETSC_OFFLOAD_UNALLOCATED : PETSC_OFFLOAD_CPU); 1256 #endif 1257 PetscFunctionReturn(0); 1258 } 1259 1260 static PetscErrorCode MatGetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], PetscScalar v[]) { 1261 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1262 const PetscScalar *vv; 1263 PetscInt i, j; 1264 1265 PetscFunctionBegin; 1266 PetscCall(MatDenseGetArrayRead(A, &vv)); 1267 /* row-oriented output */ 1268 for (i = 0; i < m; i++) { 1269 if (indexm[i] < 0) { 1270 v += n; 1271 continue; 1272 } 1273 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested larger than number rows %" PetscInt_FMT, indexm[i], A->rmap->n); 1274 for (j = 0; j < n; j++) { 1275 if (indexn[j] < 0) { 1276 v++; 1277 continue; 1278 } 1279 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " requested larger than number columns %" PetscInt_FMT, indexn[j], A->cmap->n); 1280 *v++ = vv[indexn[j] * mat->lda + indexm[i]]; 1281 } 1282 } 1283 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1284 PetscFunctionReturn(0); 1285 } 1286 1287 /* -----------------------------------------------------------------*/ 1288 1289 PetscErrorCode MatView_Dense_Binary(Mat mat, PetscViewer viewer) { 1290 PetscBool skipHeader; 1291 PetscViewerFormat format; 1292 PetscInt header[4], M, N, m, lda, i, j, k; 1293 const PetscScalar *v; 1294 PetscScalar *vwork; 1295 1296 PetscFunctionBegin; 1297 PetscCall(PetscViewerSetUp(viewer)); 1298 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1299 PetscCall(PetscViewerGetFormat(viewer, &format)); 1300 if (skipHeader) format = PETSC_VIEWER_NATIVE; 1301 1302 PetscCall(MatGetSize(mat, &M, &N)); 1303 1304 /* write matrix header */ 1305 header[0] = MAT_FILE_CLASSID; 1306 header[1] = M; 1307 header[2] = N; 1308 header[3] = (format == PETSC_VIEWER_NATIVE) ? MATRIX_BINARY_FORMAT_DENSE : M * N; 1309 if (!skipHeader) PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1310 1311 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1312 if (format != PETSC_VIEWER_NATIVE) { 1313 PetscInt nnz = m * N, *iwork; 1314 /* store row lengths for each row */ 1315 PetscCall(PetscMalloc1(nnz, &iwork)); 1316 for (i = 0; i < m; i++) iwork[i] = N; 1317 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1318 /* store column indices (zero start index) */ 1319 for (k = 0, i = 0; i < m; i++) 1320 for (j = 0; j < N; j++, k++) iwork[k] = j; 1321 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1322 PetscCall(PetscFree(iwork)); 1323 } 1324 /* store matrix values as a dense matrix in row major order */ 1325 PetscCall(PetscMalloc1(m * N, &vwork)); 1326 PetscCall(MatDenseGetArrayRead(mat, &v)); 1327 PetscCall(MatDenseGetLDA(mat, &lda)); 1328 for (k = 0, i = 0; i < m; i++) 1329 for (j = 0; j < N; j++, k++) vwork[k] = v[i + lda * j]; 1330 PetscCall(MatDenseRestoreArrayRead(mat, &v)); 1331 PetscCall(PetscViewerBinaryWriteAll(viewer, vwork, m * N, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1332 PetscCall(PetscFree(vwork)); 1333 PetscFunctionReturn(0); 1334 } 1335 1336 PetscErrorCode MatLoad_Dense_Binary(Mat mat, PetscViewer viewer) { 1337 PetscBool skipHeader; 1338 PetscInt header[4], M, N, m, nz, lda, i, j, k; 1339 PetscInt rows, cols; 1340 PetscScalar *v, *vwork; 1341 1342 PetscFunctionBegin; 1343 PetscCall(PetscViewerSetUp(viewer)); 1344 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader)); 1345 1346 if (!skipHeader) { 1347 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 1348 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 1349 M = header[1]; 1350 N = header[2]; 1351 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 1352 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 1353 nz = header[3]; 1354 PetscCheck(nz == MATRIX_BINARY_FORMAT_DENSE || nz >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Unknown matrix format %" PetscInt_FMT " in file", nz); 1355 } else { 1356 PetscCall(MatGetSize(mat, &M, &N)); 1357 PetscCheck(M >= 0 && N >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "Matrix binary file header was skipped, thus the user must specify the global sizes of input matrix"); 1358 nz = MATRIX_BINARY_FORMAT_DENSE; 1359 } 1360 1361 /* setup global sizes if not set */ 1362 if (mat->rmap->N < 0) mat->rmap->N = M; 1363 if (mat->cmap->N < 0) mat->cmap->N = N; 1364 PetscCall(MatSetUp(mat)); 1365 /* check if global sizes are correct */ 1366 PetscCall(MatGetSize(mat, &rows, &cols)); 1367 PetscCheck(M == rows && N == cols, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 1368 1369 PetscCall(MatGetSize(mat, NULL, &N)); 1370 PetscCall(MatGetLocalSize(mat, &m, NULL)); 1371 PetscCall(MatDenseGetArray(mat, &v)); 1372 PetscCall(MatDenseGetLDA(mat, &lda)); 1373 if (nz == MATRIX_BINARY_FORMAT_DENSE) { /* matrix in file is dense format */ 1374 PetscInt nnz = m * N; 1375 /* read in matrix values */ 1376 PetscCall(PetscMalloc1(nnz, &vwork)); 1377 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1378 /* store values in column major order */ 1379 for (j = 0; j < N; j++) 1380 for (i = 0; i < m; i++) v[i + lda * j] = vwork[i * N + j]; 1381 PetscCall(PetscFree(vwork)); 1382 } else { /* matrix in file is sparse format */ 1383 PetscInt nnz = 0, *rlens, *icols; 1384 /* read in row lengths */ 1385 PetscCall(PetscMalloc1(m, &rlens)); 1386 PetscCall(PetscViewerBinaryReadAll(viewer, rlens, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1387 for (i = 0; i < m; i++) nnz += rlens[i]; 1388 /* read in column indices and values */ 1389 PetscCall(PetscMalloc2(nnz, &icols, nnz, &vwork)); 1390 PetscCall(PetscViewerBinaryReadAll(viewer, icols, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1391 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1392 /* store values in column major order */ 1393 for (k = 0, i = 0; i < m; i++) 1394 for (j = 0; j < rlens[i]; j++, k++) v[i + lda * icols[k]] = vwork[k]; 1395 PetscCall(PetscFree(rlens)); 1396 PetscCall(PetscFree2(icols, vwork)); 1397 } 1398 PetscCall(MatDenseRestoreArray(mat, &v)); 1399 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 1400 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 1401 PetscFunctionReturn(0); 1402 } 1403 1404 PetscErrorCode MatLoad_SeqDense(Mat newMat, PetscViewer viewer) { 1405 PetscBool isbinary, ishdf5; 1406 1407 PetscFunctionBegin; 1408 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1409 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1410 /* force binary viewer to load .info file if it has not yet done so */ 1411 PetscCall(PetscViewerSetUp(viewer)); 1412 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1413 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1414 if (isbinary) { 1415 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1416 } else if (ishdf5) { 1417 #if defined(PETSC_HAVE_HDF5) 1418 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1419 #else 1420 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 1421 #endif 1422 } else { 1423 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1424 } 1425 PetscFunctionReturn(0); 1426 } 1427 1428 static PetscErrorCode MatView_SeqDense_ASCII(Mat A, PetscViewer viewer) { 1429 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1430 PetscInt i, j; 1431 const char *name; 1432 PetscScalar *v, *av; 1433 PetscViewerFormat format; 1434 #if defined(PETSC_USE_COMPLEX) 1435 PetscBool allreal = PETSC_TRUE; 1436 #endif 1437 1438 PetscFunctionBegin; 1439 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&av)); 1440 PetscCall(PetscViewerGetFormat(viewer, &format)); 1441 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1442 PetscFunctionReturn(0); /* do nothing for now */ 1443 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1444 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1445 for (i = 0; i < A->rmap->n; i++) { 1446 v = av + i; 1447 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i)); 1448 for (j = 0; j < A->cmap->n; j++) { 1449 #if defined(PETSC_USE_COMPLEX) 1450 if (PetscRealPart(*v) != 0.0 && PetscImaginaryPart(*v) != 0.0) { 1451 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", j, (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1452 } else if (PetscRealPart(*v)) { 1453 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)PetscRealPart(*v))); 1454 } 1455 #else 1456 if (*v) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)*v)); 1457 #endif 1458 v += a->lda; 1459 } 1460 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1461 } 1462 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1463 } else { 1464 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1465 #if defined(PETSC_USE_COMPLEX) 1466 /* determine if matrix has all real values */ 1467 for (j = 0; j < A->cmap->n; j++) { 1468 v = av + j * a->lda; 1469 for (i = 0; i < A->rmap->n; i++) { 1470 if (PetscImaginaryPart(v[i])) { 1471 allreal = PETSC_FALSE; 1472 break; 1473 } 1474 } 1475 } 1476 #endif 1477 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1478 PetscCall(PetscObjectGetName((PetscObject)A, &name)); 1479 PetscCall(PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", A->rmap->n, A->cmap->n)); 1480 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = zeros(%" PetscInt_FMT ",%" PetscInt_FMT ");\n", name, A->rmap->n, A->cmap->n)); 1481 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = [\n", name)); 1482 } 1483 1484 for (i = 0; i < A->rmap->n; i++) { 1485 v = av + i; 1486 for (j = 0; j < A->cmap->n; j++) { 1487 #if defined(PETSC_USE_COMPLEX) 1488 if (allreal) { 1489 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(*v))); 1490 } else { 1491 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei ", (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v))); 1492 } 1493 #else 1494 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)*v)); 1495 #endif 1496 v += a->lda; 1497 } 1498 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1499 } 1500 if (format == PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, "];\n")); 1501 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1502 } 1503 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&av)); 1504 PetscCall(PetscViewerFlush(viewer)); 1505 PetscFunctionReturn(0); 1506 } 1507 1508 #include <petscdraw.h> 1509 static PetscErrorCode MatView_SeqDense_Draw_Zoom(PetscDraw draw, void *Aa) { 1510 Mat A = (Mat)Aa; 1511 PetscInt m = A->rmap->n, n = A->cmap->n, i, j; 1512 int color = PETSC_DRAW_WHITE; 1513 const PetscScalar *v; 1514 PetscViewer viewer; 1515 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1516 PetscViewerFormat format; 1517 1518 PetscFunctionBegin; 1519 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1520 PetscCall(PetscViewerGetFormat(viewer, &format)); 1521 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1522 1523 /* Loop over matrix elements drawing boxes */ 1524 PetscCall(MatDenseGetArrayRead(A, &v)); 1525 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1526 PetscDrawCollectiveBegin(draw); 1527 /* Blue for negative and Red for positive */ 1528 for (j = 0; j < n; j++) { 1529 x_l = j; 1530 x_r = x_l + 1.0; 1531 for (i = 0; i < m; i++) { 1532 y_l = m - i - 1.0; 1533 y_r = y_l + 1.0; 1534 if (PetscRealPart(v[j * m + i]) > 0.) color = PETSC_DRAW_RED; 1535 else if (PetscRealPart(v[j * m + i]) < 0.) color = PETSC_DRAW_BLUE; 1536 else continue; 1537 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1538 } 1539 } 1540 PetscDrawCollectiveEnd(draw); 1541 } else { 1542 /* use contour shading to indicate magnitude of values */ 1543 /* first determine max of all nonzero values */ 1544 PetscReal minv = 0.0, maxv = 0.0; 1545 PetscDraw popup; 1546 1547 for (i = 0; i < m * n; i++) { 1548 if (PetscAbsScalar(v[i]) > maxv) maxv = PetscAbsScalar(v[i]); 1549 } 1550 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1551 PetscCall(PetscDrawGetPopup(draw, &popup)); 1552 PetscCall(PetscDrawScalePopup(popup, minv, maxv)); 1553 1554 PetscDrawCollectiveBegin(draw); 1555 for (j = 0; j < n; j++) { 1556 x_l = j; 1557 x_r = x_l + 1.0; 1558 for (i = 0; i < m; i++) { 1559 y_l = m - i - 1.0; 1560 y_r = y_l + 1.0; 1561 color = PetscDrawRealToColor(PetscAbsScalar(v[j * m + i]), minv, maxv); 1562 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color)); 1563 } 1564 } 1565 PetscDrawCollectiveEnd(draw); 1566 } 1567 PetscCall(MatDenseRestoreArrayRead(A, &v)); 1568 PetscFunctionReturn(0); 1569 } 1570 1571 static PetscErrorCode MatView_SeqDense_Draw(Mat A, PetscViewer viewer) { 1572 PetscDraw draw; 1573 PetscBool isnull; 1574 PetscReal xr, yr, xl, yl, h, w; 1575 1576 PetscFunctionBegin; 1577 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1578 PetscCall(PetscDrawIsNull(draw, &isnull)); 1579 if (isnull) PetscFunctionReturn(0); 1580 1581 xr = A->cmap->n; 1582 yr = A->rmap->n; 1583 h = yr / 10.0; 1584 w = xr / 10.0; 1585 xr += w; 1586 yr += h; 1587 xl = -w; 1588 yl = -h; 1589 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 1590 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 1591 PetscCall(PetscDrawZoom(draw, MatView_SeqDense_Draw_Zoom, A)); 1592 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 1593 PetscCall(PetscDrawSave(draw)); 1594 PetscFunctionReturn(0); 1595 } 1596 1597 PetscErrorCode MatView_SeqDense(Mat A, PetscViewer viewer) { 1598 PetscBool iascii, isbinary, isdraw; 1599 1600 PetscFunctionBegin; 1601 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1602 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1603 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1604 if (iascii) PetscCall(MatView_SeqDense_ASCII(A, viewer)); 1605 else if (isbinary) PetscCall(MatView_Dense_Binary(A, viewer)); 1606 else if (isdraw) PetscCall(MatView_SeqDense_Draw(A, viewer)); 1607 PetscFunctionReturn(0); 1608 } 1609 1610 static PetscErrorCode MatDensePlaceArray_SeqDense(Mat A, const PetscScalar *array) { 1611 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1612 1613 PetscFunctionBegin; 1614 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1615 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1616 PetscCheck(!a->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreArray() first"); 1617 a->unplacedarray = a->v; 1618 a->unplaced_user_alloc = a->user_alloc; 1619 a->v = (PetscScalar *)array; 1620 a->user_alloc = PETSC_TRUE; 1621 #if defined(PETSC_HAVE_CUDA) 1622 A->offloadmask = PETSC_OFFLOAD_CPU; 1623 #endif 1624 PetscFunctionReturn(0); 1625 } 1626 1627 static PetscErrorCode MatDenseResetArray_SeqDense(Mat A) { 1628 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1629 1630 PetscFunctionBegin; 1631 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1632 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1633 a->v = a->unplacedarray; 1634 a->user_alloc = a->unplaced_user_alloc; 1635 a->unplacedarray = NULL; 1636 #if defined(PETSC_HAVE_CUDA) 1637 A->offloadmask = PETSC_OFFLOAD_CPU; 1638 #endif 1639 PetscFunctionReturn(0); 1640 } 1641 1642 static PetscErrorCode MatDenseReplaceArray_SeqDense(Mat A, const PetscScalar *array) { 1643 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 1644 1645 PetscFunctionBegin; 1646 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1647 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1648 if (!a->user_alloc) PetscCall(PetscFree(a->v)); 1649 a->v = (PetscScalar *)array; 1650 a->user_alloc = PETSC_FALSE; 1651 #if defined(PETSC_HAVE_CUDA) 1652 A->offloadmask = PETSC_OFFLOAD_CPU; 1653 #endif 1654 PetscFunctionReturn(0); 1655 } 1656 1657 PetscErrorCode MatDestroy_SeqDense(Mat mat) { 1658 Mat_SeqDense *l = (Mat_SeqDense *)mat->data; 1659 1660 PetscFunctionBegin; 1661 #if defined(PETSC_USE_LOG) 1662 PetscLogObjectState((PetscObject)mat, "Rows %" PetscInt_FMT " Cols %" PetscInt_FMT, mat->rmap->n, mat->cmap->n); 1663 #endif 1664 PetscCall(VecDestroy(&(l->qrrhs))); 1665 PetscCall(PetscFree(l->tau)); 1666 PetscCall(PetscFree(l->pivots)); 1667 PetscCall(PetscFree(l->fwork)); 1668 PetscCall(MatDestroy(&l->ptapwork)); 1669 if (!l->user_alloc) PetscCall(PetscFree(l->v)); 1670 if (!l->unplaced_user_alloc) PetscCall(PetscFree(l->unplacedarray)); 1671 PetscCheck(!l->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1672 PetscCheck(!l->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1673 PetscCall(VecDestroy(&l->cvec)); 1674 PetscCall(MatDestroy(&l->cmat)); 1675 PetscCall(PetscFree(mat->data)); 1676 1677 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1678 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactor_C", NULL)); 1679 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorSymbolic_C", NULL)); 1680 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorNumeric_C", NULL)); 1681 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 1682 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 1683 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 1684 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 1685 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 1686 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 1687 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 1688 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 1689 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 1690 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 1691 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 1692 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqaij_C", NULL)); 1693 #if defined(PETSC_HAVE_ELEMENTAL) 1694 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_elemental_C", NULL)); 1695 #endif 1696 #if defined(PETSC_HAVE_SCALAPACK) 1697 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_scalapack_C", NULL)); 1698 #endif 1699 #if defined(PETSC_HAVE_CUDA) 1700 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensecuda_C", NULL)); 1701 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", NULL)); 1702 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdense_C", NULL)); 1703 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensecuda_C", NULL)); 1704 #endif 1705 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSeqDenseSetPreallocation_C", NULL)); 1706 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqaij_seqdense_C", NULL)); 1707 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdense_C", NULL)); 1708 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqbaij_seqdense_C", NULL)); 1709 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqsbaij_seqdense_C", NULL)); 1710 1711 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 1712 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 1713 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 1714 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 1715 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 1716 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 1717 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 1718 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 1719 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 1720 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 1721 PetscFunctionReturn(0); 1722 } 1723 1724 static PetscErrorCode MatTranspose_SeqDense(Mat A, MatReuse reuse, Mat *matout) { 1725 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1726 PetscInt k, j, m = A->rmap->n, M = mat->lda, n = A->cmap->n; 1727 PetscScalar *v, tmp; 1728 1729 PetscFunctionBegin; 1730 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1731 if (reuse == MAT_INPLACE_MATRIX) { 1732 if (m == n) { /* in place transpose */ 1733 PetscCall(MatDenseGetArray(A, &v)); 1734 for (j = 0; j < m; j++) { 1735 for (k = 0; k < j; k++) { 1736 tmp = v[j + k * M]; 1737 v[j + k * M] = v[k + j * M]; 1738 v[k + j * M] = tmp; 1739 } 1740 } 1741 PetscCall(MatDenseRestoreArray(A, &v)); 1742 } else { /* reuse memory, temporary allocates new memory */ 1743 PetscScalar *v2; 1744 PetscLayout tmplayout; 1745 1746 PetscCall(PetscMalloc1((size_t)m * n, &v2)); 1747 PetscCall(MatDenseGetArray(A, &v)); 1748 for (j = 0; j < n; j++) { 1749 for (k = 0; k < m; k++) v2[j + (size_t)k * n] = v[k + (size_t)j * M]; 1750 } 1751 PetscCall(PetscArraycpy(v, v2, (size_t)m * n)); 1752 PetscCall(PetscFree(v2)); 1753 PetscCall(MatDenseRestoreArray(A, &v)); 1754 /* cleanup size dependent quantities */ 1755 PetscCall(VecDestroy(&mat->cvec)); 1756 PetscCall(MatDestroy(&mat->cmat)); 1757 PetscCall(PetscFree(mat->pivots)); 1758 PetscCall(PetscFree(mat->fwork)); 1759 PetscCall(MatDestroy(&mat->ptapwork)); 1760 /* swap row/col layouts */ 1761 mat->lda = n; 1762 tmplayout = A->rmap; 1763 A->rmap = A->cmap; 1764 A->cmap = tmplayout; 1765 } 1766 } else { /* out-of-place transpose */ 1767 Mat tmat; 1768 Mat_SeqDense *tmatd; 1769 PetscScalar *v2; 1770 PetscInt M2; 1771 1772 if (reuse == MAT_INITIAL_MATRIX) { 1773 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &tmat)); 1774 PetscCall(MatSetSizes(tmat, A->cmap->n, A->rmap->n, A->cmap->n, A->rmap->n)); 1775 PetscCall(MatSetType(tmat, ((PetscObject)A)->type_name)); 1776 PetscCall(MatSeqDenseSetPreallocation(tmat, NULL)); 1777 } else tmat = *matout; 1778 1779 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&v)); 1780 PetscCall(MatDenseGetArray(tmat, &v2)); 1781 tmatd = (Mat_SeqDense *)tmat->data; 1782 M2 = tmatd->lda; 1783 for (j = 0; j < n; j++) { 1784 for (k = 0; k < m; k++) v2[j + k * M2] = v[k + j * M]; 1785 } 1786 PetscCall(MatDenseRestoreArray(tmat, &v2)); 1787 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&v)); 1788 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 1789 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 1790 *matout = tmat; 1791 } 1792 PetscFunctionReturn(0); 1793 } 1794 1795 static PetscErrorCode MatEqual_SeqDense(Mat A1, Mat A2, PetscBool *flg) { 1796 Mat_SeqDense *mat1 = (Mat_SeqDense *)A1->data; 1797 Mat_SeqDense *mat2 = (Mat_SeqDense *)A2->data; 1798 PetscInt i; 1799 const PetscScalar *v1, *v2; 1800 1801 PetscFunctionBegin; 1802 if (A1->rmap->n != A2->rmap->n) { 1803 *flg = PETSC_FALSE; 1804 PetscFunctionReturn(0); 1805 } 1806 if (A1->cmap->n != A2->cmap->n) { 1807 *flg = PETSC_FALSE; 1808 PetscFunctionReturn(0); 1809 } 1810 PetscCall(MatDenseGetArrayRead(A1, &v1)); 1811 PetscCall(MatDenseGetArrayRead(A2, &v2)); 1812 for (i = 0; i < A1->cmap->n; i++) { 1813 PetscCall(PetscArraycmp(v1, v2, A1->rmap->n, flg)); 1814 if (*flg == PETSC_FALSE) PetscFunctionReturn(0); 1815 v1 += mat1->lda; 1816 v2 += mat2->lda; 1817 } 1818 PetscCall(MatDenseRestoreArrayRead(A1, &v1)); 1819 PetscCall(MatDenseRestoreArrayRead(A2, &v2)); 1820 *flg = PETSC_TRUE; 1821 PetscFunctionReturn(0); 1822 } 1823 1824 static PetscErrorCode MatGetDiagonal_SeqDense(Mat A, Vec v) { 1825 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1826 PetscInt i, n, len; 1827 PetscScalar *x; 1828 const PetscScalar *vv; 1829 1830 PetscFunctionBegin; 1831 PetscCall(VecGetSize(v, &n)); 1832 PetscCall(VecGetArray(v, &x)); 1833 len = PetscMin(A->rmap->n, A->cmap->n); 1834 PetscCall(MatDenseGetArrayRead(A, &vv)); 1835 PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 1836 for (i = 0; i < len; i++) x[i] = vv[i * mat->lda + i]; 1837 PetscCall(MatDenseRestoreArrayRead(A, &vv)); 1838 PetscCall(VecRestoreArray(v, &x)); 1839 PetscFunctionReturn(0); 1840 } 1841 1842 static PetscErrorCode MatDiagonalScale_SeqDense(Mat A, Vec ll, Vec rr) { 1843 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1844 const PetscScalar *l, *r; 1845 PetscScalar x, *v, *vv; 1846 PetscInt i, j, m = A->rmap->n, n = A->cmap->n; 1847 1848 PetscFunctionBegin; 1849 PetscCall(MatDenseGetArray(A, &vv)); 1850 if (ll) { 1851 PetscCall(VecGetSize(ll, &m)); 1852 PetscCall(VecGetArrayRead(ll, &l)); 1853 PetscCheck(m == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vec wrong size"); 1854 for (i = 0; i < m; i++) { 1855 x = l[i]; 1856 v = vv + i; 1857 for (j = 0; j < n; j++) { 1858 (*v) *= x; 1859 v += mat->lda; 1860 } 1861 } 1862 PetscCall(VecRestoreArrayRead(ll, &l)); 1863 PetscCall(PetscLogFlops(1.0 * n * m)); 1864 } 1865 if (rr) { 1866 PetscCall(VecGetSize(rr, &n)); 1867 PetscCall(VecGetArrayRead(rr, &r)); 1868 PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec wrong size"); 1869 for (i = 0; i < n; i++) { 1870 x = r[i]; 1871 v = vv + i * mat->lda; 1872 for (j = 0; j < m; j++) (*v++) *= x; 1873 } 1874 PetscCall(VecRestoreArrayRead(rr, &r)); 1875 PetscCall(PetscLogFlops(1.0 * n * m)); 1876 } 1877 PetscCall(MatDenseRestoreArray(A, &vv)); 1878 PetscFunctionReturn(0); 1879 } 1880 1881 PetscErrorCode MatNorm_SeqDense(Mat A, NormType type, PetscReal *nrm) { 1882 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 1883 PetscScalar *v, *vv; 1884 PetscReal sum = 0.0; 1885 PetscInt lda, m = A->rmap->n, i, j; 1886 1887 PetscFunctionBegin; 1888 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&vv)); 1889 PetscCall(MatDenseGetLDA(A, &lda)); 1890 v = vv; 1891 if (type == NORM_FROBENIUS) { 1892 if (lda > m) { 1893 for (j = 0; j < A->cmap->n; j++) { 1894 v = vv + j * lda; 1895 for (i = 0; i < m; i++) { 1896 sum += PetscRealPart(PetscConj(*v) * (*v)); 1897 v++; 1898 } 1899 } 1900 } else { 1901 #if defined(PETSC_USE_REAL___FP16) 1902 PetscBLASInt one = 1, cnt = A->cmap->n * A->rmap->n; 1903 PetscCallBLAS("BLASnrm2", *nrm = BLASnrm2_(&cnt, v, &one)); 1904 } 1905 #else 1906 for (i = 0; i < A->cmap->n * A->rmap->n; i++) { 1907 sum += PetscRealPart(PetscConj(*v) * (*v)); 1908 v++; 1909 } 1910 } 1911 *nrm = PetscSqrtReal(sum); 1912 #endif 1913 PetscCall(PetscLogFlops(2.0 * A->cmap->n * A->rmap->n)); 1914 } else if (type == NORM_1) { 1915 *nrm = 0.0; 1916 for (j = 0; j < A->cmap->n; j++) { 1917 v = vv + j * mat->lda; 1918 sum = 0.0; 1919 for (i = 0; i < A->rmap->n; i++) { 1920 sum += PetscAbsScalar(*v); 1921 v++; 1922 } 1923 if (sum > *nrm) *nrm = sum; 1924 } 1925 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 1926 } else if (type == NORM_INFINITY) { 1927 *nrm = 0.0; 1928 for (j = 0; j < A->rmap->n; j++) { 1929 v = vv + j; 1930 sum = 0.0; 1931 for (i = 0; i < A->cmap->n; i++) { 1932 sum += PetscAbsScalar(*v); 1933 v += mat->lda; 1934 } 1935 if (sum > *nrm) *nrm = sum; 1936 } 1937 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n)); 1938 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No two norm"); 1939 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&vv)); 1940 PetscFunctionReturn(0); 1941 } 1942 1943 static PetscErrorCode MatSetOption_SeqDense(Mat A, MatOption op, PetscBool flg) { 1944 Mat_SeqDense *aij = (Mat_SeqDense *)A->data; 1945 1946 PetscFunctionBegin; 1947 switch (op) { 1948 case MAT_ROW_ORIENTED: aij->roworiented = flg; break; 1949 case MAT_NEW_NONZERO_LOCATIONS: 1950 case MAT_NEW_NONZERO_LOCATION_ERR: 1951 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1952 case MAT_FORCE_DIAGONAL_ENTRIES: 1953 case MAT_KEEP_NONZERO_PATTERN: 1954 case MAT_IGNORE_OFF_PROC_ENTRIES: 1955 case MAT_USE_HASH_TABLE: 1956 case MAT_IGNORE_ZERO_ENTRIES: 1957 case MAT_IGNORE_LOWER_TRIANGULAR: 1958 case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break; 1959 case MAT_SPD: 1960 case MAT_SYMMETRIC: 1961 case MAT_STRUCTURALLY_SYMMETRIC: 1962 case MAT_HERMITIAN: 1963 case MAT_SYMMETRY_ETERNAL: 1964 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1965 case MAT_SPD_ETERNAL: break; 1966 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 1967 } 1968 PetscFunctionReturn(0); 1969 } 1970 1971 PetscErrorCode MatZeroEntries_SeqDense(Mat A) { 1972 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 1973 PetscInt lda = l->lda, m = A->rmap->n, n = A->cmap->n, j; 1974 PetscScalar *v; 1975 1976 PetscFunctionBegin; 1977 PetscCall(MatDenseGetArrayWrite(A, &v)); 1978 if (lda > m) { 1979 for (j = 0; j < n; j++) PetscCall(PetscArrayzero(v + j * lda, m)); 1980 } else { 1981 PetscCall(PetscArrayzero(v, PetscInt64Mult(m, n))); 1982 } 1983 PetscCall(MatDenseRestoreArrayWrite(A, &v)); 1984 PetscFunctionReturn(0); 1985 } 1986 1987 static PetscErrorCode MatZeroRows_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 1988 Mat_SeqDense *l = (Mat_SeqDense *)A->data; 1989 PetscInt m = l->lda, n = A->cmap->n, i, j; 1990 PetscScalar *slot, *bb, *v; 1991 const PetscScalar *xx; 1992 1993 PetscFunctionBegin; 1994 if (PetscDefined(USE_DEBUG)) { 1995 for (i = 0; i < N; i++) { 1996 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed"); 1997 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n); 1998 } 1999 } 2000 if (!N) PetscFunctionReturn(0); 2001 2002 /* fix right hand side if needed */ 2003 if (x && b) { 2004 PetscCall(VecGetArrayRead(x, &xx)); 2005 PetscCall(VecGetArray(b, &bb)); 2006 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]]; 2007 PetscCall(VecRestoreArrayRead(x, &xx)); 2008 PetscCall(VecRestoreArray(b, &bb)); 2009 } 2010 2011 PetscCall(MatDenseGetArray(A, &v)); 2012 for (i = 0; i < N; i++) { 2013 slot = v + rows[i]; 2014 for (j = 0; j < n; j++) { 2015 *slot = 0.0; 2016 slot += m; 2017 } 2018 } 2019 if (diag != 0.0) { 2020 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices"); 2021 for (i = 0; i < N; i++) { 2022 slot = v + (m + 1) * rows[i]; 2023 *slot = diag; 2024 } 2025 } 2026 PetscCall(MatDenseRestoreArray(A, &v)); 2027 PetscFunctionReturn(0); 2028 } 2029 2030 static PetscErrorCode MatDenseGetLDA_SeqDense(Mat A, PetscInt *lda) { 2031 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2032 2033 PetscFunctionBegin; 2034 *lda = mat->lda; 2035 PetscFunctionReturn(0); 2036 } 2037 2038 PetscErrorCode MatDenseGetArray_SeqDense(Mat A, PetscScalar **array) { 2039 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2040 2041 PetscFunctionBegin; 2042 PetscCheck(!mat->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2043 *array = mat->v; 2044 PetscFunctionReturn(0); 2045 } 2046 2047 PetscErrorCode MatDenseRestoreArray_SeqDense(Mat A, PetscScalar **array) { 2048 PetscFunctionBegin; 2049 if (array) *array = NULL; 2050 PetscFunctionReturn(0); 2051 } 2052 2053 /*@ 2054 MatDenseGetLDA - gets the leading dimension of the array returned from `MatDenseGetArray()` 2055 2056 Not collective 2057 2058 Input Parameter: 2059 . mat - a `MATDENSE` or `MATDENSECUDA` matrix 2060 2061 Output Parameter: 2062 . lda - the leading dimension 2063 2064 Level: intermediate 2065 2066 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseSetLDA()` 2067 @*/ 2068 PetscErrorCode MatDenseGetLDA(Mat A, PetscInt *lda) { 2069 PetscFunctionBegin; 2070 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2071 PetscValidIntPointer(lda, 2); 2072 MatCheckPreallocated(A, 1); 2073 PetscUseMethod(A, "MatDenseGetLDA_C", (Mat, PetscInt *), (A, lda)); 2074 PetscFunctionReturn(0); 2075 } 2076 2077 /*@ 2078 MatDenseSetLDA - Sets the leading dimension of the array used by the `MATDENSE` matrix 2079 2080 Not collective 2081 2082 Input Parameters: 2083 + mat - a `MATDENSE` or `MATDENSECUDA` matrix 2084 - lda - the leading dimension 2085 2086 Level: intermediate 2087 2088 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetLDA()` 2089 @*/ 2090 PetscErrorCode MatDenseSetLDA(Mat A, PetscInt lda) { 2091 PetscFunctionBegin; 2092 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2093 PetscTryMethod(A, "MatDenseSetLDA_C", (Mat, PetscInt), (A, lda)); 2094 PetscFunctionReturn(0); 2095 } 2096 2097 /*@C 2098 MatDenseGetArray - gives read-write access to the array where the data for a `MATDENSE` matrix is stored 2099 2100 Logically Collective on A 2101 2102 Input Parameter: 2103 . mat - a dense matrix 2104 2105 Output Parameter: 2106 . array - pointer to the data 2107 2108 Level: intermediate 2109 2110 .seealso: `MATDENSE`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2111 @*/ 2112 PetscErrorCode MatDenseGetArray(Mat A, PetscScalar **array) { 2113 PetscFunctionBegin; 2114 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2115 PetscValidPointer(array, 2); 2116 PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array)); 2117 PetscFunctionReturn(0); 2118 } 2119 2120 /*@C 2121 MatDenseRestoreArray - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArray()` 2122 2123 Logically Collective on A 2124 2125 Input Parameters: 2126 + mat - a dense matrix 2127 - array - pointer to the data (may be NULL) 2128 2129 Level: intermediate 2130 2131 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2132 @*/ 2133 PetscErrorCode MatDenseRestoreArray(Mat A, PetscScalar **array) { 2134 PetscFunctionBegin; 2135 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2136 PetscValidPointer(array, 2); 2137 PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array)); 2138 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2139 #if defined(PETSC_HAVE_CUDA) 2140 A->offloadmask = PETSC_OFFLOAD_CPU; 2141 #endif 2142 PetscFunctionReturn(0); 2143 } 2144 2145 /*@C 2146 MatDenseGetArrayRead - gives read-only access to the array where the data for a `MATDENSE` matrix is stored 2147 2148 Not Collective 2149 2150 Input Parameter: 2151 . mat - a dense matrix 2152 2153 Output Parameter: 2154 . array - pointer to the data 2155 2156 Level: intermediate 2157 2158 .seealso: `MATDENSE`, `MatDenseRestoreArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2159 @*/ 2160 PetscErrorCode MatDenseGetArrayRead(Mat A, const PetscScalar **array) { 2161 PetscFunctionBegin; 2162 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2163 PetscValidPointer(array, 2); 2164 PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, const PetscScalar **), (A, array)); 2165 PetscFunctionReturn(0); 2166 } 2167 2168 /*@C 2169 MatDenseRestoreArrayRead - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayRead()` 2170 2171 Not Collective 2172 2173 Input Parameters: 2174 + mat - a dense matrix 2175 - array - pointer to the data (may be NULL) 2176 2177 Level: intermediate 2178 2179 .seealso: `MATDENSE`, `MatDenseGetArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()` 2180 @*/ 2181 PetscErrorCode MatDenseRestoreArrayRead(Mat A, const PetscScalar **array) { 2182 PetscFunctionBegin; 2183 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2184 PetscValidPointer(array, 2); 2185 PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, const PetscScalar **), (A, array)); 2186 PetscFunctionReturn(0); 2187 } 2188 2189 /*@C 2190 MatDenseGetArrayWrite - gives write-only access to the array where the data for a `MATDENSE` matrix is stored 2191 2192 Not Collective 2193 2194 Input Parameter: 2195 . mat - a dense matrix 2196 2197 Output Parameter: 2198 . array - pointer to the data 2199 2200 Level: intermediate 2201 2202 .seealso: `MATDENSE`, `MatDenseRestoreArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2203 @*/ 2204 PetscErrorCode MatDenseGetArrayWrite(Mat A, PetscScalar **array) { 2205 PetscFunctionBegin; 2206 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2207 PetscValidPointer(array, 2); 2208 PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 /*@C 2213 MatDenseRestoreArrayWrite - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayWrite()` 2214 2215 Not Collective 2216 2217 Input Parameters: 2218 + mat - a dense matrix 2219 - array - pointer to the data (may be NULL) 2220 2221 Level: intermediate 2222 2223 .seealso: `MATDENSE`, `MatDenseGetArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()` 2224 @*/ 2225 PetscErrorCode MatDenseRestoreArrayWrite(Mat A, PetscScalar **array) { 2226 PetscFunctionBegin; 2227 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2228 PetscValidPointer(array, 2); 2229 PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array)); 2230 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2231 #if defined(PETSC_HAVE_CUDA) 2232 A->offloadmask = PETSC_OFFLOAD_CPU; 2233 #endif 2234 PetscFunctionReturn(0); 2235 } 2236 2237 static PetscErrorCode MatCreateSubMatrix_SeqDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) { 2238 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2239 PetscInt i, j, nrows, ncols, ldb; 2240 const PetscInt *irow, *icol; 2241 PetscScalar *av, *bv, *v = mat->v; 2242 Mat newmat; 2243 2244 PetscFunctionBegin; 2245 PetscCall(ISGetIndices(isrow, &irow)); 2246 PetscCall(ISGetIndices(iscol, &icol)); 2247 PetscCall(ISGetLocalSize(isrow, &nrows)); 2248 PetscCall(ISGetLocalSize(iscol, &ncols)); 2249 2250 /* Check submatrixcall */ 2251 if (scall == MAT_REUSE_MATRIX) { 2252 PetscInt n_cols, n_rows; 2253 PetscCall(MatGetSize(*B, &n_rows, &n_cols)); 2254 if (n_rows != nrows || n_cols != ncols) { 2255 /* resize the result matrix to match number of requested rows/columns */ 2256 PetscCall(MatSetSizes(*B, nrows, ncols, nrows, ncols)); 2257 } 2258 newmat = *B; 2259 } else { 2260 /* Create and fill new matrix */ 2261 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 2262 PetscCall(MatSetSizes(newmat, nrows, ncols, nrows, ncols)); 2263 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 2264 PetscCall(MatSeqDenseSetPreallocation(newmat, NULL)); 2265 } 2266 2267 /* Now extract the data pointers and do the copy,column at a time */ 2268 PetscCall(MatDenseGetArray(newmat, &bv)); 2269 PetscCall(MatDenseGetLDA(newmat, &ldb)); 2270 for (i = 0; i < ncols; i++) { 2271 av = v + mat->lda * icol[i]; 2272 for (j = 0; j < nrows; j++) bv[j] = av[irow[j]]; 2273 bv += ldb; 2274 } 2275 PetscCall(MatDenseRestoreArray(newmat, &bv)); 2276 2277 /* Assemble the matrices so that the correct flags are set */ 2278 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 2279 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 2280 2281 /* Free work space */ 2282 PetscCall(ISRestoreIndices(isrow, &irow)); 2283 PetscCall(ISRestoreIndices(iscol, &icol)); 2284 *B = newmat; 2285 PetscFunctionReturn(0); 2286 } 2287 2288 static PetscErrorCode MatCreateSubMatrices_SeqDense(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) { 2289 PetscInt i; 2290 2291 PetscFunctionBegin; 2292 if (scall == MAT_INITIAL_MATRIX) PetscCall(PetscCalloc1(n, B)); 2293 2294 for (i = 0; i < n; i++) PetscCall(MatCreateSubMatrix_SeqDense(A, irow[i], icol[i], scall, &(*B)[i])); 2295 PetscFunctionReturn(0); 2296 } 2297 2298 static PetscErrorCode MatAssemblyBegin_SeqDense(Mat mat, MatAssemblyType mode) { 2299 PetscFunctionBegin; 2300 PetscFunctionReturn(0); 2301 } 2302 2303 static PetscErrorCode MatAssemblyEnd_SeqDense(Mat mat, MatAssemblyType mode) { 2304 PetscFunctionBegin; 2305 PetscFunctionReturn(0); 2306 } 2307 2308 PetscErrorCode MatCopy_SeqDense(Mat A, Mat B, MatStructure str) { 2309 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data; 2310 const PetscScalar *va; 2311 PetscScalar *vb; 2312 PetscInt lda1 = a->lda, lda2 = b->lda, m = A->rmap->n, n = A->cmap->n, j; 2313 2314 PetscFunctionBegin; 2315 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2316 if (A->ops->copy != B->ops->copy) { 2317 PetscCall(MatCopy_Basic(A, B, str)); 2318 PetscFunctionReturn(0); 2319 } 2320 PetscCheck(m == B->rmap->n && n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "size(B) != size(A)"); 2321 PetscCall(MatDenseGetArrayRead(A, &va)); 2322 PetscCall(MatDenseGetArray(B, &vb)); 2323 if (lda1 > m || lda2 > m) { 2324 for (j = 0; j < n; j++) PetscCall(PetscArraycpy(vb + j * lda2, va + j * lda1, m)); 2325 } else { 2326 PetscCall(PetscArraycpy(vb, va, A->rmap->n * A->cmap->n)); 2327 } 2328 PetscCall(MatDenseRestoreArray(B, &vb)); 2329 PetscCall(MatDenseRestoreArrayRead(A, &va)); 2330 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2331 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2332 PetscFunctionReturn(0); 2333 } 2334 2335 PetscErrorCode MatSetUp_SeqDense(Mat A) { 2336 PetscFunctionBegin; 2337 PetscCall(PetscLayoutSetUp(A->rmap)); 2338 PetscCall(PetscLayoutSetUp(A->cmap)); 2339 if (!A->preallocated) PetscCall(MatSeqDenseSetPreallocation(A, NULL)); 2340 PetscFunctionReturn(0); 2341 } 2342 2343 static PetscErrorCode MatConjugate_SeqDense(Mat A) { 2344 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2345 PetscInt i, j; 2346 PetscInt min = PetscMin(A->rmap->n, A->cmap->n); 2347 PetscScalar *aa; 2348 2349 PetscFunctionBegin; 2350 PetscCall(MatDenseGetArray(A, &aa)); 2351 for (j = 0; j < A->cmap->n; j++) { 2352 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscConj(aa[i + j * mat->lda]); 2353 } 2354 PetscCall(MatDenseRestoreArray(A, &aa)); 2355 if (mat->tau) 2356 for (i = 0; i < min; i++) mat->tau[i] = PetscConj(mat->tau[i]); 2357 PetscFunctionReturn(0); 2358 } 2359 2360 static PetscErrorCode MatRealPart_SeqDense(Mat A) { 2361 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2362 PetscInt i, j; 2363 PetscScalar *aa; 2364 2365 PetscFunctionBegin; 2366 PetscCall(MatDenseGetArray(A, &aa)); 2367 for (j = 0; j < A->cmap->n; j++) { 2368 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscRealPart(aa[i + j * mat->lda]); 2369 } 2370 PetscCall(MatDenseRestoreArray(A, &aa)); 2371 PetscFunctionReturn(0); 2372 } 2373 2374 static PetscErrorCode MatImaginaryPart_SeqDense(Mat A) { 2375 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 2376 PetscInt i, j; 2377 PetscScalar *aa; 2378 2379 PetscFunctionBegin; 2380 PetscCall(MatDenseGetArray(A, &aa)); 2381 for (j = 0; j < A->cmap->n; j++) { 2382 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscImaginaryPart(aa[i + j * mat->lda]); 2383 } 2384 PetscCall(MatDenseRestoreArray(A, &aa)); 2385 PetscFunctionReturn(0); 2386 } 2387 2388 /* ----------------------------------------------------------------*/ 2389 PetscErrorCode MatMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) { 2390 PetscInt m = A->rmap->n, n = B->cmap->n; 2391 PetscBool cisdense; 2392 2393 PetscFunctionBegin; 2394 PetscCall(MatSetSizes(C, m, n, m, n)); 2395 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2396 if (!cisdense) { 2397 PetscBool flg; 2398 2399 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2400 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2401 } 2402 PetscCall(MatSetUp(C)); 2403 PetscFunctionReturn(0); 2404 } 2405 2406 PetscErrorCode MatMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) { 2407 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data, *c = (Mat_SeqDense *)C->data; 2408 PetscBLASInt m, n, k; 2409 const PetscScalar *av, *bv; 2410 PetscScalar *cv; 2411 PetscScalar _DOne = 1.0, _DZero = 0.0; 2412 2413 PetscFunctionBegin; 2414 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2415 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2416 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2417 if (!m || !n || !k) PetscFunctionReturn(0); 2418 PetscCall(MatDenseGetArrayRead(A, &av)); 2419 PetscCall(MatDenseGetArrayRead(B, &bv)); 2420 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2421 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2422 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2423 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2424 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2425 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2426 PetscFunctionReturn(0); 2427 } 2428 2429 PetscErrorCode MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) { 2430 PetscInt m = A->rmap->n, n = B->rmap->n; 2431 PetscBool cisdense; 2432 2433 PetscFunctionBegin; 2434 PetscCall(MatSetSizes(C, m, n, m, n)); 2435 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2436 if (!cisdense) { 2437 PetscBool flg; 2438 2439 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2440 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2441 } 2442 PetscCall(MatSetUp(C)); 2443 PetscFunctionReturn(0); 2444 } 2445 2446 PetscErrorCode MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) { 2447 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2448 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2449 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2450 const PetscScalar *av, *bv; 2451 PetscScalar *cv; 2452 PetscBLASInt m, n, k; 2453 PetscScalar _DOne = 1.0, _DZero = 0.0; 2454 2455 PetscFunctionBegin; 2456 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2457 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2458 PetscCall(PetscBLASIntCast(A->cmap->n, &k)); 2459 if (!m || !n || !k) PetscFunctionReturn(0); 2460 PetscCall(MatDenseGetArrayRead(A, &av)); 2461 PetscCall(MatDenseGetArrayRead(B, &bv)); 2462 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2463 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2464 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2465 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2466 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2467 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2468 PetscFunctionReturn(0); 2469 } 2470 2471 PetscErrorCode MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) { 2472 PetscInt m = A->cmap->n, n = B->cmap->n; 2473 PetscBool cisdense; 2474 2475 PetscFunctionBegin; 2476 PetscCall(MatSetSizes(C, m, n, m, n)); 2477 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "")); 2478 if (!cisdense) { 2479 PetscBool flg; 2480 2481 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg)); 2482 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE)); 2483 } 2484 PetscCall(MatSetUp(C)); 2485 PetscFunctionReturn(0); 2486 } 2487 2488 PetscErrorCode MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C) { 2489 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2490 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2491 Mat_SeqDense *c = (Mat_SeqDense *)C->data; 2492 const PetscScalar *av, *bv; 2493 PetscScalar *cv; 2494 PetscBLASInt m, n, k; 2495 PetscScalar _DOne = 1.0, _DZero = 0.0; 2496 2497 PetscFunctionBegin; 2498 PetscCall(PetscBLASIntCast(C->rmap->n, &m)); 2499 PetscCall(PetscBLASIntCast(C->cmap->n, &n)); 2500 PetscCall(PetscBLASIntCast(A->rmap->n, &k)); 2501 if (!m || !n || !k) PetscFunctionReturn(0); 2502 PetscCall(MatDenseGetArrayRead(A, &av)); 2503 PetscCall(MatDenseGetArrayRead(B, &bv)); 2504 PetscCall(MatDenseGetArrayWrite(C, &cv)); 2505 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda)); 2506 PetscCall(MatDenseRestoreArrayRead(A, &av)); 2507 PetscCall(MatDenseRestoreArrayRead(B, &bv)); 2508 PetscCall(MatDenseRestoreArrayWrite(C, &cv)); 2509 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1))); 2510 PetscFunctionReturn(0); 2511 } 2512 2513 /* ----------------------------------------------- */ 2514 static PetscErrorCode MatProductSetFromOptions_SeqDense_AB(Mat C) { 2515 PetscFunctionBegin; 2516 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqDense; 2517 C->ops->productsymbolic = MatProductSymbolic_AB; 2518 PetscFunctionReturn(0); 2519 } 2520 2521 static PetscErrorCode MatProductSetFromOptions_SeqDense_AtB(Mat C) { 2522 PetscFunctionBegin; 2523 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqDense_SeqDense; 2524 C->ops->productsymbolic = MatProductSymbolic_AtB; 2525 PetscFunctionReturn(0); 2526 } 2527 2528 static PetscErrorCode MatProductSetFromOptions_SeqDense_ABt(Mat C) { 2529 PetscFunctionBegin; 2530 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqDense_SeqDense; 2531 C->ops->productsymbolic = MatProductSymbolic_ABt; 2532 PetscFunctionReturn(0); 2533 } 2534 2535 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense(Mat C) { 2536 Mat_Product *product = C->product; 2537 2538 PetscFunctionBegin; 2539 switch (product->type) { 2540 case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_SeqDense_AB(C)); break; 2541 case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_SeqDense_AtB(C)); break; 2542 case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_SeqDense_ABt(C)); break; 2543 default: break; 2544 } 2545 PetscFunctionReturn(0); 2546 } 2547 /* ----------------------------------------------- */ 2548 2549 static PetscErrorCode MatGetRowMax_SeqDense(Mat A, Vec v, PetscInt idx[]) { 2550 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2551 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2552 PetscScalar *x; 2553 const PetscScalar *aa; 2554 2555 PetscFunctionBegin; 2556 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2557 PetscCall(VecGetArray(v, &x)); 2558 PetscCall(VecGetLocalSize(v, &p)); 2559 PetscCall(MatDenseGetArrayRead(A, &aa)); 2560 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2561 for (i = 0; i < m; i++) { 2562 x[i] = aa[i]; 2563 if (idx) idx[i] = 0; 2564 for (j = 1; j < n; j++) { 2565 if (PetscRealPart(x[i]) < PetscRealPart(aa[i + a->lda * j])) { 2566 x[i] = aa[i + a->lda * j]; 2567 if (idx) idx[i] = j; 2568 } 2569 } 2570 } 2571 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2572 PetscCall(VecRestoreArray(v, &x)); 2573 PetscFunctionReturn(0); 2574 } 2575 2576 static PetscErrorCode MatGetRowMaxAbs_SeqDense(Mat A, Vec v, PetscInt idx[]) { 2577 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2578 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2579 PetscScalar *x; 2580 PetscReal atmp; 2581 const PetscScalar *aa; 2582 2583 PetscFunctionBegin; 2584 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2585 PetscCall(VecGetArray(v, &x)); 2586 PetscCall(VecGetLocalSize(v, &p)); 2587 PetscCall(MatDenseGetArrayRead(A, &aa)); 2588 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2589 for (i = 0; i < m; i++) { 2590 x[i] = PetscAbsScalar(aa[i]); 2591 for (j = 1; j < n; j++) { 2592 atmp = PetscAbsScalar(aa[i + a->lda * j]); 2593 if (PetscAbsScalar(x[i]) < atmp) { 2594 x[i] = atmp; 2595 if (idx) idx[i] = j; 2596 } 2597 } 2598 } 2599 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2600 PetscCall(VecRestoreArray(v, &x)); 2601 PetscFunctionReturn(0); 2602 } 2603 2604 static PetscErrorCode MatGetRowMin_SeqDense(Mat A, Vec v, PetscInt idx[]) { 2605 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2606 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p; 2607 PetscScalar *x; 2608 const PetscScalar *aa; 2609 2610 PetscFunctionBegin; 2611 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2612 PetscCall(MatDenseGetArrayRead(A, &aa)); 2613 PetscCall(VecGetArray(v, &x)); 2614 PetscCall(VecGetLocalSize(v, &p)); 2615 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2616 for (i = 0; i < m; i++) { 2617 x[i] = aa[i]; 2618 if (idx) idx[i] = 0; 2619 for (j = 1; j < n; j++) { 2620 if (PetscRealPart(x[i]) > PetscRealPart(aa[i + a->lda * j])) { 2621 x[i] = aa[i + a->lda * j]; 2622 if (idx) idx[i] = j; 2623 } 2624 } 2625 } 2626 PetscCall(VecRestoreArray(v, &x)); 2627 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2628 PetscFunctionReturn(0); 2629 } 2630 2631 PetscErrorCode MatGetColumnVector_SeqDense(Mat A, Vec v, PetscInt col) { 2632 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2633 PetscScalar *x; 2634 const PetscScalar *aa; 2635 2636 PetscFunctionBegin; 2637 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2638 PetscCall(MatDenseGetArrayRead(A, &aa)); 2639 PetscCall(VecGetArray(v, &x)); 2640 PetscCall(PetscArraycpy(x, aa + col * a->lda, A->rmap->n)); 2641 PetscCall(VecRestoreArray(v, &x)); 2642 PetscCall(MatDenseRestoreArrayRead(A, &aa)); 2643 PetscFunctionReturn(0); 2644 } 2645 2646 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat A, PetscInt type, PetscReal *reductions) { 2647 PetscInt i, j, m, n; 2648 const PetscScalar *a; 2649 2650 PetscFunctionBegin; 2651 PetscCall(MatGetSize(A, &m, &n)); 2652 PetscCall(PetscArrayzero(reductions, n)); 2653 PetscCall(MatDenseGetArrayRead(A, &a)); 2654 if (type == NORM_2) { 2655 for (i = 0; i < n; i++) { 2656 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j] * a[j]); 2657 a += m; 2658 } 2659 } else if (type == NORM_1) { 2660 for (i = 0; i < n; i++) { 2661 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j]); 2662 a += m; 2663 } 2664 } else if (type == NORM_INFINITY) { 2665 for (i = 0; i < n; i++) { 2666 for (j = 0; j < m; j++) reductions[i] = PetscMax(PetscAbsScalar(a[j]), reductions[i]); 2667 a += m; 2668 } 2669 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2670 for (i = 0; i < n; i++) { 2671 for (j = 0; j < m; j++) reductions[i] += PetscRealPart(a[j]); 2672 a += m; 2673 } 2674 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2675 for (i = 0; i < n; i++) { 2676 for (j = 0; j < m; j++) reductions[i] += PetscImaginaryPart(a[j]); 2677 a += m; 2678 } 2679 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2680 PetscCall(MatDenseRestoreArrayRead(A, &a)); 2681 if (type == NORM_2) { 2682 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2683 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2684 for (i = 0; i < n; i++) reductions[i] /= m; 2685 } 2686 PetscFunctionReturn(0); 2687 } 2688 2689 PetscErrorCode MatSetRandom_SeqDense(Mat x, PetscRandom rctx) { 2690 PetscScalar *a; 2691 PetscInt lda, m, n, i, j; 2692 2693 PetscFunctionBegin; 2694 PetscCall(MatGetSize(x, &m, &n)); 2695 PetscCall(MatDenseGetLDA(x, &lda)); 2696 PetscCall(MatDenseGetArrayWrite(x, &a)); 2697 for (j = 0; j < n; j++) { 2698 for (i = 0; i < m; i++) PetscCall(PetscRandomGetValue(rctx, a + j * lda + i)); 2699 } 2700 PetscCall(MatDenseRestoreArrayWrite(x, &a)); 2701 PetscFunctionReturn(0); 2702 } 2703 2704 static PetscErrorCode MatMissingDiagonal_SeqDense(Mat A, PetscBool *missing, PetscInt *d) { 2705 PetscFunctionBegin; 2706 *missing = PETSC_FALSE; 2707 PetscFunctionReturn(0); 2708 } 2709 2710 /* vals is not const */ 2711 static PetscErrorCode MatDenseGetColumn_SeqDense(Mat A, PetscInt col, PetscScalar **vals) { 2712 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 2713 PetscScalar *v; 2714 2715 PetscFunctionBegin; 2716 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2717 PetscCall(MatDenseGetArray(A, &v)); 2718 *vals = v + col * a->lda; 2719 PetscCall(MatDenseRestoreArray(A, &v)); 2720 PetscFunctionReturn(0); 2721 } 2722 2723 static PetscErrorCode MatDenseRestoreColumn_SeqDense(Mat A, PetscScalar **vals) { 2724 PetscFunctionBegin; 2725 if (vals) *vals = NULL; /* user cannot accidentally use the array later */ 2726 PetscFunctionReturn(0); 2727 } 2728 2729 /* -------------------------------------------------------------------*/ 2730 static struct _MatOps MatOps_Values = {MatSetValues_SeqDense, 2731 MatGetRow_SeqDense, 2732 MatRestoreRow_SeqDense, 2733 MatMult_SeqDense, 2734 /* 4*/ MatMultAdd_SeqDense, 2735 MatMultTranspose_SeqDense, 2736 MatMultTransposeAdd_SeqDense, 2737 NULL, 2738 NULL, 2739 NULL, 2740 /* 10*/ NULL, 2741 MatLUFactor_SeqDense, 2742 MatCholeskyFactor_SeqDense, 2743 MatSOR_SeqDense, 2744 MatTranspose_SeqDense, 2745 /* 15*/ MatGetInfo_SeqDense, 2746 MatEqual_SeqDense, 2747 MatGetDiagonal_SeqDense, 2748 MatDiagonalScale_SeqDense, 2749 MatNorm_SeqDense, 2750 /* 20*/ MatAssemblyBegin_SeqDense, 2751 MatAssemblyEnd_SeqDense, 2752 MatSetOption_SeqDense, 2753 MatZeroEntries_SeqDense, 2754 /* 24*/ MatZeroRows_SeqDense, 2755 NULL, 2756 NULL, 2757 NULL, 2758 NULL, 2759 /* 29*/ MatSetUp_SeqDense, 2760 NULL, 2761 NULL, 2762 NULL, 2763 NULL, 2764 /* 34*/ MatDuplicate_SeqDense, 2765 NULL, 2766 NULL, 2767 NULL, 2768 NULL, 2769 /* 39*/ MatAXPY_SeqDense, 2770 MatCreateSubMatrices_SeqDense, 2771 NULL, 2772 MatGetValues_SeqDense, 2773 MatCopy_SeqDense, 2774 /* 44*/ MatGetRowMax_SeqDense, 2775 MatScale_SeqDense, 2776 MatShift_SeqDense, 2777 NULL, 2778 MatZeroRowsColumns_SeqDense, 2779 /* 49*/ MatSetRandom_SeqDense, 2780 NULL, 2781 NULL, 2782 NULL, 2783 NULL, 2784 /* 54*/ NULL, 2785 NULL, 2786 NULL, 2787 NULL, 2788 NULL, 2789 /* 59*/ MatCreateSubMatrix_SeqDense, 2790 MatDestroy_SeqDense, 2791 MatView_SeqDense, 2792 NULL, 2793 NULL, 2794 /* 64*/ NULL, 2795 NULL, 2796 NULL, 2797 NULL, 2798 NULL, 2799 /* 69*/ MatGetRowMaxAbs_SeqDense, 2800 NULL, 2801 NULL, 2802 NULL, 2803 NULL, 2804 /* 74*/ NULL, 2805 NULL, 2806 NULL, 2807 NULL, 2808 NULL, 2809 /* 79*/ NULL, 2810 NULL, 2811 NULL, 2812 NULL, 2813 /* 83*/ MatLoad_SeqDense, 2814 MatIsSymmetric_SeqDense, 2815 MatIsHermitian_SeqDense, 2816 NULL, 2817 NULL, 2818 NULL, 2819 /* 89*/ NULL, 2820 NULL, 2821 MatMatMultNumeric_SeqDense_SeqDense, 2822 NULL, 2823 NULL, 2824 /* 94*/ NULL, 2825 NULL, 2826 NULL, 2827 MatMatTransposeMultNumeric_SeqDense_SeqDense, 2828 NULL, 2829 /* 99*/ MatProductSetFromOptions_SeqDense, 2830 NULL, 2831 NULL, 2832 MatConjugate_SeqDense, 2833 NULL, 2834 /*104*/ NULL, 2835 MatRealPart_SeqDense, 2836 MatImaginaryPart_SeqDense, 2837 NULL, 2838 NULL, 2839 /*109*/ NULL, 2840 NULL, 2841 MatGetRowMin_SeqDense, 2842 MatGetColumnVector_SeqDense, 2843 MatMissingDiagonal_SeqDense, 2844 /*114*/ NULL, 2845 NULL, 2846 NULL, 2847 NULL, 2848 NULL, 2849 /*119*/ NULL, 2850 NULL, 2851 NULL, 2852 NULL, 2853 NULL, 2854 /*124*/ NULL, 2855 MatGetColumnReductions_SeqDense, 2856 NULL, 2857 NULL, 2858 NULL, 2859 /*129*/ NULL, 2860 NULL, 2861 NULL, 2862 MatTransposeMatMultNumeric_SeqDense_SeqDense, 2863 NULL, 2864 /*134*/ NULL, 2865 NULL, 2866 NULL, 2867 NULL, 2868 NULL, 2869 /*139*/ NULL, 2870 NULL, 2871 NULL, 2872 NULL, 2873 NULL, 2874 MatCreateMPIMatConcatenateSeqMat_SeqDense, 2875 /*145*/ NULL, 2876 NULL, 2877 NULL, 2878 NULL, 2879 NULL, 2880 /*150*/ NULL}; 2881 2882 /*@C 2883 MatCreateSeqDense - Creates a `MATSEQDENSE` that 2884 is stored in column major order (the usual Fortran 77 manner). Many 2885 of the matrix operations use the BLAS and LAPACK routines. 2886 2887 Collective 2888 2889 Input Parameters: 2890 + comm - MPI communicator, set to `PETSC_COMM_SELF` 2891 . m - number of rows 2892 . n - number of columns 2893 - data - optional location of matrix data in column major order. Set data=NULL for PETSc 2894 to control all matrix memory allocation. 2895 2896 Output Parameter: 2897 . A - the matrix 2898 2899 Note: 2900 The data input variable is intended primarily for Fortran programmers 2901 who wish to allocate their own matrix memory space. Most users should 2902 set data=NULL. 2903 2904 Level: intermediate 2905 2906 .seealso: `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 2907 @*/ 2908 PetscErrorCode MatCreateSeqDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscScalar *data, Mat *A) { 2909 PetscFunctionBegin; 2910 PetscCall(MatCreate(comm, A)); 2911 PetscCall(MatSetSizes(*A, m, n, m, n)); 2912 PetscCall(MatSetType(*A, MATSEQDENSE)); 2913 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 2914 PetscFunctionReturn(0); 2915 } 2916 2917 /*@C 2918 MatSeqDenseSetPreallocation - Sets the array used for storing the matrix elements of a `MATSEQDENSE` matrix 2919 2920 Collective 2921 2922 Input Parameters: 2923 + B - the matrix 2924 - data - the array (or NULL) 2925 2926 Note: 2927 The data input variable is intended primarily for Fortran programmers 2928 who wish to allocate their own matrix memory space. Most users should 2929 need not call this routine. 2930 2931 Level: intermediate 2932 2933 .seealso: `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`, `MatDenseSetLDA()` 2934 @*/ 2935 PetscErrorCode MatSeqDenseSetPreallocation(Mat B, PetscScalar data[]) { 2936 PetscFunctionBegin; 2937 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2938 PetscTryMethod(B, "MatSeqDenseSetPreallocation_C", (Mat, PetscScalar[]), (B, data)); 2939 PetscFunctionReturn(0); 2940 } 2941 2942 PetscErrorCode MatSeqDenseSetPreallocation_SeqDense(Mat B, PetscScalar *data) { 2943 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 2944 2945 PetscFunctionBegin; 2946 PetscCheck(!b->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2947 B->preallocated = PETSC_TRUE; 2948 2949 PetscCall(PetscLayoutSetUp(B->rmap)); 2950 PetscCall(PetscLayoutSetUp(B->cmap)); 2951 2952 if (b->lda <= 0) b->lda = B->rmap->n; 2953 2954 if (!data) { /* petsc-allocated storage */ 2955 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 2956 PetscCall(PetscCalloc1((size_t)b->lda * B->cmap->n, &b->v)); 2957 2958 b->user_alloc = PETSC_FALSE; 2959 } else { /* user-allocated storage */ 2960 if (!b->user_alloc) PetscCall(PetscFree(b->v)); 2961 b->v = data; 2962 b->user_alloc = PETSC_TRUE; 2963 } 2964 B->assembled = PETSC_TRUE; 2965 PetscFunctionReturn(0); 2966 } 2967 2968 #if defined(PETSC_HAVE_ELEMENTAL) 2969 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 2970 Mat mat_elemental; 2971 const PetscScalar *array; 2972 PetscScalar *v_colwise; 2973 PetscInt M = A->rmap->N, N = A->cmap->N, i, j, k, *rows, *cols; 2974 2975 PetscFunctionBegin; 2976 PetscCall(PetscMalloc3(M * N, &v_colwise, M, &rows, N, &cols)); 2977 PetscCall(MatDenseGetArrayRead(A, &array)); 2978 /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */ 2979 k = 0; 2980 for (j = 0; j < N; j++) { 2981 cols[j] = j; 2982 for (i = 0; i < M; i++) v_colwise[j * M + i] = array[k++]; 2983 } 2984 for (i = 0; i < M; i++) rows[i] = i; 2985 PetscCall(MatDenseRestoreArrayRead(A, &array)); 2986 2987 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 2988 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N)); 2989 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 2990 PetscCall(MatSetUp(mat_elemental)); 2991 2992 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 2993 PetscCall(MatSetValues(mat_elemental, M, rows, N, cols, v_colwise, ADD_VALUES)); 2994 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 2995 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 2996 PetscCall(PetscFree3(v_colwise, rows, cols)); 2997 2998 if (reuse == MAT_INPLACE_MATRIX) { 2999 PetscCall(MatHeaderReplace(A, &mat_elemental)); 3000 } else { 3001 *newmat = mat_elemental; 3002 } 3003 PetscFunctionReturn(0); 3004 } 3005 #endif 3006 3007 PetscErrorCode MatDenseSetLDA_SeqDense(Mat B, PetscInt lda) { 3008 Mat_SeqDense *b = (Mat_SeqDense *)B->data; 3009 PetscBool data; 3010 3011 PetscFunctionBegin; 3012 data = (PetscBool)((B->rmap->n > 0 && B->cmap->n > 0) ? (b->v ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE); 3013 PetscCheck(b->user_alloc || !data || b->lda == lda, PETSC_COMM_SELF, PETSC_ERR_ORDER, "LDA cannot be changed after allocation of internal storage"); 3014 PetscCheck(lda >= B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "LDA %" PetscInt_FMT " must be at least matrix dimension %" PetscInt_FMT, lda, B->rmap->n); 3015 b->lda = lda; 3016 PetscFunctionReturn(0); 3017 } 3018 3019 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) { 3020 PetscFunctionBegin; 3021 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIDense(comm, inmat, n, scall, outmat)); 3022 PetscFunctionReturn(0); 3023 } 3024 3025 PetscErrorCode MatDenseGetColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) { 3026 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3027 3028 PetscFunctionBegin; 3029 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3030 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3031 if (!a->cvec) { PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec)); } 3032 a->vecinuse = col + 1; 3033 PetscCall(MatDenseGetArray(A, (PetscScalar **)&a->ptrinuse)); 3034 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3035 *v = a->cvec; 3036 PetscFunctionReturn(0); 3037 } 3038 3039 PetscErrorCode MatDenseRestoreColumnVec_SeqDense(Mat A, PetscInt col, Vec *v) { 3040 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3041 3042 PetscFunctionBegin; 3043 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3044 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3045 a->vecinuse = 0; 3046 PetscCall(MatDenseRestoreArray(A, (PetscScalar **)&a->ptrinuse)); 3047 PetscCall(VecResetArray(a->cvec)); 3048 if (v) *v = NULL; 3049 PetscFunctionReturn(0); 3050 } 3051 3052 PetscErrorCode MatDenseGetColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) { 3053 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3054 3055 PetscFunctionBegin; 3056 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3057 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3058 if (!a->cvec) { PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec)); } 3059 a->vecinuse = col + 1; 3060 PetscCall(MatDenseGetArrayRead(A, &a->ptrinuse)); 3061 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3062 PetscCall(VecLockReadPush(a->cvec)); 3063 *v = a->cvec; 3064 PetscFunctionReturn(0); 3065 } 3066 3067 PetscErrorCode MatDenseRestoreColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v) { 3068 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3069 3070 PetscFunctionBegin; 3071 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3072 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3073 a->vecinuse = 0; 3074 PetscCall(MatDenseRestoreArrayRead(A, &a->ptrinuse)); 3075 PetscCall(VecLockReadPop(a->cvec)); 3076 PetscCall(VecResetArray(a->cvec)); 3077 if (v) *v = NULL; 3078 PetscFunctionReturn(0); 3079 } 3080 3081 PetscErrorCode MatDenseGetColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) { 3082 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3083 3084 PetscFunctionBegin; 3085 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3086 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3087 if (!a->cvec) { PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec)); } 3088 a->vecinuse = col + 1; 3089 PetscCall(MatDenseGetArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3090 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda)); 3091 *v = a->cvec; 3092 PetscFunctionReturn(0); 3093 } 3094 3095 PetscErrorCode MatDenseRestoreColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v) { 3096 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3097 3098 PetscFunctionBegin; 3099 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 3100 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 3101 a->vecinuse = 0; 3102 PetscCall(MatDenseRestoreArrayWrite(A, (PetscScalar **)&a->ptrinuse)); 3103 PetscCall(VecResetArray(a->cvec)); 3104 if (v) *v = NULL; 3105 PetscFunctionReturn(0); 3106 } 3107 3108 PetscErrorCode MatDenseGetSubMatrix_SeqDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) { 3109 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3110 3111 PetscFunctionBegin; 3112 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 3113 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 3114 if (a->cmat && (cend - cbegin != a->cmat->cmap->N || rend - rbegin != a->cmat->rmap->N)) PetscCall(MatDestroy(&a->cmat)); 3115 if (!a->cmat) { 3116 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), rend - rbegin, PETSC_DECIDE, rend - rbegin, cend - cbegin, a->v + rbegin + (size_t)cbegin * a->lda, &a->cmat)); 3117 } else { 3118 PetscCall(MatDensePlaceArray(a->cmat, a->v + rbegin + (size_t)cbegin * a->lda)); 3119 } 3120 PetscCall(MatDenseSetLDA(a->cmat, a->lda)); 3121 a->matinuse = cbegin + 1; 3122 *v = a->cmat; 3123 #if defined(PETSC_HAVE_CUDA) 3124 A->offloadmask = PETSC_OFFLOAD_CPU; 3125 #endif 3126 PetscFunctionReturn(0); 3127 } 3128 3129 PetscErrorCode MatDenseRestoreSubMatrix_SeqDense(Mat A, Mat *v) { 3130 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3131 3132 PetscFunctionBegin; 3133 PetscCheck(a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 3134 PetscCheck(a->cmat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column matrix"); 3135 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 3136 a->matinuse = 0; 3137 PetscCall(MatDenseResetArray(a->cmat)); 3138 if (v) *v = NULL; 3139 #if defined(PETSC_HAVE_CUDA) 3140 A->offloadmask = PETSC_OFFLOAD_CPU; 3141 #endif 3142 PetscFunctionReturn(0); 3143 } 3144 3145 /*MC 3146 MATSEQDENSE - MATSEQDENSE = "seqdense" - A matrix type to be used for sequential dense matrices. 3147 3148 Options Database Keys: 3149 . -mat_type seqdense - sets the matrix type to `MATSEQDENSE` during a call to `MatSetFromOptions()` 3150 3151 Level: beginner 3152 3153 .seealso: `MATSEQDENSE`, `MatCreateSeqDense()` 3154 M*/ 3155 PetscErrorCode MatCreate_SeqDense(Mat B) { 3156 Mat_SeqDense *b; 3157 PetscMPIInt size; 3158 3159 PetscFunctionBegin; 3160 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3161 PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3162 3163 PetscCall(PetscNew(&b)); 3164 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 3165 B->data = (void *)b; 3166 3167 b->roworiented = PETSC_TRUE; 3168 3169 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatQRFactor_C", MatQRFactor_SeqDense)); 3170 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetLDA_C", MatDenseGetLDA_SeqDense)); 3171 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseSetLDA_C", MatDenseSetLDA_SeqDense)); 3172 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArray_C", MatDenseGetArray_SeqDense)); 3173 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArray_C", MatDenseRestoreArray_SeqDense)); 3174 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDensePlaceArray_C", MatDensePlaceArray_SeqDense)); 3175 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseResetArray_C", MatDenseResetArray_SeqDense)); 3176 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseReplaceArray_C", MatDenseReplaceArray_SeqDense)); 3177 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayRead_C", MatDenseGetArray_SeqDense)); 3178 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayRead_C", MatDenseRestoreArray_SeqDense)); 3179 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayWrite_C", MatDenseGetArray_SeqDense)); 3180 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArray_SeqDense)); 3181 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqaij_C", MatConvert_SeqDense_SeqAIJ)); 3182 #if defined(PETSC_HAVE_ELEMENTAL) 3183 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_elemental_C", MatConvert_SeqDense_Elemental)); 3184 #endif 3185 #if defined(PETSC_HAVE_SCALAPACK) 3186 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 3187 #endif 3188 #if defined(PETSC_HAVE_CUDA) 3189 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensecuda_C", MatConvert_SeqDense_SeqDenseCUDA)); 3190 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3191 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdense_C", MatProductSetFromOptions_SeqDense)); 3192 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensecuda_C", MatProductSetFromOptions_SeqDense)); 3193 #endif 3194 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqDenseSetPreallocation_C", MatSeqDenseSetPreallocation_SeqDense)); 3195 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 3196 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdense_C", MatProductSetFromOptions_SeqDense)); 3197 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3198 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqsbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense)); 3199 3200 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumn_C", MatDenseGetColumn_SeqDense)); 3201 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_SeqDense)); 3202 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_SeqDense)); 3203 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_SeqDense)); 3204 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_SeqDense)); 3205 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_SeqDense)); 3206 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_SeqDense)); 3207 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_SeqDense)); 3208 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_SeqDense)); 3209 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_SeqDense)); 3210 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQDENSE)); 3211 PetscFunctionReturn(0); 3212 } 3213 3214 /*@C 3215 MatDenseGetColumn - gives access to a column of a dense matrix. This is only the local part of the column. You MUST call `MatDenseRestoreColumn()` to avoid memory bleeding. 3216 3217 Not Collective 3218 3219 Input Parameters: 3220 + mat - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3221 - col - column index 3222 3223 Output Parameter: 3224 . vals - pointer to the data 3225 3226 Level: intermediate 3227 3228 Note: 3229 Use `MatDenseGetColumnVec()` to get access to a column of a `MATDENSE` treated as a `Vec` 3230 3231 .seealso: `MATDENSE`, `MatDenseRestoreColumn()`, `MatDenseGetColumnVec()` 3232 @*/ 3233 PetscErrorCode MatDenseGetColumn(Mat A, PetscInt col, PetscScalar **vals) { 3234 PetscFunctionBegin; 3235 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3236 PetscValidLogicalCollectiveInt(A, col, 2); 3237 PetscValidPointer(vals, 3); 3238 PetscUseMethod(A, "MatDenseGetColumn_C", (Mat, PetscInt, PetscScalar **), (A, col, vals)); 3239 PetscFunctionReturn(0); 3240 } 3241 3242 /*@C 3243 MatDenseRestoreColumn - returns access to a column of a `MATDENSE` matrix which is returned by `MatDenseGetColumn()`. 3244 3245 Not Collective 3246 3247 Input Parameters: 3248 + mat - a `MATSEQDENSE` or `MATMPIDENSE` matrix 3249 - vals - pointer to the data (may be NULL) 3250 3251 Level: intermediate 3252 3253 .seealso: `MATDENSE`, `MatDenseGetColumn()` 3254 @*/ 3255 PetscErrorCode MatDenseRestoreColumn(Mat A, PetscScalar **vals) { 3256 PetscFunctionBegin; 3257 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3258 PetscValidPointer(vals, 2); 3259 PetscUseMethod(A, "MatDenseRestoreColumn_C", (Mat, PetscScalar **), (A, vals)); 3260 PetscFunctionReturn(0); 3261 } 3262 3263 /*@ 3264 MatDenseGetColumnVec - Gives read-write access to a column of a `MATDENSE` matrix, represented as a `Vec`. 3265 3266 Collective 3267 3268 Input Parameters: 3269 + mat - the `Mat` object 3270 - col - the column index 3271 3272 Output Parameter: 3273 . v - the vector 3274 3275 Notes: 3276 The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVec()` when the vector is no longer needed. 3277 3278 Use `MatDenseGetColumnVecRead()` to obtain read-only access or `MatDenseGetColumnVecWrite()` for write-only access. 3279 3280 Level: intermediate 3281 3282 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`, `MatDenseGetColumn()` 3283 @*/ 3284 PetscErrorCode MatDenseGetColumnVec(Mat A, PetscInt col, Vec *v) { 3285 PetscFunctionBegin; 3286 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3287 PetscValidType(A, 1); 3288 PetscValidLogicalCollectiveInt(A, col, 2); 3289 PetscValidPointer(v, 3); 3290 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3291 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3292 PetscUseMethod(A, "MatDenseGetColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3293 PetscFunctionReturn(0); 3294 } 3295 3296 /*@ 3297 MatDenseRestoreColumnVec - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVec(). 3298 3299 Collective 3300 3301 Input Parameters: 3302 + mat - the Mat object 3303 . col - the column index 3304 - v - the Vec object (may be NULL) 3305 3306 Level: intermediate 3307 3308 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3309 @*/ 3310 PetscErrorCode MatDenseRestoreColumnVec(Mat A, PetscInt col, Vec *v) { 3311 PetscFunctionBegin; 3312 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3313 PetscValidType(A, 1); 3314 PetscValidLogicalCollectiveInt(A, col, 2); 3315 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3316 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3317 PetscUseMethod(A, "MatDenseRestoreColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v)); 3318 PetscFunctionReturn(0); 3319 } 3320 3321 /*@ 3322 MatDenseGetColumnVecRead - Gives read-only access to a column of a dense matrix, represented as a Vec. 3323 3324 Collective 3325 3326 Input Parameters: 3327 + mat - the Mat object 3328 - col - the column index 3329 3330 Output Parameter: 3331 . v - the vector 3332 3333 Notes: 3334 The vector is owned by PETSc and users cannot modify it. 3335 3336 Users need to call MatDenseRestoreColumnVecRead() when the vector is no longer needed. 3337 3338 Use MatDenseGetColumnVec() to obtain read-write access or MatDenseGetColumnVecWrite() for write-only access. 3339 3340 Level: intermediate 3341 3342 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3343 @*/ 3344 PetscErrorCode MatDenseGetColumnVecRead(Mat A, PetscInt col, Vec *v) { 3345 PetscFunctionBegin; 3346 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3347 PetscValidType(A, 1); 3348 PetscValidLogicalCollectiveInt(A, col, 2); 3349 PetscValidPointer(v, 3); 3350 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3351 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3352 PetscUseMethod(A, "MatDenseGetColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3353 PetscFunctionReturn(0); 3354 } 3355 3356 /*@ 3357 MatDenseRestoreColumnVecRead - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVecRead(). 3358 3359 Collective 3360 3361 Input Parameters: 3362 + mat - the Mat object 3363 . col - the column index 3364 - v - the Vec object (may be NULL) 3365 3366 Level: intermediate 3367 3368 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecWrite()` 3369 @*/ 3370 PetscErrorCode MatDenseRestoreColumnVecRead(Mat A, PetscInt col, Vec *v) { 3371 PetscFunctionBegin; 3372 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3373 PetscValidType(A, 1); 3374 PetscValidLogicalCollectiveInt(A, col, 2); 3375 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3376 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3377 PetscUseMethod(A, "MatDenseRestoreColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v)); 3378 PetscFunctionReturn(0); 3379 } 3380 3381 /*@ 3382 MatDenseGetColumnVecWrite - Gives write-only access to a column of a dense matrix, represented as a Vec. 3383 3384 Collective 3385 3386 Input Parameters: 3387 + mat - the Mat object 3388 - col - the column index 3389 3390 Output Parameter: 3391 . v - the vector 3392 3393 Notes: 3394 The vector is owned by PETSc. Users need to call MatDenseRestoreColumnVecWrite() when the vector is no longer needed. 3395 3396 Use MatDenseGetColumnVec() to obtain read-write access or MatDenseGetColumnVecRead() for read-only access. 3397 3398 Level: intermediate 3399 3400 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()` 3401 @*/ 3402 PetscErrorCode MatDenseGetColumnVecWrite(Mat A, PetscInt col, Vec *v) { 3403 PetscFunctionBegin; 3404 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3405 PetscValidType(A, 1); 3406 PetscValidLogicalCollectiveInt(A, col, 2); 3407 PetscValidPointer(v, 3); 3408 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3409 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3410 PetscUseMethod(A, "MatDenseGetColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3411 PetscFunctionReturn(0); 3412 } 3413 3414 /*@ 3415 MatDenseRestoreColumnVecWrite - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVecWrite(). 3416 3417 Collective 3418 3419 Input Parameters: 3420 + mat - the Mat object 3421 . col - the column index 3422 - v - the Vec object (may be NULL) 3423 3424 Level: intermediate 3425 3426 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()` 3427 @*/ 3428 PetscErrorCode MatDenseRestoreColumnVecWrite(Mat A, PetscInt col, Vec *v) { 3429 PetscFunctionBegin; 3430 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3431 PetscValidType(A, 1); 3432 PetscValidLogicalCollectiveInt(A, col, 2); 3433 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3434 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N); 3435 PetscUseMethod(A, "MatDenseRestoreColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v)); 3436 PetscFunctionReturn(0); 3437 } 3438 3439 /*@ 3440 MatDenseGetSubMatrix - Gives access to a block of rows and columns of a dense matrix, represented as a Mat. 3441 3442 Collective 3443 3444 Input Parameters: 3445 + mat - the Mat object 3446 . rbegin - the first global row index in the block (if PETSC_DECIDE, is 0) 3447 . rend - the global row index past the last one in the block (if PETSC_DECIDE, is M) 3448 . cbegin - the first global column index in the block (if PETSC_DECIDE, is 0) 3449 - cend - the global column index past the last one in the block (if PETSC_DECIDE, is N) 3450 3451 Output Parameter: 3452 . v - the matrix 3453 3454 Notes: 3455 The matrix is owned by PETSc. Users need to call MatDenseRestoreSubMatrix() when the matrix is no longer needed. 3456 3457 The output matrix is not redistributed by PETSc, so depending on the values of rbegin and rend, some processes may have no local rows. 3458 3459 Level: intermediate 3460 3461 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreSubMatrix()` 3462 @*/ 3463 PetscErrorCode MatDenseGetSubMatrix(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) { 3464 PetscFunctionBegin; 3465 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3466 PetscValidType(A, 1); 3467 PetscValidLogicalCollectiveInt(A, rbegin, 2); 3468 PetscValidLogicalCollectiveInt(A, rend, 3); 3469 PetscValidLogicalCollectiveInt(A, cbegin, 4); 3470 PetscValidLogicalCollectiveInt(A, cend, 5); 3471 PetscValidPointer(v, 6); 3472 if (rbegin == PETSC_DECIDE) rbegin = 0; 3473 if (rend == PETSC_DECIDE) rend = A->rmap->N; 3474 if (cbegin == PETSC_DECIDE) cbegin = 0; 3475 if (cend == PETSC_DECIDE) cend = A->cmap->N; 3476 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated"); 3477 PetscCheck(rbegin >= 0 && rbegin <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", rbegin, A->rmap->N); 3478 PetscCheck(rend >= rbegin && rend <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", rend, rbegin, A->rmap->N); 3479 PetscCheck(cbegin >= 0 && cbegin <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", cbegin, A->cmap->N); 3480 PetscCheck(cend >= cbegin && cend <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", cend, cbegin, A->cmap->N); 3481 PetscUseMethod(A, "MatDenseGetSubMatrix_C", (Mat, PetscInt, PetscInt, PetscInt, PetscInt, Mat *), (A, rbegin, rend, cbegin, cend, v)); 3482 PetscFunctionReturn(0); 3483 } 3484 3485 /*@ 3486 MatDenseRestoreSubMatrix - Returns access to a block of columns of a dense matrix obtained from MatDenseGetSubMatrix(). 3487 3488 Collective 3489 3490 Input Parameters: 3491 + mat - the Mat object 3492 - v - the Mat object (may be NULL) 3493 3494 Level: intermediate 3495 3496 .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseGetSubMatrix()` 3497 @*/ 3498 PetscErrorCode MatDenseRestoreSubMatrix(Mat A, Mat *v) { 3499 PetscFunctionBegin; 3500 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3501 PetscValidType(A, 1); 3502 PetscValidPointer(v, 2); 3503 PetscUseMethod(A, "MatDenseRestoreSubMatrix_C", (Mat, Mat *), (A, v)); 3504 PetscFunctionReturn(0); 3505 } 3506 3507 #include <petscblaslapack.h> 3508 #include <petsc/private/kernels/blockinvert.h> 3509 3510 PetscErrorCode MatSeqDenseInvert(Mat A) { 3511 Mat_SeqDense *a = (Mat_SeqDense *)A->data; 3512 PetscInt bs = A->rmap->n; 3513 MatScalar *values = a->v; 3514 const PetscReal shift = 0.0; 3515 PetscBool allowzeropivot = PetscNot(A->erroriffailure), zeropivotdetected = PETSC_FALSE; 3516 3517 PetscFunctionBegin; 3518 /* factor and invert each block */ 3519 switch (bs) { 3520 case 1: values[0] = (PetscScalar)1.0 / (values[0] + shift); break; 3521 case 2: 3522 PetscCall(PetscKernel_A_gets_inverse_A_2(values, shift, allowzeropivot, &zeropivotdetected)); 3523 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3524 break; 3525 case 3: 3526 PetscCall(PetscKernel_A_gets_inverse_A_3(values, shift, allowzeropivot, &zeropivotdetected)); 3527 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3528 break; 3529 case 4: 3530 PetscCall(PetscKernel_A_gets_inverse_A_4(values, shift, allowzeropivot, &zeropivotdetected)); 3531 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3532 break; 3533 case 5: { 3534 PetscScalar work[25]; 3535 PetscInt ipvt[5]; 3536 3537 PetscCall(PetscKernel_A_gets_inverse_A_5(values, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 3538 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3539 } break; 3540 case 6: 3541 PetscCall(PetscKernel_A_gets_inverse_A_6(values, shift, allowzeropivot, &zeropivotdetected)); 3542 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3543 break; 3544 case 7: 3545 PetscCall(PetscKernel_A_gets_inverse_A_7(values, shift, allowzeropivot, &zeropivotdetected)); 3546 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3547 break; 3548 default: { 3549 PetscInt *v_pivots, *IJ, j; 3550 PetscScalar *v_work; 3551 3552 PetscCall(PetscMalloc3(bs, &v_work, bs, &v_pivots, bs, &IJ)); 3553 for (j = 0; j < bs; j++) IJ[j] = j; 3554 PetscCall(PetscKernel_A_gets_inverse_A(bs, values, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 3555 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3556 PetscCall(PetscFree3(v_work, v_pivots, IJ)); 3557 } 3558 } 3559 PetscFunctionReturn(0); 3560 } 3561