1 /*
2 Defines the basic matrix operations for sequential dense.
3 Portions of this code are under:
4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6
7 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/
8 #include <../src/mat/impls/dense/mpi/mpidense.h>
9 #include <petscblaslapack.h>
10 #include <../src/mat/impls/aij/seq/aij.h>
11 #include <petsc/private/vecimpl.h>
12
MatSeqDenseSymmetrize_Private(Mat A,PetscBool hermitian)13 PetscErrorCode MatSeqDenseSymmetrize_Private(Mat A, PetscBool hermitian)
14 {
15 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
16 PetscInt j, k, n = A->rmap->n;
17 PetscScalar *v;
18
19 PetscFunctionBegin;
20 PetscCheck(A->rmap->n == A->cmap->n, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot symmetrize a rectangular matrix");
21 PetscCall(MatDenseGetArray(A, &v));
22 if (!hermitian) {
23 for (k = 0; k < n; k++) {
24 for (j = k; j < n; j++) v[j * mat->lda + k] = v[k * mat->lda + j];
25 }
26 } else {
27 for (k = 0; k < n; k++) {
28 for (j = k; j < n; j++) v[j * mat->lda + k] = PetscConj(v[k * mat->lda + j]);
29 }
30 }
31 PetscCall(MatDenseRestoreArray(A, &v));
32 PetscFunctionReturn(PETSC_SUCCESS);
33 }
34
MatSeqDenseInvertFactors_Private(Mat A)35 PetscErrorCode MatSeqDenseInvertFactors_Private(Mat A)
36 {
37 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
38 PetscBLASInt info, n;
39
40 PetscFunctionBegin;
41 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS);
42 PetscCall(PetscBLASIntCast(A->cmap->n, &n));
43 if (A->factortype == MAT_FACTOR_LU) {
44 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present");
45 if (!mat->fwork) {
46 mat->lfwork = n;
47 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork));
48 }
49 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
50 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info));
51 PetscCall(PetscFPTrapPop());
52 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0));
53 } else if (A->factortype == MAT_FACTOR_CHOLESKY) {
54 if (A->spd == PETSC_BOOL3_TRUE) {
55 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
56 PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &n, mat->v, &mat->lda, &info));
57 PetscCall(PetscFPTrapPop());
58 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE));
59 #if defined(PETSC_USE_COMPLEX)
60 } else if (A->hermitian == PETSC_BOOL3_TRUE) {
61 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present");
62 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present");
63 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
64 PetscCallBLAS("LAPACKhetri", LAPACKhetri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info));
65 PetscCall(PetscFPTrapPop());
66 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE));
67 #endif
68 } else { /* symmetric case */
69 PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present");
70 PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present");
71 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
72 PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info));
73 PetscCall(PetscFPTrapPop());
74 PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_FALSE));
75 }
76 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad Inversion: zero pivot in row %" PetscBLASInt_FMT, info - 1);
77 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0));
78 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix must be factored to solve");
79
80 A->ops->solve = NULL;
81 A->ops->matsolve = NULL;
82 A->ops->solvetranspose = NULL;
83 A->ops->matsolvetranspose = NULL;
84 A->ops->solveadd = NULL;
85 A->ops->solvetransposeadd = NULL;
86 A->factortype = MAT_FACTOR_NONE;
87 PetscCall(PetscFree(A->solvertype));
88 PetscFunctionReturn(PETSC_SUCCESS);
89 }
90
MatZeroRowsColumns_SeqDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)91 static PetscErrorCode MatZeroRowsColumns_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
92 {
93 Mat_SeqDense *l = (Mat_SeqDense *)A->data;
94 PetscInt m = l->lda, n = A->cmap->n, r = A->rmap->n, i, j;
95 PetscScalar *slot, *bb, *v;
96 const PetscScalar *xx;
97
98 PetscFunctionBegin;
99 if (PetscDefined(USE_DEBUG)) {
100 for (i = 0; i < N; i++) {
101 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed");
102 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n);
103 PetscCheck(rows[i] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Col %" PetscInt_FMT " requested to be zeroed greater than or equal number of cols %" PetscInt_FMT, rows[i], A->cmap->n);
104 }
105 }
106 if (!N) PetscFunctionReturn(PETSC_SUCCESS);
107
108 /* fix right-hand side if needed */
109 if (x && b) {
110 Vec xt;
111
112 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices");
113 PetscCall(VecDuplicate(x, &xt));
114 PetscCall(VecCopy(x, xt));
115 PetscCall(VecScale(xt, -1.0));
116 PetscCall(MatMultAdd(A, xt, b, b));
117 PetscCall(VecDestroy(&xt));
118 PetscCall(VecGetArrayRead(x, &xx));
119 PetscCall(VecGetArray(b, &bb));
120 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]];
121 PetscCall(VecRestoreArrayRead(x, &xx));
122 PetscCall(VecRestoreArray(b, &bb));
123 }
124
125 PetscCall(MatDenseGetArray(A, &v));
126 for (i = 0; i < N; i++) {
127 slot = v + rows[i] * m;
128 PetscCall(PetscArrayzero(slot, r));
129 }
130 for (i = 0; i < N; i++) {
131 slot = v + rows[i];
132 for (j = 0; j < n; j++) {
133 *slot = 0.0;
134 slot += m;
135 }
136 }
137 if (diag != 0.0) {
138 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices");
139 for (i = 0; i < N; i++) {
140 slot = v + (m + 1) * rows[i];
141 *slot = diag;
142 }
143 }
144 PetscCall(MatDenseRestoreArray(A, &v));
145 PetscFunctionReturn(PETSC_SUCCESS);
146 }
147
MatConvert_SeqAIJ_SeqDense(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)148 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
149 {
150 Mat B = NULL;
151 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
152 Mat_SeqDense *b;
153 PetscInt *ai = a->i, *aj = a->j, m = A->rmap->N, n = A->cmap->N, i;
154 const MatScalar *av;
155 PetscBool isseqdense;
156
157 PetscFunctionBegin;
158 if (reuse == MAT_REUSE_MATRIX) {
159 PetscCall(PetscObjectTypeCompare((PetscObject)*newmat, MATSEQDENSE, &isseqdense));
160 PetscCheck(isseqdense, PetscObjectComm((PetscObject)*newmat), PETSC_ERR_USER, "Cannot reuse matrix of type %s", ((PetscObject)*newmat)->type_name);
161 }
162 if (reuse != MAT_REUSE_MATRIX) {
163 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
164 PetscCall(MatSetSizes(B, m, n, m, n));
165 PetscCall(MatSetType(B, MATSEQDENSE));
166 PetscCall(MatSeqDenseSetPreallocation(B, NULL));
167 b = (Mat_SeqDense *)B->data;
168 } else {
169 b = (Mat_SeqDense *)((*newmat)->data);
170 for (i = 0; i < n; i++) PetscCall(PetscArrayzero(b->v + i * b->lda, m));
171 }
172 PetscCall(MatSeqAIJGetArrayRead(A, &av));
173 for (i = 0; i < m; i++) {
174 PetscInt j;
175 for (j = 0; j < ai[1] - ai[0]; j++) {
176 b->v[*aj * b->lda + i] = *av;
177 aj++;
178 av++;
179 }
180 ai++;
181 }
182 PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
183
184 if (reuse == MAT_INPLACE_MATRIX) {
185 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
186 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
187 PetscCall(MatHeaderReplace(A, &B));
188 } else {
189 if (B) *newmat = B;
190 PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY));
191 PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY));
192 }
193 PetscFunctionReturn(PETSC_SUCCESS);
194 }
195
MatConvert_SeqDense_SeqAIJ(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)196 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
197 {
198 Mat B = NULL;
199 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
200 PetscInt i, j;
201 PetscInt *rows, *nnz;
202 MatScalar *aa = a->v, *vals;
203
204 PetscFunctionBegin;
205 PetscCall(PetscCalloc3(A->rmap->n, &rows, A->rmap->n, &nnz, A->rmap->n, &vals));
206 if (reuse != MAT_REUSE_MATRIX) {
207 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
208 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
209 PetscCall(MatSetType(B, MATSEQAIJ));
210 for (j = 0; j < A->cmap->n; j++) {
211 for (i = 0; i < A->rmap->n; i++)
212 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) ++nnz[i];
213 aa += a->lda;
214 }
215 PetscCall(MatSeqAIJSetPreallocation(B, PETSC_DETERMINE, nnz));
216 } else B = *newmat;
217 aa = a->v;
218 for (j = 0; j < A->cmap->n; j++) {
219 PetscInt numRows = 0;
220 for (i = 0; i < A->rmap->n; i++)
221 if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) {
222 rows[numRows] = i;
223 vals[numRows++] = aa[i];
224 }
225 PetscCall(MatSetValues(B, numRows, rows, 1, &j, vals, INSERT_VALUES));
226 aa += a->lda;
227 }
228 PetscCall(PetscFree3(rows, nnz, vals));
229 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
230 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
231
232 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &B));
233 else if (reuse != MAT_REUSE_MATRIX) *newmat = B;
234 PetscFunctionReturn(PETSC_SUCCESS);
235 }
236
MatAXPY_SeqDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)237 PetscErrorCode MatAXPY_SeqDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
238 {
239 Mat_SeqDense *x = (Mat_SeqDense *)X->data, *y = (Mat_SeqDense *)Y->data;
240 const PetscScalar *xv;
241 PetscScalar *yv;
242 PetscBLASInt N, m, ldax = 0, lday = 0, one = 1;
243
244 PetscFunctionBegin;
245 PetscCall(MatDenseGetArrayRead(X, &xv));
246 PetscCall(MatDenseGetArray(Y, &yv));
247 PetscCall(PetscBLASIntCast(X->rmap->n * X->cmap->n, &N));
248 PetscCall(PetscBLASIntCast(X->rmap->n, &m));
249 PetscCall(PetscBLASIntCast(x->lda, &ldax));
250 PetscCall(PetscBLASIntCast(y->lda, &lday));
251 if (ldax > m || lday > m) {
252 for (PetscInt j = 0; j < X->cmap->n; j++) PetscCallBLAS("BLASaxpy", BLASaxpy_(&m, &alpha, PetscSafePointerPlusOffset(xv, j * ldax), &one, PetscSafePointerPlusOffset(yv, j * lday), &one));
253 } else {
254 PetscCallBLAS("BLASaxpy", BLASaxpy_(&N, &alpha, xv, &one, yv, &one));
255 }
256 PetscCall(MatDenseRestoreArrayRead(X, &xv));
257 PetscCall(MatDenseRestoreArray(Y, &yv));
258 PetscCall(PetscLogFlops(PetscMax(2.0 * N - 1, 0)));
259 PetscFunctionReturn(PETSC_SUCCESS);
260 }
261
MatGetInfo_SeqDense(Mat A,MatInfoType flag,MatInfo * info)262 static PetscErrorCode MatGetInfo_SeqDense(Mat A, MatInfoType flag, MatInfo *info)
263 {
264 PetscLogDouble N = A->rmap->n * A->cmap->n;
265
266 PetscFunctionBegin;
267 info->block_size = 1.0;
268 info->nz_allocated = N;
269 info->nz_used = N;
270 info->nz_unneeded = 0;
271 info->assemblies = A->num_ass;
272 info->mallocs = 0;
273 info->memory = 0; /* REVIEW ME */
274 info->fill_ratio_given = 0;
275 info->fill_ratio_needed = 0;
276 info->factor_mallocs = 0;
277 PetscFunctionReturn(PETSC_SUCCESS);
278 }
279
MatScale_SeqDense(Mat A,PetscScalar alpha)280 PetscErrorCode MatScale_SeqDense(Mat A, PetscScalar alpha)
281 {
282 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
283 PetscScalar *v;
284 PetscBLASInt one = 1, j, nz, lda = 0;
285
286 PetscFunctionBegin;
287 PetscCall(MatDenseGetArray(A, &v));
288 PetscCall(PetscBLASIntCast(a->lda, &lda));
289 if (lda > A->rmap->n) {
290 PetscCall(PetscBLASIntCast(A->rmap->n, &nz));
291 for (j = 0; j < A->cmap->n; j++) PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v + j * lda, &one));
292 } else {
293 PetscCall(PetscBLASIntCast(A->rmap->n * A->cmap->n, &nz));
294 PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v, &one));
295 }
296 PetscCall(PetscLogFlops(A->rmap->n * A->cmap->n));
297 PetscCall(MatDenseRestoreArray(A, &v));
298 PetscFunctionReturn(PETSC_SUCCESS);
299 }
300
MatShift_SeqDense(Mat A,PetscScalar alpha)301 PetscErrorCode MatShift_SeqDense(Mat A, PetscScalar alpha)
302 {
303 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
304 PetscScalar *v;
305 PetscInt j, k;
306
307 PetscFunctionBegin;
308 PetscCall(MatDenseGetArray(A, &v));
309 k = PetscMin(A->rmap->n, A->cmap->n);
310 for (j = 0; j < k; j++) v[j + j * a->lda] += alpha;
311 PetscCall(PetscLogFlops(k));
312 PetscCall(MatDenseRestoreArray(A, &v));
313 PetscFunctionReturn(PETSC_SUCCESS);
314 }
315
MatIsHermitian_SeqDense(Mat A,PetscReal rtol,PetscBool * fl)316 static PetscErrorCode MatIsHermitian_SeqDense(Mat A, PetscReal rtol, PetscBool *fl)
317 {
318 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
319 PetscInt i, j, m = A->rmap->n, N = a->lda;
320 const PetscScalar *v;
321
322 PetscFunctionBegin;
323 *fl = PETSC_FALSE;
324 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS);
325 PetscCall(MatDenseGetArrayRead(A, &v));
326 for (i = 0; i < m; i++) {
327 for (j = i; j < m; j++) {
328 if (PetscAbsScalar(v[i + j * N] - PetscConj(v[j + i * N])) > rtol) goto restore;
329 }
330 }
331 *fl = PETSC_TRUE;
332 restore:
333 PetscCall(MatDenseRestoreArrayRead(A, &v));
334 PetscFunctionReturn(PETSC_SUCCESS);
335 }
336
MatIsSymmetric_SeqDense(Mat A,PetscReal rtol,PetscBool * fl)337 static PetscErrorCode MatIsSymmetric_SeqDense(Mat A, PetscReal rtol, PetscBool *fl)
338 {
339 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
340 PetscInt i, j, m = A->rmap->n, N = a->lda;
341 const PetscScalar *v;
342
343 PetscFunctionBegin;
344 *fl = PETSC_FALSE;
345 if (A->rmap->n != A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS);
346 PetscCall(MatDenseGetArrayRead(A, &v));
347 for (i = 0; i < m; i++) {
348 for (j = i; j < m; j++) {
349 if (PetscAbsScalar(v[i + j * N] - v[j + i * N]) > rtol) goto restore;
350 }
351 }
352 *fl = PETSC_TRUE;
353 restore:
354 PetscCall(MatDenseRestoreArrayRead(A, &v));
355 PetscFunctionReturn(PETSC_SUCCESS);
356 }
357
MatDuplicateNoCreate_SeqDense(Mat newi,Mat A,MatDuplicateOption cpvalues)358 PetscErrorCode MatDuplicateNoCreate_SeqDense(Mat newi, Mat A, MatDuplicateOption cpvalues)
359 {
360 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
361 PetscInt lda = mat->lda, j, m, nlda = lda;
362 PetscBool isdensecpu;
363
364 PetscFunctionBegin;
365 PetscCall(PetscLayoutReference(A->rmap, &newi->rmap));
366 PetscCall(PetscLayoutReference(A->cmap, &newi->cmap));
367 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { /* propagate LDA */
368 PetscCall(MatDenseSetLDA(newi, lda));
369 }
370 PetscCall(PetscObjectTypeCompare((PetscObject)newi, MATSEQDENSE, &isdensecpu));
371 if (isdensecpu) PetscCall(MatSeqDenseSetPreallocation(newi, NULL));
372 if (cpvalues == MAT_COPY_VALUES) {
373 const PetscScalar *av;
374 PetscScalar *v;
375
376 PetscCall(MatDenseGetArrayRead(A, &av));
377 PetscCall(MatDenseGetArrayWrite(newi, &v));
378 PetscCall(MatDenseGetLDA(newi, &nlda));
379 m = A->rmap->n;
380 if (lda > m || nlda > m) {
381 for (j = 0; j < A->cmap->n; j++) PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(v, j * nlda), PetscSafePointerPlusOffset(av, j * lda), m));
382 } else {
383 PetscCall(PetscArraycpy(v, av, A->rmap->n * A->cmap->n));
384 }
385 PetscCall(MatDenseRestoreArrayWrite(newi, &v));
386 PetscCall(MatDenseRestoreArrayRead(A, &av));
387 PetscCall(MatPropagateSymmetryOptions(A, newi));
388 }
389 PetscFunctionReturn(PETSC_SUCCESS);
390 }
391
MatDuplicate_SeqDense(Mat A,MatDuplicateOption cpvalues,Mat * newmat)392 PetscErrorCode MatDuplicate_SeqDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
393 {
394 PetscFunctionBegin;
395 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), newmat));
396 PetscCall(MatSetSizes(*newmat, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n));
397 PetscCall(MatSetType(*newmat, ((PetscObject)A)->type_name));
398 PetscCall(MatDuplicateNoCreate_SeqDense(*newmat, A, cpvalues));
399 PetscFunctionReturn(PETSC_SUCCESS);
400 }
401
MatSolve_SeqDense_Internal_LU(Mat A,PetscScalar * x,PetscBLASInt ldx,PetscBLASInt m,PetscBLASInt nrhs,PetscBLASInt k,PetscBool T)402 static PetscErrorCode MatSolve_SeqDense_Internal_LU(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T)
403 {
404 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
405 PetscBLASInt info;
406
407 PetscFunctionBegin;
408 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
409 PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_(T ? "T" : "N", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
410 PetscCall(PetscFPTrapPop());
411 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve %" PetscBLASInt_FMT, info);
412 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m)));
413 PetscFunctionReturn(PETSC_SUCCESS);
414 }
415
MatSolve_SeqDense_Internal_Cholesky(Mat A,PetscScalar * x,PetscBLASInt ldx,PetscBLASInt m,PetscBLASInt nrhs,PetscBLASInt k,PetscBool T)416 static PetscErrorCode MatSolve_SeqDense_Internal_Cholesky(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T)
417 {
418 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
419 PetscBLASInt info;
420
421 PetscFunctionBegin;
422 if (A->spd == PETSC_BOOL3_TRUE) {
423 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A));
424 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
425 PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &m, &nrhs, mat->v, &mat->lda, x, &m, &info));
426 PetscCall(PetscFPTrapPop());
427 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "POTRS Bad solve %" PetscBLASInt_FMT, info);
428 if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A));
429 #if defined(PETSC_USE_COMPLEX)
430 } else if (A->hermitian == PETSC_BOOL3_TRUE) {
431 if (T) PetscCall(MatConjugate_SeqDense(A));
432 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
433 PetscCallBLAS("LAPACKhetrs", LAPACKhetrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
434 PetscCall(PetscFPTrapPop());
435 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "HETRS Bad solve %" PetscBLASInt_FMT, info);
436 if (T) PetscCall(MatConjugate_SeqDense(A));
437 #endif
438 } else { /* symmetric case */
439 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
440 PetscCallBLAS("LAPACKsytrs", LAPACKsytrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
441 PetscCall(PetscFPTrapPop());
442 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "SYTRS Bad solve %" PetscBLASInt_FMT, info);
443 }
444 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m)));
445 PetscFunctionReturn(PETSC_SUCCESS);
446 }
447
MatSolve_SeqDense_Internal_QR(Mat A,PetscScalar * x,PetscBLASInt ldx,PetscBLASInt m,PetscBLASInt nrhs,PetscBLASInt k)448 static PetscErrorCode MatSolve_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k)
449 {
450 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
451 PetscBLASInt info;
452 char trans;
453
454 PetscFunctionBegin;
455 if (PetscDefined(USE_COMPLEX)) {
456 trans = 'C';
457 } else {
458 trans = 'T';
459 }
460 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
461 { /* lwork depends on the number of right-hand sides */
462 PetscBLASInt nlfwork, lfwork = -1;
463 PetscScalar fwork;
464
465 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info));
466 nlfwork = (PetscBLASInt)PetscRealPart(fwork);
467 if (nlfwork > mat->lfwork) {
468 mat->lfwork = nlfwork;
469 PetscCall(PetscFree(mat->fwork));
470 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork));
471 }
472 }
473 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info));
474 PetscCall(PetscFPTrapPop());
475 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %" PetscBLASInt_FMT, info);
476 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
477 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &mat->rank, &nrhs, mat->v, &mat->lda, x, &ldx, &info));
478 PetscCall(PetscFPTrapPop());
479 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %" PetscBLASInt_FMT, info);
480 for (PetscInt j = 0; j < nrhs; j++) {
481 for (PetscInt i = mat->rank; i < k; i++) x[j * ldx + i] = 0.;
482 }
483 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank))));
484 PetscFunctionReturn(PETSC_SUCCESS);
485 }
486
MatSolveTranspose_SeqDense_Internal_QR(Mat A,PetscScalar * x,PetscBLASInt ldx,PetscBLASInt m,PetscBLASInt nrhs,PetscBLASInt k)487 static PetscErrorCode MatSolveTranspose_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k)
488 {
489 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
490 PetscBLASInt info;
491
492 PetscFunctionBegin;
493 if (A->rmap->n == A->cmap->n && mat->rank == A->rmap->n) {
494 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
495 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &m, &nrhs, mat->v, &mat->lda, x, &ldx, &info));
496 PetscCall(PetscFPTrapPop());
497 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %" PetscBLASInt_FMT, info);
498 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A));
499 { /* lwork depends on the number of right-hand sides */
500 PetscBLASInt nlfwork, lfwork = -1;
501 PetscScalar fwork;
502
503 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info));
504 nlfwork = (PetscBLASInt)PetscRealPart(fwork);
505 if (nlfwork > mat->lfwork) {
506 mat->lfwork = nlfwork;
507 PetscCall(PetscFree(mat->fwork));
508 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork));
509 }
510 }
511 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
512 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info));
513 PetscCall(PetscFPTrapPop());
514 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %" PetscBLASInt_FMT, info);
515 if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A));
516 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "QR factored matrix cannot be used for transpose solve");
517 PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank))));
518 PetscFunctionReturn(PETSC_SUCCESS);
519 }
520
MatSolve_SeqDense_SetUp(Mat A,Vec xx,Vec yy,PetscScalar ** _y,PetscBLASInt * _m,PetscBLASInt * _k)521 static PetscErrorCode MatSolve_SeqDense_SetUp(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k)
522 {
523 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
524 PetscScalar *y;
525 PetscBLASInt m = 0, k = 0;
526
527 PetscFunctionBegin;
528 PetscCall(PetscBLASIntCast(A->rmap->n, &m));
529 PetscCall(PetscBLASIntCast(A->cmap->n, &k));
530 if (k < m) {
531 PetscCall(VecCopy(xx, mat->qrrhs));
532 PetscCall(VecGetArray(mat->qrrhs, &y));
533 } else {
534 PetscCall(VecCopy(xx, yy));
535 PetscCall(VecGetArray(yy, &y));
536 }
537 *_y = y;
538 *_k = k;
539 *_m = m;
540 PetscFunctionReturn(PETSC_SUCCESS);
541 }
542
MatSolve_SeqDense_TearDown(Mat A,Vec xx,Vec yy,PetscScalar ** _y,PetscBLASInt * _m,PetscBLASInt * _k)543 static PetscErrorCode MatSolve_SeqDense_TearDown(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k)
544 {
545 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
546 PetscScalar *y = NULL;
547 PetscBLASInt m, k;
548
549 PetscFunctionBegin;
550 y = *_y;
551 *_y = NULL;
552 k = *_k;
553 m = *_m;
554 if (k < m) {
555 PetscScalar *yv;
556 PetscCall(VecGetArray(yy, &yv));
557 PetscCall(PetscArraycpy(yv, y, k));
558 PetscCall(VecRestoreArray(yy, &yv));
559 PetscCall(VecRestoreArray(mat->qrrhs, &y));
560 } else {
561 PetscCall(VecRestoreArray(yy, &y));
562 }
563 PetscFunctionReturn(PETSC_SUCCESS);
564 }
565
MatSolve_SeqDense_LU(Mat A,Vec xx,Vec yy)566 static PetscErrorCode MatSolve_SeqDense_LU(Mat A, Vec xx, Vec yy)
567 {
568 PetscScalar *y = NULL;
569 PetscBLASInt m = 0, k = 0;
570
571 PetscFunctionBegin;
572 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k));
573 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_FALSE));
574 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k));
575 PetscFunctionReturn(PETSC_SUCCESS);
576 }
577
MatSolveTranspose_SeqDense_LU(Mat A,Vec xx,Vec yy)578 static PetscErrorCode MatSolveTranspose_SeqDense_LU(Mat A, Vec xx, Vec yy)
579 {
580 PetscScalar *y = NULL;
581 PetscBLASInt m = 0, k = 0;
582
583 PetscFunctionBegin;
584 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k));
585 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_TRUE));
586 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k));
587 PetscFunctionReturn(PETSC_SUCCESS);
588 }
589
MatSolve_SeqDense_Cholesky(Mat A,Vec xx,Vec yy)590 static PetscErrorCode MatSolve_SeqDense_Cholesky(Mat A, Vec xx, Vec yy)
591 {
592 PetscScalar *y = NULL;
593 PetscBLASInt m = 0, k = 0;
594
595 PetscFunctionBegin;
596 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k));
597 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_FALSE));
598 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k));
599 PetscFunctionReturn(PETSC_SUCCESS);
600 }
601
MatSolveTranspose_SeqDense_Cholesky(Mat A,Vec xx,Vec yy)602 static PetscErrorCode MatSolveTranspose_SeqDense_Cholesky(Mat A, Vec xx, Vec yy)
603 {
604 PetscScalar *y = NULL;
605 PetscBLASInt m = 0, k = 0;
606
607 PetscFunctionBegin;
608 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k));
609 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_TRUE));
610 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k));
611 PetscFunctionReturn(PETSC_SUCCESS);
612 }
613
MatSolve_SeqDense_QR(Mat A,Vec xx,Vec yy)614 static PetscErrorCode MatSolve_SeqDense_QR(Mat A, Vec xx, Vec yy)
615 {
616 PetscScalar *y = NULL;
617 PetscBLASInt m = 0, k = 0;
618
619 PetscFunctionBegin;
620 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k));
621 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k));
622 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k));
623 PetscFunctionReturn(PETSC_SUCCESS);
624 }
625
MatSolveTranspose_SeqDense_QR(Mat A,Vec xx,Vec yy)626 static PetscErrorCode MatSolveTranspose_SeqDense_QR(Mat A, Vec xx, Vec yy)
627 {
628 PetscScalar *y = NULL;
629 PetscBLASInt m = 0, k = 0;
630
631 PetscFunctionBegin;
632 PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k));
633 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k));
634 PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k));
635 PetscFunctionReturn(PETSC_SUCCESS);
636 }
637
MatMatSolve_SeqDense_SetUp(Mat A,Mat B,Mat X,PetscScalar ** _y,PetscBLASInt * _ldy,PetscBLASInt * _m,PetscBLASInt * _nrhs,PetscBLASInt * _k)638 static PetscErrorCode MatMatSolve_SeqDense_SetUp(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k)
639 {
640 const PetscScalar *b;
641 PetscScalar *y;
642 PetscInt n, _ldb, _ldx;
643 PetscBLASInt nrhs = 0, m = 0, k = 0, ldb = 0, ldx = 0, ldy = 0;
644
645 PetscFunctionBegin;
646 *_ldy = 0;
647 *_m = 0;
648 *_nrhs = 0;
649 *_k = 0;
650 *_y = NULL;
651 PetscCall(PetscBLASIntCast(A->rmap->n, &m));
652 PetscCall(PetscBLASIntCast(A->cmap->n, &k));
653 PetscCall(MatGetSize(B, NULL, &n));
654 PetscCall(PetscBLASIntCast(n, &nrhs));
655 PetscCall(MatDenseGetLDA(B, &_ldb));
656 PetscCall(PetscBLASIntCast(_ldb, &ldb));
657 PetscCall(MatDenseGetLDA(X, &_ldx));
658 PetscCall(PetscBLASIntCast(_ldx, &ldx));
659 if (ldx < m) {
660 PetscCall(MatDenseGetArrayRead(B, &b));
661 PetscCall(PetscMalloc1(nrhs * m, &y));
662 if (ldb == m) {
663 PetscCall(PetscArraycpy(y, b, ldb * nrhs));
664 } else {
665 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * m], &b[j * ldb], m));
666 }
667 ldy = m;
668 PetscCall(MatDenseRestoreArrayRead(B, &b));
669 } else {
670 if (ldb == ldx) {
671 PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN));
672 PetscCall(MatDenseGetArray(X, &y));
673 } else {
674 PetscCall(MatDenseGetArray(X, &y));
675 PetscCall(MatDenseGetArrayRead(B, &b));
676 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * ldx], &b[j * ldb], m));
677 PetscCall(MatDenseRestoreArrayRead(B, &b));
678 }
679 ldy = ldx;
680 }
681 *_y = y;
682 *_ldy = ldy;
683 *_k = k;
684 *_m = m;
685 *_nrhs = nrhs;
686 PetscFunctionReturn(PETSC_SUCCESS);
687 }
688
MatMatSolve_SeqDense_TearDown(Mat A,Mat B,Mat X,PetscScalar ** _y,PetscBLASInt * _ldy,PetscBLASInt * _m,PetscBLASInt * _nrhs,PetscBLASInt * _k)689 static PetscErrorCode MatMatSolve_SeqDense_TearDown(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k)
690 {
691 PetscScalar *y;
692 PetscInt _ldx;
693 PetscBLASInt k, ldy, nrhs, ldx = 0;
694
695 PetscFunctionBegin;
696 y = *_y;
697 *_y = NULL;
698 k = *_k;
699 ldy = *_ldy;
700 nrhs = *_nrhs;
701 PetscCall(MatDenseGetLDA(X, &_ldx));
702 PetscCall(PetscBLASIntCast(_ldx, &ldx));
703 if (ldx != ldy) {
704 PetscScalar *xv;
705 PetscCall(MatDenseGetArray(X, &xv));
706 for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&xv[j * ldx], &y[j * ldy], k));
707 PetscCall(MatDenseRestoreArray(X, &xv));
708 PetscCall(PetscFree(y));
709 } else {
710 PetscCall(MatDenseRestoreArray(X, &y));
711 }
712 PetscFunctionReturn(PETSC_SUCCESS);
713 }
714
MatMatSolve_SeqDense_LU(Mat A,Mat B,Mat X)715 static PetscErrorCode MatMatSolve_SeqDense_LU(Mat A, Mat B, Mat X)
716 {
717 PetscScalar *y;
718 PetscBLASInt m, k, ldy, nrhs;
719
720 PetscFunctionBegin;
721 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k));
722 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_FALSE));
723 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k));
724 PetscFunctionReturn(PETSC_SUCCESS);
725 }
726
MatMatSolveTranspose_SeqDense_LU(Mat A,Mat B,Mat X)727 static PetscErrorCode MatMatSolveTranspose_SeqDense_LU(Mat A, Mat B, Mat X)
728 {
729 PetscScalar *y;
730 PetscBLASInt m, k, ldy, nrhs;
731
732 PetscFunctionBegin;
733 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k));
734 PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_TRUE));
735 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k));
736 PetscFunctionReturn(PETSC_SUCCESS);
737 }
738
MatMatSolve_SeqDense_Cholesky(Mat A,Mat B,Mat X)739 static PetscErrorCode MatMatSolve_SeqDense_Cholesky(Mat A, Mat B, Mat X)
740 {
741 PetscScalar *y;
742 PetscBLASInt m, k, ldy, nrhs;
743
744 PetscFunctionBegin;
745 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k));
746 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_FALSE));
747 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k));
748 PetscFunctionReturn(PETSC_SUCCESS);
749 }
750
MatMatSolveTranspose_SeqDense_Cholesky(Mat A,Mat B,Mat X)751 static PetscErrorCode MatMatSolveTranspose_SeqDense_Cholesky(Mat A, Mat B, Mat X)
752 {
753 PetscScalar *y;
754 PetscBLASInt m, k, ldy, nrhs;
755
756 PetscFunctionBegin;
757 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k));
758 PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_TRUE));
759 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k));
760 PetscFunctionReturn(PETSC_SUCCESS);
761 }
762
MatMatSolve_SeqDense_QR(Mat A,Mat B,Mat X)763 static PetscErrorCode MatMatSolve_SeqDense_QR(Mat A, Mat B, Mat X)
764 {
765 PetscScalar *y;
766 PetscBLASInt m, k, ldy, nrhs;
767
768 PetscFunctionBegin;
769 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k));
770 PetscCall(MatSolve_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k));
771 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k));
772 PetscFunctionReturn(PETSC_SUCCESS);
773 }
774
MatMatSolveTranspose_SeqDense_QR(Mat A,Mat B,Mat X)775 static PetscErrorCode MatMatSolveTranspose_SeqDense_QR(Mat A, Mat B, Mat X)
776 {
777 PetscScalar *y;
778 PetscBLASInt m, k, ldy, nrhs;
779
780 PetscFunctionBegin;
781 PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k));
782 PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k));
783 PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k));
784 PetscFunctionReturn(PETSC_SUCCESS);
785 }
786
787 /* COMMENT: I have chosen to hide row permutation in the pivots,
788 rather than put it in the Mat->row slot.*/
MatLUFactor_SeqDense(Mat A,IS row,IS col,PETSC_UNUSED const MatFactorInfo * minfo)789 PetscErrorCode MatLUFactor_SeqDense(Mat A, IS row, IS col, PETSC_UNUSED const MatFactorInfo *minfo)
790 {
791 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
792 PetscBLASInt n, m, info;
793
794 PetscFunctionBegin;
795 PetscCall(PetscBLASIntCast(A->cmap->n, &n));
796 PetscCall(PetscBLASIntCast(A->rmap->n, &m));
797 if (!mat->pivots) PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots));
798 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS);
799 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
800 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&m, &n, mat->v, &mat->lda, mat->pivots, &info));
801 PetscCall(PetscFPTrapPop());
802
803 PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to LU factorization %" PetscBLASInt_FMT, info);
804 PetscCheck(info <= 0, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Bad LU factorization %" PetscBLASInt_FMT, info);
805
806 A->ops->solve = MatSolve_SeqDense_LU;
807 A->ops->matsolve = MatMatSolve_SeqDense_LU;
808 A->ops->solvetranspose = MatSolveTranspose_SeqDense_LU;
809 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_LU;
810 A->factortype = MAT_FACTOR_LU;
811
812 PetscCall(PetscFree(A->solvertype));
813 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype));
814
815 PetscCall(PetscLogFlops((2.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3));
816 PetscFunctionReturn(PETSC_SUCCESS);
817 }
818
MatLUFactorNumeric_SeqDense(Mat fact,Mat A,const MatFactorInfo * info)819 static PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info)
820 {
821 PetscFunctionBegin;
822 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES));
823 PetscUseTypeMethod(fact, lufactor, NULL, NULL, info);
824 PetscFunctionReturn(PETSC_SUCCESS);
825 }
826
MatLUFactorSymbolic_SeqDense(Mat fact,Mat A,IS row,IS col,PETSC_UNUSED const MatFactorInfo * info)827 PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, IS col, PETSC_UNUSED const MatFactorInfo *info)
828 {
829 PetscFunctionBegin;
830 fact->preallocated = PETSC_TRUE;
831 fact->assembled = PETSC_TRUE;
832 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense;
833 PetscFunctionReturn(PETSC_SUCCESS);
834 }
835
836 /* Cholesky as L*L^T or L*D*L^T and the symmetric/hermitian complex variants */
MatCholeskyFactor_SeqDense(Mat A,IS perm,PETSC_UNUSED const MatFactorInfo * minfo)837 PetscErrorCode MatCholeskyFactor_SeqDense(Mat A, IS perm, PETSC_UNUSED const MatFactorInfo *minfo)
838 {
839 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
840 PetscBLASInt info, n;
841
842 PetscFunctionBegin;
843 PetscCall(PetscBLASIntCast(A->cmap->n, &n));
844 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS);
845 if (A->spd == PETSC_BOOL3_TRUE) {
846 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
847 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &n, mat->v, &mat->lda, &info));
848 PetscCall(PetscFPTrapPop());
849 #if defined(PETSC_USE_COMPLEX)
850 } else if (A->hermitian == PETSC_BOOL3_TRUE) {
851 if (!mat->pivots) PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots));
852 if (!mat->fwork) {
853 PetscScalar dummy;
854
855 mat->lfwork = -1;
856 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
857 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info));
858 PetscCall(PetscFPTrapPop());
859 PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork));
860 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork));
861 }
862 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
863 PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info));
864 PetscCall(PetscFPTrapPop());
865 #endif
866 } else { /* symmetric case */
867 if (!mat->pivots) PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots));
868 if (!mat->fwork) {
869 PetscScalar dummy;
870
871 mat->lfwork = -1;
872 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
873 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info));
874 PetscCall(PetscFPTrapPop());
875 PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork));
876 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork));
877 }
878 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
879 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info));
880 PetscCall(PetscFPTrapPop());
881 }
882 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad factorization: zero pivot in row %" PetscBLASInt_FMT, info - 1);
883
884 A->ops->solve = MatSolve_SeqDense_Cholesky;
885 A->ops->matsolve = MatMatSolve_SeqDense_Cholesky;
886 A->ops->solvetranspose = MatSolveTranspose_SeqDense_Cholesky;
887 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_Cholesky;
888 A->factortype = MAT_FACTOR_CHOLESKY;
889
890 PetscCall(PetscFree(A->solvertype));
891 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype));
892
893 PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0));
894 PetscFunctionReturn(PETSC_SUCCESS);
895 }
896
MatCholeskyFactorNumeric_SeqDense(Mat fact,Mat A,const MatFactorInfo * info)897 static PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info)
898 {
899 PetscFunctionBegin;
900 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES));
901 PetscUseTypeMethod(fact, choleskyfactor, NULL, info);
902 PetscFunctionReturn(PETSC_SUCCESS);
903 }
904
MatCholeskyFactorSymbolic_SeqDense(Mat fact,Mat A,IS row,const MatFactorInfo * info)905 PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info)
906 {
907 PetscFunctionBegin;
908 fact->assembled = PETSC_TRUE;
909 fact->preallocated = PETSC_TRUE;
910 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense;
911 PetscFunctionReturn(PETSC_SUCCESS);
912 }
913
MatQRFactor_SeqDense(Mat A,IS col,PETSC_UNUSED const MatFactorInfo * minfo)914 PetscErrorCode MatQRFactor_SeqDense(Mat A, IS col, PETSC_UNUSED const MatFactorInfo *minfo)
915 {
916 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
917 PetscBLASInt n, m, info, min, max;
918
919 PetscFunctionBegin;
920 PetscCall(PetscBLASIntCast(A->cmap->n, &n));
921 PetscCall(PetscBLASIntCast(A->rmap->n, &m));
922 max = PetscMax(m, n);
923 min = PetscMin(m, n);
924 if (!mat->tau) PetscCall(PetscMalloc1(min, &mat->tau));
925 if (!mat->pivots) PetscCall(PetscMalloc1(n, &mat->pivots));
926 if (!mat->qrrhs) PetscCall(MatCreateVecs(A, NULL, &mat->qrrhs));
927 if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS);
928 if (!mat->fwork) {
929 PetscScalar dummy;
930
931 mat->lfwork = -1;
932 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
933 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, &dummy, &mat->lfwork, &info));
934 PetscCall(PetscFPTrapPop());
935 PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork));
936 PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork));
937 }
938 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
939 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, mat->fwork, &mat->lfwork, &info));
940 PetscCall(PetscFPTrapPop());
941 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to QR factorization %" PetscBLASInt_FMT, info);
942 // TODO: try to estimate rank or test for and use geqp3 for rank revealing QR. For now just say rank is min of m and n
943 mat->rank = min;
944
945 A->ops->solve = MatSolve_SeqDense_QR;
946 A->ops->matsolve = MatMatSolve_SeqDense_QR;
947 A->factortype = MAT_FACTOR_QR;
948 if (m == n) {
949 A->ops->solvetranspose = MatSolveTranspose_SeqDense_QR;
950 A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_QR;
951 }
952
953 PetscCall(PetscFree(A->solvertype));
954 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype));
955
956 PetscCall(PetscLogFlops(2.0 * min * min * (max - min / 3.0)));
957 PetscFunctionReturn(PETSC_SUCCESS);
958 }
959
MatQRFactorNumeric_SeqDense(Mat fact,Mat A,const MatFactorInfo * info)960 static PetscErrorCode MatQRFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info)
961 {
962 PetscFunctionBegin;
963 PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES));
964 PetscUseMethod(fact, "MatQRFactor_C", (Mat, IS, const MatFactorInfo *), (fact, NULL, info));
965 PetscFunctionReturn(PETSC_SUCCESS);
966 }
967
MatQRFactorSymbolic_SeqDense(Mat fact,Mat A,IS row,const MatFactorInfo * info)968 PetscErrorCode MatQRFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info)
969 {
970 PetscFunctionBegin;
971 fact->assembled = PETSC_TRUE;
972 fact->preallocated = PETSC_TRUE;
973 PetscCall(PetscObjectComposeFunction((PetscObject)fact, "MatQRFactorNumeric_C", MatQRFactorNumeric_SeqDense));
974 PetscFunctionReturn(PETSC_SUCCESS);
975 }
976
977 /* uses LAPACK */
MatGetFactor_seqdense_petsc(Mat A,MatFactorType ftype,Mat * fact)978 PETSC_INTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A, MatFactorType ftype, Mat *fact)
979 {
980 PetscFunctionBegin;
981 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), fact));
982 PetscCall(MatSetSizes(*fact, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n));
983 PetscCall(MatSetType(*fact, MATDENSE));
984 (*fact)->trivialsymbolic = PETSC_TRUE;
985 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
986 (*fact)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqDense;
987 (*fact)->ops->ilufactorsymbolic = MatLUFactorSymbolic_SeqDense;
988 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
989 (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense;
990 } else if (ftype == MAT_FACTOR_QR) {
991 PetscCall(PetscObjectComposeFunction((PetscObject)*fact, "MatQRFactorSymbolic_C", MatQRFactorSymbolic_SeqDense));
992 }
993 (*fact)->factortype = ftype;
994
995 PetscCall(PetscFree((*fact)->solvertype));
996 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*fact)->solvertype));
997 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_LU]));
998 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ILU]));
999 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_CHOLESKY]));
1000 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ICC]));
1001 PetscFunctionReturn(PETSC_SUCCESS);
1002 }
1003
MatSOR_SeqDense(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal shift,PetscInt its,PetscInt lits,Vec xx)1004 static PetscErrorCode MatSOR_SeqDense(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal shift, PetscInt its, PetscInt lits, Vec xx)
1005 {
1006 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1007 PetscScalar *x, *v = mat->v, zero = 0.0, xt;
1008 const PetscScalar *b;
1009 PetscInt m = A->rmap->n, i;
1010 PetscBLASInt o = 1, bm = 0;
1011
1012 PetscFunctionBegin;
1013 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1014 PetscCheck(A->offloadmask != PETSC_OFFLOAD_GPU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented");
1015 #endif
1016 if (shift == -1) shift = 0.0; /* negative shift indicates do not error on zero diagonal; this code never zeros on zero diagonal */
1017 PetscCall(PetscBLASIntCast(m, &bm));
1018 if (flag & SOR_ZERO_INITIAL_GUESS) {
1019 /* this is a hack fix, should have another version without the second BLASdotu */
1020 PetscCall(VecSet(xx, zero));
1021 }
1022 PetscCall(VecGetArray(xx, &x));
1023 PetscCall(VecGetArrayRead(bb, &b));
1024 its = its * lits;
1025 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
1026 while (its--) {
1027 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1028 for (i = 0; i < m; i++) {
1029 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o));
1030 x[i] = (1. - omega) * x[i] + (xt + v[i + i * m] * x[i]) * omega / (v[i + i * m] + shift);
1031 }
1032 }
1033 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1034 for (i = m - 1; i >= 0; i--) {
1035 PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o));
1036 x[i] = (1. - omega) * x[i] + (xt + v[i + i * m] * x[i]) * omega / (v[i + i * m] + shift);
1037 }
1038 }
1039 }
1040 PetscCall(VecRestoreArrayRead(bb, &b));
1041 PetscCall(VecRestoreArray(xx, &x));
1042 PetscFunctionReturn(PETSC_SUCCESS);
1043 }
1044
MatMultColumnRangeKernel_SeqDense(Mat A,Vec xx,Vec yy,PetscInt c_start,PetscInt c_end,PetscBool trans,PetscBool herm)1045 PETSC_INTERN PetscErrorCode MatMultColumnRangeKernel_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end, PetscBool trans, PetscBool herm)
1046 {
1047 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1048 PetscScalar *y, _DOne = 1.0, _DZero = 0.0;
1049 PetscBLASInt m, n, _One = 1;
1050 const PetscScalar *v = mat->v, *x;
1051
1052 PetscFunctionBegin;
1053 PetscCall(PetscBLASIntCast(A->rmap->n, &m));
1054 PetscCall(PetscBLASIntCast(c_end - c_start, &n));
1055 PetscCall(VecGetArrayRead(xx, &x));
1056 PetscCall(VecGetArrayWrite(yy, &y));
1057 if (!m || !n) {
1058 PetscBLASInt i;
1059 if (trans)
1060 for (i = 0; i < n; i++) y[i] = 0.0;
1061 else
1062 for (i = 0; i < m; i++) y[i] = 0.0;
1063 } else {
1064 if (trans) {
1065 if (herm) PetscCallBLAS("BLASgemv", BLASgemv_("C", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DZero, y + c_start, &_One));
1066 else PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DZero, y + c_start, &_One));
1067 } else {
1068 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x + c_start, &_One, &_DZero, y, &_One));
1069 }
1070 PetscCall(PetscLogFlops(2.0 * m * n - n));
1071 }
1072 PetscCall(VecRestoreArrayRead(xx, &x));
1073 PetscCall(VecRestoreArrayWrite(yy, &y));
1074 PetscFunctionReturn(PETSC_SUCCESS);
1075 }
1076
MatMultHermitianTransposeColumnRange_SeqDense(Mat A,Vec xx,Vec yy,PetscInt c_start,PetscInt c_end)1077 PetscErrorCode MatMultHermitianTransposeColumnRange_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
1078 {
1079 PetscFunctionBegin;
1080 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, c_start, c_end, PETSC_TRUE, PETSC_TRUE));
1081 PetscFunctionReturn(PETSC_SUCCESS);
1082 }
1083
MatMult_SeqDense(Mat A,Vec xx,Vec yy)1084 PetscErrorCode MatMult_SeqDense(Mat A, Vec xx, Vec yy)
1085 {
1086 PetscFunctionBegin;
1087 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_FALSE, PETSC_FALSE));
1088 PetscFunctionReturn(PETSC_SUCCESS);
1089 }
1090
MatMultTranspose_SeqDense(Mat A,Vec xx,Vec yy)1091 PetscErrorCode MatMultTranspose_SeqDense(Mat A, Vec xx, Vec yy)
1092 {
1093 PetscFunctionBegin;
1094 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_FALSE));
1095 PetscFunctionReturn(PETSC_SUCCESS);
1096 }
1097
MatMultHermitianTranspose_SeqDense(Mat A,Vec xx,Vec yy)1098 PetscErrorCode MatMultHermitianTranspose_SeqDense(Mat A, Vec xx, Vec yy)
1099 {
1100 PetscFunctionBegin;
1101 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_TRUE));
1102 PetscFunctionReturn(PETSC_SUCCESS);
1103 }
1104
MatMultAddColumnRangeKernel_SeqDense(Mat A,Vec xx,Vec zz,Vec yy,PetscInt c_start,PetscInt c_end,PetscBool trans,PetscBool herm)1105 PETSC_INTERN PetscErrorCode MatMultAddColumnRangeKernel_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end, PetscBool trans, PetscBool herm)
1106 {
1107 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1108 const PetscScalar *v = mat->v, *x;
1109 PetscScalar *y, _DOne = 1.0;
1110 PetscBLASInt m, n, _One = 1;
1111
1112 PetscFunctionBegin;
1113 PetscCall(PetscBLASIntCast(A->rmap->n, &m));
1114 PetscCall(PetscBLASIntCast(c_end - c_start, &n));
1115 PetscCall(VecCopy(zz, yy));
1116 if (!m || !n) PetscFunctionReturn(PETSC_SUCCESS);
1117 PetscCall(VecGetArray(yy, &y));
1118 PetscCall(VecGetArrayRead(xx, &x));
1119 if (trans) {
1120 if (herm) PetscCallBLAS("BLASgemv", BLASgemv_("C", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DOne, y + c_start, &_One));
1121 else PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DOne, y + c_start, &_One));
1122 } else {
1123 PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x + c_start, &_One, &_DOne, y, &_One));
1124 }
1125 PetscCall(VecRestoreArrayRead(xx, &x));
1126 PetscCall(VecRestoreArray(yy, &y));
1127 PetscCall(PetscLogFlops(2.0 * m * n));
1128 PetscFunctionReturn(PETSC_SUCCESS);
1129 }
1130
MatMultColumnRange_SeqDense(Mat A,Vec xx,Vec yy,PetscInt c_start,PetscInt c_end)1131 PetscErrorCode MatMultColumnRange_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
1132 {
1133 PetscFunctionBegin;
1134 PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, c_start, c_end, PETSC_FALSE, PETSC_FALSE));
1135 PetscFunctionReturn(PETSC_SUCCESS);
1136 }
1137
MatMultAddColumnRange_SeqDense(Mat A,Vec xx,Vec zz,Vec yy,PetscInt c_start,PetscInt c_end)1138 PetscErrorCode MatMultAddColumnRange_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end)
1139 {
1140 PetscFunctionBegin;
1141 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, c_start, c_end, PETSC_FALSE, PETSC_FALSE));
1142 PetscFunctionReturn(PETSC_SUCCESS);
1143 }
1144
MatMultHermitianTransposeAddColumnRange_SeqDense(Mat A,Vec xx,Vec zz,Vec yy,PetscInt c_start,PetscInt c_end)1145 PetscErrorCode MatMultHermitianTransposeAddColumnRange_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end)
1146 {
1147 PetscFunctionBegin;
1148 PetscMPIInt rank;
1149 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
1150 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, c_start, c_end, PETSC_TRUE, PETSC_TRUE));
1151 PetscFunctionReturn(PETSC_SUCCESS);
1152 }
1153
MatMultAdd_SeqDense(Mat A,Vec xx,Vec zz,Vec yy)1154 PetscErrorCode MatMultAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy)
1155 {
1156 PetscFunctionBegin;
1157 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_FALSE, PETSC_FALSE));
1158 PetscFunctionReturn(PETSC_SUCCESS);
1159 }
1160
MatMultTransposeAdd_SeqDense(Mat A,Vec xx,Vec zz,Vec yy)1161 PetscErrorCode MatMultTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy)
1162 {
1163 PetscFunctionBegin;
1164 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_FALSE));
1165 PetscFunctionReturn(PETSC_SUCCESS);
1166 }
1167
MatMultHermitianTransposeAdd_SeqDense(Mat A,Vec xx,Vec zz,Vec yy)1168 PetscErrorCode MatMultHermitianTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy)
1169 {
1170 PetscFunctionBegin;
1171 PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_TRUE));
1172 PetscFunctionReturn(PETSC_SUCCESS);
1173 }
1174
MatGetRow_SeqDense(Mat A,PetscInt row,PetscInt * ncols,PetscInt ** cols,PetscScalar ** vals)1175 static PetscErrorCode MatGetRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals)
1176 {
1177 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1178 PetscInt i;
1179
1180 PetscFunctionBegin;
1181 if (ncols) *ncols = A->cmap->n;
1182 if (cols) {
1183 PetscCall(PetscMalloc1(A->cmap->n, cols));
1184 for (i = 0; i < A->cmap->n; i++) (*cols)[i] = i;
1185 }
1186 if (vals) {
1187 const PetscScalar *v;
1188
1189 PetscCall(MatDenseGetArrayRead(A, &v));
1190 PetscCall(PetscMalloc1(A->cmap->n, vals));
1191 v += row;
1192 for (i = 0; i < A->cmap->n; i++) {
1193 (*vals)[i] = *v;
1194 v += mat->lda;
1195 }
1196 PetscCall(MatDenseRestoreArrayRead(A, &v));
1197 }
1198 PetscFunctionReturn(PETSC_SUCCESS);
1199 }
1200
MatRestoreRow_SeqDense(Mat A,PetscInt row,PetscInt * ncols,PetscInt ** cols,PetscScalar ** vals)1201 static PetscErrorCode MatRestoreRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals)
1202 {
1203 PetscFunctionBegin;
1204 if (cols) PetscCall(PetscFree(*cols));
1205 if (vals) PetscCall(PetscFree(*vals));
1206 PetscFunctionReturn(PETSC_SUCCESS);
1207 }
1208
MatSetValues_SeqDense(Mat A,PetscInt m,const PetscInt indexm[],PetscInt n,const PetscInt indexn[],const PetscScalar v[],InsertMode addv)1209 static PetscErrorCode MatSetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], const PetscScalar v[], InsertMode addv)
1210 {
1211 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1212 PetscScalar *av;
1213 PetscInt i, j, idx = 0;
1214 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1215 PetscOffloadMask oldf;
1216 #endif
1217
1218 PetscFunctionBegin;
1219 PetscCall(MatDenseGetArray(A, &av));
1220 if (!mat->roworiented) {
1221 if (addv == INSERT_VALUES) {
1222 for (j = 0; j < n; j++) {
1223 if (indexn[j] < 0) {
1224 idx += m;
1225 continue;
1226 }
1227 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1);
1228 for (i = 0; i < m; i++) {
1229 if (indexm[i] < 0) {
1230 idx++;
1231 continue;
1232 }
1233 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1);
1234 av[indexn[j] * mat->lda + indexm[i]] = v ? v[idx++] : (idx++, 0.0);
1235 }
1236 }
1237 } else {
1238 for (j = 0; j < n; j++) {
1239 if (indexn[j] < 0) {
1240 idx += m;
1241 continue;
1242 }
1243 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1);
1244 for (i = 0; i < m; i++) {
1245 if (indexm[i] < 0) {
1246 idx++;
1247 continue;
1248 }
1249 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1);
1250 av[indexn[j] * mat->lda + indexm[i]] += v ? v[idx++] : (idx++, 0.0);
1251 }
1252 }
1253 }
1254 } else {
1255 if (addv == INSERT_VALUES) {
1256 for (i = 0; i < m; i++) {
1257 if (indexm[i] < 0) {
1258 idx += n;
1259 continue;
1260 }
1261 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1);
1262 for (j = 0; j < n; j++) {
1263 if (indexn[j] < 0) {
1264 idx++;
1265 continue;
1266 }
1267 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1);
1268 av[indexn[j] * mat->lda + indexm[i]] = v ? v[idx++] : (idx++, 0.0);
1269 }
1270 }
1271 } else {
1272 for (i = 0; i < m; i++) {
1273 if (indexm[i] < 0) {
1274 idx += n;
1275 continue;
1276 }
1277 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1);
1278 for (j = 0; j < n; j++) {
1279 if (indexn[j] < 0) {
1280 idx++;
1281 continue;
1282 }
1283 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1);
1284 av[indexn[j] * mat->lda + indexm[i]] += v ? v[idx++] : (idx++, 0.0);
1285 }
1286 }
1287 }
1288 }
1289 /* hack to prevent unneeded copy to the GPU while returning the array */
1290 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1291 oldf = A->offloadmask;
1292 A->offloadmask = PETSC_OFFLOAD_GPU;
1293 #endif
1294 PetscCall(MatDenseRestoreArray(A, &av));
1295 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1296 A->offloadmask = (oldf == PETSC_OFFLOAD_UNALLOCATED ? PETSC_OFFLOAD_UNALLOCATED : PETSC_OFFLOAD_CPU);
1297 #endif
1298 PetscFunctionReturn(PETSC_SUCCESS);
1299 }
1300
MatGetValues_SeqDense(Mat A,PetscInt m,const PetscInt indexm[],PetscInt n,const PetscInt indexn[],PetscScalar v[])1301 static PetscErrorCode MatGetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], PetscScalar v[])
1302 {
1303 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1304 const PetscScalar *vv;
1305 PetscInt i, j;
1306
1307 PetscFunctionBegin;
1308 PetscCall(MatDenseGetArrayRead(A, &vv));
1309 /* row-oriented output */
1310 for (i = 0; i < m; i++) {
1311 if (indexm[i] < 0) {
1312 v += n;
1313 continue;
1314 }
1315 PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested larger than number rows %" PetscInt_FMT, indexm[i], A->rmap->n);
1316 for (j = 0; j < n; j++) {
1317 if (indexn[j] < 0) {
1318 v++;
1319 continue;
1320 }
1321 PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " requested larger than number columns %" PetscInt_FMT, indexn[j], A->cmap->n);
1322 *v++ = vv[indexn[j] * mat->lda + indexm[i]];
1323 }
1324 }
1325 PetscCall(MatDenseRestoreArrayRead(A, &vv));
1326 PetscFunctionReturn(PETSC_SUCCESS);
1327 }
1328
MatView_Dense_Binary(Mat mat,PetscViewer viewer)1329 PetscErrorCode MatView_Dense_Binary(Mat mat, PetscViewer viewer)
1330 {
1331 PetscBool skipHeader;
1332 PetscViewerFormat format;
1333 PetscInt header[4], M, N, m, lda, i, j;
1334 PetscCount k;
1335 const PetscScalar *v;
1336 PetscScalar *vwork;
1337
1338 PetscFunctionBegin;
1339 PetscCall(PetscViewerSetUp(viewer));
1340 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader));
1341 PetscCall(PetscViewerGetFormat(viewer, &format));
1342 if (skipHeader) format = PETSC_VIEWER_NATIVE;
1343
1344 PetscCall(MatGetSize(mat, &M, &N));
1345
1346 /* write matrix header */
1347 header[0] = MAT_FILE_CLASSID;
1348 header[1] = M;
1349 header[2] = N;
1350 header[3] = (format == PETSC_VIEWER_NATIVE) ? MATRIX_BINARY_FORMAT_DENSE : M * N;
1351 if (!skipHeader) PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1352
1353 PetscCall(MatGetLocalSize(mat, &m, NULL));
1354 if (format != PETSC_VIEWER_NATIVE) {
1355 PetscInt nnz = m * N, *iwork;
1356 /* store row lengths for each row */
1357 PetscCall(PetscMalloc1(nnz, &iwork));
1358 for (i = 0; i < m; i++) iwork[i] = N;
1359 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1360 /* store column indices (zero start index) */
1361 for (k = 0, i = 0; i < m; i++)
1362 for (j = 0; j < N; j++, k++) iwork[k] = j;
1363 PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1364 PetscCall(PetscFree(iwork));
1365 }
1366 /* store matrix values as a dense matrix in row major order */
1367 PetscCall(PetscMalloc1(m * N, &vwork));
1368 PetscCall(MatDenseGetArrayRead(mat, &v));
1369 PetscCall(MatDenseGetLDA(mat, &lda));
1370 for (k = 0, i = 0; i < m; i++)
1371 for (j = 0; j < N; j++, k++) vwork[k] = v[i + (size_t)lda * j];
1372 PetscCall(MatDenseRestoreArrayRead(mat, &v));
1373 PetscCall(PetscViewerBinaryWriteAll(viewer, vwork, m * N, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1374 PetscCall(PetscFree(vwork));
1375 PetscFunctionReturn(PETSC_SUCCESS);
1376 }
1377
MatLoad_Dense_Binary(Mat mat,PetscViewer viewer)1378 PetscErrorCode MatLoad_Dense_Binary(Mat mat, PetscViewer viewer)
1379 {
1380 PetscBool skipHeader;
1381 PetscInt header[4], M, N, m, nz, lda, i, j, k;
1382 PetscInt rows, cols;
1383 PetscScalar *v, *vwork;
1384
1385 PetscFunctionBegin;
1386 PetscCall(PetscViewerSetUp(viewer));
1387 PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader));
1388
1389 if (!skipHeader) {
1390 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
1391 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
1392 M = header[1];
1393 N = header[2];
1394 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
1395 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
1396 nz = header[3];
1397 PetscCheck(nz == MATRIX_BINARY_FORMAT_DENSE || nz >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Unknown matrix format %" PetscInt_FMT " in file", nz);
1398 } else {
1399 PetscCall(MatGetSize(mat, &M, &N));
1400 PetscCheck(M >= 0 && N >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "Matrix binary file header was skipped, thus the user must specify the global sizes of input matrix");
1401 nz = MATRIX_BINARY_FORMAT_DENSE;
1402 }
1403
1404 /* setup global sizes if not set */
1405 if (mat->rmap->N < 0) mat->rmap->N = M;
1406 if (mat->cmap->N < 0) mat->cmap->N = N;
1407 PetscCall(MatSetUp(mat));
1408 /* check if global sizes are correct */
1409 PetscCall(MatGetSize(mat, &rows, &cols));
1410 PetscCheck(M == rows && N == cols, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
1411
1412 PetscCall(MatGetSize(mat, NULL, &N));
1413 PetscCall(MatGetLocalSize(mat, &m, NULL));
1414 PetscCall(MatDenseGetArray(mat, &v));
1415 PetscCall(MatDenseGetLDA(mat, &lda));
1416 if (nz == MATRIX_BINARY_FORMAT_DENSE) { /* matrix in file is dense format */
1417 PetscCount nnz = (size_t)m * N;
1418 /* read in matrix values */
1419 PetscCall(PetscMalloc1(nnz, &vwork));
1420 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1421 /* store values in column major order */
1422 for (j = 0; j < N; j++)
1423 for (i = 0; i < m; i++) v[i + (size_t)lda * j] = vwork[(size_t)i * N + j];
1424 PetscCall(PetscFree(vwork));
1425 } else { /* matrix in file is sparse format */
1426 PetscInt nnz = 0, *rlens, *icols;
1427 /* read in row lengths */
1428 PetscCall(PetscMalloc1(m, &rlens));
1429 PetscCall(PetscViewerBinaryReadAll(viewer, rlens, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1430 for (i = 0; i < m; i++) nnz += rlens[i];
1431 /* read in column indices and values */
1432 PetscCall(PetscMalloc2(nnz, &icols, nnz, &vwork));
1433 PetscCall(PetscViewerBinaryReadAll(viewer, icols, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1434 PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1435 /* store values in column major order */
1436 for (k = 0, i = 0; i < m; i++)
1437 for (j = 0; j < rlens[i]; j++, k++) v[i + lda * icols[k]] = vwork[k];
1438 PetscCall(PetscFree(rlens));
1439 PetscCall(PetscFree2(icols, vwork));
1440 }
1441 PetscCall(MatDenseRestoreArray(mat, &v));
1442 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
1443 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
1444 PetscFunctionReturn(PETSC_SUCCESS);
1445 }
1446
MatLoad_SeqDense(Mat newMat,PetscViewer viewer)1447 static PetscErrorCode MatLoad_SeqDense(Mat newMat, PetscViewer viewer)
1448 {
1449 PetscBool isbinary, ishdf5;
1450
1451 PetscFunctionBegin;
1452 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1453 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1454 /* force binary viewer to load .info file if it has not yet done so */
1455 PetscCall(PetscViewerSetUp(viewer));
1456 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1457 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
1458 if (isbinary) {
1459 PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1460 } else if (ishdf5) {
1461 #if defined(PETSC_HAVE_HDF5)
1462 PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
1463 #else
1464 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
1465 #endif
1466 } else {
1467 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
1468 }
1469 PetscFunctionReturn(PETSC_SUCCESS);
1470 }
1471
MatView_SeqDense_ASCII(Mat A,PetscViewer viewer)1472 static PetscErrorCode MatView_SeqDense_ASCII(Mat A, PetscViewer viewer)
1473 {
1474 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
1475 PetscInt i, j;
1476 const char *name;
1477 PetscScalar *v, *av;
1478 PetscViewerFormat format;
1479 #if defined(PETSC_USE_COMPLEX)
1480 PetscBool allreal = PETSC_TRUE;
1481 #endif
1482
1483 PetscFunctionBegin;
1484 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&av));
1485 PetscCall(PetscViewerGetFormat(viewer, &format));
1486 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1487 PetscFunctionReturn(PETSC_SUCCESS); /* do nothing for now */
1488 } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1489 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1490 for (i = 0; i < A->rmap->n; i++) {
1491 v = av + i;
1492 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i));
1493 for (j = 0; j < A->cmap->n; j++) {
1494 #if defined(PETSC_USE_COMPLEX)
1495 if (PetscRealPart(*v) != 0.0 && PetscImaginaryPart(*v) != 0.0) {
1496 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", j, (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v)));
1497 } else if (PetscRealPart(*v)) {
1498 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)PetscRealPart(*v)));
1499 }
1500 #else
1501 if (*v) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)*v));
1502 #endif
1503 v += a->lda;
1504 }
1505 PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1506 }
1507 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1508 } else {
1509 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1510 #if defined(PETSC_USE_COMPLEX)
1511 /* determine if matrix has all real values */
1512 for (j = 0; j < A->cmap->n; j++) {
1513 v = av + j * a->lda;
1514 for (i = 0; i < A->rmap->n; i++) {
1515 if (PetscImaginaryPart(v[i])) {
1516 allreal = PETSC_FALSE;
1517 break;
1518 }
1519 }
1520 }
1521 #endif
1522 if (format == PETSC_VIEWER_ASCII_MATLAB) {
1523 PetscCall(PetscObjectGetName((PetscObject)A, &name));
1524 PetscCall(PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", A->rmap->n, A->cmap->n));
1525 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = zeros(%" PetscInt_FMT ",%" PetscInt_FMT ");\n", name, A->rmap->n, A->cmap->n));
1526 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = [\n", name));
1527 }
1528
1529 for (i = 0; i < A->rmap->n; i++) {
1530 v = av + i;
1531 for (j = 0; j < A->cmap->n; j++) {
1532 #if defined(PETSC_USE_COMPLEX)
1533 if (allreal) {
1534 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(*v)));
1535 } else {
1536 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei ", (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v)));
1537 }
1538 #else
1539 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)*v));
1540 #endif
1541 v += a->lda;
1542 }
1543 PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1544 }
1545 if (format == PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, "];\n"));
1546 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1547 }
1548 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&av));
1549 PetscCall(PetscViewerFlush(viewer));
1550 PetscFunctionReturn(PETSC_SUCCESS);
1551 }
1552
1553 #include <petscdraw.h>
MatView_SeqDense_Draw_Zoom(PetscDraw draw,void * Aa)1554 static PetscErrorCode MatView_SeqDense_Draw_Zoom(PetscDraw draw, void *Aa)
1555 {
1556 Mat A = (Mat)Aa;
1557 PetscInt m = A->rmap->n, n = A->cmap->n, i, j;
1558 int color = PETSC_DRAW_WHITE;
1559 const PetscScalar *v;
1560 PetscViewer viewer;
1561 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1562 PetscViewerFormat format;
1563
1564 PetscFunctionBegin;
1565 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
1566 PetscCall(PetscViewerGetFormat(viewer, &format));
1567 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
1568
1569 /* Loop over matrix elements drawing boxes */
1570 PetscCall(MatDenseGetArrayRead(A, &v));
1571 if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1572 PetscDrawCollectiveBegin(draw);
1573 /* Blue for negative and Red for positive */
1574 for (j = 0; j < n; j++) {
1575 x_l = j;
1576 x_r = x_l + 1.0;
1577 for (i = 0; i < m; i++) {
1578 y_l = m - i - 1.0;
1579 y_r = y_l + 1.0;
1580 if (PetscRealPart(v[j * m + i]) > 0.) color = PETSC_DRAW_RED;
1581 else if (PetscRealPart(v[j * m + i]) < 0.) color = PETSC_DRAW_BLUE;
1582 else continue;
1583 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color));
1584 }
1585 }
1586 PetscDrawCollectiveEnd(draw);
1587 } else {
1588 /* use contour shading to indicate magnitude of values */
1589 /* first determine max of all nonzero values */
1590 PetscReal minv = 0.0, maxv = 0.0;
1591 PetscDraw popup;
1592
1593 for (i = 0; i < m * n; i++) {
1594 if (PetscAbsScalar(v[i]) > maxv) maxv = PetscAbsScalar(v[i]);
1595 }
1596 if (minv >= maxv) maxv = minv + PETSC_SMALL;
1597 PetscCall(PetscDrawGetPopup(draw, &popup));
1598 PetscCall(PetscDrawScalePopup(popup, minv, maxv));
1599
1600 PetscDrawCollectiveBegin(draw);
1601 for (j = 0; j < n; j++) {
1602 x_l = j;
1603 x_r = x_l + 1.0;
1604 for (i = 0; i < m; i++) {
1605 y_l = m - i - 1.0;
1606 y_r = y_l + 1.0;
1607 color = PetscDrawRealToColor(PetscAbsScalar(v[j * m + i]), minv, maxv);
1608 PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color));
1609 }
1610 }
1611 PetscDrawCollectiveEnd(draw);
1612 }
1613 PetscCall(MatDenseRestoreArrayRead(A, &v));
1614 PetscFunctionReturn(PETSC_SUCCESS);
1615 }
1616
MatView_SeqDense_Draw(Mat A,PetscViewer viewer)1617 static PetscErrorCode MatView_SeqDense_Draw(Mat A, PetscViewer viewer)
1618 {
1619 PetscDraw draw;
1620 PetscBool isnull;
1621 PetscReal xr, yr, xl, yl, h, w;
1622
1623 PetscFunctionBegin;
1624 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1625 PetscCall(PetscDrawIsNull(draw, &isnull));
1626 if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1627
1628 xr = A->cmap->n;
1629 yr = A->rmap->n;
1630 h = yr / 10.0;
1631 w = xr / 10.0;
1632 xr += w;
1633 yr += h;
1634 xl = -w;
1635 yl = -h;
1636 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
1637 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
1638 PetscCall(PetscDrawZoom(draw, MatView_SeqDense_Draw_Zoom, A));
1639 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
1640 PetscCall(PetscDrawSave(draw));
1641 PetscFunctionReturn(PETSC_SUCCESS);
1642 }
1643
MatView_SeqDense(Mat A,PetscViewer viewer)1644 PetscErrorCode MatView_SeqDense(Mat A, PetscViewer viewer)
1645 {
1646 PetscBool isascii, isbinary, isdraw;
1647
1648 PetscFunctionBegin;
1649 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1650 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1651 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1652 if (isascii) PetscCall(MatView_SeqDense_ASCII(A, viewer));
1653 else if (isbinary) PetscCall(MatView_Dense_Binary(A, viewer));
1654 else if (isdraw) PetscCall(MatView_SeqDense_Draw(A, viewer));
1655 PetscFunctionReturn(PETSC_SUCCESS);
1656 }
1657
MatDensePlaceArray_SeqDense(Mat A,const PetscScalar * array)1658 static PetscErrorCode MatDensePlaceArray_SeqDense(Mat A, const PetscScalar *array)
1659 {
1660 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
1661
1662 PetscFunctionBegin;
1663 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1664 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1665 PetscCheck(!a->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseResetArray() first");
1666 a->unplacedarray = a->v;
1667 a->unplaced_user_alloc = a->user_alloc;
1668 a->v = (PetscScalar *)array;
1669 a->user_alloc = PETSC_TRUE;
1670 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1671 A->offloadmask = PETSC_OFFLOAD_CPU;
1672 #endif
1673 PetscFunctionReturn(PETSC_SUCCESS);
1674 }
1675
MatDenseResetArray_SeqDense(Mat A)1676 static PetscErrorCode MatDenseResetArray_SeqDense(Mat A)
1677 {
1678 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
1679
1680 PetscFunctionBegin;
1681 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1682 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1683 a->v = a->unplacedarray;
1684 a->user_alloc = a->unplaced_user_alloc;
1685 a->unplacedarray = NULL;
1686 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1687 A->offloadmask = PETSC_OFFLOAD_CPU;
1688 #endif
1689 PetscFunctionReturn(PETSC_SUCCESS);
1690 }
1691
MatDenseReplaceArray_SeqDense(Mat A,const PetscScalar * array)1692 static PetscErrorCode MatDenseReplaceArray_SeqDense(Mat A, const PetscScalar *array)
1693 {
1694 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
1695
1696 PetscFunctionBegin;
1697 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1698 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1699 if (!a->user_alloc) PetscCall(PetscFree(a->v));
1700 a->v = (PetscScalar *)array;
1701 a->user_alloc = PETSC_FALSE;
1702 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1703 A->offloadmask = PETSC_OFFLOAD_CPU;
1704 #endif
1705 PetscFunctionReturn(PETSC_SUCCESS);
1706 }
1707
MatDestroy_SeqDense(Mat mat)1708 PetscErrorCode MatDestroy_SeqDense(Mat mat)
1709 {
1710 Mat_SeqDense *l = (Mat_SeqDense *)mat->data;
1711
1712 PetscFunctionBegin;
1713 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows %" PetscInt_FMT " Cols %" PetscInt_FMT, mat->rmap->n, mat->cmap->n));
1714 PetscCall(VecDestroy(&l->qrrhs));
1715 PetscCall(PetscFree(l->tau));
1716 PetscCall(PetscFree(l->pivots));
1717 PetscCall(PetscFree(l->fwork));
1718 if (!l->user_alloc) PetscCall(PetscFree(l->v));
1719 if (!l->unplaced_user_alloc) PetscCall(PetscFree(l->unplacedarray));
1720 PetscCheck(!l->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1721 PetscCheck(!l->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1722 PetscCall(VecDestroy(&l->cvec));
1723 PetscCall(MatDestroy(&l->cmat));
1724 PetscCall(PetscFree(mat->data));
1725
1726 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
1727 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactor_C", NULL));
1728 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorSymbolic_C", NULL));
1729 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorNumeric_C", NULL));
1730 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
1731 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
1732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
1733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
1734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
1735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
1736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
1737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
1738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
1739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
1740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
1741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqaij_C", NULL));
1742 #if defined(PETSC_HAVE_ELEMENTAL)
1743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_elemental_C", NULL));
1744 #endif
1745 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
1746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_scalapack_C", NULL));
1747 #endif
1748 #if defined(PETSC_HAVE_CUDA)
1749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensecuda_C", NULL));
1750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", NULL));
1751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdense_C", NULL));
1752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensecuda_C", NULL));
1753 #endif
1754 #if defined(PETSC_HAVE_HIP)
1755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensehip_C", NULL));
1756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", NULL));
1757 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdense_C", NULL));
1758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensehip_C", NULL));
1759 #endif
1760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSeqDenseSetPreallocation_C", NULL));
1761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqaij_seqdense_C", NULL));
1762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdense_C", NULL));
1763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqbaij_seqdense_C", NULL));
1764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqsbaij_seqdense_C", NULL));
1765
1766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
1767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
1768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
1769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
1770 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
1771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
1772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
1773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
1774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
1775 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
1776 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", NULL));
1777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
1778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
1779 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));
1780 PetscFunctionReturn(PETSC_SUCCESS);
1781 }
1782
MatTranspose_SeqDense(Mat A,MatReuse reuse,Mat * matout)1783 static PetscErrorCode MatTranspose_SeqDense(Mat A, MatReuse reuse, Mat *matout)
1784 {
1785 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1786 PetscInt k, j, m = A->rmap->n, M = mat->lda, n = A->cmap->n;
1787 PetscScalar *v, tmp;
1788
1789 PetscFunctionBegin;
1790 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1791 if (reuse == MAT_INPLACE_MATRIX) {
1792 if (m == n) { /* in place transpose */
1793 PetscCall(MatDenseGetArray(A, &v));
1794 for (j = 0; j < m; j++) {
1795 for (k = 0; k < j; k++) {
1796 tmp = v[j + k * M];
1797 v[j + k * M] = v[k + j * M];
1798 v[k + j * M] = tmp;
1799 }
1800 }
1801 PetscCall(MatDenseRestoreArray(A, &v));
1802 } else { /* reuse memory, temporary allocates new memory */
1803 PetscScalar *v2;
1804 PetscLayout tmplayout;
1805
1806 PetscCall(PetscMalloc1((size_t)m * n, &v2));
1807 PetscCall(MatDenseGetArray(A, &v));
1808 for (j = 0; j < n; j++) {
1809 for (k = 0; k < m; k++) v2[j + (size_t)k * n] = v[k + (size_t)j * M];
1810 }
1811 PetscCall(PetscArraycpy(v, v2, (size_t)m * n));
1812 PetscCall(PetscFree(v2));
1813 PetscCall(MatDenseRestoreArray(A, &v));
1814 /* cleanup size dependent quantities */
1815 PetscCall(VecDestroy(&mat->cvec));
1816 PetscCall(MatDestroy(&mat->cmat));
1817 PetscCall(PetscFree(mat->pivots));
1818 PetscCall(PetscFree(mat->fwork));
1819 /* swap row/col layouts */
1820 PetscCall(PetscBLASIntCast(n, &mat->lda));
1821 tmplayout = A->rmap;
1822 A->rmap = A->cmap;
1823 A->cmap = tmplayout;
1824 }
1825 } else { /* out-of-place transpose */
1826 Mat tmat;
1827 Mat_SeqDense *tmatd;
1828 PetscScalar *v2;
1829 PetscInt M2;
1830
1831 if (reuse == MAT_INITIAL_MATRIX) {
1832 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &tmat));
1833 PetscCall(MatSetSizes(tmat, A->cmap->n, A->rmap->n, A->cmap->n, A->rmap->n));
1834 PetscCall(MatSetType(tmat, ((PetscObject)A)->type_name));
1835 PetscCall(MatSeqDenseSetPreallocation(tmat, NULL));
1836 } else tmat = *matout;
1837
1838 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&v));
1839 PetscCall(MatDenseGetArray(tmat, &v2));
1840 tmatd = (Mat_SeqDense *)tmat->data;
1841 M2 = tmatd->lda;
1842 for (j = 0; j < n; j++) {
1843 for (k = 0; k < m; k++) v2[j + k * M2] = v[k + j * M];
1844 }
1845 PetscCall(MatDenseRestoreArray(tmat, &v2));
1846 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&v));
1847 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY));
1848 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY));
1849 *matout = tmat;
1850 }
1851 PetscFunctionReturn(PETSC_SUCCESS);
1852 }
1853
MatEqual_SeqDense(Mat A1,Mat A2,PetscBool * flg)1854 static PetscErrorCode MatEqual_SeqDense(Mat A1, Mat A2, PetscBool *flg)
1855 {
1856 Mat_SeqDense *mat1 = (Mat_SeqDense *)A1->data;
1857 Mat_SeqDense *mat2 = (Mat_SeqDense *)A2->data;
1858 PetscInt i;
1859 const PetscScalar *v1, *v2;
1860
1861 PetscFunctionBegin;
1862 if (A1->rmap->n != A2->rmap->n) {
1863 *flg = PETSC_FALSE;
1864 PetscFunctionReturn(PETSC_SUCCESS);
1865 }
1866 if (A1->cmap->n != A2->cmap->n) {
1867 *flg = PETSC_FALSE;
1868 PetscFunctionReturn(PETSC_SUCCESS);
1869 }
1870 PetscCall(MatDenseGetArrayRead(A1, &v1));
1871 PetscCall(MatDenseGetArrayRead(A2, &v2));
1872 for (i = 0; i < A1->cmap->n; i++) {
1873 PetscCall(PetscArraycmp(v1, v2, A1->rmap->n, flg));
1874 if (*flg == PETSC_FALSE) PetscFunctionReturn(PETSC_SUCCESS);
1875 v1 += mat1->lda;
1876 v2 += mat2->lda;
1877 }
1878 PetscCall(MatDenseRestoreArrayRead(A1, &v1));
1879 PetscCall(MatDenseRestoreArrayRead(A2, &v2));
1880 *flg = PETSC_TRUE;
1881 PetscFunctionReturn(PETSC_SUCCESS);
1882 }
1883
MatGetDiagonal_SeqDense(Mat A,Vec v)1884 PetscErrorCode MatGetDiagonal_SeqDense(Mat A, Vec v)
1885 {
1886 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1887 PetscInt i, n, len;
1888 PetscScalar *x;
1889 const PetscScalar *vv;
1890
1891 PetscFunctionBegin;
1892 PetscCall(VecGetSize(v, &n));
1893 PetscCall(VecGetArray(v, &x));
1894 len = PetscMin(A->rmap->n, A->cmap->n);
1895 PetscCall(MatDenseGetArrayRead(A, &vv));
1896 PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
1897 for (i = 0; i < len; i++) x[i] = vv[i * mat->lda + i];
1898 PetscCall(MatDenseRestoreArrayRead(A, &vv));
1899 PetscCall(VecRestoreArray(v, &x));
1900 PetscFunctionReturn(PETSC_SUCCESS);
1901 }
1902
MatDiagonalScale_SeqDense(Mat A,Vec ll,Vec rr)1903 static PetscErrorCode MatDiagonalScale_SeqDense(Mat A, Vec ll, Vec rr)
1904 {
1905 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1906 const PetscScalar *l, *r;
1907 PetscScalar x, *v, *vv;
1908 PetscInt i, j, m = A->rmap->n, n = A->cmap->n;
1909
1910 PetscFunctionBegin;
1911 PetscCall(MatDenseGetArray(A, &vv));
1912 if (ll) {
1913 PetscCall(VecGetSize(ll, &m));
1914 PetscCall(VecGetArrayRead(ll, &l));
1915 PetscCheck(m == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vec wrong size");
1916 for (i = 0; i < m; i++) {
1917 x = l[i];
1918 v = vv + i;
1919 for (j = 0; j < n; j++) {
1920 (*v) *= x;
1921 v += mat->lda;
1922 }
1923 }
1924 PetscCall(VecRestoreArrayRead(ll, &l));
1925 PetscCall(PetscLogFlops(1.0 * n * m));
1926 }
1927 if (rr) {
1928 PetscCall(VecGetSize(rr, &n));
1929 PetscCall(VecGetArrayRead(rr, &r));
1930 PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec wrong size");
1931 for (i = 0; i < n; i++) {
1932 x = r[i];
1933 v = vv + i * mat->lda;
1934 for (j = 0; j < m; j++) (*v++) *= x;
1935 }
1936 PetscCall(VecRestoreArrayRead(rr, &r));
1937 PetscCall(PetscLogFlops(1.0 * n * m));
1938 }
1939 PetscCall(MatDenseRestoreArray(A, &vv));
1940 PetscFunctionReturn(PETSC_SUCCESS);
1941 }
1942
MatNorm_SeqDense(Mat A,NormType type,PetscReal * nrm)1943 PetscErrorCode MatNorm_SeqDense(Mat A, NormType type, PetscReal *nrm)
1944 {
1945 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1946 PetscScalar *v, *vv;
1947 PetscReal sum = 0.0;
1948 PetscInt lda, m = A->rmap->n, i, j;
1949
1950 PetscFunctionBegin;
1951 PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&vv));
1952 PetscCall(MatDenseGetLDA(A, &lda));
1953 v = vv;
1954 if (type == NORM_FROBENIUS) {
1955 if (lda > m) {
1956 for (j = 0; j < A->cmap->n; j++) {
1957 v = vv + j * lda;
1958 for (i = 0; i < m; i++) {
1959 sum += PetscRealPart(PetscConj(*v) * (*v));
1960 v++;
1961 }
1962 }
1963 } else {
1964 #if defined(PETSC_USE_REAL___FP16)
1965 PetscBLASInt one = 1, cnt = A->cmap->n * A->rmap->n;
1966 PetscCallBLAS("BLASnrm2", *nrm = BLASnrm2_(&cnt, v, &one));
1967 }
1968 #else
1969 for (i = 0; i < A->cmap->n * A->rmap->n; i++) {
1970 sum += PetscRealPart(PetscConj(*v) * (*v));
1971 v++;
1972 }
1973 }
1974 *nrm = PetscSqrtReal(sum);
1975 #endif
1976 PetscCall(PetscLogFlops(2.0 * A->cmap->n * A->rmap->n));
1977 } else if (type == NORM_1) {
1978 *nrm = 0.0;
1979 for (j = 0; j < A->cmap->n; j++) {
1980 v = vv + j * mat->lda;
1981 sum = 0.0;
1982 for (i = 0; i < A->rmap->n; i++) {
1983 sum += PetscAbsScalar(*v);
1984 v++;
1985 }
1986 if (sum > *nrm) *nrm = sum;
1987 }
1988 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n));
1989 } else if (type == NORM_INFINITY) {
1990 *nrm = 0.0;
1991 for (j = 0; j < A->rmap->n; j++) {
1992 v = vv + j;
1993 sum = 0.0;
1994 for (i = 0; i < A->cmap->n; i++) {
1995 sum += PetscAbsScalar(*v);
1996 v += mat->lda;
1997 }
1998 if (sum > *nrm) *nrm = sum;
1999 }
2000 PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n));
2001 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No two norm");
2002 PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&vv));
2003 PetscFunctionReturn(PETSC_SUCCESS);
2004 }
2005
MatSetOption_SeqDense(Mat A,MatOption op,PetscBool flg)2006 static PetscErrorCode MatSetOption_SeqDense(Mat A, MatOption op, PetscBool flg)
2007 {
2008 Mat_SeqDense *aij = (Mat_SeqDense *)A->data;
2009
2010 PetscFunctionBegin;
2011 switch (op) {
2012 case MAT_ROW_ORIENTED:
2013 aij->roworiented = flg;
2014 break;
2015 default:
2016 break;
2017 }
2018 PetscFunctionReturn(PETSC_SUCCESS);
2019 }
2020
MatZeroEntries_SeqDense(Mat A)2021 PetscErrorCode MatZeroEntries_SeqDense(Mat A)
2022 {
2023 Mat_SeqDense *l = (Mat_SeqDense *)A->data;
2024 PetscInt lda = l->lda, m = A->rmap->n, n = A->cmap->n, j;
2025 PetscScalar *v;
2026
2027 PetscFunctionBegin;
2028 PetscCall(MatDenseGetArrayWrite(A, &v));
2029 if (lda > m) {
2030 for (j = 0; j < n; j++) PetscCall(PetscArrayzero(v + j * lda, m));
2031 } else {
2032 PetscCall(PetscArrayzero(v, PetscInt64Mult(m, n)));
2033 }
2034 PetscCall(MatDenseRestoreArrayWrite(A, &v));
2035 PetscFunctionReturn(PETSC_SUCCESS);
2036 }
2037
MatZeroRows_SeqDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)2038 static PetscErrorCode MatZeroRows_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
2039 {
2040 Mat_SeqDense *l = (Mat_SeqDense *)A->data;
2041 PetscInt m = l->lda, n = A->cmap->n, i, j;
2042 PetscScalar *slot, *bb, *v;
2043 const PetscScalar *xx;
2044
2045 PetscFunctionBegin;
2046 if (PetscDefined(USE_DEBUG)) {
2047 for (i = 0; i < N; i++) {
2048 PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed");
2049 PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n);
2050 }
2051 }
2052 if (!N) PetscFunctionReturn(PETSC_SUCCESS);
2053
2054 /* fix right-hand side if needed */
2055 if (x && b) {
2056 PetscCall(VecGetArrayRead(x, &xx));
2057 PetscCall(VecGetArray(b, &bb));
2058 for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]];
2059 PetscCall(VecRestoreArrayRead(x, &xx));
2060 PetscCall(VecRestoreArray(b, &bb));
2061 }
2062
2063 PetscCall(MatDenseGetArray(A, &v));
2064 for (i = 0; i < N; i++) {
2065 slot = v + rows[i];
2066 for (j = 0; j < n; j++) {
2067 *slot = 0.0;
2068 slot += m;
2069 }
2070 }
2071 if (diag != 0.0) {
2072 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices");
2073 for (i = 0; i < N; i++) {
2074 slot = v + (m + 1) * rows[i];
2075 *slot = diag;
2076 }
2077 }
2078 PetscCall(MatDenseRestoreArray(A, &v));
2079 PetscFunctionReturn(PETSC_SUCCESS);
2080 }
2081
MatDenseGetLDA_SeqDense(Mat A,PetscInt * lda)2082 static PetscErrorCode MatDenseGetLDA_SeqDense(Mat A, PetscInt *lda)
2083 {
2084 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2085
2086 PetscFunctionBegin;
2087 *lda = mat->lda;
2088 PetscFunctionReturn(PETSC_SUCCESS);
2089 }
2090
MatDenseGetArray_SeqDense(Mat A,PetscScalar ** array)2091 PetscErrorCode MatDenseGetArray_SeqDense(Mat A, PetscScalar **array)
2092 {
2093 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2094
2095 PetscFunctionBegin;
2096 PetscCheck(!mat->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2097 *array = mat->v;
2098 PetscFunctionReturn(PETSC_SUCCESS);
2099 }
2100
MatDenseRestoreArray_SeqDense(Mat A,PetscScalar ** array)2101 PetscErrorCode MatDenseRestoreArray_SeqDense(Mat A, PetscScalar **array)
2102 {
2103 PetscFunctionBegin;
2104 if (array) *array = NULL;
2105 PetscFunctionReturn(PETSC_SUCCESS);
2106 }
2107
2108 /*@
2109 MatDenseGetLDA - gets the leading dimension of the array returned from `MatDenseGetArray()`
2110
2111 Not Collective
2112
2113 Input Parameter:
2114 . A - a `MATDENSE` or `MATDENSECUDA` matrix
2115
2116 Output Parameter:
2117 . lda - the leading dimension
2118
2119 Level: intermediate
2120
2121 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseSetLDA()`
2122 @*/
MatDenseGetLDA(Mat A,PetscInt * lda)2123 PetscErrorCode MatDenseGetLDA(Mat A, PetscInt *lda)
2124 {
2125 PetscFunctionBegin;
2126 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2127 PetscAssertPointer(lda, 2);
2128 MatCheckPreallocated(A, 1);
2129 PetscUseMethod(A, "MatDenseGetLDA_C", (Mat, PetscInt *), (A, lda));
2130 PetscFunctionReturn(PETSC_SUCCESS);
2131 }
2132
2133 /*@
2134 MatDenseSetLDA - Sets the leading dimension of the array used by the `MATDENSE` matrix
2135
2136 Collective if the matrix layouts have not yet been setup
2137
2138 Input Parameters:
2139 + A - a `MATDENSE` or `MATDENSECUDA` matrix
2140 - lda - the leading dimension
2141
2142 Level: intermediate
2143
2144 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetLDA()`
2145 @*/
MatDenseSetLDA(Mat A,PetscInt lda)2146 PetscErrorCode MatDenseSetLDA(Mat A, PetscInt lda)
2147 {
2148 PetscFunctionBegin;
2149 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2150 PetscTryMethod(A, "MatDenseSetLDA_C", (Mat, PetscInt), (A, lda));
2151 PetscFunctionReturn(PETSC_SUCCESS);
2152 }
2153
2154 /*@C
2155 MatDenseGetArray - gives read-write access to the array where the data for a `MATDENSE` matrix is stored
2156
2157 Logically Collective
2158
2159 Input Parameter:
2160 . A - a dense matrix
2161
2162 Output Parameter:
2163 . array - pointer to the data
2164
2165 Level: intermediate
2166
2167 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2168 @*/
MatDenseGetArray(Mat A,PetscScalar * array[])2169 PetscErrorCode MatDenseGetArray(Mat A, PetscScalar *array[]) PeNS
2170 {
2171 PetscFunctionBegin;
2172 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2173 PetscAssertPointer(array, 2);
2174 PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array));
2175 PetscFunctionReturn(PETSC_SUCCESS);
2176 }
2177
2178 /*@C
2179 MatDenseRestoreArray - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArray()`
2180
2181 Logically Collective
2182
2183 Input Parameters:
2184 + A - a dense matrix
2185 - array - pointer to the data (may be `NULL`)
2186
2187 Level: intermediate
2188
2189 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2190 @*/
MatDenseRestoreArray(Mat A,PetscScalar * array[])2191 PetscErrorCode MatDenseRestoreArray(Mat A, PetscScalar *array[]) PeNS
2192 {
2193 PetscFunctionBegin;
2194 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2195 if (array) PetscAssertPointer(array, 2);
2196 PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array));
2197 PetscCall(PetscObjectStateIncrease((PetscObject)A));
2198 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
2199 A->offloadmask = PETSC_OFFLOAD_CPU;
2200 #endif
2201 PetscFunctionReturn(PETSC_SUCCESS);
2202 }
2203
2204 /*@C
2205 MatDenseGetArrayRead - gives read-only access to the array where the data for a `MATDENSE` matrix is stored
2206
2207 Not Collective
2208
2209 Input Parameter:
2210 . A - a dense matrix
2211
2212 Output Parameter:
2213 . array - pointer to the data
2214
2215 Level: intermediate
2216
2217 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2218 @*/
MatDenseGetArrayRead(Mat A,const PetscScalar * array[])2219 PetscErrorCode MatDenseGetArrayRead(Mat A, const PetscScalar *array[]) PeNS
2220 {
2221 PetscFunctionBegin;
2222 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2223 PetscAssertPointer(array, 2);
2224 PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array));
2225 PetscFunctionReturn(PETSC_SUCCESS);
2226 }
2227
2228 /*@C
2229 MatDenseRestoreArrayRead - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayRead()`
2230
2231 Not Collective
2232
2233 Input Parameters:
2234 + A - a dense matrix
2235 - array - pointer to the data (may be `NULL`)
2236
2237 Level: intermediate
2238
2239 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2240 @*/
MatDenseRestoreArrayRead(Mat A,const PetscScalar * array[])2241 PetscErrorCode MatDenseRestoreArrayRead(Mat A, const PetscScalar *array[]) PeNS
2242 {
2243 PetscFunctionBegin;
2244 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2245 if (array) PetscAssertPointer(array, 2);
2246 PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array));
2247 PetscFunctionReturn(PETSC_SUCCESS);
2248 }
2249
2250 /*@C
2251 MatDenseGetArrayWrite - gives write-only access to the array where the data for a `MATDENSE` matrix is stored
2252
2253 Not Collective
2254
2255 Input Parameter:
2256 . A - a dense matrix
2257
2258 Output Parameter:
2259 . array - pointer to the data
2260
2261 Level: intermediate
2262
2263 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`
2264 @*/
MatDenseGetArrayWrite(Mat A,PetscScalar * array[])2265 PetscErrorCode MatDenseGetArrayWrite(Mat A, PetscScalar *array[]) PeNS
2266 {
2267 PetscFunctionBegin;
2268 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2269 PetscAssertPointer(array, 2);
2270 PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array));
2271 PetscFunctionReturn(PETSC_SUCCESS);
2272 }
2273
2274 /*@C
2275 MatDenseRestoreArrayWrite - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayWrite()`
2276
2277 Not Collective
2278
2279 Input Parameters:
2280 + A - a dense matrix
2281 - array - pointer to the data (may be `NULL`)
2282
2283 Level: intermediate
2284
2285 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`
2286 @*/
MatDenseRestoreArrayWrite(Mat A,PetscScalar * array[])2287 PetscErrorCode MatDenseRestoreArrayWrite(Mat A, PetscScalar *array[]) PeNS
2288 {
2289 PetscFunctionBegin;
2290 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2291 if (array) PetscAssertPointer(array, 2);
2292 PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array));
2293 PetscCall(PetscObjectStateIncrease((PetscObject)A));
2294 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
2295 A->offloadmask = PETSC_OFFLOAD_CPU;
2296 #endif
2297 PetscFunctionReturn(PETSC_SUCCESS);
2298 }
2299
2300 /*@C
2301 MatDenseGetArrayAndMemType - gives read-write access to the array where the data for a `MATDENSE` matrix is stored
2302
2303 Logically Collective
2304
2305 Input Parameter:
2306 . A - a dense matrix
2307
2308 Output Parameters:
2309 + array - pointer to the data
2310 - mtype - memory type of the returned pointer
2311
2312 Level: intermediate
2313
2314 Note:
2315 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc.,
2316 an array on device is always returned and is guaranteed to contain the matrix's latest data.
2317
2318 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayAndMemType()`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArrayRead()`,
2319 `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()`
2320 @*/
MatDenseGetArrayAndMemType(Mat A,PetscScalar * array[],PetscMemType * mtype)2321 PetscErrorCode MatDenseGetArrayAndMemType(Mat A, PetscScalar *array[], PetscMemType *mtype)
2322 {
2323 PetscBool isMPI;
2324
2325 PetscFunctionBegin;
2326 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2327 PetscAssertPointer(array, 2);
2328 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */
2329 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI));
2330 if (isMPI) {
2331 /* Dispatch here so that the code can be reused for all subclasses of MATDENSE */
2332 PetscCall(MatDenseGetArrayAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype));
2333 } else {
2334 PetscErrorCode (*fptr)(Mat, PetscScalar **, PetscMemType *);
2335
2336 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayAndMemType_C", &fptr));
2337 if (fptr) {
2338 PetscCall((*fptr)(A, array, mtype));
2339 } else {
2340 PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array));
2341 if (mtype) *mtype = PETSC_MEMTYPE_HOST;
2342 }
2343 }
2344 PetscFunctionReturn(PETSC_SUCCESS);
2345 }
2346
2347 /*@C
2348 MatDenseRestoreArrayAndMemType - returns access to the array that is obtained by `MatDenseGetArrayAndMemType()`
2349
2350 Logically Collective
2351
2352 Input Parameters:
2353 + A - a dense matrix
2354 - array - pointer to the data
2355
2356 Level: intermediate
2357
2358 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2359 @*/
MatDenseRestoreArrayAndMemType(Mat A,PetscScalar * array[])2360 PetscErrorCode MatDenseRestoreArrayAndMemType(Mat A, PetscScalar *array[])
2361 {
2362 PetscBool isMPI;
2363
2364 PetscFunctionBegin;
2365 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2366 if (array) PetscAssertPointer(array, 2);
2367 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI));
2368 if (isMPI) {
2369 PetscCall(MatDenseRestoreArrayAndMemType(((Mat_MPIDense *)A->data)->A, array));
2370 } else {
2371 PetscErrorCode (*fptr)(Mat, PetscScalar **);
2372
2373 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayAndMemType_C", &fptr));
2374 if (fptr) {
2375 PetscCall((*fptr)(A, array));
2376 } else {
2377 PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array));
2378 }
2379 if (array) *array = NULL;
2380 }
2381 PetscCall(PetscObjectStateIncrease((PetscObject)A));
2382 PetscFunctionReturn(PETSC_SUCCESS);
2383 }
2384
2385 /*@C
2386 MatDenseGetArrayReadAndMemType - gives read-only access to the array where the data for a `MATDENSE` matrix is stored
2387
2388 Logically Collective
2389
2390 Input Parameter:
2391 . A - a dense matrix
2392
2393 Output Parameters:
2394 + array - pointer to the data
2395 - mtype - memory type of the returned pointer
2396
2397 Level: intermediate
2398
2399 Note:
2400 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc.,
2401 an array on device is always returned and is guaranteed to contain the matrix's latest data.
2402
2403 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayReadAndMemType()`, `MatDenseGetArrayWriteAndMemType()`,
2404 `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()`
2405 @*/
MatDenseGetArrayReadAndMemType(Mat A,const PetscScalar * array[],PetscMemType * mtype)2406 PetscErrorCode MatDenseGetArrayReadAndMemType(Mat A, const PetscScalar *array[], PetscMemType *mtype)
2407 {
2408 PetscBool isMPI;
2409
2410 PetscFunctionBegin;
2411 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2412 PetscAssertPointer(array, 2);
2413 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */
2414 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI));
2415 if (isMPI) { /* Dispatch here so that the code can be reused for all subclasses of MATDENSE */
2416 PetscCall(MatDenseGetArrayReadAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype));
2417 } else {
2418 PetscErrorCode (*fptr)(Mat, const PetscScalar **, PetscMemType *);
2419
2420 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayReadAndMemType_C", &fptr));
2421 if (fptr) {
2422 PetscCall((*fptr)(A, array, mtype));
2423 } else {
2424 PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array));
2425 if (mtype) *mtype = PETSC_MEMTYPE_HOST;
2426 }
2427 }
2428 PetscFunctionReturn(PETSC_SUCCESS);
2429 }
2430
2431 /*@C
2432 MatDenseRestoreArrayReadAndMemType - returns access to the array that is obtained by `MatDenseGetArrayReadAndMemType()`
2433
2434 Logically Collective
2435
2436 Input Parameters:
2437 + A - a dense matrix
2438 - array - pointer to the data
2439
2440 Level: intermediate
2441
2442 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2443 @*/
MatDenseRestoreArrayReadAndMemType(Mat A,const PetscScalar * array[])2444 PetscErrorCode MatDenseRestoreArrayReadAndMemType(Mat A, const PetscScalar *array[])
2445 {
2446 PetscBool isMPI;
2447
2448 PetscFunctionBegin;
2449 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2450 if (array) PetscAssertPointer(array, 2);
2451 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI));
2452 if (isMPI) {
2453 PetscCall(MatDenseRestoreArrayReadAndMemType(((Mat_MPIDense *)A->data)->A, array));
2454 } else {
2455 PetscErrorCode (*fptr)(Mat, const PetscScalar **);
2456
2457 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayReadAndMemType_C", &fptr));
2458 if (fptr) {
2459 PetscCall((*fptr)(A, array));
2460 } else {
2461 PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array));
2462 }
2463 if (array) *array = NULL;
2464 }
2465 PetscFunctionReturn(PETSC_SUCCESS);
2466 }
2467
2468 /*@C
2469 MatDenseGetArrayWriteAndMemType - gives write-only access to the array where the data for a `MATDENSE` matrix is stored
2470
2471 Logically Collective
2472
2473 Input Parameter:
2474 . A - a dense matrix
2475
2476 Output Parameters:
2477 + array - pointer to the data
2478 - mtype - memory type of the returned pointer
2479
2480 Level: intermediate
2481
2482 Note:
2483 If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc.,
2484 an array on device is always returned and is guaranteed to contain the matrix's latest data.
2485
2486 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayWriteAndMemType()`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArrayRead()`,
2487 `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()`
2488 @*/
MatDenseGetArrayWriteAndMemType(Mat A,PetscScalar * array[],PetscMemType * mtype)2489 PetscErrorCode MatDenseGetArrayWriteAndMemType(Mat A, PetscScalar *array[], PetscMemType *mtype)
2490 {
2491 PetscBool isMPI;
2492
2493 PetscFunctionBegin;
2494 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2495 PetscAssertPointer(array, 2);
2496 PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */
2497 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI));
2498 if (isMPI) {
2499 PetscCall(MatDenseGetArrayWriteAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype));
2500 } else {
2501 PetscErrorCode (*fptr)(Mat, PetscScalar **, PetscMemType *);
2502
2503 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayWriteAndMemType_C", &fptr));
2504 if (fptr) {
2505 PetscCall((*fptr)(A, array, mtype));
2506 } else {
2507 PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array));
2508 if (mtype) *mtype = PETSC_MEMTYPE_HOST;
2509 }
2510 }
2511 PetscFunctionReturn(PETSC_SUCCESS);
2512 }
2513
2514 /*@C
2515 MatDenseRestoreArrayWriteAndMemType - returns access to the array that is obtained by `MatDenseGetArrayReadAndMemType()`
2516
2517 Logically Collective
2518
2519 Input Parameters:
2520 + A - a dense matrix
2521 - array - pointer to the data
2522
2523 Level: intermediate
2524
2525 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2526 @*/
MatDenseRestoreArrayWriteAndMemType(Mat A,PetscScalar * array[])2527 PetscErrorCode MatDenseRestoreArrayWriteAndMemType(Mat A, PetscScalar *array[])
2528 {
2529 PetscBool isMPI;
2530
2531 PetscFunctionBegin;
2532 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2533 if (array) PetscAssertPointer(array, 2);
2534 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI));
2535 if (isMPI) {
2536 PetscCall(MatDenseRestoreArrayWriteAndMemType(((Mat_MPIDense *)A->data)->A, array));
2537 } else {
2538 PetscErrorCode (*fptr)(Mat, PetscScalar **);
2539
2540 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayWriteAndMemType_C", &fptr));
2541 if (fptr) {
2542 PetscCall((*fptr)(A, array));
2543 } else {
2544 PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array));
2545 }
2546 if (array) *array = NULL;
2547 }
2548 PetscCall(PetscObjectStateIncrease((PetscObject)A));
2549 PetscFunctionReturn(PETSC_SUCCESS);
2550 }
2551
MatCreateSubMatrix_SeqDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat * B)2552 static PetscErrorCode MatCreateSubMatrix_SeqDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
2553 {
2554 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2555 PetscInt i, j, nrows, ncols, ldb;
2556 const PetscInt *irow, *icol;
2557 PetscScalar *av, *bv, *v = mat->v;
2558 Mat newmat;
2559
2560 PetscFunctionBegin;
2561 PetscCall(ISGetIndices(isrow, &irow));
2562 PetscCall(ISGetIndices(iscol, &icol));
2563 PetscCall(ISGetLocalSize(isrow, &nrows));
2564 PetscCall(ISGetLocalSize(iscol, &ncols));
2565
2566 /* Check submatrixcall */
2567 if (scall == MAT_REUSE_MATRIX) {
2568 PetscInt n_cols, n_rows;
2569 PetscCall(MatGetSize(*B, &n_rows, &n_cols));
2570 if (n_rows != nrows || n_cols != ncols) {
2571 /* resize the result matrix to match number of requested rows/columns */
2572 PetscCall(MatSetSizes(*B, nrows, ncols, nrows, ncols));
2573 }
2574 newmat = *B;
2575 } else {
2576 /* Create and fill new matrix */
2577 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
2578 PetscCall(MatSetSizes(newmat, nrows, ncols, nrows, ncols));
2579 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
2580 PetscCall(MatSeqDenseSetPreallocation(newmat, NULL));
2581 }
2582
2583 /* Now extract the data pointers and do the copy,column at a time */
2584 PetscCall(MatDenseGetArray(newmat, &bv));
2585 PetscCall(MatDenseGetLDA(newmat, &ldb));
2586 for (i = 0; i < ncols; i++) {
2587 av = v + mat->lda * icol[i];
2588 for (j = 0; j < nrows; j++) bv[j] = av[irow[j]];
2589 bv += ldb;
2590 }
2591 PetscCall(MatDenseRestoreArray(newmat, &bv));
2592
2593 /* Assemble the matrices so that the correct flags are set */
2594 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
2595 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
2596
2597 /* Free work space */
2598 PetscCall(ISRestoreIndices(isrow, &irow));
2599 PetscCall(ISRestoreIndices(iscol, &icol));
2600 *B = newmat;
2601 PetscFunctionReturn(PETSC_SUCCESS);
2602 }
2603
MatCreateSubMatrices_SeqDense(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat * B[])2604 static PetscErrorCode MatCreateSubMatrices_SeqDense(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
2605 {
2606 PetscInt i;
2607
2608 PetscFunctionBegin;
2609 if (scall == MAT_INITIAL_MATRIX) PetscCall(PetscCalloc1(n, B));
2610
2611 for (i = 0; i < n; i++) PetscCall(MatCreateSubMatrix_SeqDense(A, irow[i], icol[i], scall, &(*B)[i]));
2612 PetscFunctionReturn(PETSC_SUCCESS);
2613 }
2614
MatCopy_SeqDense(Mat A,Mat B,MatStructure str)2615 PetscErrorCode MatCopy_SeqDense(Mat A, Mat B, MatStructure str)
2616 {
2617 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data;
2618 const PetscScalar *va;
2619 PetscScalar *vb;
2620 PetscInt lda1 = a->lda, lda2 = b->lda, m = A->rmap->n, n = A->cmap->n, j;
2621
2622 PetscFunctionBegin;
2623 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2624 if (A->ops->copy != B->ops->copy) {
2625 PetscCall(MatCopy_Basic(A, B, str));
2626 PetscFunctionReturn(PETSC_SUCCESS);
2627 }
2628 PetscCheck(m == B->rmap->n && n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "size(B) != size(A)");
2629 PetscCall(MatDenseGetArrayRead(A, &va));
2630 PetscCall(MatDenseGetArray(B, &vb));
2631 if (lda1 > m || lda2 > m) {
2632 for (j = 0; j < n; j++) PetscCall(PetscArraycpy(vb + j * lda2, va + j * lda1, m));
2633 } else {
2634 PetscCall(PetscArraycpy(vb, va, A->rmap->n * A->cmap->n));
2635 }
2636 PetscCall(MatDenseRestoreArray(B, &vb));
2637 PetscCall(MatDenseRestoreArrayRead(A, &va));
2638 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2639 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2640 PetscFunctionReturn(PETSC_SUCCESS);
2641 }
2642
MatSetUp_SeqDense(Mat A)2643 PetscErrorCode MatSetUp_SeqDense(Mat A)
2644 {
2645 PetscFunctionBegin;
2646 PetscCall(PetscLayoutSetUp(A->rmap));
2647 PetscCall(PetscLayoutSetUp(A->cmap));
2648 if (!A->preallocated) PetscCall(MatSeqDenseSetPreallocation(A, NULL));
2649 PetscFunctionReturn(PETSC_SUCCESS);
2650 }
2651
MatConjugate_SeqDense(Mat A)2652 PetscErrorCode MatConjugate_SeqDense(Mat A)
2653 {
2654 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2655 PetscInt i, j;
2656 PetscInt min = PetscMin(A->rmap->n, A->cmap->n);
2657 PetscScalar *aa;
2658
2659 PetscFunctionBegin;
2660 PetscCall(MatDenseGetArray(A, &aa));
2661 for (j = 0; j < A->cmap->n; j++)
2662 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscConj(aa[i + j * mat->lda]);
2663 PetscCall(MatDenseRestoreArray(A, &aa));
2664 if (mat->tau)
2665 for (i = 0; i < min; i++) mat->tau[i] = PetscConj(mat->tau[i]);
2666 PetscFunctionReturn(PETSC_SUCCESS);
2667 }
2668
MatRealPart_SeqDense(Mat A)2669 static PetscErrorCode MatRealPart_SeqDense(Mat A)
2670 {
2671 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2672 PetscInt i, j;
2673 PetscScalar *aa;
2674
2675 PetscFunctionBegin;
2676 PetscCall(MatDenseGetArray(A, &aa));
2677 for (j = 0; j < A->cmap->n; j++) {
2678 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscRealPart(aa[i + j * mat->lda]);
2679 }
2680 PetscCall(MatDenseRestoreArray(A, &aa));
2681 PetscFunctionReturn(PETSC_SUCCESS);
2682 }
2683
MatImaginaryPart_SeqDense(Mat A)2684 static PetscErrorCode MatImaginaryPart_SeqDense(Mat A)
2685 {
2686 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2687 PetscInt i, j;
2688 PetscScalar *aa;
2689
2690 PetscFunctionBegin;
2691 PetscCall(MatDenseGetArray(A, &aa));
2692 for (j = 0; j < A->cmap->n; j++) {
2693 for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscImaginaryPart(aa[i + j * mat->lda]);
2694 }
2695 PetscCall(MatDenseRestoreArray(A, &aa));
2696 PetscFunctionReturn(PETSC_SUCCESS);
2697 }
2698
MatMatMultSymbolic_SeqDense_SeqDense(Mat A,Mat B,PetscReal fill,Mat C)2699 PetscErrorCode MatMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
2700 {
2701 PetscInt m = A->rmap->n, n = B->cmap->n;
2702 PetscBool cisdense = PETSC_FALSE;
2703
2704 PetscFunctionBegin;
2705 PetscCall(MatSetSizes(C, m, n, m, n));
2706 #if defined(PETSC_HAVE_CUDA)
2707 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, ""));
2708 #endif
2709 #if defined(PETSC_HAVE_HIP)
2710 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, ""));
2711 #endif
2712 if (!cisdense) {
2713 PetscBool flg;
2714
2715 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg));
2716 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE));
2717 }
2718 PetscCall(MatSetUp(C));
2719 PetscFunctionReturn(PETSC_SUCCESS);
2720 }
2721
MatMatMultNumeric_SeqDense_SeqDense(Mat A,Mat B,Mat C)2722 PetscErrorCode MatMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C)
2723 {
2724 Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data, *c = (Mat_SeqDense *)C->data;
2725 PetscBLASInt m, n, k;
2726 const PetscScalar *av, *bv;
2727 PetscScalar *cv;
2728 PetscScalar _DOne = 1.0, _DZero = 0.0;
2729
2730 PetscFunctionBegin;
2731 PetscCall(PetscBLASIntCast(C->rmap->n, &m));
2732 PetscCall(PetscBLASIntCast(C->cmap->n, &n));
2733 PetscCall(PetscBLASIntCast(A->cmap->n, &k));
2734 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS);
2735 PetscCall(MatDenseGetArrayRead(A, &av));
2736 PetscCall(MatDenseGetArrayRead(B, &bv));
2737 PetscCall(MatDenseGetArrayWrite(C, &cv));
2738 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda));
2739 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1)));
2740 PetscCall(MatDenseRestoreArrayRead(A, &av));
2741 PetscCall(MatDenseRestoreArrayRead(B, &bv));
2742 PetscCall(MatDenseRestoreArrayWrite(C, &cv));
2743 PetscFunctionReturn(PETSC_SUCCESS);
2744 }
2745
MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A,Mat B,PetscReal fill,Mat C)2746 PetscErrorCode MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
2747 {
2748 PetscInt m = A->rmap->n, n = B->rmap->n;
2749 PetscBool cisdense = PETSC_FALSE;
2750
2751 PetscFunctionBegin;
2752 PetscCall(MatSetSizes(C, m, n, m, n));
2753 #if defined(PETSC_HAVE_CUDA)
2754 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, ""));
2755 #endif
2756 #if defined(PETSC_HAVE_HIP)
2757 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, ""));
2758 #endif
2759 if (!cisdense) {
2760 PetscBool flg;
2761
2762 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg));
2763 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE));
2764 }
2765 PetscCall(MatSetUp(C));
2766 PetscFunctionReturn(PETSC_SUCCESS);
2767 }
2768
MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A,Mat B,Mat C)2769 PetscErrorCode MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C)
2770 {
2771 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
2772 Mat_SeqDense *b = (Mat_SeqDense *)B->data;
2773 Mat_SeqDense *c = (Mat_SeqDense *)C->data;
2774 const PetscScalar *av, *bv;
2775 PetscScalar *cv;
2776 PetscBLASInt m, n, k;
2777 PetscScalar _DOne = 1.0, _DZero = 0.0;
2778
2779 PetscFunctionBegin;
2780 PetscCall(PetscBLASIntCast(C->rmap->n, &m));
2781 PetscCall(PetscBLASIntCast(C->cmap->n, &n));
2782 PetscCall(PetscBLASIntCast(A->cmap->n, &k));
2783 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS);
2784 PetscCall(MatDenseGetArrayRead(A, &av));
2785 PetscCall(MatDenseGetArrayRead(B, &bv));
2786 PetscCall(MatDenseGetArrayWrite(C, &cv));
2787 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda));
2788 PetscCall(MatDenseRestoreArrayRead(A, &av));
2789 PetscCall(MatDenseRestoreArrayRead(B, &bv));
2790 PetscCall(MatDenseRestoreArrayWrite(C, &cv));
2791 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1)));
2792 PetscFunctionReturn(PETSC_SUCCESS);
2793 }
2794
MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A,Mat B,PetscReal fill,Mat C)2795 PetscErrorCode MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
2796 {
2797 PetscInt m = A->cmap->n, n = B->cmap->n;
2798 PetscBool cisdense = PETSC_FALSE;
2799
2800 PetscFunctionBegin;
2801 PetscCall(MatSetSizes(C, m, n, m, n));
2802 #if defined(PETSC_HAVE_CUDA)
2803 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, ""));
2804 #endif
2805 #if defined(PETSC_HAVE_HIP)
2806 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, ""));
2807 #endif
2808 if (!cisdense) {
2809 PetscBool flg;
2810
2811 PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg));
2812 PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE));
2813 }
2814 PetscCall(MatSetUp(C));
2815 PetscFunctionReturn(PETSC_SUCCESS);
2816 }
2817
MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A,Mat B,Mat C)2818 PetscErrorCode MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C)
2819 {
2820 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
2821 Mat_SeqDense *b = (Mat_SeqDense *)B->data;
2822 Mat_SeqDense *c = (Mat_SeqDense *)C->data;
2823 const PetscScalar *av, *bv;
2824 PetscScalar *cv;
2825 PetscBLASInt m, n, k;
2826 PetscScalar _DOne = 1.0, _DZero = 0.0;
2827
2828 PetscFunctionBegin;
2829 PetscCall(PetscBLASIntCast(C->rmap->n, &m));
2830 PetscCall(PetscBLASIntCast(C->cmap->n, &n));
2831 PetscCall(PetscBLASIntCast(A->rmap->n, &k));
2832 if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS);
2833 PetscCall(MatDenseGetArrayRead(A, &av));
2834 PetscCall(MatDenseGetArrayRead(B, &bv));
2835 PetscCall(MatDenseGetArrayWrite(C, &cv));
2836 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda));
2837 PetscCall(MatDenseRestoreArrayRead(A, &av));
2838 PetscCall(MatDenseRestoreArrayRead(B, &bv));
2839 PetscCall(MatDenseRestoreArrayWrite(C, &cv));
2840 PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1)));
2841 PetscFunctionReturn(PETSC_SUCCESS);
2842 }
2843
MatProductSetFromOptions_SeqDense_AB(Mat C)2844 static PetscErrorCode MatProductSetFromOptions_SeqDense_AB(Mat C)
2845 {
2846 PetscFunctionBegin;
2847 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqDense;
2848 C->ops->productsymbolic = MatProductSymbolic_AB;
2849 PetscFunctionReturn(PETSC_SUCCESS);
2850 }
2851
MatProductSetFromOptions_SeqDense_AtB(Mat C)2852 static PetscErrorCode MatProductSetFromOptions_SeqDense_AtB(Mat C)
2853 {
2854 PetscFunctionBegin;
2855 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqDense_SeqDense;
2856 C->ops->productsymbolic = MatProductSymbolic_AtB;
2857 PetscFunctionReturn(PETSC_SUCCESS);
2858 }
2859
MatProductSetFromOptions_SeqDense_ABt(Mat C)2860 static PetscErrorCode MatProductSetFromOptions_SeqDense_ABt(Mat C)
2861 {
2862 PetscFunctionBegin;
2863 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqDense_SeqDense;
2864 C->ops->productsymbolic = MatProductSymbolic_ABt;
2865 PetscFunctionReturn(PETSC_SUCCESS);
2866 }
2867
MatProductSetFromOptions_SeqDense(Mat C)2868 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense(Mat C)
2869 {
2870 Mat_Product *product = C->product;
2871
2872 PetscFunctionBegin;
2873 switch (product->type) {
2874 case MATPRODUCT_AB:
2875 PetscCall(MatProductSetFromOptions_SeqDense_AB(C));
2876 break;
2877 case MATPRODUCT_AtB:
2878 PetscCall(MatProductSetFromOptions_SeqDense_AtB(C));
2879 break;
2880 case MATPRODUCT_ABt:
2881 PetscCall(MatProductSetFromOptions_SeqDense_ABt(C));
2882 break;
2883 default:
2884 break;
2885 }
2886 PetscFunctionReturn(PETSC_SUCCESS);
2887 }
2888
MatGetRowMax_SeqDense(Mat A,Vec v,PetscInt idx[])2889 static PetscErrorCode MatGetRowMax_SeqDense(Mat A, Vec v, PetscInt idx[])
2890 {
2891 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
2892 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p;
2893 PetscScalar *x;
2894 const PetscScalar *aa;
2895
2896 PetscFunctionBegin;
2897 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2898 PetscCall(VecGetArray(v, &x));
2899 PetscCall(VecGetLocalSize(v, &p));
2900 PetscCall(MatDenseGetArrayRead(A, &aa));
2901 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2902 for (i = 0; i < m; i++) {
2903 x[i] = aa[i];
2904 if (idx) idx[i] = 0;
2905 for (j = 1; j < n; j++) {
2906 if (PetscRealPart(x[i]) < PetscRealPart(aa[i + a->lda * j])) {
2907 x[i] = aa[i + a->lda * j];
2908 if (idx) idx[i] = j;
2909 }
2910 }
2911 }
2912 PetscCall(MatDenseRestoreArrayRead(A, &aa));
2913 PetscCall(VecRestoreArray(v, &x));
2914 PetscFunctionReturn(PETSC_SUCCESS);
2915 }
2916
MatGetRowMaxAbs_SeqDense(Mat A,Vec v,PetscInt idx[])2917 static PetscErrorCode MatGetRowMaxAbs_SeqDense(Mat A, Vec v, PetscInt idx[])
2918 {
2919 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
2920 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p;
2921 PetscScalar *x;
2922 PetscReal atmp;
2923 const PetscScalar *aa;
2924
2925 PetscFunctionBegin;
2926 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2927 PetscCall(VecGetArray(v, &x));
2928 PetscCall(VecGetLocalSize(v, &p));
2929 PetscCall(MatDenseGetArrayRead(A, &aa));
2930 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2931 for (i = 0; i < m; i++) {
2932 x[i] = PetscAbsScalar(aa[i]);
2933 for (j = 1; j < n; j++) {
2934 atmp = PetscAbsScalar(aa[i + a->lda * j]);
2935 if (PetscAbsScalar(x[i]) < atmp) {
2936 x[i] = atmp;
2937 if (idx) idx[i] = j;
2938 }
2939 }
2940 }
2941 PetscCall(MatDenseRestoreArrayRead(A, &aa));
2942 PetscCall(VecRestoreArray(v, &x));
2943 PetscFunctionReturn(PETSC_SUCCESS);
2944 }
2945
MatGetRowMin_SeqDense(Mat A,Vec v,PetscInt idx[])2946 static PetscErrorCode MatGetRowMin_SeqDense(Mat A, Vec v, PetscInt idx[])
2947 {
2948 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
2949 PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p;
2950 PetscScalar *x;
2951 const PetscScalar *aa;
2952
2953 PetscFunctionBegin;
2954 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2955 PetscCall(MatDenseGetArrayRead(A, &aa));
2956 PetscCall(VecGetArray(v, &x));
2957 PetscCall(VecGetLocalSize(v, &p));
2958 PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2959 for (i = 0; i < m; i++) {
2960 x[i] = aa[i];
2961 if (idx) idx[i] = 0;
2962 for (j = 1; j < n; j++) {
2963 if (PetscRealPart(x[i]) > PetscRealPart(aa[i + a->lda * j])) {
2964 x[i] = aa[i + a->lda * j];
2965 if (idx) idx[i] = j;
2966 }
2967 }
2968 }
2969 PetscCall(VecRestoreArray(v, &x));
2970 PetscCall(MatDenseRestoreArrayRead(A, &aa));
2971 PetscFunctionReturn(PETSC_SUCCESS);
2972 }
2973
MatGetColumnVector_SeqDense(Mat A,Vec v,PetscInt col)2974 PetscErrorCode MatGetColumnVector_SeqDense(Mat A, Vec v, PetscInt col)
2975 {
2976 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
2977 PetscScalar *x;
2978 const PetscScalar *aa;
2979
2980 PetscFunctionBegin;
2981 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2982 PetscCall(MatDenseGetArrayRead(A, &aa));
2983 PetscCall(VecGetArray(v, &x));
2984 PetscCall(PetscArraycpy(x, aa + col * a->lda, A->rmap->n));
2985 PetscCall(VecRestoreArray(v, &x));
2986 PetscCall(MatDenseRestoreArrayRead(A, &aa));
2987 PetscFunctionReturn(PETSC_SUCCESS);
2988 }
2989
MatGetColumnReductions_SeqDense(Mat A,PetscInt type,PetscReal * reductions)2990 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat A, PetscInt type, PetscReal *reductions)
2991 {
2992 PetscInt i, j, m, n;
2993 const PetscScalar *a;
2994
2995 PetscFunctionBegin;
2996 PetscCall(MatGetSize(A, &m, &n));
2997 PetscCall(PetscArrayzero(reductions, n));
2998 PetscCall(MatDenseGetArrayRead(A, &a));
2999 if (type == NORM_2) {
3000 for (i = 0; i < n; i++) {
3001 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j] * a[j]);
3002 a = PetscSafePointerPlusOffset(a, m);
3003 }
3004 } else if (type == NORM_1) {
3005 for (i = 0; i < n; i++) {
3006 for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j]);
3007 a = PetscSafePointerPlusOffset(a, m);
3008 }
3009 } else if (type == NORM_INFINITY) {
3010 for (i = 0; i < n; i++) {
3011 for (j = 0; j < m; j++) reductions[i] = PetscMax(PetscAbsScalar(a[j]), reductions[i]);
3012 a = PetscSafePointerPlusOffset(a, m);
3013 }
3014 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
3015 for (i = 0; i < n; i++) {
3016 for (j = 0; j < m; j++) reductions[i] += PetscRealPart(a[j]);
3017 a = PetscSafePointerPlusOffset(a, m);
3018 }
3019 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
3020 for (i = 0; i < n; i++) {
3021 for (j = 0; j < m; j++) reductions[i] += PetscImaginaryPart(a[j]);
3022 a = PetscSafePointerPlusOffset(a, m);
3023 }
3024 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
3025 PetscCall(MatDenseRestoreArrayRead(A, &a));
3026 if (type == NORM_2) {
3027 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
3028 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
3029 for (i = 0; i < n; i++) reductions[i] /= m;
3030 }
3031 PetscFunctionReturn(PETSC_SUCCESS);
3032 }
3033
MatSetRandom_SeqDense(Mat x,PetscRandom rctx)3034 PetscErrorCode MatSetRandom_SeqDense(Mat x, PetscRandom rctx)
3035 {
3036 PetscScalar *a;
3037 PetscInt lda, m, n, i, j;
3038
3039 PetscFunctionBegin;
3040 PetscCall(MatGetSize(x, &m, &n));
3041 PetscCall(MatDenseGetLDA(x, &lda));
3042 PetscCall(MatDenseGetArrayWrite(x, &a));
3043 for (j = 0; j < n; j++) {
3044 for (i = 0; i < m; i++) PetscCall(PetscRandomGetValue(rctx, a + j * lda + i));
3045 }
3046 PetscCall(MatDenseRestoreArrayWrite(x, &a));
3047 PetscFunctionReturn(PETSC_SUCCESS);
3048 }
3049
3050 /* vals is not const */
MatDenseGetColumn_SeqDense(Mat A,PetscInt col,PetscScalar ** vals)3051 static PetscErrorCode MatDenseGetColumn_SeqDense(Mat A, PetscInt col, PetscScalar **vals)
3052 {
3053 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3054 PetscScalar *v;
3055
3056 PetscFunctionBegin;
3057 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3058 PetscCall(MatDenseGetArray(A, &v));
3059 *vals = v + col * a->lda;
3060 PetscCall(MatDenseRestoreArray(A, &v));
3061 PetscFunctionReturn(PETSC_SUCCESS);
3062 }
3063
MatDenseRestoreColumn_SeqDense(Mat A,PetscScalar ** vals)3064 static PetscErrorCode MatDenseRestoreColumn_SeqDense(Mat A, PetscScalar **vals)
3065 {
3066 PetscFunctionBegin;
3067 if (vals) *vals = NULL; /* user cannot accidentally use the array later */
3068 PetscFunctionReturn(PETSC_SUCCESS);
3069 }
3070
3071 static struct _MatOps MatOps_Values = {MatSetValues_SeqDense,
3072 MatGetRow_SeqDense,
3073 MatRestoreRow_SeqDense,
3074 MatMult_SeqDense,
3075 /* 4*/ MatMultAdd_SeqDense,
3076 MatMultTranspose_SeqDense,
3077 MatMultTransposeAdd_SeqDense,
3078 NULL,
3079 NULL,
3080 NULL,
3081 /* 10*/ NULL,
3082 MatLUFactor_SeqDense,
3083 MatCholeskyFactor_SeqDense,
3084 MatSOR_SeqDense,
3085 MatTranspose_SeqDense,
3086 /* 15*/ MatGetInfo_SeqDense,
3087 MatEqual_SeqDense,
3088 MatGetDiagonal_SeqDense,
3089 MatDiagonalScale_SeqDense,
3090 MatNorm_SeqDense,
3091 /* 20*/ NULL,
3092 NULL,
3093 MatSetOption_SeqDense,
3094 MatZeroEntries_SeqDense,
3095 /* 24*/ MatZeroRows_SeqDense,
3096 NULL,
3097 NULL,
3098 NULL,
3099 NULL,
3100 /* 29*/ MatSetUp_SeqDense,
3101 NULL,
3102 NULL,
3103 NULL,
3104 NULL,
3105 /* 34*/ MatDuplicate_SeqDense,
3106 NULL,
3107 NULL,
3108 NULL,
3109 NULL,
3110 /* 39*/ MatAXPY_SeqDense,
3111 MatCreateSubMatrices_SeqDense,
3112 NULL,
3113 MatGetValues_SeqDense,
3114 MatCopy_SeqDense,
3115 /* 44*/ MatGetRowMax_SeqDense,
3116 MatScale_SeqDense,
3117 MatShift_SeqDense,
3118 NULL,
3119 MatZeroRowsColumns_SeqDense,
3120 /* 49*/ MatSetRandom_SeqDense,
3121 NULL,
3122 NULL,
3123 NULL,
3124 NULL,
3125 /* 54*/ NULL,
3126 NULL,
3127 NULL,
3128 NULL,
3129 NULL,
3130 /* 59*/ MatCreateSubMatrix_SeqDense,
3131 MatDestroy_SeqDense,
3132 MatView_SeqDense,
3133 NULL,
3134 NULL,
3135 /* 64*/ NULL,
3136 NULL,
3137 NULL,
3138 NULL,
3139 MatGetRowMaxAbs_SeqDense,
3140 /* 69*/ NULL,
3141 NULL,
3142 NULL,
3143 NULL,
3144 NULL,
3145 /* 74*/ NULL,
3146 NULL,
3147 NULL,
3148 NULL,
3149 MatLoad_SeqDense,
3150 /* 79*/ MatIsSymmetric_SeqDense,
3151 MatIsHermitian_SeqDense,
3152 NULL,
3153 NULL,
3154 NULL,
3155 /* 84*/ NULL,
3156 MatMatMultNumeric_SeqDense_SeqDense,
3157 NULL,
3158 NULL,
3159 MatMatTransposeMultNumeric_SeqDense_SeqDense,
3160 /* 89*/ NULL,
3161 MatProductSetFromOptions_SeqDense,
3162 NULL,
3163 NULL,
3164 MatConjugate_SeqDense,
3165 /* 94*/ NULL,
3166 NULL,
3167 MatRealPart_SeqDense,
3168 MatImaginaryPart_SeqDense,
3169 NULL,
3170 /* 99*/ NULL,
3171 NULL,
3172 NULL,
3173 MatGetRowMin_SeqDense,
3174 MatGetColumnVector_SeqDense,
3175 /*104*/ NULL,
3176 NULL,
3177 NULL,
3178 NULL,
3179 NULL,
3180 /*109*/ NULL,
3181 NULL,
3182 MatMultHermitianTranspose_SeqDense,
3183 MatMultHermitianTransposeAdd_SeqDense,
3184 NULL,
3185 /*114*/ NULL,
3186 MatGetColumnReductions_SeqDense,
3187 NULL,
3188 NULL,
3189 NULL,
3190 /*119*/ NULL,
3191 NULL,
3192 MatTransposeMatMultNumeric_SeqDense_SeqDense,
3193 NULL,
3194 NULL,
3195 /*124*/ NULL,
3196 NULL,
3197 NULL,
3198 NULL,
3199 NULL,
3200 /*129*/ NULL,
3201 MatCreateMPIMatConcatenateSeqMat_SeqDense,
3202 NULL,
3203 NULL,
3204 NULL,
3205 /*134*/ NULL,
3206 NULL,
3207 NULL,
3208 NULL,
3209 NULL,
3210 /*139*/ NULL,
3211 NULL,
3212 NULL,
3213 NULL,
3214 NULL};
3215
3216 /*@
3217 MatCreateSeqDense - Creates a `MATSEQDENSE` that
3218 is stored in column major order (the usual Fortran format).
3219
3220 Collective
3221
3222 Input Parameters:
3223 + comm - MPI communicator, set to `PETSC_COMM_SELF`
3224 . m - number of rows
3225 . n - number of columns
3226 - data - optional location of matrix data in column major order. Use `NULL` for PETSc
3227 to control all matrix memory allocation.
3228
3229 Output Parameter:
3230 . A - the matrix
3231
3232 Level: intermediate
3233
3234 Note:
3235 The data input variable is intended primarily for Fortran programmers
3236 who wish to allocate their own matrix memory space. Most users should
3237 set `data` = `NULL`.
3238
3239 Developer Note:
3240 Many of the matrix operations for this variant use the BLAS and LAPACK routines.
3241
3242 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
3243 @*/
MatCreateSeqDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscScalar data[],Mat * A)3244 PetscErrorCode MatCreateSeqDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscScalar data[], Mat *A)
3245 {
3246 PetscFunctionBegin;
3247 PetscCall(MatCreate(comm, A));
3248 PetscCall(MatSetSizes(*A, m, n, m, n));
3249 PetscCall(MatSetType(*A, MATSEQDENSE));
3250 PetscCall(MatSeqDenseSetPreallocation(*A, data));
3251 PetscFunctionReturn(PETSC_SUCCESS);
3252 }
3253
3254 /*@
3255 MatSeqDenseSetPreallocation - Sets the array used for storing the matrix elements of a `MATSEQDENSE` matrix
3256
3257 Collective
3258
3259 Input Parameters:
3260 + B - the matrix
3261 - data - the array (or `NULL`)
3262
3263 Level: intermediate
3264
3265 Note:
3266 The data input variable is intended primarily for Fortran programmers
3267 who wish to allocate their own matrix memory space. Most users should
3268 need not call this routine.
3269
3270 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`, `MatDenseSetLDA()`
3271 @*/
MatSeqDenseSetPreallocation(Mat B,PetscScalar data[])3272 PetscErrorCode MatSeqDenseSetPreallocation(Mat B, PetscScalar data[])
3273 {
3274 PetscFunctionBegin;
3275 PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3276 PetscTryMethod(B, "MatSeqDenseSetPreallocation_C", (Mat, PetscScalar[]), (B, data));
3277 PetscFunctionReturn(PETSC_SUCCESS);
3278 }
3279
MatSeqDenseSetPreallocation_SeqDense(Mat B,PetscScalar * data)3280 PetscErrorCode MatSeqDenseSetPreallocation_SeqDense(Mat B, PetscScalar *data)
3281 {
3282 Mat_SeqDense *b = (Mat_SeqDense *)B->data;
3283
3284 PetscFunctionBegin;
3285 PetscCheck(!b->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
3286 B->preallocated = PETSC_TRUE;
3287
3288 PetscCall(PetscLayoutSetUp(B->rmap));
3289 PetscCall(PetscLayoutSetUp(B->cmap));
3290
3291 if (b->lda <= 0) PetscCall(PetscBLASIntCast(B->rmap->n, &b->lda));
3292
3293 if (!data) { /* petsc-allocated storage */
3294 if (!b->user_alloc) PetscCall(PetscFree(b->v));
3295 PetscCall(PetscCalloc1((size_t)b->lda * B->cmap->n, &b->v));
3296
3297 b->user_alloc = PETSC_FALSE;
3298 } else { /* user-allocated storage */
3299 if (!b->user_alloc) PetscCall(PetscFree(b->v));
3300 b->v = data;
3301 b->user_alloc = PETSC_TRUE;
3302 }
3303 B->assembled = PETSC_TRUE;
3304 PetscFunctionReturn(PETSC_SUCCESS);
3305 }
3306
3307 #if defined(PETSC_HAVE_ELEMENTAL)
MatConvert_SeqDense_Elemental(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)3308 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
3309 {
3310 Mat mat_elemental;
3311 const PetscScalar *array;
3312 PetscScalar *v_colwise;
3313 PetscInt M = A->rmap->N, N = A->cmap->N, i, j, k, *rows, *cols;
3314
3315 PetscFunctionBegin;
3316 PetscCall(PetscMalloc3(M * N, &v_colwise, M, &rows, N, &cols));
3317 PetscCall(MatDenseGetArrayRead(A, &array));
3318 /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */
3319 k = 0;
3320 for (j = 0; j < N; j++) {
3321 cols[j] = j;
3322 for (i = 0; i < M; i++) v_colwise[j * M + i] = array[k++];
3323 }
3324 for (i = 0; i < M; i++) rows[i] = i;
3325 PetscCall(MatDenseRestoreArrayRead(A, &array));
3326
3327 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
3328 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N));
3329 PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
3330 PetscCall(MatSetUp(mat_elemental));
3331
3332 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
3333 PetscCall(MatSetValues(mat_elemental, M, rows, N, cols, v_colwise, ADD_VALUES));
3334 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
3335 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
3336 PetscCall(PetscFree3(v_colwise, rows, cols));
3337
3338 if (reuse == MAT_INPLACE_MATRIX) {
3339 PetscCall(MatHeaderReplace(A, &mat_elemental));
3340 } else {
3341 *newmat = mat_elemental;
3342 }
3343 PetscFunctionReturn(PETSC_SUCCESS);
3344 }
3345 #endif
3346
MatDenseSetLDA_SeqDense(Mat B,PetscInt lda)3347 PetscErrorCode MatDenseSetLDA_SeqDense(Mat B, PetscInt lda)
3348 {
3349 Mat_SeqDense *b = (Mat_SeqDense *)B->data;
3350 PetscBool data;
3351
3352 PetscFunctionBegin;
3353 data = (B->rmap->n > 0 && B->cmap->n > 0) ? (b->v ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;
3354 PetscCheck(b->user_alloc || !data || b->lda == lda, PETSC_COMM_SELF, PETSC_ERR_ORDER, "LDA cannot be changed after allocation of internal storage");
3355 PetscCheck(lda >= B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "LDA %" PetscInt_FMT " must be at least matrix dimension %" PetscInt_FMT, lda, B->rmap->n);
3356 PetscCall(PetscBLASIntCast(lda, &b->lda));
3357 PetscFunctionReturn(PETSC_SUCCESS);
3358 }
3359
MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat * outmat)3360 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3361 {
3362 PetscFunctionBegin;
3363 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIDense(comm, inmat, n, scall, outmat));
3364 PetscFunctionReturn(PETSC_SUCCESS);
3365 }
3366
MatDenseCreateColumnVec_Private(Mat A,Vec * v)3367 PetscErrorCode MatDenseCreateColumnVec_Private(Mat A, Vec *v)
3368 {
3369 PetscBool isstd, iskok, iscuda, iship;
3370 PetscMPIInt size;
3371 #if PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)
3372 /* we pass the data of A, to prevent allocating needless GPU memory the first time VecCUPMPlaceArray is called. */
3373 const PetscScalar *a;
3374 #endif
3375
3376 PetscFunctionBegin;
3377 *v = NULL;
3378 PetscCall(PetscStrcmpAny(A->defaultvectype, &isstd, VECSTANDARD, VECSEQ, VECMPI, ""));
3379 PetscCall(PetscStrcmpAny(A->defaultvectype, &iskok, VECKOKKOS, VECSEQKOKKOS, VECMPIKOKKOS, ""));
3380 PetscCall(PetscStrcmpAny(A->defaultvectype, &iscuda, VECCUDA, VECSEQCUDA, VECMPICUDA, ""));
3381 PetscCall(PetscStrcmpAny(A->defaultvectype, &iship, VECHIP, VECSEQHIP, VECMPIHIP, ""));
3382 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3383 if (isstd) {
3384 if (size > 1) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, v));
3385 else PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, v));
3386 } else if (iskok) {
3387 PetscCheck(PetscDefined(HAVE_KOKKOS_KERNELS), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using KOKKOS kernels support");
3388 #if PetscDefined(HAVE_KOKKOS_KERNELS)
3389 if (size > 1) PetscCall(VecCreateMPIKokkosWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, v));
3390 else PetscCall(VecCreateSeqKokkosWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, v));
3391 #endif
3392 } else if (iscuda) {
3393 PetscCheck(PetscDefined(HAVE_CUDA), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using CUDA support");
3394 #if PetscDefined(HAVE_CUDA)
3395 PetscCall(MatDenseCUDAGetArrayRead(A, &a));
3396 if (size > 1) PetscCall(VecCreateMPICUDAWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, a, v));
3397 else PetscCall(VecCreateSeqCUDAWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, a, v));
3398 #endif
3399 } else if (iship) {
3400 PetscCheck(PetscDefined(HAVE_HIP), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using HIP support");
3401 #if PetscDefined(HAVE_HIP)
3402 PetscCall(MatDenseHIPGetArrayRead(A, &a));
3403 if (size > 1) PetscCall(VecCreateMPIHIPWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, a, v));
3404 else PetscCall(VecCreateSeqHIPWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, a, v));
3405 #endif
3406 }
3407 PetscCheck(*v, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not coded for type %s", A->defaultvectype);
3408 PetscFunctionReturn(PETSC_SUCCESS);
3409 }
3410
MatDenseGetColumnVec_SeqDense(Mat A,PetscInt col,Vec * v)3411 PetscErrorCode MatDenseGetColumnVec_SeqDense(Mat A, PetscInt col, Vec *v)
3412 {
3413 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3414
3415 PetscFunctionBegin;
3416 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
3417 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
3418 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
3419 a->vecinuse = col + 1;
3420 PetscCall(MatDenseGetArray(A, (PetscScalar **)&a->ptrinuse));
3421 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda));
3422 *v = a->cvec;
3423 PetscFunctionReturn(PETSC_SUCCESS);
3424 }
3425
MatDenseRestoreColumnVec_SeqDense(Mat A,PetscInt col,Vec * v)3426 PetscErrorCode MatDenseRestoreColumnVec_SeqDense(Mat A, PetscInt col, Vec *v)
3427 {
3428 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3429
3430 PetscFunctionBegin;
3431 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
3432 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
3433 VecCheckAssembled(a->cvec);
3434 a->vecinuse = 0;
3435 PetscCall(MatDenseRestoreArray(A, (PetscScalar **)&a->ptrinuse));
3436 PetscCall(VecResetArray(a->cvec));
3437 if (v) *v = NULL;
3438 PetscFunctionReturn(PETSC_SUCCESS);
3439 }
3440
MatDenseGetColumnVecRead_SeqDense(Mat A,PetscInt col,Vec * v)3441 PetscErrorCode MatDenseGetColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v)
3442 {
3443 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3444
3445 PetscFunctionBegin;
3446 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
3447 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
3448 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
3449 a->vecinuse = col + 1;
3450 PetscCall(MatDenseGetArrayRead(A, &a->ptrinuse));
3451 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)a->lda)));
3452 PetscCall(VecLockReadPush(a->cvec));
3453 *v = a->cvec;
3454 PetscFunctionReturn(PETSC_SUCCESS);
3455 }
3456
MatDenseRestoreColumnVecRead_SeqDense(Mat A,PetscInt col,Vec * v)3457 PetscErrorCode MatDenseRestoreColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v)
3458 {
3459 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3460
3461 PetscFunctionBegin;
3462 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
3463 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
3464 VecCheckAssembled(a->cvec);
3465 a->vecinuse = 0;
3466 PetscCall(MatDenseRestoreArrayRead(A, &a->ptrinuse));
3467 PetscCall(VecLockReadPop(a->cvec));
3468 PetscCall(VecResetArray(a->cvec));
3469 if (v) *v = NULL;
3470 PetscFunctionReturn(PETSC_SUCCESS);
3471 }
3472
MatDenseGetColumnVecWrite_SeqDense(Mat A,PetscInt col,Vec * v)3473 PetscErrorCode MatDenseGetColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v)
3474 {
3475 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3476
3477 PetscFunctionBegin;
3478 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
3479 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
3480 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
3481 a->vecinuse = col + 1;
3482 PetscCall(MatDenseGetArrayWrite(A, (PetscScalar **)&a->ptrinuse));
3483 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)a->lda)));
3484 *v = a->cvec;
3485 PetscFunctionReturn(PETSC_SUCCESS);
3486 }
3487
MatDenseRestoreColumnVecWrite_SeqDense(Mat A,PetscInt col,Vec * v)3488 PetscErrorCode MatDenseRestoreColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v)
3489 {
3490 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3491
3492 PetscFunctionBegin;
3493 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
3494 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
3495 VecCheckAssembled(a->cvec);
3496 a->vecinuse = 0;
3497 PetscCall(MatDenseRestoreArrayWrite(A, (PetscScalar **)&a->ptrinuse));
3498 PetscCall(VecResetArray(a->cvec));
3499 if (v) *v = NULL;
3500 PetscFunctionReturn(PETSC_SUCCESS);
3501 }
3502
MatDenseGetSubMatrix_SeqDense(Mat A,PetscInt rbegin,PetscInt rend,PetscInt cbegin,PetscInt cend,Mat * v)3503 PetscErrorCode MatDenseGetSubMatrix_SeqDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
3504 {
3505 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3506
3507 PetscFunctionBegin;
3508 PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
3509 PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
3510 if (a->cmat && (cend - cbegin != a->cmat->cmap->N || rend - rbegin != a->cmat->rmap->N)) PetscCall(MatDestroy(&a->cmat));
3511 if (!a->cmat) {
3512 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), rend - rbegin, PETSC_DECIDE, rend - rbegin, cend - cbegin, PetscSafePointerPlusOffset(a->v, rbegin + (size_t)cbegin * a->lda), &a->cmat));
3513 } else {
3514 PetscCall(MatDensePlaceArray(a->cmat, PetscSafePointerPlusOffset(a->v, rbegin + (size_t)cbegin * a->lda)));
3515 }
3516 PetscCall(MatDenseSetLDA(a->cmat, a->lda));
3517 a->matinuse = cbegin + 1;
3518 *v = a->cmat;
3519 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
3520 A->offloadmask = PETSC_OFFLOAD_CPU;
3521 #endif
3522 PetscFunctionReturn(PETSC_SUCCESS);
3523 }
3524
MatDenseRestoreSubMatrix_SeqDense(Mat A,Mat * v)3525 PetscErrorCode MatDenseRestoreSubMatrix_SeqDense(Mat A, Mat *v)
3526 {
3527 Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3528
3529 PetscFunctionBegin;
3530 PetscCheck(a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
3531 PetscCheck(a->cmat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column matrix");
3532 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
3533 a->matinuse = 0;
3534 PetscCall(MatDenseResetArray(a->cmat));
3535 *v = NULL;
3536 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
3537 A->offloadmask = PETSC_OFFLOAD_CPU;
3538 #endif
3539 PetscFunctionReturn(PETSC_SUCCESS);
3540 }
3541
3542 /*MC
3543 MATSEQDENSE - MATSEQDENSE = "seqdense" - A matrix type to be used for sequential dense matrices.
3544
3545 Options Database Key:
3546 . -mat_type seqdense - sets the matrix type to `MATSEQDENSE` during a call to `MatSetFromOptions()`
3547
3548 Level: beginner
3549
3550 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreateSeqDense()`
3551 M*/
MatCreate_SeqDense(Mat B)3552 PetscErrorCode MatCreate_SeqDense(Mat B)
3553 {
3554 Mat_SeqDense *b;
3555 PetscMPIInt size;
3556
3557 PetscFunctionBegin;
3558 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
3559 PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3560
3561 PetscCall(PetscNew(&b));
3562 B->data = (void *)b;
3563 B->ops[0] = MatOps_Values;
3564
3565 b->roworiented = PETSC_TRUE;
3566
3567 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatQRFactor_C", MatQRFactor_SeqDense));
3568 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetLDA_C", MatDenseGetLDA_SeqDense));
3569 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseSetLDA_C", MatDenseSetLDA_SeqDense));
3570 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArray_C", MatDenseGetArray_SeqDense));
3571 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArray_C", MatDenseRestoreArray_SeqDense));
3572 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDensePlaceArray_C", MatDensePlaceArray_SeqDense));
3573 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseResetArray_C", MatDenseResetArray_SeqDense));
3574 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseReplaceArray_C", MatDenseReplaceArray_SeqDense));
3575 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayRead_C", MatDenseGetArray_SeqDense));
3576 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayRead_C", MatDenseRestoreArray_SeqDense));
3577 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayWrite_C", MatDenseGetArray_SeqDense));
3578 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArray_SeqDense));
3579 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqaij_C", MatConvert_SeqDense_SeqAIJ));
3580 #if defined(PETSC_HAVE_ELEMENTAL)
3581 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_elemental_C", MatConvert_SeqDense_Elemental));
3582 #endif
3583 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
3584 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_scalapack_C", MatConvert_Dense_ScaLAPACK));
3585 #endif
3586 #if defined(PETSC_HAVE_CUDA)
3587 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensecuda_C", MatConvert_SeqDense_SeqDenseCUDA));
3588 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", MatProductSetFromOptions_SeqDense));
3589 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdense_C", MatProductSetFromOptions_SeqDense));
3590 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensecuda_C", MatProductSetFromOptions_SeqDense));
3591 #endif
3592 #if defined(PETSC_HAVE_HIP)
3593 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensehip_C", MatConvert_SeqDense_SeqDenseHIP));
3594 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", MatProductSetFromOptions_SeqDense));
3595 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdense_C", MatProductSetFromOptions_SeqDense));
3596 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensehip_C", MatProductSetFromOptions_SeqDense));
3597 #endif
3598 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqDenseSetPreallocation_C", MatSeqDenseSetPreallocation_SeqDense));
3599 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense));
3600 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdense_C", MatProductSetFromOptions_SeqDense));
3601 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense));
3602 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqsbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense));
3603
3604 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumn_C", MatDenseGetColumn_SeqDense));
3605 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_SeqDense));
3606 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_SeqDense));
3607 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_SeqDense));
3608 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_SeqDense));
3609 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_SeqDense));
3610 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_SeqDense));
3611 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_SeqDense));
3612 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_SeqDense));
3613 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_SeqDense));
3614 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultColumnRange_C", MatMultColumnRange_SeqDense));
3615 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultAddColumnRange_C", MatMultAddColumnRange_SeqDense));
3616 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_SeqDense));
3617 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_SeqDense));
3618 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQDENSE));
3619 PetscFunctionReturn(PETSC_SUCCESS);
3620 }
3621
3622 /*@C
3623 MatDenseGetColumn - gives access to a column of a dense matrix. This is only the local part of the column. You MUST call `MatDenseRestoreColumn()` to avoid memory bleeding.
3624
3625 Not Collective
3626
3627 Input Parameters:
3628 + A - a `MATSEQDENSE` or `MATMPIDENSE` matrix
3629 - col - column index
3630
3631 Output Parameter:
3632 . vals - pointer to the data
3633
3634 Level: intermediate
3635
3636 Note:
3637 Use `MatDenseGetColumnVec()` to get access to a column of a `MATDENSE` treated as a `Vec`
3638
3639 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreColumn()`, `MatDenseGetColumnVec()`
3640 @*/
MatDenseGetColumn(Mat A,PetscInt col,PetscScalar * vals[])3641 PetscErrorCode MatDenseGetColumn(Mat A, PetscInt col, PetscScalar *vals[])
3642 {
3643 PetscFunctionBegin;
3644 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3645 PetscValidLogicalCollectiveInt(A, col, 2);
3646 PetscAssertPointer(vals, 3);
3647 PetscUseMethod(A, "MatDenseGetColumn_C", (Mat, PetscInt, PetscScalar **), (A, col, vals));
3648 PetscFunctionReturn(PETSC_SUCCESS);
3649 }
3650
3651 /*@C
3652 MatDenseRestoreColumn - returns access to a column of a `MATDENSE` matrix which is returned by `MatDenseGetColumn()`.
3653
3654 Not Collective
3655
3656 Input Parameters:
3657 + A - a `MATSEQDENSE` or `MATMPIDENSE` matrix
3658 - vals - pointer to the data (may be `NULL`)
3659
3660 Level: intermediate
3661
3662 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetColumn()`
3663 @*/
MatDenseRestoreColumn(Mat A,PetscScalar * vals[])3664 PetscErrorCode MatDenseRestoreColumn(Mat A, PetscScalar *vals[])
3665 {
3666 PetscFunctionBegin;
3667 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3668 PetscAssertPointer(vals, 2);
3669 PetscUseMethod(A, "MatDenseRestoreColumn_C", (Mat, PetscScalar **), (A, vals));
3670 PetscFunctionReturn(PETSC_SUCCESS);
3671 }
3672
3673 /*@
3674 MatDenseGetColumnVec - Gives read-write access to a column of a `MATDENSE` matrix, represented as a `Vec`.
3675
3676 Collective
3677
3678 Input Parameters:
3679 + A - the `Mat` object
3680 - col - the column index
3681
3682 Output Parameter:
3683 . v - the vector
3684
3685 Level: intermediate
3686
3687 Notes:
3688 The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVec()` when the vector is no longer needed.
3689
3690 Use `MatDenseGetColumnVecRead()` to obtain read-only access or `MatDenseGetColumnVecWrite()` for write-only access.
3691
3692 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`, `MatDenseGetColumn()`
3693 @*/
MatDenseGetColumnVec(Mat A,PetscInt col,Vec * v)3694 PetscErrorCode MatDenseGetColumnVec(Mat A, PetscInt col, Vec *v)
3695 {
3696 PetscFunctionBegin;
3697 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3698 PetscValidType(A, 1);
3699 PetscValidLogicalCollectiveInt(A, col, 2);
3700 PetscAssertPointer(v, 3);
3701 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated");
3702 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N);
3703 PetscUseMethod(A, "MatDenseGetColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v));
3704 PetscFunctionReturn(PETSC_SUCCESS);
3705 }
3706
3707 /*@
3708 MatDenseRestoreColumnVec - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVec()`.
3709
3710 Collective
3711
3712 Input Parameters:
3713 + A - the `Mat` object
3714 . col - the column index
3715 - v - the `Vec` object (may be `NULL`)
3716
3717 Level: intermediate
3718
3719 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`
3720 @*/
MatDenseRestoreColumnVec(Mat A,PetscInt col,Vec * v)3721 PetscErrorCode MatDenseRestoreColumnVec(Mat A, PetscInt col, Vec *v)
3722 {
3723 PetscFunctionBegin;
3724 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3725 PetscValidType(A, 1);
3726 PetscValidLogicalCollectiveInt(A, col, 2);
3727 if (v) PetscValidHeaderSpecific(*v, VEC_CLASSID, 3);
3728 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated");
3729 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N);
3730 PetscUseMethod(A, "MatDenseRestoreColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v));
3731 PetscFunctionReturn(PETSC_SUCCESS);
3732 }
3733
3734 /*@
3735 MatDenseGetColumnVecRead - Gives read-only access to a column of a dense matrix, represented as a `Vec`.
3736
3737 Collective
3738
3739 Input Parameters:
3740 + A - the `Mat` object
3741 - col - the column index
3742
3743 Output Parameter:
3744 . v - the vector
3745
3746 Level: intermediate
3747
3748 Notes:
3749 The vector is owned by PETSc and users cannot modify it.
3750
3751 Users need to call `MatDenseRestoreColumnVecRead()` when the vector is no longer needed.
3752
3753 Use `MatDenseGetColumnVec()` to obtain read-write access or `MatDenseGetColumnVecWrite()` for write-only access.
3754
3755 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`
3756 @*/
MatDenseGetColumnVecRead(Mat A,PetscInt col,Vec * v)3757 PetscErrorCode MatDenseGetColumnVecRead(Mat A, PetscInt col, Vec *v)
3758 {
3759 PetscFunctionBegin;
3760 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3761 PetscValidType(A, 1);
3762 PetscValidLogicalCollectiveInt(A, col, 2);
3763 PetscAssertPointer(v, 3);
3764 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated");
3765 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N);
3766 PetscUseMethod(A, "MatDenseGetColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v));
3767 PetscFunctionReturn(PETSC_SUCCESS);
3768 }
3769
3770 /*@
3771 MatDenseRestoreColumnVecRead - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVecRead()`.
3772
3773 Collective
3774
3775 Input Parameters:
3776 + A - the `Mat` object
3777 . col - the column index
3778 - v - the `Vec` object (may be `NULL`)
3779
3780 Level: intermediate
3781
3782 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecWrite()`
3783 @*/
MatDenseRestoreColumnVecRead(Mat A,PetscInt col,Vec * v)3784 PetscErrorCode MatDenseRestoreColumnVecRead(Mat A, PetscInt col, Vec *v)
3785 {
3786 PetscFunctionBegin;
3787 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3788 PetscValidType(A, 1);
3789 PetscValidLogicalCollectiveInt(A, col, 2);
3790 if (v) PetscValidHeaderSpecific(*v, VEC_CLASSID, 3);
3791 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated");
3792 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N);
3793 PetscUseMethod(A, "MatDenseRestoreColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v));
3794 PetscFunctionReturn(PETSC_SUCCESS);
3795 }
3796
3797 /*@
3798 MatDenseGetColumnVecWrite - Gives write-only access to a column of a dense matrix, represented as a `Vec`.
3799
3800 Collective
3801
3802 Input Parameters:
3803 + A - the `Mat` object
3804 - col - the column index
3805
3806 Output Parameter:
3807 . v - the vector
3808
3809 Level: intermediate
3810
3811 Notes:
3812 The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVecWrite()` when the vector is no longer needed.
3813
3814 Use `MatDenseGetColumnVec()` to obtain read-write access or `MatDenseGetColumnVecRead()` for read-only access.
3815
3816 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`
3817 @*/
MatDenseGetColumnVecWrite(Mat A,PetscInt col,Vec * v)3818 PetscErrorCode MatDenseGetColumnVecWrite(Mat A, PetscInt col, Vec *v)
3819 {
3820 PetscFunctionBegin;
3821 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3822 PetscValidType(A, 1);
3823 PetscValidLogicalCollectiveInt(A, col, 2);
3824 PetscAssertPointer(v, 3);
3825 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated");
3826 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N);
3827 PetscUseMethod(A, "MatDenseGetColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v));
3828 PetscFunctionReturn(PETSC_SUCCESS);
3829 }
3830
3831 /*@
3832 MatDenseRestoreColumnVecWrite - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVecWrite()`.
3833
3834 Collective
3835
3836 Input Parameters:
3837 + A - the `Mat` object
3838 . col - the column index
3839 - v - the `Vec` object (may be `NULL`)
3840
3841 Level: intermediate
3842
3843 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`
3844 @*/
MatDenseRestoreColumnVecWrite(Mat A,PetscInt col,Vec * v)3845 PetscErrorCode MatDenseRestoreColumnVecWrite(Mat A, PetscInt col, Vec *v)
3846 {
3847 PetscFunctionBegin;
3848 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3849 PetscValidType(A, 1);
3850 PetscValidLogicalCollectiveInt(A, col, 2);
3851 if (v) PetscValidHeaderSpecific(*v, VEC_CLASSID, 3);
3852 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated");
3853 PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N);
3854 PetscUseMethod(A, "MatDenseRestoreColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v));
3855 PetscFunctionReturn(PETSC_SUCCESS);
3856 }
3857
3858 /*@
3859 MatDenseGetSubMatrix - Gives access to a block of rows and columns of a dense matrix, represented as a `Mat`.
3860
3861 Collective
3862
3863 Input Parameters:
3864 + A - the `Mat` object
3865 . rbegin - the first global row index in the block (if `PETSC_DECIDE`, is 0)
3866 . rend - the global row index past the last one in the block (if `PETSC_DECIDE`, is `M`)
3867 . cbegin - the first global column index in the block (if `PETSC_DECIDE`, is 0)
3868 - cend - the global column index past the last one in the block (if `PETSC_DECIDE`, is `N`)
3869
3870 Output Parameter:
3871 . v - the matrix
3872
3873 Level: intermediate
3874
3875 Notes:
3876 The matrix is owned by PETSc. Users need to call `MatDenseRestoreSubMatrix()` when the matrix is no longer needed.
3877
3878 The output matrix is not redistributed by PETSc, so depending on the values of `rbegin` and `rend`, some processes may have no local rows.
3879
3880 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreSubMatrix()`
3881 @*/
MatDenseGetSubMatrix(Mat A,PetscInt rbegin,PetscInt rend,PetscInt cbegin,PetscInt cend,Mat * v)3882 PetscErrorCode MatDenseGetSubMatrix(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
3883 {
3884 PetscFunctionBegin;
3885 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3886 PetscValidType(A, 1);
3887 PetscValidLogicalCollectiveInt(A, rbegin, 2);
3888 PetscValidLogicalCollectiveInt(A, rend, 3);
3889 PetscValidLogicalCollectiveInt(A, cbegin, 4);
3890 PetscValidLogicalCollectiveInt(A, cend, 5);
3891 PetscAssertPointer(v, 6);
3892 if (rbegin == PETSC_DECIDE) rbegin = 0;
3893 if (rend == PETSC_DECIDE) rend = A->rmap->N;
3894 if (cbegin == PETSC_DECIDE) cbegin = 0;
3895 if (cend == PETSC_DECIDE) cend = A->cmap->N;
3896 PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated");
3897 PetscCheck(rbegin >= 0 && rbegin <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", rbegin, A->rmap->N);
3898 PetscCheck(rend >= rbegin && rend <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", rend, rbegin, A->rmap->N);
3899 PetscCheck(cbegin >= 0 && cbegin <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", cbegin, A->cmap->N);
3900 PetscCheck(cend >= cbegin && cend <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", cend, cbegin, A->cmap->N);
3901 PetscUseMethod(A, "MatDenseGetSubMatrix_C", (Mat, PetscInt, PetscInt, PetscInt, PetscInt, Mat *), (A, rbegin, rend, cbegin, cend, v));
3902 PetscFunctionReturn(PETSC_SUCCESS);
3903 }
3904
3905 /*@
3906 MatDenseRestoreSubMatrix - Returns access to a block of columns of a dense matrix obtained from `MatDenseGetSubMatrix()`.
3907
3908 Collective
3909
3910 Input Parameters:
3911 + A - the `Mat` object
3912 - v - the `Mat` object (cannot be `NULL`)
3913
3914 Level: intermediate
3915
3916 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseGetSubMatrix()`
3917 @*/
MatDenseRestoreSubMatrix(Mat A,Mat * v)3918 PetscErrorCode MatDenseRestoreSubMatrix(Mat A, Mat *v)
3919 {
3920 PetscFunctionBegin;
3921 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3922 PetscValidType(A, 1);
3923 PetscAssertPointer(v, 2);
3924 PetscValidHeaderSpecific(*v, MAT_CLASSID, 2);
3925 PetscUseMethod(A, "MatDenseRestoreSubMatrix_C", (Mat, Mat *), (A, v));
3926 PetscFunctionReturn(PETSC_SUCCESS);
3927 }
3928
3929 #include <petscblaslapack.h>
3930 #include <petsc/private/kernels/blockinvert.h>
3931
MatSeqDenseInvert(Mat A)3932 PetscErrorCode MatSeqDenseInvert(Mat A)
3933 {
3934 PetscInt m;
3935 const PetscReal shift = 0.0;
3936 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE;
3937 PetscScalar *values;
3938
3939 PetscFunctionBegin;
3940 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3941 PetscCall(MatDenseGetArray(A, &values));
3942 PetscCall(MatGetLocalSize(A, &m, NULL));
3943 allowzeropivot = PetscNot(A->erroriffailure);
3944 /* factor and invert each block */
3945 switch (m) {
3946 case 1:
3947 values[0] = (PetscScalar)1.0 / (values[0] + shift);
3948 break;
3949 case 2:
3950 PetscCall(PetscKernel_A_gets_inverse_A_2(values, shift, allowzeropivot, &zeropivotdetected));
3951 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3952 break;
3953 case 3:
3954 PetscCall(PetscKernel_A_gets_inverse_A_3(values, shift, allowzeropivot, &zeropivotdetected));
3955 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3956 break;
3957 case 4:
3958 PetscCall(PetscKernel_A_gets_inverse_A_4(values, shift, allowzeropivot, &zeropivotdetected));
3959 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3960 break;
3961 case 5: {
3962 PetscScalar work[25];
3963 PetscInt ipvt[5];
3964
3965 PetscCall(PetscKernel_A_gets_inverse_A_5(values, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
3966 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3967 } break;
3968 case 6:
3969 PetscCall(PetscKernel_A_gets_inverse_A_6(values, shift, allowzeropivot, &zeropivotdetected));
3970 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3971 break;
3972 case 7:
3973 PetscCall(PetscKernel_A_gets_inverse_A_7(values, shift, allowzeropivot, &zeropivotdetected));
3974 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3975 break;
3976 default: {
3977 PetscInt *v_pivots, *IJ, j;
3978 PetscScalar *v_work;
3979
3980 PetscCall(PetscMalloc3(m, &v_work, m, &v_pivots, m, &IJ));
3981 for (j = 0; j < m; j++) IJ[j] = j;
3982 PetscCall(PetscKernel_A_gets_inverse_A(m, values, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
3983 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3984 PetscCall(PetscFree3(v_work, v_pivots, IJ));
3985 }
3986 }
3987 PetscCall(MatDenseRestoreArray(A, &values));
3988 PetscFunctionReturn(PETSC_SUCCESS);
3989 }
3990