xref: /petsc/src/mat/impls/dense/seq/dense.c (revision 27f169480a6668db3ba90f8ef6ef68d542d113fa) !
1 /*
2      Defines the basic matrix operations for sequential dense.
3      Portions of this code are under:
4      Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/
8 #include <../src/mat/impls/dense/mpi/mpidense.h>
9 #include <petscblaslapack.h>
10 #include <../src/mat/impls/aij/seq/aij.h>
11 #include <petsc/private/vecimpl.h>
12 
MatSeqDenseSymmetrize_Private(Mat A,PetscBool hermitian)13 PetscErrorCode MatSeqDenseSymmetrize_Private(Mat A, PetscBool hermitian)
14 {
15   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
16   PetscInt      j, k, n = A->rmap->n;
17   PetscScalar  *v;
18 
19   PetscFunctionBegin;
20   PetscCheck(A->rmap->n == A->cmap->n, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot symmetrize a rectangular matrix");
21   PetscCall(MatDenseGetArray(A, &v));
22   if (!hermitian) {
23     for (k = 0; k < n; k++) {
24       for (j = k; j < n; j++) v[j * mat->lda + k] = v[k * mat->lda + j];
25     }
26   } else {
27     for (k = 0; k < n; k++) {
28       for (j = k; j < n; j++) v[j * mat->lda + k] = PetscConj(v[k * mat->lda + j]);
29     }
30   }
31   PetscCall(MatDenseRestoreArray(A, &v));
32   PetscFunctionReturn(PETSC_SUCCESS);
33 }
34 
MatSeqDenseInvertFactors_Private(Mat A)35 PetscErrorCode MatSeqDenseInvertFactors_Private(Mat A)
36 {
37   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
38   PetscBLASInt  info, n;
39 
40   PetscFunctionBegin;
41   if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS);
42   PetscCall(PetscBLASIntCast(A->cmap->n, &n));
43   if (A->factortype == MAT_FACTOR_LU) {
44     PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present");
45     if (!mat->fwork) {
46       mat->lfwork = n;
47       PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork));
48     }
49     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
50     PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info));
51     PetscCall(PetscFPTrapPop());
52     PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0));
53   } else if (A->factortype == MAT_FACTOR_CHOLESKY) {
54     if (A->spd == PETSC_BOOL3_TRUE) {
55       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
56       PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &n, mat->v, &mat->lda, &info));
57       PetscCall(PetscFPTrapPop());
58       PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE));
59 #if defined(PETSC_USE_COMPLEX)
60     } else if (A->hermitian == PETSC_BOOL3_TRUE) {
61       PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present");
62       PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present");
63       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
64       PetscCallBLAS("LAPACKhetri", LAPACKhetri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info));
65       PetscCall(PetscFPTrapPop());
66       PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE));
67 #endif
68     } else { /* symmetric case */
69       PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present");
70       PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present");
71       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
72       PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info));
73       PetscCall(PetscFPTrapPop());
74       PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_FALSE));
75     }
76     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad Inversion: zero pivot in row %" PetscBLASInt_FMT, info - 1);
77     PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0));
78   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix must be factored to solve");
79 
80   A->ops->solve             = NULL;
81   A->ops->matsolve          = NULL;
82   A->ops->solvetranspose    = NULL;
83   A->ops->matsolvetranspose = NULL;
84   A->ops->solveadd          = NULL;
85   A->ops->solvetransposeadd = NULL;
86   A->factortype             = MAT_FACTOR_NONE;
87   PetscCall(PetscFree(A->solvertype));
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 
MatZeroRowsColumns_SeqDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)91 static PetscErrorCode MatZeroRowsColumns_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
92 {
93   Mat_SeqDense      *l = (Mat_SeqDense *)A->data;
94   PetscInt           m = l->lda, n = A->cmap->n, r = A->rmap->n, i, j;
95   PetscScalar       *slot, *bb, *v;
96   const PetscScalar *xx;
97 
98   PetscFunctionBegin;
99   if (PetscDefined(USE_DEBUG)) {
100     for (i = 0; i < N; i++) {
101       PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed");
102       PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n);
103       PetscCheck(rows[i] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Col %" PetscInt_FMT " requested to be zeroed greater than or equal number of cols %" PetscInt_FMT, rows[i], A->cmap->n);
104     }
105   }
106   if (!N) PetscFunctionReturn(PETSC_SUCCESS);
107 
108   /* fix right-hand side if needed */
109   if (x && b) {
110     Vec xt;
111 
112     PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices");
113     PetscCall(VecDuplicate(x, &xt));
114     PetscCall(VecCopy(x, xt));
115     PetscCall(VecScale(xt, -1.0));
116     PetscCall(MatMultAdd(A, xt, b, b));
117     PetscCall(VecDestroy(&xt));
118     PetscCall(VecGetArrayRead(x, &xx));
119     PetscCall(VecGetArray(b, &bb));
120     for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]];
121     PetscCall(VecRestoreArrayRead(x, &xx));
122     PetscCall(VecRestoreArray(b, &bb));
123   }
124 
125   PetscCall(MatDenseGetArray(A, &v));
126   for (i = 0; i < N; i++) {
127     slot = v + rows[i] * m;
128     PetscCall(PetscArrayzero(slot, r));
129   }
130   for (i = 0; i < N; i++) {
131     slot = v + rows[i];
132     for (j = 0; j < n; j++) {
133       *slot = 0.0;
134       slot += m;
135     }
136   }
137   if (diag != 0.0) {
138     PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices");
139     for (i = 0; i < N; i++) {
140       slot  = v + (m + 1) * rows[i];
141       *slot = diag;
142     }
143   }
144   PetscCall(MatDenseRestoreArray(A, &v));
145   PetscFunctionReturn(PETSC_SUCCESS);
146 }
147 
MatConvert_SeqAIJ_SeqDense(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)148 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
149 {
150   Mat              B = NULL;
151   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data;
152   Mat_SeqDense    *b;
153   PetscInt        *ai = a->i, *aj = a->j, m = A->rmap->N, n = A->cmap->N, i;
154   const MatScalar *av;
155   PetscBool        isseqdense;
156 
157   PetscFunctionBegin;
158   if (reuse == MAT_REUSE_MATRIX) {
159     PetscCall(PetscObjectTypeCompare((PetscObject)*newmat, MATSEQDENSE, &isseqdense));
160     PetscCheck(isseqdense, PetscObjectComm((PetscObject)*newmat), PETSC_ERR_USER, "Cannot reuse matrix of type %s", ((PetscObject)*newmat)->type_name);
161   }
162   if (reuse != MAT_REUSE_MATRIX) {
163     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
164     PetscCall(MatSetSizes(B, m, n, m, n));
165     PetscCall(MatSetType(B, MATSEQDENSE));
166     PetscCall(MatSeqDenseSetPreallocation(B, NULL));
167     b = (Mat_SeqDense *)B->data;
168   } else {
169     b = (Mat_SeqDense *)((*newmat)->data);
170     for (i = 0; i < n; i++) PetscCall(PetscArrayzero(b->v + i * b->lda, m));
171   }
172   PetscCall(MatSeqAIJGetArrayRead(A, &av));
173   for (i = 0; i < m; i++) {
174     PetscInt j;
175     for (j = 0; j < ai[1] - ai[0]; j++) {
176       b->v[*aj * b->lda + i] = *av;
177       aj++;
178       av++;
179     }
180     ai++;
181   }
182   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
183 
184   if (reuse == MAT_INPLACE_MATRIX) {
185     PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
186     PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
187     PetscCall(MatHeaderReplace(A, &B));
188   } else {
189     if (B) *newmat = B;
190     PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY));
191     PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY));
192   }
193   PetscFunctionReturn(PETSC_SUCCESS);
194 }
195 
MatConvert_SeqDense_SeqAIJ(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)196 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
197 {
198   Mat           B = NULL;
199   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
200   PetscInt      i, j;
201   PetscInt     *rows, *nnz;
202   MatScalar    *aa = a->v, *vals;
203 
204   PetscFunctionBegin;
205   PetscCall(PetscCalloc3(A->rmap->n, &rows, A->rmap->n, &nnz, A->rmap->n, &vals));
206   if (reuse != MAT_REUSE_MATRIX) {
207     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
208     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
209     PetscCall(MatSetType(B, MATSEQAIJ));
210     for (j = 0; j < A->cmap->n; j++) {
211       for (i = 0; i < A->rmap->n; i++)
212         if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) ++nnz[i];
213       aa += a->lda;
214     }
215     PetscCall(MatSeqAIJSetPreallocation(B, PETSC_DETERMINE, nnz));
216   } else B = *newmat;
217   aa = a->v;
218   for (j = 0; j < A->cmap->n; j++) {
219     PetscInt numRows = 0;
220     for (i = 0; i < A->rmap->n; i++)
221       if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) {
222         rows[numRows]   = i;
223         vals[numRows++] = aa[i];
224       }
225     PetscCall(MatSetValues(B, numRows, rows, 1, &j, vals, INSERT_VALUES));
226     aa += a->lda;
227   }
228   PetscCall(PetscFree3(rows, nnz, vals));
229   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
230   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
231 
232   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &B));
233   else if (reuse != MAT_REUSE_MATRIX) *newmat = B;
234   PetscFunctionReturn(PETSC_SUCCESS);
235 }
236 
MatAXPY_SeqDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)237 PetscErrorCode MatAXPY_SeqDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
238 {
239   Mat_SeqDense      *x = (Mat_SeqDense *)X->data, *y = (Mat_SeqDense *)Y->data;
240   const PetscScalar *xv;
241   PetscScalar       *yv;
242   PetscBLASInt       N, m, ldax = 0, lday = 0, one = 1;
243 
244   PetscFunctionBegin;
245   PetscCall(MatDenseGetArrayRead(X, &xv));
246   PetscCall(MatDenseGetArray(Y, &yv));
247   PetscCall(PetscBLASIntCast(X->rmap->n * X->cmap->n, &N));
248   PetscCall(PetscBLASIntCast(X->rmap->n, &m));
249   PetscCall(PetscBLASIntCast(x->lda, &ldax));
250   PetscCall(PetscBLASIntCast(y->lda, &lday));
251   if (ldax > m || lday > m) {
252     for (PetscInt j = 0; j < X->cmap->n; j++) PetscCallBLAS("BLASaxpy", BLASaxpy_(&m, &alpha, PetscSafePointerPlusOffset(xv, j * ldax), &one, PetscSafePointerPlusOffset(yv, j * lday), &one));
253   } else {
254     PetscCallBLAS("BLASaxpy", BLASaxpy_(&N, &alpha, xv, &one, yv, &one));
255   }
256   PetscCall(MatDenseRestoreArrayRead(X, &xv));
257   PetscCall(MatDenseRestoreArray(Y, &yv));
258   PetscCall(PetscLogFlops(PetscMax(2.0 * N - 1, 0)));
259   PetscFunctionReturn(PETSC_SUCCESS);
260 }
261 
MatGetInfo_SeqDense(Mat A,MatInfoType flag,MatInfo * info)262 static PetscErrorCode MatGetInfo_SeqDense(Mat A, MatInfoType flag, MatInfo *info)
263 {
264   PetscLogDouble N = A->rmap->n * A->cmap->n;
265 
266   PetscFunctionBegin;
267   info->block_size        = 1.0;
268   info->nz_allocated      = N;
269   info->nz_used           = N;
270   info->nz_unneeded       = 0;
271   info->assemblies        = A->num_ass;
272   info->mallocs           = 0;
273   info->memory            = 0; /* REVIEW ME */
274   info->fill_ratio_given  = 0;
275   info->fill_ratio_needed = 0;
276   info->factor_mallocs    = 0;
277   PetscFunctionReturn(PETSC_SUCCESS);
278 }
279 
MatScale_SeqDense(Mat A,PetscScalar alpha)280 PetscErrorCode MatScale_SeqDense(Mat A, PetscScalar alpha)
281 {
282   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
283   PetscScalar  *v;
284   PetscBLASInt  one = 1, j, nz, lda = 0;
285 
286   PetscFunctionBegin;
287   PetscCall(MatDenseGetArray(A, &v));
288   PetscCall(PetscBLASIntCast(a->lda, &lda));
289   if (lda > A->rmap->n) {
290     PetscCall(PetscBLASIntCast(A->rmap->n, &nz));
291     for (j = 0; j < A->cmap->n; j++) PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v + j * lda, &one));
292   } else {
293     PetscCall(PetscBLASIntCast(A->rmap->n * A->cmap->n, &nz));
294     PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v, &one));
295   }
296   PetscCall(PetscLogFlops(A->rmap->n * A->cmap->n));
297   PetscCall(MatDenseRestoreArray(A, &v));
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
MatShift_SeqDense(Mat A,PetscScalar alpha)301 PetscErrorCode MatShift_SeqDense(Mat A, PetscScalar alpha)
302 {
303   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
304   PetscScalar  *v;
305   PetscInt      j, k;
306 
307   PetscFunctionBegin;
308   PetscCall(MatDenseGetArray(A, &v));
309   k = PetscMin(A->rmap->n, A->cmap->n);
310   for (j = 0; j < k; j++) v[j + j * a->lda] += alpha;
311   PetscCall(PetscLogFlops(k));
312   PetscCall(MatDenseRestoreArray(A, &v));
313   PetscFunctionReturn(PETSC_SUCCESS);
314 }
315 
MatIsHermitian_SeqDense(Mat A,PetscReal rtol,PetscBool * fl)316 static PetscErrorCode MatIsHermitian_SeqDense(Mat A, PetscReal rtol, PetscBool *fl)
317 {
318   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
319   PetscInt           i, j, m = A->rmap->n, N = a->lda;
320   const PetscScalar *v;
321 
322   PetscFunctionBegin;
323   *fl = PETSC_FALSE;
324   if (A->rmap->n != A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS);
325   PetscCall(MatDenseGetArrayRead(A, &v));
326   for (i = 0; i < m; i++) {
327     for (j = i; j < m; j++) {
328       if (PetscAbsScalar(v[i + j * N] - PetscConj(v[j + i * N])) > rtol) goto restore;
329     }
330   }
331   *fl = PETSC_TRUE;
332 restore:
333   PetscCall(MatDenseRestoreArrayRead(A, &v));
334   PetscFunctionReturn(PETSC_SUCCESS);
335 }
336 
MatIsSymmetric_SeqDense(Mat A,PetscReal rtol,PetscBool * fl)337 static PetscErrorCode MatIsSymmetric_SeqDense(Mat A, PetscReal rtol, PetscBool *fl)
338 {
339   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
340   PetscInt           i, j, m = A->rmap->n, N = a->lda;
341   const PetscScalar *v;
342 
343   PetscFunctionBegin;
344   *fl = PETSC_FALSE;
345   if (A->rmap->n != A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS);
346   PetscCall(MatDenseGetArrayRead(A, &v));
347   for (i = 0; i < m; i++) {
348     for (j = i; j < m; j++) {
349       if (PetscAbsScalar(v[i + j * N] - v[j + i * N]) > rtol) goto restore;
350     }
351   }
352   *fl = PETSC_TRUE;
353 restore:
354   PetscCall(MatDenseRestoreArrayRead(A, &v));
355   PetscFunctionReturn(PETSC_SUCCESS);
356 }
357 
MatDuplicateNoCreate_SeqDense(Mat newi,Mat A,MatDuplicateOption cpvalues)358 PetscErrorCode MatDuplicateNoCreate_SeqDense(Mat newi, Mat A, MatDuplicateOption cpvalues)
359 {
360   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
361   PetscInt      lda = mat->lda, j, m, nlda = lda;
362   PetscBool     isdensecpu;
363 
364   PetscFunctionBegin;
365   PetscCall(PetscLayoutReference(A->rmap, &newi->rmap));
366   PetscCall(PetscLayoutReference(A->cmap, &newi->cmap));
367   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { /* propagate LDA */
368     PetscCall(MatDenseSetLDA(newi, lda));
369   }
370   PetscCall(PetscObjectTypeCompare((PetscObject)newi, MATSEQDENSE, &isdensecpu));
371   if (isdensecpu) PetscCall(MatSeqDenseSetPreallocation(newi, NULL));
372   if (cpvalues == MAT_COPY_VALUES) {
373     const PetscScalar *av;
374     PetscScalar       *v;
375 
376     PetscCall(MatDenseGetArrayRead(A, &av));
377     PetscCall(MatDenseGetArrayWrite(newi, &v));
378     PetscCall(MatDenseGetLDA(newi, &nlda));
379     m = A->rmap->n;
380     if (lda > m || nlda > m) {
381       for (j = 0; j < A->cmap->n; j++) PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(v, j * nlda), PetscSafePointerPlusOffset(av, j * lda), m));
382     } else {
383       PetscCall(PetscArraycpy(v, av, A->rmap->n * A->cmap->n));
384     }
385     PetscCall(MatDenseRestoreArrayWrite(newi, &v));
386     PetscCall(MatDenseRestoreArrayRead(A, &av));
387     PetscCall(MatPropagateSymmetryOptions(A, newi));
388   }
389   PetscFunctionReturn(PETSC_SUCCESS);
390 }
391 
MatDuplicate_SeqDense(Mat A,MatDuplicateOption cpvalues,Mat * newmat)392 PetscErrorCode MatDuplicate_SeqDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
393 {
394   PetscFunctionBegin;
395   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), newmat));
396   PetscCall(MatSetSizes(*newmat, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n));
397   PetscCall(MatSetType(*newmat, ((PetscObject)A)->type_name));
398   PetscCall(MatDuplicateNoCreate_SeqDense(*newmat, A, cpvalues));
399   PetscFunctionReturn(PETSC_SUCCESS);
400 }
401 
MatSolve_SeqDense_Internal_LU(Mat A,PetscScalar * x,PetscBLASInt ldx,PetscBLASInt m,PetscBLASInt nrhs,PetscBLASInt k,PetscBool T)402 static PetscErrorCode MatSolve_SeqDense_Internal_LU(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T)
403 {
404   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
405   PetscBLASInt  info;
406 
407   PetscFunctionBegin;
408   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
409   PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_(T ? "T" : "N", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
410   PetscCall(PetscFPTrapPop());
411   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve %" PetscBLASInt_FMT, info);
412   PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m)));
413   PetscFunctionReturn(PETSC_SUCCESS);
414 }
415 
MatSolve_SeqDense_Internal_Cholesky(Mat A,PetscScalar * x,PetscBLASInt ldx,PetscBLASInt m,PetscBLASInt nrhs,PetscBLASInt k,PetscBool T)416 static PetscErrorCode MatSolve_SeqDense_Internal_Cholesky(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T)
417 {
418   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
419   PetscBLASInt  info;
420 
421   PetscFunctionBegin;
422   if (A->spd == PETSC_BOOL3_TRUE) {
423     if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A));
424     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
425     PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &m, &nrhs, mat->v, &mat->lda, x, &m, &info));
426     PetscCall(PetscFPTrapPop());
427     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "POTRS Bad solve %" PetscBLASInt_FMT, info);
428     if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A));
429 #if defined(PETSC_USE_COMPLEX)
430   } else if (A->hermitian == PETSC_BOOL3_TRUE) {
431     if (T) PetscCall(MatConjugate_SeqDense(A));
432     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
433     PetscCallBLAS("LAPACKhetrs", LAPACKhetrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
434     PetscCall(PetscFPTrapPop());
435     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "HETRS Bad solve %" PetscBLASInt_FMT, info);
436     if (T) PetscCall(MatConjugate_SeqDense(A));
437 #endif
438   } else { /* symmetric case */
439     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
440     PetscCallBLAS("LAPACKsytrs", LAPACKsytrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
441     PetscCall(PetscFPTrapPop());
442     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "SYTRS Bad solve %" PetscBLASInt_FMT, info);
443   }
444   PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m)));
445   PetscFunctionReturn(PETSC_SUCCESS);
446 }
447 
MatSolve_SeqDense_Internal_QR(Mat A,PetscScalar * x,PetscBLASInt ldx,PetscBLASInt m,PetscBLASInt nrhs,PetscBLASInt k)448 static PetscErrorCode MatSolve_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k)
449 {
450   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
451   PetscBLASInt  info;
452   char          trans;
453 
454   PetscFunctionBegin;
455   if (PetscDefined(USE_COMPLEX)) {
456     trans = 'C';
457   } else {
458     trans = 'T';
459   }
460   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
461   { /* lwork depends on the number of right-hand sides */
462     PetscBLASInt nlfwork, lfwork = -1;
463     PetscScalar  fwork;
464 
465     PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info));
466     nlfwork = (PetscBLASInt)PetscRealPart(fwork);
467     if (nlfwork > mat->lfwork) {
468       mat->lfwork = nlfwork;
469       PetscCall(PetscFree(mat->fwork));
470       PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork));
471     }
472   }
473   PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info));
474   PetscCall(PetscFPTrapPop());
475   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %" PetscBLASInt_FMT, info);
476   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
477   PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &mat->rank, &nrhs, mat->v, &mat->lda, x, &ldx, &info));
478   PetscCall(PetscFPTrapPop());
479   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %" PetscBLASInt_FMT, info);
480   for (PetscInt j = 0; j < nrhs; j++) {
481     for (PetscInt i = mat->rank; i < k; i++) x[j * ldx + i] = 0.;
482   }
483   PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank))));
484   PetscFunctionReturn(PETSC_SUCCESS);
485 }
486 
MatSolveTranspose_SeqDense_Internal_QR(Mat A,PetscScalar * x,PetscBLASInt ldx,PetscBLASInt m,PetscBLASInt nrhs,PetscBLASInt k)487 static PetscErrorCode MatSolveTranspose_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k)
488 {
489   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
490   PetscBLASInt  info;
491 
492   PetscFunctionBegin;
493   if (A->rmap->n == A->cmap->n && mat->rank == A->rmap->n) {
494     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
495     PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &m, &nrhs, mat->v, &mat->lda, x, &ldx, &info));
496     PetscCall(PetscFPTrapPop());
497     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %" PetscBLASInt_FMT, info);
498     if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A));
499     { /* lwork depends on the number of right-hand sides */
500       PetscBLASInt nlfwork, lfwork = -1;
501       PetscScalar  fwork;
502 
503       PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info));
504       nlfwork = (PetscBLASInt)PetscRealPart(fwork);
505       if (nlfwork > mat->lfwork) {
506         mat->lfwork = nlfwork;
507         PetscCall(PetscFree(mat->fwork));
508         PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork));
509       }
510     }
511     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
512     PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info));
513     PetscCall(PetscFPTrapPop());
514     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %" PetscBLASInt_FMT, info);
515     if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A));
516   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "QR factored matrix cannot be used for transpose solve");
517   PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank))));
518   PetscFunctionReturn(PETSC_SUCCESS);
519 }
520 
MatSolve_SeqDense_SetUp(Mat A,Vec xx,Vec yy,PetscScalar ** _y,PetscBLASInt * _m,PetscBLASInt * _k)521 static PetscErrorCode MatSolve_SeqDense_SetUp(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k)
522 {
523   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
524   PetscScalar  *y;
525   PetscBLASInt  m = 0, k = 0;
526 
527   PetscFunctionBegin;
528   PetscCall(PetscBLASIntCast(A->rmap->n, &m));
529   PetscCall(PetscBLASIntCast(A->cmap->n, &k));
530   if (k < m) {
531     PetscCall(VecCopy(xx, mat->qrrhs));
532     PetscCall(VecGetArray(mat->qrrhs, &y));
533   } else {
534     PetscCall(VecCopy(xx, yy));
535     PetscCall(VecGetArray(yy, &y));
536   }
537   *_y = y;
538   *_k = k;
539   *_m = m;
540   PetscFunctionReturn(PETSC_SUCCESS);
541 }
542 
MatSolve_SeqDense_TearDown(Mat A,Vec xx,Vec yy,PetscScalar ** _y,PetscBLASInt * _m,PetscBLASInt * _k)543 static PetscErrorCode MatSolve_SeqDense_TearDown(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k)
544 {
545   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
546   PetscScalar  *y   = NULL;
547   PetscBLASInt  m, k;
548 
549   PetscFunctionBegin;
550   y   = *_y;
551   *_y = NULL;
552   k   = *_k;
553   m   = *_m;
554   if (k < m) {
555     PetscScalar *yv;
556     PetscCall(VecGetArray(yy, &yv));
557     PetscCall(PetscArraycpy(yv, y, k));
558     PetscCall(VecRestoreArray(yy, &yv));
559     PetscCall(VecRestoreArray(mat->qrrhs, &y));
560   } else {
561     PetscCall(VecRestoreArray(yy, &y));
562   }
563   PetscFunctionReturn(PETSC_SUCCESS);
564 }
565 
MatSolve_SeqDense_LU(Mat A,Vec xx,Vec yy)566 static PetscErrorCode MatSolve_SeqDense_LU(Mat A, Vec xx, Vec yy)
567 {
568   PetscScalar *y = NULL;
569   PetscBLASInt m = 0, k = 0;
570 
571   PetscFunctionBegin;
572   PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k));
573   PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_FALSE));
574   PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k));
575   PetscFunctionReturn(PETSC_SUCCESS);
576 }
577 
MatSolveTranspose_SeqDense_LU(Mat A,Vec xx,Vec yy)578 static PetscErrorCode MatSolveTranspose_SeqDense_LU(Mat A, Vec xx, Vec yy)
579 {
580   PetscScalar *y = NULL;
581   PetscBLASInt m = 0, k = 0;
582 
583   PetscFunctionBegin;
584   PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k));
585   PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_TRUE));
586   PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k));
587   PetscFunctionReturn(PETSC_SUCCESS);
588 }
589 
MatSolve_SeqDense_Cholesky(Mat A,Vec xx,Vec yy)590 static PetscErrorCode MatSolve_SeqDense_Cholesky(Mat A, Vec xx, Vec yy)
591 {
592   PetscScalar *y = NULL;
593   PetscBLASInt m = 0, k = 0;
594 
595   PetscFunctionBegin;
596   PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k));
597   PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_FALSE));
598   PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k));
599   PetscFunctionReturn(PETSC_SUCCESS);
600 }
601 
MatSolveTranspose_SeqDense_Cholesky(Mat A,Vec xx,Vec yy)602 static PetscErrorCode MatSolveTranspose_SeqDense_Cholesky(Mat A, Vec xx, Vec yy)
603 {
604   PetscScalar *y = NULL;
605   PetscBLASInt m = 0, k = 0;
606 
607   PetscFunctionBegin;
608   PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k));
609   PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_TRUE));
610   PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k));
611   PetscFunctionReturn(PETSC_SUCCESS);
612 }
613 
MatSolve_SeqDense_QR(Mat A,Vec xx,Vec yy)614 static PetscErrorCode MatSolve_SeqDense_QR(Mat A, Vec xx, Vec yy)
615 {
616   PetscScalar *y = NULL;
617   PetscBLASInt m = 0, k = 0;
618 
619   PetscFunctionBegin;
620   PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k));
621   PetscCall(MatSolve_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k));
622   PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k));
623   PetscFunctionReturn(PETSC_SUCCESS);
624 }
625 
MatSolveTranspose_SeqDense_QR(Mat A,Vec xx,Vec yy)626 static PetscErrorCode MatSolveTranspose_SeqDense_QR(Mat A, Vec xx, Vec yy)
627 {
628   PetscScalar *y = NULL;
629   PetscBLASInt m = 0, k = 0;
630 
631   PetscFunctionBegin;
632   PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k));
633   PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k));
634   PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k));
635   PetscFunctionReturn(PETSC_SUCCESS);
636 }
637 
MatMatSolve_SeqDense_SetUp(Mat A,Mat B,Mat X,PetscScalar ** _y,PetscBLASInt * _ldy,PetscBLASInt * _m,PetscBLASInt * _nrhs,PetscBLASInt * _k)638 static PetscErrorCode MatMatSolve_SeqDense_SetUp(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k)
639 {
640   const PetscScalar *b;
641   PetscScalar       *y;
642   PetscInt           n, _ldb, _ldx;
643   PetscBLASInt       nrhs = 0, m = 0, k = 0, ldb = 0, ldx = 0, ldy = 0;
644 
645   PetscFunctionBegin;
646   *_ldy  = 0;
647   *_m    = 0;
648   *_nrhs = 0;
649   *_k    = 0;
650   *_y    = NULL;
651   PetscCall(PetscBLASIntCast(A->rmap->n, &m));
652   PetscCall(PetscBLASIntCast(A->cmap->n, &k));
653   PetscCall(MatGetSize(B, NULL, &n));
654   PetscCall(PetscBLASIntCast(n, &nrhs));
655   PetscCall(MatDenseGetLDA(B, &_ldb));
656   PetscCall(PetscBLASIntCast(_ldb, &ldb));
657   PetscCall(MatDenseGetLDA(X, &_ldx));
658   PetscCall(PetscBLASIntCast(_ldx, &ldx));
659   if (ldx < m) {
660     PetscCall(MatDenseGetArrayRead(B, &b));
661     PetscCall(PetscMalloc1(nrhs * m, &y));
662     if (ldb == m) {
663       PetscCall(PetscArraycpy(y, b, ldb * nrhs));
664     } else {
665       for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * m], &b[j * ldb], m));
666     }
667     ldy = m;
668     PetscCall(MatDenseRestoreArrayRead(B, &b));
669   } else {
670     if (ldb == ldx) {
671       PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN));
672       PetscCall(MatDenseGetArray(X, &y));
673     } else {
674       PetscCall(MatDenseGetArray(X, &y));
675       PetscCall(MatDenseGetArrayRead(B, &b));
676       for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * ldx], &b[j * ldb], m));
677       PetscCall(MatDenseRestoreArrayRead(B, &b));
678     }
679     ldy = ldx;
680   }
681   *_y    = y;
682   *_ldy  = ldy;
683   *_k    = k;
684   *_m    = m;
685   *_nrhs = nrhs;
686   PetscFunctionReturn(PETSC_SUCCESS);
687 }
688 
MatMatSolve_SeqDense_TearDown(Mat A,Mat B,Mat X,PetscScalar ** _y,PetscBLASInt * _ldy,PetscBLASInt * _m,PetscBLASInt * _nrhs,PetscBLASInt * _k)689 static PetscErrorCode MatMatSolve_SeqDense_TearDown(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k)
690 {
691   PetscScalar *y;
692   PetscInt     _ldx;
693   PetscBLASInt k, ldy, nrhs, ldx = 0;
694 
695   PetscFunctionBegin;
696   y    = *_y;
697   *_y  = NULL;
698   k    = *_k;
699   ldy  = *_ldy;
700   nrhs = *_nrhs;
701   PetscCall(MatDenseGetLDA(X, &_ldx));
702   PetscCall(PetscBLASIntCast(_ldx, &ldx));
703   if (ldx != ldy) {
704     PetscScalar *xv;
705     PetscCall(MatDenseGetArray(X, &xv));
706     for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&xv[j * ldx], &y[j * ldy], k));
707     PetscCall(MatDenseRestoreArray(X, &xv));
708     PetscCall(PetscFree(y));
709   } else {
710     PetscCall(MatDenseRestoreArray(X, &y));
711   }
712   PetscFunctionReturn(PETSC_SUCCESS);
713 }
714 
MatMatSolve_SeqDense_LU(Mat A,Mat B,Mat X)715 static PetscErrorCode MatMatSolve_SeqDense_LU(Mat A, Mat B, Mat X)
716 {
717   PetscScalar *y;
718   PetscBLASInt m, k, ldy, nrhs;
719 
720   PetscFunctionBegin;
721   PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k));
722   PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_FALSE));
723   PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k));
724   PetscFunctionReturn(PETSC_SUCCESS);
725 }
726 
MatMatSolveTranspose_SeqDense_LU(Mat A,Mat B,Mat X)727 static PetscErrorCode MatMatSolveTranspose_SeqDense_LU(Mat A, Mat B, Mat X)
728 {
729   PetscScalar *y;
730   PetscBLASInt m, k, ldy, nrhs;
731 
732   PetscFunctionBegin;
733   PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k));
734   PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_TRUE));
735   PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k));
736   PetscFunctionReturn(PETSC_SUCCESS);
737 }
738 
MatMatSolve_SeqDense_Cholesky(Mat A,Mat B,Mat X)739 static PetscErrorCode MatMatSolve_SeqDense_Cholesky(Mat A, Mat B, Mat X)
740 {
741   PetscScalar *y;
742   PetscBLASInt m, k, ldy, nrhs;
743 
744   PetscFunctionBegin;
745   PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k));
746   PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_FALSE));
747   PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k));
748   PetscFunctionReturn(PETSC_SUCCESS);
749 }
750 
MatMatSolveTranspose_SeqDense_Cholesky(Mat A,Mat B,Mat X)751 static PetscErrorCode MatMatSolveTranspose_SeqDense_Cholesky(Mat A, Mat B, Mat X)
752 {
753   PetscScalar *y;
754   PetscBLASInt m, k, ldy, nrhs;
755 
756   PetscFunctionBegin;
757   PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k));
758   PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_TRUE));
759   PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k));
760   PetscFunctionReturn(PETSC_SUCCESS);
761 }
762 
MatMatSolve_SeqDense_QR(Mat A,Mat B,Mat X)763 static PetscErrorCode MatMatSolve_SeqDense_QR(Mat A, Mat B, Mat X)
764 {
765   PetscScalar *y;
766   PetscBLASInt m, k, ldy, nrhs;
767 
768   PetscFunctionBegin;
769   PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k));
770   PetscCall(MatSolve_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k));
771   PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k));
772   PetscFunctionReturn(PETSC_SUCCESS);
773 }
774 
MatMatSolveTranspose_SeqDense_QR(Mat A,Mat B,Mat X)775 static PetscErrorCode MatMatSolveTranspose_SeqDense_QR(Mat A, Mat B, Mat X)
776 {
777   PetscScalar *y;
778   PetscBLASInt m, k, ldy, nrhs;
779 
780   PetscFunctionBegin;
781   PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k));
782   PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k));
783   PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k));
784   PetscFunctionReturn(PETSC_SUCCESS);
785 }
786 
787 /* COMMENT: I have chosen to hide row permutation in the pivots,
788    rather than put it in the Mat->row slot.*/
MatLUFactor_SeqDense(Mat A,IS row,IS col,PETSC_UNUSED const MatFactorInfo * minfo)789 PetscErrorCode MatLUFactor_SeqDense(Mat A, IS row, IS col, PETSC_UNUSED const MatFactorInfo *minfo)
790 {
791   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
792   PetscBLASInt  n, m, info;
793 
794   PetscFunctionBegin;
795   PetscCall(PetscBLASIntCast(A->cmap->n, &n));
796   PetscCall(PetscBLASIntCast(A->rmap->n, &m));
797   if (!mat->pivots) PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots));
798   if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS);
799   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
800   PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&m, &n, mat->v, &mat->lda, mat->pivots, &info));
801   PetscCall(PetscFPTrapPop());
802 
803   PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to LU factorization %" PetscBLASInt_FMT, info);
804   PetscCheck(info <= 0, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Bad LU factorization %" PetscBLASInt_FMT, info);
805 
806   A->ops->solve             = MatSolve_SeqDense_LU;
807   A->ops->matsolve          = MatMatSolve_SeqDense_LU;
808   A->ops->solvetranspose    = MatSolveTranspose_SeqDense_LU;
809   A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_LU;
810   A->factortype             = MAT_FACTOR_LU;
811 
812   PetscCall(PetscFree(A->solvertype));
813   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype));
814 
815   PetscCall(PetscLogFlops((2.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3));
816   PetscFunctionReturn(PETSC_SUCCESS);
817 }
818 
MatLUFactorNumeric_SeqDense(Mat fact,Mat A,const MatFactorInfo * info)819 static PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info)
820 {
821   PetscFunctionBegin;
822   PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES));
823   PetscUseTypeMethod(fact, lufactor, NULL, NULL, info);
824   PetscFunctionReturn(PETSC_SUCCESS);
825 }
826 
MatLUFactorSymbolic_SeqDense(Mat fact,Mat A,IS row,IS col,PETSC_UNUSED const MatFactorInfo * info)827 PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, IS col, PETSC_UNUSED const MatFactorInfo *info)
828 {
829   PetscFunctionBegin;
830   fact->preallocated         = PETSC_TRUE;
831   fact->assembled            = PETSC_TRUE;
832   fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense;
833   PetscFunctionReturn(PETSC_SUCCESS);
834 }
835 
836 /* Cholesky as L*L^T or L*D*L^T and the symmetric/hermitian complex variants */
MatCholeskyFactor_SeqDense(Mat A,IS perm,PETSC_UNUSED const MatFactorInfo * minfo)837 PetscErrorCode MatCholeskyFactor_SeqDense(Mat A, IS perm, PETSC_UNUSED const MatFactorInfo *minfo)
838 {
839   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
840   PetscBLASInt  info, n;
841 
842   PetscFunctionBegin;
843   PetscCall(PetscBLASIntCast(A->cmap->n, &n));
844   if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS);
845   if (A->spd == PETSC_BOOL3_TRUE) {
846     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
847     PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &n, mat->v, &mat->lda, &info));
848     PetscCall(PetscFPTrapPop());
849 #if defined(PETSC_USE_COMPLEX)
850   } else if (A->hermitian == PETSC_BOOL3_TRUE) {
851     if (!mat->pivots) PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots));
852     if (!mat->fwork) {
853       PetscScalar dummy;
854 
855       mat->lfwork = -1;
856       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
857       PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info));
858       PetscCall(PetscFPTrapPop());
859       PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork));
860       PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork));
861     }
862     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
863     PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info));
864     PetscCall(PetscFPTrapPop());
865 #endif
866   } else { /* symmetric case */
867     if (!mat->pivots) PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots));
868     if (!mat->fwork) {
869       PetscScalar dummy;
870 
871       mat->lfwork = -1;
872       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
873       PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info));
874       PetscCall(PetscFPTrapPop());
875       PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork));
876       PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork));
877     }
878     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
879     PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info));
880     PetscCall(PetscFPTrapPop());
881   }
882   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad factorization: zero pivot in row %" PetscBLASInt_FMT, info - 1);
883 
884   A->ops->solve             = MatSolve_SeqDense_Cholesky;
885   A->ops->matsolve          = MatMatSolve_SeqDense_Cholesky;
886   A->ops->solvetranspose    = MatSolveTranspose_SeqDense_Cholesky;
887   A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_Cholesky;
888   A->factortype             = MAT_FACTOR_CHOLESKY;
889 
890   PetscCall(PetscFree(A->solvertype));
891   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype));
892 
893   PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0));
894   PetscFunctionReturn(PETSC_SUCCESS);
895 }
896 
MatCholeskyFactorNumeric_SeqDense(Mat fact,Mat A,const MatFactorInfo * info)897 static PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info)
898 {
899   PetscFunctionBegin;
900   PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES));
901   PetscUseTypeMethod(fact, choleskyfactor, NULL, info);
902   PetscFunctionReturn(PETSC_SUCCESS);
903 }
904 
MatCholeskyFactorSymbolic_SeqDense(Mat fact,Mat A,IS row,const MatFactorInfo * info)905 PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info)
906 {
907   PetscFunctionBegin;
908   fact->assembled                  = PETSC_TRUE;
909   fact->preallocated               = PETSC_TRUE;
910   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense;
911   PetscFunctionReturn(PETSC_SUCCESS);
912 }
913 
MatQRFactor_SeqDense(Mat A,IS col,PETSC_UNUSED const MatFactorInfo * minfo)914 PetscErrorCode MatQRFactor_SeqDense(Mat A, IS col, PETSC_UNUSED const MatFactorInfo *minfo)
915 {
916   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
917   PetscBLASInt  n, m, info, min, max;
918 
919   PetscFunctionBegin;
920   PetscCall(PetscBLASIntCast(A->cmap->n, &n));
921   PetscCall(PetscBLASIntCast(A->rmap->n, &m));
922   max = PetscMax(m, n);
923   min = PetscMin(m, n);
924   if (!mat->tau) PetscCall(PetscMalloc1(min, &mat->tau));
925   if (!mat->pivots) PetscCall(PetscMalloc1(n, &mat->pivots));
926   if (!mat->qrrhs) PetscCall(MatCreateVecs(A, NULL, &mat->qrrhs));
927   if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS);
928   if (!mat->fwork) {
929     PetscScalar dummy;
930 
931     mat->lfwork = -1;
932     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
933     PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, &dummy, &mat->lfwork, &info));
934     PetscCall(PetscFPTrapPop());
935     PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork));
936     PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork));
937   }
938   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
939   PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, mat->fwork, &mat->lfwork, &info));
940   PetscCall(PetscFPTrapPop());
941   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to QR factorization %" PetscBLASInt_FMT, info);
942   // TODO: try to estimate rank or test for and use geqp3 for rank revealing QR.  For now just say rank is min of m and n
943   mat->rank = min;
944 
945   A->ops->solve    = MatSolve_SeqDense_QR;
946   A->ops->matsolve = MatMatSolve_SeqDense_QR;
947   A->factortype    = MAT_FACTOR_QR;
948   if (m == n) {
949     A->ops->solvetranspose    = MatSolveTranspose_SeqDense_QR;
950     A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_QR;
951   }
952 
953   PetscCall(PetscFree(A->solvertype));
954   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype));
955 
956   PetscCall(PetscLogFlops(2.0 * min * min * (max - min / 3.0)));
957   PetscFunctionReturn(PETSC_SUCCESS);
958 }
959 
MatQRFactorNumeric_SeqDense(Mat fact,Mat A,const MatFactorInfo * info)960 static PetscErrorCode MatQRFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info)
961 {
962   PetscFunctionBegin;
963   PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES));
964   PetscUseMethod(fact, "MatQRFactor_C", (Mat, IS, const MatFactorInfo *), (fact, NULL, info));
965   PetscFunctionReturn(PETSC_SUCCESS);
966 }
967 
MatQRFactorSymbolic_SeqDense(Mat fact,Mat A,IS row,const MatFactorInfo * info)968 PetscErrorCode MatQRFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info)
969 {
970   PetscFunctionBegin;
971   fact->assembled    = PETSC_TRUE;
972   fact->preallocated = PETSC_TRUE;
973   PetscCall(PetscObjectComposeFunction((PetscObject)fact, "MatQRFactorNumeric_C", MatQRFactorNumeric_SeqDense));
974   PetscFunctionReturn(PETSC_SUCCESS);
975 }
976 
977 /* uses LAPACK */
MatGetFactor_seqdense_petsc(Mat A,MatFactorType ftype,Mat * fact)978 PETSC_INTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A, MatFactorType ftype, Mat *fact)
979 {
980   PetscFunctionBegin;
981   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), fact));
982   PetscCall(MatSetSizes(*fact, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n));
983   PetscCall(MatSetType(*fact, MATDENSE));
984   (*fact)->trivialsymbolic = PETSC_TRUE;
985   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
986     (*fact)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqDense;
987     (*fact)->ops->ilufactorsymbolic = MatLUFactorSymbolic_SeqDense;
988   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
989     (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense;
990   } else if (ftype == MAT_FACTOR_QR) {
991     PetscCall(PetscObjectComposeFunction((PetscObject)*fact, "MatQRFactorSymbolic_C", MatQRFactorSymbolic_SeqDense));
992   }
993   (*fact)->factortype = ftype;
994 
995   PetscCall(PetscFree((*fact)->solvertype));
996   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*fact)->solvertype));
997   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_LU]));
998   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ILU]));
999   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_CHOLESKY]));
1000   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ICC]));
1001   PetscFunctionReturn(PETSC_SUCCESS);
1002 }
1003 
MatSOR_SeqDense(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal shift,PetscInt its,PetscInt lits,Vec xx)1004 static PetscErrorCode MatSOR_SeqDense(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal shift, PetscInt its, PetscInt lits, Vec xx)
1005 {
1006   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
1007   PetscScalar       *x, *v = mat->v, zero = 0.0, xt;
1008   const PetscScalar *b;
1009   PetscInt           m = A->rmap->n, i;
1010   PetscBLASInt       o = 1, bm = 0;
1011 
1012   PetscFunctionBegin;
1013 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1014   PetscCheck(A->offloadmask != PETSC_OFFLOAD_GPU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented");
1015 #endif
1016   if (shift == -1) shift = 0.0; /* negative shift indicates do not error on zero diagonal; this code never zeros on zero diagonal */
1017   PetscCall(PetscBLASIntCast(m, &bm));
1018   if (flag & SOR_ZERO_INITIAL_GUESS) {
1019     /* this is a hack fix, should have another version without the second BLASdotu */
1020     PetscCall(VecSet(xx, zero));
1021   }
1022   PetscCall(VecGetArray(xx, &x));
1023   PetscCall(VecGetArrayRead(bb, &b));
1024   its = its * lits;
1025   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
1026   while (its--) {
1027     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1028       for (i = 0; i < m; i++) {
1029         PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o));
1030         x[i] = (1. - omega) * x[i] + (xt + v[i + i * m] * x[i]) * omega / (v[i + i * m] + shift);
1031       }
1032     }
1033     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1034       for (i = m - 1; i >= 0; i--) {
1035         PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o));
1036         x[i] = (1. - omega) * x[i] + (xt + v[i + i * m] * x[i]) * omega / (v[i + i * m] + shift);
1037       }
1038     }
1039   }
1040   PetscCall(VecRestoreArrayRead(bb, &b));
1041   PetscCall(VecRestoreArray(xx, &x));
1042   PetscFunctionReturn(PETSC_SUCCESS);
1043 }
1044 
MatMultColumnRangeKernel_SeqDense(Mat A,Vec xx,Vec yy,PetscInt c_start,PetscInt c_end,PetscBool trans,PetscBool herm)1045 PETSC_INTERN PetscErrorCode MatMultColumnRangeKernel_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end, PetscBool trans, PetscBool herm)
1046 {
1047   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
1048   PetscScalar       *y, _DOne = 1.0, _DZero = 0.0;
1049   PetscBLASInt       m, n, _One             = 1;
1050   const PetscScalar *v = mat->v, *x;
1051 
1052   PetscFunctionBegin;
1053   PetscCall(PetscBLASIntCast(A->rmap->n, &m));
1054   PetscCall(PetscBLASIntCast(c_end - c_start, &n));
1055   PetscCall(VecGetArrayRead(xx, &x));
1056   PetscCall(VecGetArrayWrite(yy, &y));
1057   if (!m || !n) {
1058     PetscBLASInt i;
1059     if (trans)
1060       for (i = 0; i < n; i++) y[i] = 0.0;
1061     else
1062       for (i = 0; i < m; i++) y[i] = 0.0;
1063   } else {
1064     if (trans) {
1065       if (herm) PetscCallBLAS("BLASgemv", BLASgemv_("C", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DZero, y + c_start, &_One));
1066       else PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DZero, y + c_start, &_One));
1067     } else {
1068       PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x + c_start, &_One, &_DZero, y, &_One));
1069     }
1070     PetscCall(PetscLogFlops(2.0 * m * n - n));
1071   }
1072   PetscCall(VecRestoreArrayRead(xx, &x));
1073   PetscCall(VecRestoreArrayWrite(yy, &y));
1074   PetscFunctionReturn(PETSC_SUCCESS);
1075 }
1076 
MatMultHermitianTransposeColumnRange_SeqDense(Mat A,Vec xx,Vec yy,PetscInt c_start,PetscInt c_end)1077 PetscErrorCode MatMultHermitianTransposeColumnRange_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
1078 {
1079   PetscFunctionBegin;
1080   PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, c_start, c_end, PETSC_TRUE, PETSC_TRUE));
1081   PetscFunctionReturn(PETSC_SUCCESS);
1082 }
1083 
MatMult_SeqDense(Mat A,Vec xx,Vec yy)1084 PetscErrorCode MatMult_SeqDense(Mat A, Vec xx, Vec yy)
1085 {
1086   PetscFunctionBegin;
1087   PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_FALSE, PETSC_FALSE));
1088   PetscFunctionReturn(PETSC_SUCCESS);
1089 }
1090 
MatMultTranspose_SeqDense(Mat A,Vec xx,Vec yy)1091 PetscErrorCode MatMultTranspose_SeqDense(Mat A, Vec xx, Vec yy)
1092 {
1093   PetscFunctionBegin;
1094   PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_FALSE));
1095   PetscFunctionReturn(PETSC_SUCCESS);
1096 }
1097 
MatMultHermitianTranspose_SeqDense(Mat A,Vec xx,Vec yy)1098 PetscErrorCode MatMultHermitianTranspose_SeqDense(Mat A, Vec xx, Vec yy)
1099 {
1100   PetscFunctionBegin;
1101   PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_TRUE));
1102   PetscFunctionReturn(PETSC_SUCCESS);
1103 }
1104 
MatMultAddColumnRangeKernel_SeqDense(Mat A,Vec xx,Vec zz,Vec yy,PetscInt c_start,PetscInt c_end,PetscBool trans,PetscBool herm)1105 PETSC_INTERN PetscErrorCode MatMultAddColumnRangeKernel_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end, PetscBool trans, PetscBool herm)
1106 {
1107   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
1108   const PetscScalar *v   = mat->v, *x;
1109   PetscScalar       *y, _DOne = 1.0;
1110   PetscBLASInt       m, n, _One = 1;
1111 
1112   PetscFunctionBegin;
1113   PetscCall(PetscBLASIntCast(A->rmap->n, &m));
1114   PetscCall(PetscBLASIntCast(c_end - c_start, &n));
1115   PetscCall(VecCopy(zz, yy));
1116   if (!m || !n) PetscFunctionReturn(PETSC_SUCCESS);
1117   PetscCall(VecGetArray(yy, &y));
1118   PetscCall(VecGetArrayRead(xx, &x));
1119   if (trans) {
1120     if (herm) PetscCallBLAS("BLASgemv", BLASgemv_("C", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DOne, y + c_start, &_One));
1121     else PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DOne, y + c_start, &_One));
1122   } else {
1123     PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x + c_start, &_One, &_DOne, y, &_One));
1124   }
1125   PetscCall(VecRestoreArrayRead(xx, &x));
1126   PetscCall(VecRestoreArray(yy, &y));
1127   PetscCall(PetscLogFlops(2.0 * m * n));
1128   PetscFunctionReturn(PETSC_SUCCESS);
1129 }
1130 
MatMultColumnRange_SeqDense(Mat A,Vec xx,Vec yy,PetscInt c_start,PetscInt c_end)1131 PetscErrorCode MatMultColumnRange_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
1132 {
1133   PetscFunctionBegin;
1134   PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, c_start, c_end, PETSC_FALSE, PETSC_FALSE));
1135   PetscFunctionReturn(PETSC_SUCCESS);
1136 }
1137 
MatMultAddColumnRange_SeqDense(Mat A,Vec xx,Vec zz,Vec yy,PetscInt c_start,PetscInt c_end)1138 PetscErrorCode MatMultAddColumnRange_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end)
1139 {
1140   PetscFunctionBegin;
1141   PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, c_start, c_end, PETSC_FALSE, PETSC_FALSE));
1142   PetscFunctionReturn(PETSC_SUCCESS);
1143 }
1144 
MatMultHermitianTransposeAddColumnRange_SeqDense(Mat A,Vec xx,Vec zz,Vec yy,PetscInt c_start,PetscInt c_end)1145 PetscErrorCode MatMultHermitianTransposeAddColumnRange_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end)
1146 {
1147   PetscFunctionBegin;
1148   PetscMPIInt rank;
1149   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
1150   PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, c_start, c_end, PETSC_TRUE, PETSC_TRUE));
1151   PetscFunctionReturn(PETSC_SUCCESS);
1152 }
1153 
MatMultAdd_SeqDense(Mat A,Vec xx,Vec zz,Vec yy)1154 PetscErrorCode MatMultAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy)
1155 {
1156   PetscFunctionBegin;
1157   PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_FALSE, PETSC_FALSE));
1158   PetscFunctionReturn(PETSC_SUCCESS);
1159 }
1160 
MatMultTransposeAdd_SeqDense(Mat A,Vec xx,Vec zz,Vec yy)1161 PetscErrorCode MatMultTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy)
1162 {
1163   PetscFunctionBegin;
1164   PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_FALSE));
1165   PetscFunctionReturn(PETSC_SUCCESS);
1166 }
1167 
MatMultHermitianTransposeAdd_SeqDense(Mat A,Vec xx,Vec zz,Vec yy)1168 PetscErrorCode MatMultHermitianTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy)
1169 {
1170   PetscFunctionBegin;
1171   PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_TRUE));
1172   PetscFunctionReturn(PETSC_SUCCESS);
1173 }
1174 
MatGetRow_SeqDense(Mat A,PetscInt row,PetscInt * ncols,PetscInt ** cols,PetscScalar ** vals)1175 static PetscErrorCode MatGetRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals)
1176 {
1177   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1178   PetscInt      i;
1179 
1180   PetscFunctionBegin;
1181   if (ncols) *ncols = A->cmap->n;
1182   if (cols) {
1183     PetscCall(PetscMalloc1(A->cmap->n, cols));
1184     for (i = 0; i < A->cmap->n; i++) (*cols)[i] = i;
1185   }
1186   if (vals) {
1187     const PetscScalar *v;
1188 
1189     PetscCall(MatDenseGetArrayRead(A, &v));
1190     PetscCall(PetscMalloc1(A->cmap->n, vals));
1191     v += row;
1192     for (i = 0; i < A->cmap->n; i++) {
1193       (*vals)[i] = *v;
1194       v += mat->lda;
1195     }
1196     PetscCall(MatDenseRestoreArrayRead(A, &v));
1197   }
1198   PetscFunctionReturn(PETSC_SUCCESS);
1199 }
1200 
MatRestoreRow_SeqDense(Mat A,PetscInt row,PetscInt * ncols,PetscInt ** cols,PetscScalar ** vals)1201 static PetscErrorCode MatRestoreRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals)
1202 {
1203   PetscFunctionBegin;
1204   if (cols) PetscCall(PetscFree(*cols));
1205   if (vals) PetscCall(PetscFree(*vals));
1206   PetscFunctionReturn(PETSC_SUCCESS);
1207 }
1208 
MatSetValues_SeqDense(Mat A,PetscInt m,const PetscInt indexm[],PetscInt n,const PetscInt indexn[],const PetscScalar v[],InsertMode addv)1209 static PetscErrorCode MatSetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], const PetscScalar v[], InsertMode addv)
1210 {
1211   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1212   PetscScalar  *av;
1213   PetscInt      i, j, idx = 0;
1214 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1215   PetscOffloadMask oldf;
1216 #endif
1217 
1218   PetscFunctionBegin;
1219   PetscCall(MatDenseGetArray(A, &av));
1220   if (!mat->roworiented) {
1221     if (addv == INSERT_VALUES) {
1222       for (j = 0; j < n; j++) {
1223         if (indexn[j] < 0) {
1224           idx += m;
1225           continue;
1226         }
1227         PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1);
1228         for (i = 0; i < m; i++) {
1229           if (indexm[i] < 0) {
1230             idx++;
1231             continue;
1232           }
1233           PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1);
1234           av[indexn[j] * mat->lda + indexm[i]] = v ? v[idx++] : (idx++, 0.0);
1235         }
1236       }
1237     } else {
1238       for (j = 0; j < n; j++) {
1239         if (indexn[j] < 0) {
1240           idx += m;
1241           continue;
1242         }
1243         PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1);
1244         for (i = 0; i < m; i++) {
1245           if (indexm[i] < 0) {
1246             idx++;
1247             continue;
1248           }
1249           PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1);
1250           av[indexn[j] * mat->lda + indexm[i]] += v ? v[idx++] : (idx++, 0.0);
1251         }
1252       }
1253     }
1254   } else {
1255     if (addv == INSERT_VALUES) {
1256       for (i = 0; i < m; i++) {
1257         if (indexm[i] < 0) {
1258           idx += n;
1259           continue;
1260         }
1261         PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1);
1262         for (j = 0; j < n; j++) {
1263           if (indexn[j] < 0) {
1264             idx++;
1265             continue;
1266           }
1267           PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1);
1268           av[indexn[j] * mat->lda + indexm[i]] = v ? v[idx++] : (idx++, 0.0);
1269         }
1270       }
1271     } else {
1272       for (i = 0; i < m; i++) {
1273         if (indexm[i] < 0) {
1274           idx += n;
1275           continue;
1276         }
1277         PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1);
1278         for (j = 0; j < n; j++) {
1279           if (indexn[j] < 0) {
1280             idx++;
1281             continue;
1282           }
1283           PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1);
1284           av[indexn[j] * mat->lda + indexm[i]] += v ? v[idx++] : (idx++, 0.0);
1285         }
1286       }
1287     }
1288   }
1289   /* hack to prevent unneeded copy to the GPU while returning the array */
1290 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1291   oldf           = A->offloadmask;
1292   A->offloadmask = PETSC_OFFLOAD_GPU;
1293 #endif
1294   PetscCall(MatDenseRestoreArray(A, &av));
1295 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1296   A->offloadmask = (oldf == PETSC_OFFLOAD_UNALLOCATED ? PETSC_OFFLOAD_UNALLOCATED : PETSC_OFFLOAD_CPU);
1297 #endif
1298   PetscFunctionReturn(PETSC_SUCCESS);
1299 }
1300 
MatGetValues_SeqDense(Mat A,PetscInt m,const PetscInt indexm[],PetscInt n,const PetscInt indexn[],PetscScalar v[])1301 static PetscErrorCode MatGetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], PetscScalar v[])
1302 {
1303   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
1304   const PetscScalar *vv;
1305   PetscInt           i, j;
1306 
1307   PetscFunctionBegin;
1308   PetscCall(MatDenseGetArrayRead(A, &vv));
1309   /* row-oriented output */
1310   for (i = 0; i < m; i++) {
1311     if (indexm[i] < 0) {
1312       v += n;
1313       continue;
1314     }
1315     PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested larger than number rows %" PetscInt_FMT, indexm[i], A->rmap->n);
1316     for (j = 0; j < n; j++) {
1317       if (indexn[j] < 0) {
1318         v++;
1319         continue;
1320       }
1321       PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " requested larger than number columns %" PetscInt_FMT, indexn[j], A->cmap->n);
1322       *v++ = vv[indexn[j] * mat->lda + indexm[i]];
1323     }
1324   }
1325   PetscCall(MatDenseRestoreArrayRead(A, &vv));
1326   PetscFunctionReturn(PETSC_SUCCESS);
1327 }
1328 
MatView_Dense_Binary(Mat mat,PetscViewer viewer)1329 PetscErrorCode MatView_Dense_Binary(Mat mat, PetscViewer viewer)
1330 {
1331   PetscBool          skipHeader;
1332   PetscViewerFormat  format;
1333   PetscInt           header[4], M, N, m, lda, i, j;
1334   PetscCount         k;
1335   const PetscScalar *v;
1336   PetscScalar       *vwork;
1337 
1338   PetscFunctionBegin;
1339   PetscCall(PetscViewerSetUp(viewer));
1340   PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader));
1341   PetscCall(PetscViewerGetFormat(viewer, &format));
1342   if (skipHeader) format = PETSC_VIEWER_NATIVE;
1343 
1344   PetscCall(MatGetSize(mat, &M, &N));
1345 
1346   /* write matrix header */
1347   header[0] = MAT_FILE_CLASSID;
1348   header[1] = M;
1349   header[2] = N;
1350   header[3] = (format == PETSC_VIEWER_NATIVE) ? MATRIX_BINARY_FORMAT_DENSE : M * N;
1351   if (!skipHeader) PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1352 
1353   PetscCall(MatGetLocalSize(mat, &m, NULL));
1354   if (format != PETSC_VIEWER_NATIVE) {
1355     PetscInt nnz = m * N, *iwork;
1356     /* store row lengths for each row */
1357     PetscCall(PetscMalloc1(nnz, &iwork));
1358     for (i = 0; i < m; i++) iwork[i] = N;
1359     PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1360     /* store column indices (zero start index) */
1361     for (k = 0, i = 0; i < m; i++)
1362       for (j = 0; j < N; j++, k++) iwork[k] = j;
1363     PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1364     PetscCall(PetscFree(iwork));
1365   }
1366   /* store matrix values as a dense matrix in row major order */
1367   PetscCall(PetscMalloc1(m * N, &vwork));
1368   PetscCall(MatDenseGetArrayRead(mat, &v));
1369   PetscCall(MatDenseGetLDA(mat, &lda));
1370   for (k = 0, i = 0; i < m; i++)
1371     for (j = 0; j < N; j++, k++) vwork[k] = v[i + (size_t)lda * j];
1372   PetscCall(MatDenseRestoreArrayRead(mat, &v));
1373   PetscCall(PetscViewerBinaryWriteAll(viewer, vwork, m * N, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1374   PetscCall(PetscFree(vwork));
1375   PetscFunctionReturn(PETSC_SUCCESS);
1376 }
1377 
MatLoad_Dense_Binary(Mat mat,PetscViewer viewer)1378 PetscErrorCode MatLoad_Dense_Binary(Mat mat, PetscViewer viewer)
1379 {
1380   PetscBool    skipHeader;
1381   PetscInt     header[4], M, N, m, nz, lda, i, j, k;
1382   PetscInt     rows, cols;
1383   PetscScalar *v, *vwork;
1384 
1385   PetscFunctionBegin;
1386   PetscCall(PetscViewerSetUp(viewer));
1387   PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader));
1388 
1389   if (!skipHeader) {
1390     PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
1391     PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
1392     M = header[1];
1393     N = header[2];
1394     PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
1395     PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
1396     nz = header[3];
1397     PetscCheck(nz == MATRIX_BINARY_FORMAT_DENSE || nz >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Unknown matrix format %" PetscInt_FMT " in file", nz);
1398   } else {
1399     PetscCall(MatGetSize(mat, &M, &N));
1400     PetscCheck(M >= 0 && N >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "Matrix binary file header was skipped, thus the user must specify the global sizes of input matrix");
1401     nz = MATRIX_BINARY_FORMAT_DENSE;
1402   }
1403 
1404   /* setup global sizes if not set */
1405   if (mat->rmap->N < 0) mat->rmap->N = M;
1406   if (mat->cmap->N < 0) mat->cmap->N = N;
1407   PetscCall(MatSetUp(mat));
1408   /* check if global sizes are correct */
1409   PetscCall(MatGetSize(mat, &rows, &cols));
1410   PetscCheck(M == rows && N == cols, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
1411 
1412   PetscCall(MatGetSize(mat, NULL, &N));
1413   PetscCall(MatGetLocalSize(mat, &m, NULL));
1414   PetscCall(MatDenseGetArray(mat, &v));
1415   PetscCall(MatDenseGetLDA(mat, &lda));
1416   if (nz == MATRIX_BINARY_FORMAT_DENSE) { /* matrix in file is dense format */
1417     PetscCount nnz = (size_t)m * N;
1418     /* read in matrix values */
1419     PetscCall(PetscMalloc1(nnz, &vwork));
1420     PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1421     /* store values in column major order */
1422     for (j = 0; j < N; j++)
1423       for (i = 0; i < m; i++) v[i + (size_t)lda * j] = vwork[(size_t)i * N + j];
1424     PetscCall(PetscFree(vwork));
1425   } else { /* matrix in file is sparse format */
1426     PetscInt nnz = 0, *rlens, *icols;
1427     /* read in row lengths */
1428     PetscCall(PetscMalloc1(m, &rlens));
1429     PetscCall(PetscViewerBinaryReadAll(viewer, rlens, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1430     for (i = 0; i < m; i++) nnz += rlens[i];
1431     /* read in column indices and values */
1432     PetscCall(PetscMalloc2(nnz, &icols, nnz, &vwork));
1433     PetscCall(PetscViewerBinaryReadAll(viewer, icols, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1434     PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1435     /* store values in column major order */
1436     for (k = 0, i = 0; i < m; i++)
1437       for (j = 0; j < rlens[i]; j++, k++) v[i + lda * icols[k]] = vwork[k];
1438     PetscCall(PetscFree(rlens));
1439     PetscCall(PetscFree2(icols, vwork));
1440   }
1441   PetscCall(MatDenseRestoreArray(mat, &v));
1442   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
1443   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
1444   PetscFunctionReturn(PETSC_SUCCESS);
1445 }
1446 
MatLoad_SeqDense(Mat newMat,PetscViewer viewer)1447 static PetscErrorCode MatLoad_SeqDense(Mat newMat, PetscViewer viewer)
1448 {
1449   PetscBool isbinary, ishdf5;
1450 
1451   PetscFunctionBegin;
1452   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1453   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1454   /* force binary viewer to load .info file if it has not yet done so */
1455   PetscCall(PetscViewerSetUp(viewer));
1456   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1457   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
1458   if (isbinary) {
1459     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1460   } else if (ishdf5) {
1461 #if defined(PETSC_HAVE_HDF5)
1462     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
1463 #else
1464     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
1465 #endif
1466   } else {
1467     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
1468   }
1469   PetscFunctionReturn(PETSC_SUCCESS);
1470 }
1471 
MatView_SeqDense_ASCII(Mat A,PetscViewer viewer)1472 static PetscErrorCode MatView_SeqDense_ASCII(Mat A, PetscViewer viewer)
1473 {
1474   Mat_SeqDense     *a = (Mat_SeqDense *)A->data;
1475   PetscInt          i, j;
1476   const char       *name;
1477   PetscScalar      *v, *av;
1478   PetscViewerFormat format;
1479 #if defined(PETSC_USE_COMPLEX)
1480   PetscBool allreal = PETSC_TRUE;
1481 #endif
1482 
1483   PetscFunctionBegin;
1484   PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&av));
1485   PetscCall(PetscViewerGetFormat(viewer, &format));
1486   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1487     PetscFunctionReturn(PETSC_SUCCESS); /* do nothing for now */
1488   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1489     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1490     for (i = 0; i < A->rmap->n; i++) {
1491       v = av + i;
1492       PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i));
1493       for (j = 0; j < A->cmap->n; j++) {
1494 #if defined(PETSC_USE_COMPLEX)
1495         if (PetscRealPart(*v) != 0.0 && PetscImaginaryPart(*v) != 0.0) {
1496           PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", j, (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v)));
1497         } else if (PetscRealPart(*v)) {
1498           PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)PetscRealPart(*v)));
1499         }
1500 #else
1501         if (*v) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)*v));
1502 #endif
1503         v += a->lda;
1504       }
1505       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1506     }
1507     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1508   } else {
1509     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1510 #if defined(PETSC_USE_COMPLEX)
1511     /* determine if matrix has all real values */
1512     for (j = 0; j < A->cmap->n; j++) {
1513       v = av + j * a->lda;
1514       for (i = 0; i < A->rmap->n; i++) {
1515         if (PetscImaginaryPart(v[i])) {
1516           allreal = PETSC_FALSE;
1517           break;
1518         }
1519       }
1520     }
1521 #endif
1522     if (format == PETSC_VIEWER_ASCII_MATLAB) {
1523       PetscCall(PetscObjectGetName((PetscObject)A, &name));
1524       PetscCall(PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", A->rmap->n, A->cmap->n));
1525       PetscCall(PetscViewerASCIIPrintf(viewer, "%s = zeros(%" PetscInt_FMT ",%" PetscInt_FMT ");\n", name, A->rmap->n, A->cmap->n));
1526       PetscCall(PetscViewerASCIIPrintf(viewer, "%s = [\n", name));
1527     }
1528 
1529     for (i = 0; i < A->rmap->n; i++) {
1530       v = av + i;
1531       for (j = 0; j < A->cmap->n; j++) {
1532 #if defined(PETSC_USE_COMPLEX)
1533         if (allreal) {
1534           PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(*v)));
1535         } else {
1536           PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei ", (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v)));
1537         }
1538 #else
1539         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)*v));
1540 #endif
1541         v += a->lda;
1542       }
1543       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1544     }
1545     if (format == PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, "];\n"));
1546     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1547   }
1548   PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&av));
1549   PetscCall(PetscViewerFlush(viewer));
1550   PetscFunctionReturn(PETSC_SUCCESS);
1551 }
1552 
1553 #include <petscdraw.h>
MatView_SeqDense_Draw_Zoom(PetscDraw draw,void * Aa)1554 static PetscErrorCode MatView_SeqDense_Draw_Zoom(PetscDraw draw, void *Aa)
1555 {
1556   Mat                A = (Mat)Aa;
1557   PetscInt           m = A->rmap->n, n = A->cmap->n, i, j;
1558   int                color = PETSC_DRAW_WHITE;
1559   const PetscScalar *v;
1560   PetscViewer        viewer;
1561   PetscReal          xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1562   PetscViewerFormat  format;
1563 
1564   PetscFunctionBegin;
1565   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
1566   PetscCall(PetscViewerGetFormat(viewer, &format));
1567   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
1568 
1569   /* Loop over matrix elements drawing boxes */
1570   PetscCall(MatDenseGetArrayRead(A, &v));
1571   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1572     PetscDrawCollectiveBegin(draw);
1573     /* Blue for negative and Red for positive */
1574     for (j = 0; j < n; j++) {
1575       x_l = j;
1576       x_r = x_l + 1.0;
1577       for (i = 0; i < m; i++) {
1578         y_l = m - i - 1.0;
1579         y_r = y_l + 1.0;
1580         if (PetscRealPart(v[j * m + i]) > 0.) color = PETSC_DRAW_RED;
1581         else if (PetscRealPart(v[j * m + i]) < 0.) color = PETSC_DRAW_BLUE;
1582         else continue;
1583         PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color));
1584       }
1585     }
1586     PetscDrawCollectiveEnd(draw);
1587   } else {
1588     /* use contour shading to indicate magnitude of values */
1589     /* first determine max of all nonzero values */
1590     PetscReal minv = 0.0, maxv = 0.0;
1591     PetscDraw popup;
1592 
1593     for (i = 0; i < m * n; i++) {
1594       if (PetscAbsScalar(v[i]) > maxv) maxv = PetscAbsScalar(v[i]);
1595     }
1596     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1597     PetscCall(PetscDrawGetPopup(draw, &popup));
1598     PetscCall(PetscDrawScalePopup(popup, minv, maxv));
1599 
1600     PetscDrawCollectiveBegin(draw);
1601     for (j = 0; j < n; j++) {
1602       x_l = j;
1603       x_r = x_l + 1.0;
1604       for (i = 0; i < m; i++) {
1605         y_l   = m - i - 1.0;
1606         y_r   = y_l + 1.0;
1607         color = PetscDrawRealToColor(PetscAbsScalar(v[j * m + i]), minv, maxv);
1608         PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color));
1609       }
1610     }
1611     PetscDrawCollectiveEnd(draw);
1612   }
1613   PetscCall(MatDenseRestoreArrayRead(A, &v));
1614   PetscFunctionReturn(PETSC_SUCCESS);
1615 }
1616 
MatView_SeqDense_Draw(Mat A,PetscViewer viewer)1617 static PetscErrorCode MatView_SeqDense_Draw(Mat A, PetscViewer viewer)
1618 {
1619   PetscDraw draw;
1620   PetscBool isnull;
1621   PetscReal xr, yr, xl, yl, h, w;
1622 
1623   PetscFunctionBegin;
1624   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1625   PetscCall(PetscDrawIsNull(draw, &isnull));
1626   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1627 
1628   xr = A->cmap->n;
1629   yr = A->rmap->n;
1630   h  = yr / 10.0;
1631   w  = xr / 10.0;
1632   xr += w;
1633   yr += h;
1634   xl = -w;
1635   yl = -h;
1636   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
1637   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
1638   PetscCall(PetscDrawZoom(draw, MatView_SeqDense_Draw_Zoom, A));
1639   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
1640   PetscCall(PetscDrawSave(draw));
1641   PetscFunctionReturn(PETSC_SUCCESS);
1642 }
1643 
MatView_SeqDense(Mat A,PetscViewer viewer)1644 PetscErrorCode MatView_SeqDense(Mat A, PetscViewer viewer)
1645 {
1646   PetscBool isascii, isbinary, isdraw;
1647 
1648   PetscFunctionBegin;
1649   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1650   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1651   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1652   if (isascii) PetscCall(MatView_SeqDense_ASCII(A, viewer));
1653   else if (isbinary) PetscCall(MatView_Dense_Binary(A, viewer));
1654   else if (isdraw) PetscCall(MatView_SeqDense_Draw(A, viewer));
1655   PetscFunctionReturn(PETSC_SUCCESS);
1656 }
1657 
MatDensePlaceArray_SeqDense(Mat A,const PetscScalar * array)1658 static PetscErrorCode MatDensePlaceArray_SeqDense(Mat A, const PetscScalar *array)
1659 {
1660   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
1661 
1662   PetscFunctionBegin;
1663   PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1664   PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1665   PetscCheck(!a->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseResetArray() first");
1666   a->unplacedarray       = a->v;
1667   a->unplaced_user_alloc = a->user_alloc;
1668   a->v                   = (PetscScalar *)array;
1669   a->user_alloc          = PETSC_TRUE;
1670 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1671   A->offloadmask = PETSC_OFFLOAD_CPU;
1672 #endif
1673   PetscFunctionReturn(PETSC_SUCCESS);
1674 }
1675 
MatDenseResetArray_SeqDense(Mat A)1676 static PetscErrorCode MatDenseResetArray_SeqDense(Mat A)
1677 {
1678   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
1679 
1680   PetscFunctionBegin;
1681   PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1682   PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1683   a->v             = a->unplacedarray;
1684   a->user_alloc    = a->unplaced_user_alloc;
1685   a->unplacedarray = NULL;
1686 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1687   A->offloadmask = PETSC_OFFLOAD_CPU;
1688 #endif
1689   PetscFunctionReturn(PETSC_SUCCESS);
1690 }
1691 
MatDenseReplaceArray_SeqDense(Mat A,const PetscScalar * array)1692 static PetscErrorCode MatDenseReplaceArray_SeqDense(Mat A, const PetscScalar *array)
1693 {
1694   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
1695 
1696   PetscFunctionBegin;
1697   PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1698   PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1699   if (!a->user_alloc) PetscCall(PetscFree(a->v));
1700   a->v          = (PetscScalar *)array;
1701   a->user_alloc = PETSC_FALSE;
1702 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1703   A->offloadmask = PETSC_OFFLOAD_CPU;
1704 #endif
1705   PetscFunctionReturn(PETSC_SUCCESS);
1706 }
1707 
MatDestroy_SeqDense(Mat mat)1708 PetscErrorCode MatDestroy_SeqDense(Mat mat)
1709 {
1710   Mat_SeqDense *l = (Mat_SeqDense *)mat->data;
1711 
1712   PetscFunctionBegin;
1713   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows %" PetscInt_FMT " Cols %" PetscInt_FMT, mat->rmap->n, mat->cmap->n));
1714   PetscCall(VecDestroy(&l->qrrhs));
1715   PetscCall(PetscFree(l->tau));
1716   PetscCall(PetscFree(l->pivots));
1717   PetscCall(PetscFree(l->fwork));
1718   if (!l->user_alloc) PetscCall(PetscFree(l->v));
1719   if (!l->unplaced_user_alloc) PetscCall(PetscFree(l->unplacedarray));
1720   PetscCheck(!l->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1721   PetscCheck(!l->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1722   PetscCall(VecDestroy(&l->cvec));
1723   PetscCall(MatDestroy(&l->cmat));
1724   PetscCall(PetscFree(mat->data));
1725 
1726   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
1727   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactor_C", NULL));
1728   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorSymbolic_C", NULL));
1729   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorNumeric_C", NULL));
1730   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
1731   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
1732   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
1733   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
1734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
1735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
1736   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
1737   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
1738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
1739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
1740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
1741   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqaij_C", NULL));
1742 #if defined(PETSC_HAVE_ELEMENTAL)
1743   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_elemental_C", NULL));
1744 #endif
1745 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
1746   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_scalapack_C", NULL));
1747 #endif
1748 #if defined(PETSC_HAVE_CUDA)
1749   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensecuda_C", NULL));
1750   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", NULL));
1751   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdense_C", NULL));
1752   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensecuda_C", NULL));
1753 #endif
1754 #if defined(PETSC_HAVE_HIP)
1755   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensehip_C", NULL));
1756   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", NULL));
1757   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdense_C", NULL));
1758   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensehip_C", NULL));
1759 #endif
1760   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSeqDenseSetPreallocation_C", NULL));
1761   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqaij_seqdense_C", NULL));
1762   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdense_C", NULL));
1763   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqbaij_seqdense_C", NULL));
1764   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqsbaij_seqdense_C", NULL));
1765 
1766   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
1767   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
1768   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
1769   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
1770   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
1771   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
1772   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
1773   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
1774   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
1775   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
1776   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", NULL));
1777   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
1778   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
1779   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));
1780   PetscFunctionReturn(PETSC_SUCCESS);
1781 }
1782 
MatTranspose_SeqDense(Mat A,MatReuse reuse,Mat * matout)1783 static PetscErrorCode MatTranspose_SeqDense(Mat A, MatReuse reuse, Mat *matout)
1784 {
1785   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1786   PetscInt      k, j, m = A->rmap->n, M = mat->lda, n = A->cmap->n;
1787   PetscScalar  *v, tmp;
1788 
1789   PetscFunctionBegin;
1790   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1791   if (reuse == MAT_INPLACE_MATRIX) {
1792     if (m == n) { /* in place transpose */
1793       PetscCall(MatDenseGetArray(A, &v));
1794       for (j = 0; j < m; j++) {
1795         for (k = 0; k < j; k++) {
1796           tmp          = v[j + k * M];
1797           v[j + k * M] = v[k + j * M];
1798           v[k + j * M] = tmp;
1799         }
1800       }
1801       PetscCall(MatDenseRestoreArray(A, &v));
1802     } else { /* reuse memory, temporary allocates new memory */
1803       PetscScalar *v2;
1804       PetscLayout  tmplayout;
1805 
1806       PetscCall(PetscMalloc1((size_t)m * n, &v2));
1807       PetscCall(MatDenseGetArray(A, &v));
1808       for (j = 0; j < n; j++) {
1809         for (k = 0; k < m; k++) v2[j + (size_t)k * n] = v[k + (size_t)j * M];
1810       }
1811       PetscCall(PetscArraycpy(v, v2, (size_t)m * n));
1812       PetscCall(PetscFree(v2));
1813       PetscCall(MatDenseRestoreArray(A, &v));
1814       /* cleanup size dependent quantities */
1815       PetscCall(VecDestroy(&mat->cvec));
1816       PetscCall(MatDestroy(&mat->cmat));
1817       PetscCall(PetscFree(mat->pivots));
1818       PetscCall(PetscFree(mat->fwork));
1819       /* swap row/col layouts */
1820       PetscCall(PetscBLASIntCast(n, &mat->lda));
1821       tmplayout = A->rmap;
1822       A->rmap   = A->cmap;
1823       A->cmap   = tmplayout;
1824     }
1825   } else { /* out-of-place transpose */
1826     Mat           tmat;
1827     Mat_SeqDense *tmatd;
1828     PetscScalar  *v2;
1829     PetscInt      M2;
1830 
1831     if (reuse == MAT_INITIAL_MATRIX) {
1832       PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &tmat));
1833       PetscCall(MatSetSizes(tmat, A->cmap->n, A->rmap->n, A->cmap->n, A->rmap->n));
1834       PetscCall(MatSetType(tmat, ((PetscObject)A)->type_name));
1835       PetscCall(MatSeqDenseSetPreallocation(tmat, NULL));
1836     } else tmat = *matout;
1837 
1838     PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&v));
1839     PetscCall(MatDenseGetArray(tmat, &v2));
1840     tmatd = (Mat_SeqDense *)tmat->data;
1841     M2    = tmatd->lda;
1842     for (j = 0; j < n; j++) {
1843       for (k = 0; k < m; k++) v2[j + k * M2] = v[k + j * M];
1844     }
1845     PetscCall(MatDenseRestoreArray(tmat, &v2));
1846     PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&v));
1847     PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY));
1848     PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY));
1849     *matout = tmat;
1850   }
1851   PetscFunctionReturn(PETSC_SUCCESS);
1852 }
1853 
MatEqual_SeqDense(Mat A1,Mat A2,PetscBool * flg)1854 static PetscErrorCode MatEqual_SeqDense(Mat A1, Mat A2, PetscBool *flg)
1855 {
1856   Mat_SeqDense      *mat1 = (Mat_SeqDense *)A1->data;
1857   Mat_SeqDense      *mat2 = (Mat_SeqDense *)A2->data;
1858   PetscInt           i;
1859   const PetscScalar *v1, *v2;
1860 
1861   PetscFunctionBegin;
1862   if (A1->rmap->n != A2->rmap->n) {
1863     *flg = PETSC_FALSE;
1864     PetscFunctionReturn(PETSC_SUCCESS);
1865   }
1866   if (A1->cmap->n != A2->cmap->n) {
1867     *flg = PETSC_FALSE;
1868     PetscFunctionReturn(PETSC_SUCCESS);
1869   }
1870   PetscCall(MatDenseGetArrayRead(A1, &v1));
1871   PetscCall(MatDenseGetArrayRead(A2, &v2));
1872   for (i = 0; i < A1->cmap->n; i++) {
1873     PetscCall(PetscArraycmp(v1, v2, A1->rmap->n, flg));
1874     if (*flg == PETSC_FALSE) PetscFunctionReturn(PETSC_SUCCESS);
1875     v1 += mat1->lda;
1876     v2 += mat2->lda;
1877   }
1878   PetscCall(MatDenseRestoreArrayRead(A1, &v1));
1879   PetscCall(MatDenseRestoreArrayRead(A2, &v2));
1880   *flg = PETSC_TRUE;
1881   PetscFunctionReturn(PETSC_SUCCESS);
1882 }
1883 
MatGetDiagonal_SeqDense(Mat A,Vec v)1884 PetscErrorCode MatGetDiagonal_SeqDense(Mat A, Vec v)
1885 {
1886   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
1887   PetscInt           i, n, len;
1888   PetscScalar       *x;
1889   const PetscScalar *vv;
1890 
1891   PetscFunctionBegin;
1892   PetscCall(VecGetSize(v, &n));
1893   PetscCall(VecGetArray(v, &x));
1894   len = PetscMin(A->rmap->n, A->cmap->n);
1895   PetscCall(MatDenseGetArrayRead(A, &vv));
1896   PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
1897   for (i = 0; i < len; i++) x[i] = vv[i * mat->lda + i];
1898   PetscCall(MatDenseRestoreArrayRead(A, &vv));
1899   PetscCall(VecRestoreArray(v, &x));
1900   PetscFunctionReturn(PETSC_SUCCESS);
1901 }
1902 
MatDiagonalScale_SeqDense(Mat A,Vec ll,Vec rr)1903 static PetscErrorCode MatDiagonalScale_SeqDense(Mat A, Vec ll, Vec rr)
1904 {
1905   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
1906   const PetscScalar *l, *r;
1907   PetscScalar        x, *v, *vv;
1908   PetscInt           i, j, m = A->rmap->n, n = A->cmap->n;
1909 
1910   PetscFunctionBegin;
1911   PetscCall(MatDenseGetArray(A, &vv));
1912   if (ll) {
1913     PetscCall(VecGetSize(ll, &m));
1914     PetscCall(VecGetArrayRead(ll, &l));
1915     PetscCheck(m == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vec wrong size");
1916     for (i = 0; i < m; i++) {
1917       x = l[i];
1918       v = vv + i;
1919       for (j = 0; j < n; j++) {
1920         (*v) *= x;
1921         v += mat->lda;
1922       }
1923     }
1924     PetscCall(VecRestoreArrayRead(ll, &l));
1925     PetscCall(PetscLogFlops(1.0 * n * m));
1926   }
1927   if (rr) {
1928     PetscCall(VecGetSize(rr, &n));
1929     PetscCall(VecGetArrayRead(rr, &r));
1930     PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec wrong size");
1931     for (i = 0; i < n; i++) {
1932       x = r[i];
1933       v = vv + i * mat->lda;
1934       for (j = 0; j < m; j++) (*v++) *= x;
1935     }
1936     PetscCall(VecRestoreArrayRead(rr, &r));
1937     PetscCall(PetscLogFlops(1.0 * n * m));
1938   }
1939   PetscCall(MatDenseRestoreArray(A, &vv));
1940   PetscFunctionReturn(PETSC_SUCCESS);
1941 }
1942 
MatNorm_SeqDense(Mat A,NormType type,PetscReal * nrm)1943 PetscErrorCode MatNorm_SeqDense(Mat A, NormType type, PetscReal *nrm)
1944 {
1945   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1946   PetscScalar  *v, *vv;
1947   PetscReal     sum = 0.0;
1948   PetscInt      lda, m = A->rmap->n, i, j;
1949 
1950   PetscFunctionBegin;
1951   PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&vv));
1952   PetscCall(MatDenseGetLDA(A, &lda));
1953   v = vv;
1954   if (type == NORM_FROBENIUS) {
1955     if (lda > m) {
1956       for (j = 0; j < A->cmap->n; j++) {
1957         v = vv + j * lda;
1958         for (i = 0; i < m; i++) {
1959           sum += PetscRealPart(PetscConj(*v) * (*v));
1960           v++;
1961         }
1962       }
1963     } else {
1964 #if defined(PETSC_USE_REAL___FP16)
1965       PetscBLASInt one = 1, cnt = A->cmap->n * A->rmap->n;
1966       PetscCallBLAS("BLASnrm2", *nrm = BLASnrm2_(&cnt, v, &one));
1967     }
1968 #else
1969       for (i = 0; i < A->cmap->n * A->rmap->n; i++) {
1970         sum += PetscRealPart(PetscConj(*v) * (*v));
1971         v++;
1972       }
1973     }
1974     *nrm = PetscSqrtReal(sum);
1975 #endif
1976     PetscCall(PetscLogFlops(2.0 * A->cmap->n * A->rmap->n));
1977   } else if (type == NORM_1) {
1978     *nrm = 0.0;
1979     for (j = 0; j < A->cmap->n; j++) {
1980       v   = vv + j * mat->lda;
1981       sum = 0.0;
1982       for (i = 0; i < A->rmap->n; i++) {
1983         sum += PetscAbsScalar(*v);
1984         v++;
1985       }
1986       if (sum > *nrm) *nrm = sum;
1987     }
1988     PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n));
1989   } else if (type == NORM_INFINITY) {
1990     *nrm = 0.0;
1991     for (j = 0; j < A->rmap->n; j++) {
1992       v   = vv + j;
1993       sum = 0.0;
1994       for (i = 0; i < A->cmap->n; i++) {
1995         sum += PetscAbsScalar(*v);
1996         v += mat->lda;
1997       }
1998       if (sum > *nrm) *nrm = sum;
1999     }
2000     PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n));
2001   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No two norm");
2002   PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&vv));
2003   PetscFunctionReturn(PETSC_SUCCESS);
2004 }
2005 
MatSetOption_SeqDense(Mat A,MatOption op,PetscBool flg)2006 static PetscErrorCode MatSetOption_SeqDense(Mat A, MatOption op, PetscBool flg)
2007 {
2008   Mat_SeqDense *aij = (Mat_SeqDense *)A->data;
2009 
2010   PetscFunctionBegin;
2011   switch (op) {
2012   case MAT_ROW_ORIENTED:
2013     aij->roworiented = flg;
2014     break;
2015   default:
2016     break;
2017   }
2018   PetscFunctionReturn(PETSC_SUCCESS);
2019 }
2020 
MatZeroEntries_SeqDense(Mat A)2021 PetscErrorCode MatZeroEntries_SeqDense(Mat A)
2022 {
2023   Mat_SeqDense *l   = (Mat_SeqDense *)A->data;
2024   PetscInt      lda = l->lda, m = A->rmap->n, n = A->cmap->n, j;
2025   PetscScalar  *v;
2026 
2027   PetscFunctionBegin;
2028   PetscCall(MatDenseGetArrayWrite(A, &v));
2029   if (lda > m) {
2030     for (j = 0; j < n; j++) PetscCall(PetscArrayzero(v + j * lda, m));
2031   } else {
2032     PetscCall(PetscArrayzero(v, PetscInt64Mult(m, n)));
2033   }
2034   PetscCall(MatDenseRestoreArrayWrite(A, &v));
2035   PetscFunctionReturn(PETSC_SUCCESS);
2036 }
2037 
MatZeroRows_SeqDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)2038 static PetscErrorCode MatZeroRows_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
2039 {
2040   Mat_SeqDense      *l = (Mat_SeqDense *)A->data;
2041   PetscInt           m = l->lda, n = A->cmap->n, i, j;
2042   PetscScalar       *slot, *bb, *v;
2043   const PetscScalar *xx;
2044 
2045   PetscFunctionBegin;
2046   if (PetscDefined(USE_DEBUG)) {
2047     for (i = 0; i < N; i++) {
2048       PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed");
2049       PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n);
2050     }
2051   }
2052   if (!N) PetscFunctionReturn(PETSC_SUCCESS);
2053 
2054   /* fix right-hand side if needed */
2055   if (x && b) {
2056     PetscCall(VecGetArrayRead(x, &xx));
2057     PetscCall(VecGetArray(b, &bb));
2058     for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]];
2059     PetscCall(VecRestoreArrayRead(x, &xx));
2060     PetscCall(VecRestoreArray(b, &bb));
2061   }
2062 
2063   PetscCall(MatDenseGetArray(A, &v));
2064   for (i = 0; i < N; i++) {
2065     slot = v + rows[i];
2066     for (j = 0; j < n; j++) {
2067       *slot = 0.0;
2068       slot += m;
2069     }
2070   }
2071   if (diag != 0.0) {
2072     PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices");
2073     for (i = 0; i < N; i++) {
2074       slot  = v + (m + 1) * rows[i];
2075       *slot = diag;
2076     }
2077   }
2078   PetscCall(MatDenseRestoreArray(A, &v));
2079   PetscFunctionReturn(PETSC_SUCCESS);
2080 }
2081 
MatDenseGetLDA_SeqDense(Mat A,PetscInt * lda)2082 static PetscErrorCode MatDenseGetLDA_SeqDense(Mat A, PetscInt *lda)
2083 {
2084   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2085 
2086   PetscFunctionBegin;
2087   *lda = mat->lda;
2088   PetscFunctionReturn(PETSC_SUCCESS);
2089 }
2090 
MatDenseGetArray_SeqDense(Mat A,PetscScalar ** array)2091 PetscErrorCode MatDenseGetArray_SeqDense(Mat A, PetscScalar **array)
2092 {
2093   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2094 
2095   PetscFunctionBegin;
2096   PetscCheck(!mat->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2097   *array = mat->v;
2098   PetscFunctionReturn(PETSC_SUCCESS);
2099 }
2100 
MatDenseRestoreArray_SeqDense(Mat A,PetscScalar ** array)2101 PetscErrorCode MatDenseRestoreArray_SeqDense(Mat A, PetscScalar **array)
2102 {
2103   PetscFunctionBegin;
2104   if (array) *array = NULL;
2105   PetscFunctionReturn(PETSC_SUCCESS);
2106 }
2107 
2108 /*@
2109   MatDenseGetLDA - gets the leading dimension of the array returned from `MatDenseGetArray()`
2110 
2111   Not Collective
2112 
2113   Input Parameter:
2114 . A - a `MATDENSE` or `MATDENSECUDA` matrix
2115 
2116   Output Parameter:
2117 . lda - the leading dimension
2118 
2119   Level: intermediate
2120 
2121 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseSetLDA()`
2122 @*/
MatDenseGetLDA(Mat A,PetscInt * lda)2123 PetscErrorCode MatDenseGetLDA(Mat A, PetscInt *lda)
2124 {
2125   PetscFunctionBegin;
2126   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2127   PetscAssertPointer(lda, 2);
2128   MatCheckPreallocated(A, 1);
2129   PetscUseMethod(A, "MatDenseGetLDA_C", (Mat, PetscInt *), (A, lda));
2130   PetscFunctionReturn(PETSC_SUCCESS);
2131 }
2132 
2133 /*@
2134   MatDenseSetLDA - Sets the leading dimension of the array used by the `MATDENSE` matrix
2135 
2136   Collective if the matrix layouts have not yet been setup
2137 
2138   Input Parameters:
2139 + A   - a `MATDENSE` or `MATDENSECUDA` matrix
2140 - lda - the leading dimension
2141 
2142   Level: intermediate
2143 
2144 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetLDA()`
2145 @*/
MatDenseSetLDA(Mat A,PetscInt lda)2146 PetscErrorCode MatDenseSetLDA(Mat A, PetscInt lda)
2147 {
2148   PetscFunctionBegin;
2149   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2150   PetscTryMethod(A, "MatDenseSetLDA_C", (Mat, PetscInt), (A, lda));
2151   PetscFunctionReturn(PETSC_SUCCESS);
2152 }
2153 
2154 /*@C
2155   MatDenseGetArray - gives read-write access to the array where the data for a `MATDENSE` matrix is stored
2156 
2157   Logically Collective
2158 
2159   Input Parameter:
2160 . A - a dense matrix
2161 
2162   Output Parameter:
2163 . array - pointer to the data
2164 
2165   Level: intermediate
2166 
2167 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2168 @*/
MatDenseGetArray(Mat A,PetscScalar * array[])2169 PetscErrorCode MatDenseGetArray(Mat A, PetscScalar *array[]) PeNS
2170 {
2171   PetscFunctionBegin;
2172   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2173   PetscAssertPointer(array, 2);
2174   PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array));
2175   PetscFunctionReturn(PETSC_SUCCESS);
2176 }
2177 
2178 /*@C
2179   MatDenseRestoreArray - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArray()`
2180 
2181   Logically Collective
2182 
2183   Input Parameters:
2184 + A     - a dense matrix
2185 - array - pointer to the data (may be `NULL`)
2186 
2187   Level: intermediate
2188 
2189 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2190 @*/
MatDenseRestoreArray(Mat A,PetscScalar * array[])2191 PetscErrorCode MatDenseRestoreArray(Mat A, PetscScalar *array[]) PeNS
2192 {
2193   PetscFunctionBegin;
2194   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2195   if (array) PetscAssertPointer(array, 2);
2196   PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array));
2197   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2198 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
2199   A->offloadmask = PETSC_OFFLOAD_CPU;
2200 #endif
2201   PetscFunctionReturn(PETSC_SUCCESS);
2202 }
2203 
2204 /*@C
2205   MatDenseGetArrayRead - gives read-only access to the array where the data for a `MATDENSE` matrix is stored
2206 
2207   Not Collective
2208 
2209   Input Parameter:
2210 . A - a dense matrix
2211 
2212   Output Parameter:
2213 . array - pointer to the data
2214 
2215   Level: intermediate
2216 
2217 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2218 @*/
MatDenseGetArrayRead(Mat A,const PetscScalar * array[])2219 PetscErrorCode MatDenseGetArrayRead(Mat A, const PetscScalar *array[]) PeNS
2220 {
2221   PetscFunctionBegin;
2222   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2223   PetscAssertPointer(array, 2);
2224   PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array));
2225   PetscFunctionReturn(PETSC_SUCCESS);
2226 }
2227 
2228 /*@C
2229   MatDenseRestoreArrayRead - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayRead()`
2230 
2231   Not Collective
2232 
2233   Input Parameters:
2234 + A     - a dense matrix
2235 - array - pointer to the data (may be `NULL`)
2236 
2237   Level: intermediate
2238 
2239 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2240 @*/
MatDenseRestoreArrayRead(Mat A,const PetscScalar * array[])2241 PetscErrorCode MatDenseRestoreArrayRead(Mat A, const PetscScalar *array[]) PeNS
2242 {
2243   PetscFunctionBegin;
2244   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2245   if (array) PetscAssertPointer(array, 2);
2246   PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array));
2247   PetscFunctionReturn(PETSC_SUCCESS);
2248 }
2249 
2250 /*@C
2251   MatDenseGetArrayWrite - gives write-only access to the array where the data for a `MATDENSE` matrix is stored
2252 
2253   Not Collective
2254 
2255   Input Parameter:
2256 . A - a dense matrix
2257 
2258   Output Parameter:
2259 . array - pointer to the data
2260 
2261   Level: intermediate
2262 
2263 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`
2264 @*/
MatDenseGetArrayWrite(Mat A,PetscScalar * array[])2265 PetscErrorCode MatDenseGetArrayWrite(Mat A, PetscScalar *array[]) PeNS
2266 {
2267   PetscFunctionBegin;
2268   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2269   PetscAssertPointer(array, 2);
2270   PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array));
2271   PetscFunctionReturn(PETSC_SUCCESS);
2272 }
2273 
2274 /*@C
2275   MatDenseRestoreArrayWrite - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayWrite()`
2276 
2277   Not Collective
2278 
2279   Input Parameters:
2280 + A     - a dense matrix
2281 - array - pointer to the data (may be `NULL`)
2282 
2283   Level: intermediate
2284 
2285 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`
2286 @*/
MatDenseRestoreArrayWrite(Mat A,PetscScalar * array[])2287 PetscErrorCode MatDenseRestoreArrayWrite(Mat A, PetscScalar *array[]) PeNS
2288 {
2289   PetscFunctionBegin;
2290   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2291   if (array) PetscAssertPointer(array, 2);
2292   PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array));
2293   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2294 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
2295   A->offloadmask = PETSC_OFFLOAD_CPU;
2296 #endif
2297   PetscFunctionReturn(PETSC_SUCCESS);
2298 }
2299 
2300 /*@C
2301   MatDenseGetArrayAndMemType - gives read-write access to the array where the data for a `MATDENSE` matrix is stored
2302 
2303   Logically Collective
2304 
2305   Input Parameter:
2306 . A - a dense matrix
2307 
2308   Output Parameters:
2309 + array - pointer to the data
2310 - mtype - memory type of the returned pointer
2311 
2312   Level: intermediate
2313 
2314   Note:
2315   If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc.,
2316   an array on device is always returned and is guaranteed to contain the matrix's latest data.
2317 
2318 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayAndMemType()`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArrayRead()`,
2319    `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()`
2320 @*/
MatDenseGetArrayAndMemType(Mat A,PetscScalar * array[],PetscMemType * mtype)2321 PetscErrorCode MatDenseGetArrayAndMemType(Mat A, PetscScalar *array[], PetscMemType *mtype)
2322 {
2323   PetscBool isMPI;
2324 
2325   PetscFunctionBegin;
2326   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2327   PetscAssertPointer(array, 2);
2328   PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */
2329   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI));
2330   if (isMPI) {
2331     /* Dispatch here so that the code can be reused for all subclasses of MATDENSE */
2332     PetscCall(MatDenseGetArrayAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype));
2333   } else {
2334     PetscErrorCode (*fptr)(Mat, PetscScalar **, PetscMemType *);
2335 
2336     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayAndMemType_C", &fptr));
2337     if (fptr) {
2338       PetscCall((*fptr)(A, array, mtype));
2339     } else {
2340       PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array));
2341       if (mtype) *mtype = PETSC_MEMTYPE_HOST;
2342     }
2343   }
2344   PetscFunctionReturn(PETSC_SUCCESS);
2345 }
2346 
2347 /*@C
2348   MatDenseRestoreArrayAndMemType - returns access to the array that is obtained by `MatDenseGetArrayAndMemType()`
2349 
2350   Logically Collective
2351 
2352   Input Parameters:
2353 + A     - a dense matrix
2354 - array - pointer to the data
2355 
2356   Level: intermediate
2357 
2358 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2359 @*/
MatDenseRestoreArrayAndMemType(Mat A,PetscScalar * array[])2360 PetscErrorCode MatDenseRestoreArrayAndMemType(Mat A, PetscScalar *array[])
2361 {
2362   PetscBool isMPI;
2363 
2364   PetscFunctionBegin;
2365   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2366   if (array) PetscAssertPointer(array, 2);
2367   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI));
2368   if (isMPI) {
2369     PetscCall(MatDenseRestoreArrayAndMemType(((Mat_MPIDense *)A->data)->A, array));
2370   } else {
2371     PetscErrorCode (*fptr)(Mat, PetscScalar **);
2372 
2373     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayAndMemType_C", &fptr));
2374     if (fptr) {
2375       PetscCall((*fptr)(A, array));
2376     } else {
2377       PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array));
2378     }
2379     if (array) *array = NULL;
2380   }
2381   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2382   PetscFunctionReturn(PETSC_SUCCESS);
2383 }
2384 
2385 /*@C
2386   MatDenseGetArrayReadAndMemType - gives read-only access to the array where the data for a `MATDENSE` matrix is stored
2387 
2388   Logically Collective
2389 
2390   Input Parameter:
2391 . A - a dense matrix
2392 
2393   Output Parameters:
2394 + array - pointer to the data
2395 - mtype - memory type of the returned pointer
2396 
2397   Level: intermediate
2398 
2399   Note:
2400   If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc.,
2401   an array on device is always returned and is guaranteed to contain the matrix's latest data.
2402 
2403 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayReadAndMemType()`, `MatDenseGetArrayWriteAndMemType()`,
2404    `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()`
2405 @*/
MatDenseGetArrayReadAndMemType(Mat A,const PetscScalar * array[],PetscMemType * mtype)2406 PetscErrorCode MatDenseGetArrayReadAndMemType(Mat A, const PetscScalar *array[], PetscMemType *mtype)
2407 {
2408   PetscBool isMPI;
2409 
2410   PetscFunctionBegin;
2411   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2412   PetscAssertPointer(array, 2);
2413   PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */
2414   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI));
2415   if (isMPI) { /* Dispatch here so that the code can be reused for all subclasses of MATDENSE */
2416     PetscCall(MatDenseGetArrayReadAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype));
2417   } else {
2418     PetscErrorCode (*fptr)(Mat, const PetscScalar **, PetscMemType *);
2419 
2420     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayReadAndMemType_C", &fptr));
2421     if (fptr) {
2422       PetscCall((*fptr)(A, array, mtype));
2423     } else {
2424       PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array));
2425       if (mtype) *mtype = PETSC_MEMTYPE_HOST;
2426     }
2427   }
2428   PetscFunctionReturn(PETSC_SUCCESS);
2429 }
2430 
2431 /*@C
2432   MatDenseRestoreArrayReadAndMemType - returns access to the array that is obtained by `MatDenseGetArrayReadAndMemType()`
2433 
2434   Logically Collective
2435 
2436   Input Parameters:
2437 + A     - a dense matrix
2438 - array - pointer to the data
2439 
2440   Level: intermediate
2441 
2442 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2443 @*/
MatDenseRestoreArrayReadAndMemType(Mat A,const PetscScalar * array[])2444 PetscErrorCode MatDenseRestoreArrayReadAndMemType(Mat A, const PetscScalar *array[])
2445 {
2446   PetscBool isMPI;
2447 
2448   PetscFunctionBegin;
2449   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2450   if (array) PetscAssertPointer(array, 2);
2451   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI));
2452   if (isMPI) {
2453     PetscCall(MatDenseRestoreArrayReadAndMemType(((Mat_MPIDense *)A->data)->A, array));
2454   } else {
2455     PetscErrorCode (*fptr)(Mat, const PetscScalar **);
2456 
2457     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayReadAndMemType_C", &fptr));
2458     if (fptr) {
2459       PetscCall((*fptr)(A, array));
2460     } else {
2461       PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array));
2462     }
2463     if (array) *array = NULL;
2464   }
2465   PetscFunctionReturn(PETSC_SUCCESS);
2466 }
2467 
2468 /*@C
2469   MatDenseGetArrayWriteAndMemType - gives write-only access to the array where the data for a `MATDENSE` matrix is stored
2470 
2471   Logically Collective
2472 
2473   Input Parameter:
2474 . A - a dense matrix
2475 
2476   Output Parameters:
2477 + array - pointer to the data
2478 - mtype - memory type of the returned pointer
2479 
2480   Level: intermediate
2481 
2482   Note:
2483   If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc.,
2484   an array on device is always returned and is guaranteed to contain the matrix's latest data.
2485 
2486 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayWriteAndMemType()`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArrayRead()`,
2487   `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()`
2488 @*/
MatDenseGetArrayWriteAndMemType(Mat A,PetscScalar * array[],PetscMemType * mtype)2489 PetscErrorCode MatDenseGetArrayWriteAndMemType(Mat A, PetscScalar *array[], PetscMemType *mtype)
2490 {
2491   PetscBool isMPI;
2492 
2493   PetscFunctionBegin;
2494   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2495   PetscAssertPointer(array, 2);
2496   PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */
2497   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI));
2498   if (isMPI) {
2499     PetscCall(MatDenseGetArrayWriteAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype));
2500   } else {
2501     PetscErrorCode (*fptr)(Mat, PetscScalar **, PetscMemType *);
2502 
2503     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayWriteAndMemType_C", &fptr));
2504     if (fptr) {
2505       PetscCall((*fptr)(A, array, mtype));
2506     } else {
2507       PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array));
2508       if (mtype) *mtype = PETSC_MEMTYPE_HOST;
2509     }
2510   }
2511   PetscFunctionReturn(PETSC_SUCCESS);
2512 }
2513 
2514 /*@C
2515   MatDenseRestoreArrayWriteAndMemType - returns access to the array that is obtained by `MatDenseGetArrayReadAndMemType()`
2516 
2517   Logically Collective
2518 
2519   Input Parameters:
2520 + A     - a dense matrix
2521 - array - pointer to the data
2522 
2523   Level: intermediate
2524 
2525 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2526 @*/
MatDenseRestoreArrayWriteAndMemType(Mat A,PetscScalar * array[])2527 PetscErrorCode MatDenseRestoreArrayWriteAndMemType(Mat A, PetscScalar *array[])
2528 {
2529   PetscBool isMPI;
2530 
2531   PetscFunctionBegin;
2532   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2533   if (array) PetscAssertPointer(array, 2);
2534   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI));
2535   if (isMPI) {
2536     PetscCall(MatDenseRestoreArrayWriteAndMemType(((Mat_MPIDense *)A->data)->A, array));
2537   } else {
2538     PetscErrorCode (*fptr)(Mat, PetscScalar **);
2539 
2540     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayWriteAndMemType_C", &fptr));
2541     if (fptr) {
2542       PetscCall((*fptr)(A, array));
2543     } else {
2544       PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array));
2545     }
2546     if (array) *array = NULL;
2547   }
2548   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2549   PetscFunctionReturn(PETSC_SUCCESS);
2550 }
2551 
MatCreateSubMatrix_SeqDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat * B)2552 static PetscErrorCode MatCreateSubMatrix_SeqDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
2553 {
2554   Mat_SeqDense   *mat = (Mat_SeqDense *)A->data;
2555   PetscInt        i, j, nrows, ncols, ldb;
2556   const PetscInt *irow, *icol;
2557   PetscScalar    *av, *bv, *v = mat->v;
2558   Mat             newmat;
2559 
2560   PetscFunctionBegin;
2561   PetscCall(ISGetIndices(isrow, &irow));
2562   PetscCall(ISGetIndices(iscol, &icol));
2563   PetscCall(ISGetLocalSize(isrow, &nrows));
2564   PetscCall(ISGetLocalSize(iscol, &ncols));
2565 
2566   /* Check submatrixcall */
2567   if (scall == MAT_REUSE_MATRIX) {
2568     PetscInt n_cols, n_rows;
2569     PetscCall(MatGetSize(*B, &n_rows, &n_cols));
2570     if (n_rows != nrows || n_cols != ncols) {
2571       /* resize the result matrix to match number of requested rows/columns */
2572       PetscCall(MatSetSizes(*B, nrows, ncols, nrows, ncols));
2573     }
2574     newmat = *B;
2575   } else {
2576     /* Create and fill new matrix */
2577     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
2578     PetscCall(MatSetSizes(newmat, nrows, ncols, nrows, ncols));
2579     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
2580     PetscCall(MatSeqDenseSetPreallocation(newmat, NULL));
2581   }
2582 
2583   /* Now extract the data pointers and do the copy,column at a time */
2584   PetscCall(MatDenseGetArray(newmat, &bv));
2585   PetscCall(MatDenseGetLDA(newmat, &ldb));
2586   for (i = 0; i < ncols; i++) {
2587     av = v + mat->lda * icol[i];
2588     for (j = 0; j < nrows; j++) bv[j] = av[irow[j]];
2589     bv += ldb;
2590   }
2591   PetscCall(MatDenseRestoreArray(newmat, &bv));
2592 
2593   /* Assemble the matrices so that the correct flags are set */
2594   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
2595   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
2596 
2597   /* Free work space */
2598   PetscCall(ISRestoreIndices(isrow, &irow));
2599   PetscCall(ISRestoreIndices(iscol, &icol));
2600   *B = newmat;
2601   PetscFunctionReturn(PETSC_SUCCESS);
2602 }
2603 
MatCreateSubMatrices_SeqDense(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat * B[])2604 static PetscErrorCode MatCreateSubMatrices_SeqDense(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
2605 {
2606   PetscInt i;
2607 
2608   PetscFunctionBegin;
2609   if (scall == MAT_INITIAL_MATRIX) PetscCall(PetscCalloc1(n, B));
2610 
2611   for (i = 0; i < n; i++) PetscCall(MatCreateSubMatrix_SeqDense(A, irow[i], icol[i], scall, &(*B)[i]));
2612   PetscFunctionReturn(PETSC_SUCCESS);
2613 }
2614 
MatCopy_SeqDense(Mat A,Mat B,MatStructure str)2615 PetscErrorCode MatCopy_SeqDense(Mat A, Mat B, MatStructure str)
2616 {
2617   Mat_SeqDense      *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data;
2618   const PetscScalar *va;
2619   PetscScalar       *vb;
2620   PetscInt           lda1 = a->lda, lda2 = b->lda, m = A->rmap->n, n = A->cmap->n, j;
2621 
2622   PetscFunctionBegin;
2623   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2624   if (A->ops->copy != B->ops->copy) {
2625     PetscCall(MatCopy_Basic(A, B, str));
2626     PetscFunctionReturn(PETSC_SUCCESS);
2627   }
2628   PetscCheck(m == B->rmap->n && n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "size(B) != size(A)");
2629   PetscCall(MatDenseGetArrayRead(A, &va));
2630   PetscCall(MatDenseGetArray(B, &vb));
2631   if (lda1 > m || lda2 > m) {
2632     for (j = 0; j < n; j++) PetscCall(PetscArraycpy(vb + j * lda2, va + j * lda1, m));
2633   } else {
2634     PetscCall(PetscArraycpy(vb, va, A->rmap->n * A->cmap->n));
2635   }
2636   PetscCall(MatDenseRestoreArray(B, &vb));
2637   PetscCall(MatDenseRestoreArrayRead(A, &va));
2638   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2639   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2640   PetscFunctionReturn(PETSC_SUCCESS);
2641 }
2642 
MatSetUp_SeqDense(Mat A)2643 PetscErrorCode MatSetUp_SeqDense(Mat A)
2644 {
2645   PetscFunctionBegin;
2646   PetscCall(PetscLayoutSetUp(A->rmap));
2647   PetscCall(PetscLayoutSetUp(A->cmap));
2648   if (!A->preallocated) PetscCall(MatSeqDenseSetPreallocation(A, NULL));
2649   PetscFunctionReturn(PETSC_SUCCESS);
2650 }
2651 
MatConjugate_SeqDense(Mat A)2652 PetscErrorCode MatConjugate_SeqDense(Mat A)
2653 {
2654   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2655   PetscInt      i, j;
2656   PetscInt      min = PetscMin(A->rmap->n, A->cmap->n);
2657   PetscScalar  *aa;
2658 
2659   PetscFunctionBegin;
2660   PetscCall(MatDenseGetArray(A, &aa));
2661   for (j = 0; j < A->cmap->n; j++)
2662     for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscConj(aa[i + j * mat->lda]);
2663   PetscCall(MatDenseRestoreArray(A, &aa));
2664   if (mat->tau)
2665     for (i = 0; i < min; i++) mat->tau[i] = PetscConj(mat->tau[i]);
2666   PetscFunctionReturn(PETSC_SUCCESS);
2667 }
2668 
MatRealPart_SeqDense(Mat A)2669 static PetscErrorCode MatRealPart_SeqDense(Mat A)
2670 {
2671   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2672   PetscInt      i, j;
2673   PetscScalar  *aa;
2674 
2675   PetscFunctionBegin;
2676   PetscCall(MatDenseGetArray(A, &aa));
2677   for (j = 0; j < A->cmap->n; j++) {
2678     for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscRealPart(aa[i + j * mat->lda]);
2679   }
2680   PetscCall(MatDenseRestoreArray(A, &aa));
2681   PetscFunctionReturn(PETSC_SUCCESS);
2682 }
2683 
MatImaginaryPart_SeqDense(Mat A)2684 static PetscErrorCode MatImaginaryPart_SeqDense(Mat A)
2685 {
2686   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2687   PetscInt      i, j;
2688   PetscScalar  *aa;
2689 
2690   PetscFunctionBegin;
2691   PetscCall(MatDenseGetArray(A, &aa));
2692   for (j = 0; j < A->cmap->n; j++) {
2693     for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscImaginaryPart(aa[i + j * mat->lda]);
2694   }
2695   PetscCall(MatDenseRestoreArray(A, &aa));
2696   PetscFunctionReturn(PETSC_SUCCESS);
2697 }
2698 
MatMatMultSymbolic_SeqDense_SeqDense(Mat A,Mat B,PetscReal fill,Mat C)2699 PetscErrorCode MatMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
2700 {
2701   PetscInt  m = A->rmap->n, n = B->cmap->n;
2702   PetscBool cisdense = PETSC_FALSE;
2703 
2704   PetscFunctionBegin;
2705   PetscCall(MatSetSizes(C, m, n, m, n));
2706 #if defined(PETSC_HAVE_CUDA)
2707   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, ""));
2708 #endif
2709 #if defined(PETSC_HAVE_HIP)
2710   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, ""));
2711 #endif
2712   if (!cisdense) {
2713     PetscBool flg;
2714 
2715     PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg));
2716     PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE));
2717   }
2718   PetscCall(MatSetUp(C));
2719   PetscFunctionReturn(PETSC_SUCCESS);
2720 }
2721 
MatMatMultNumeric_SeqDense_SeqDense(Mat A,Mat B,Mat C)2722 PetscErrorCode MatMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C)
2723 {
2724   Mat_SeqDense      *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data, *c = (Mat_SeqDense *)C->data;
2725   PetscBLASInt       m, n, k;
2726   const PetscScalar *av, *bv;
2727   PetscScalar       *cv;
2728   PetscScalar        _DOne = 1.0, _DZero = 0.0;
2729 
2730   PetscFunctionBegin;
2731   PetscCall(PetscBLASIntCast(C->rmap->n, &m));
2732   PetscCall(PetscBLASIntCast(C->cmap->n, &n));
2733   PetscCall(PetscBLASIntCast(A->cmap->n, &k));
2734   if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS);
2735   PetscCall(MatDenseGetArrayRead(A, &av));
2736   PetscCall(MatDenseGetArrayRead(B, &bv));
2737   PetscCall(MatDenseGetArrayWrite(C, &cv));
2738   PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda));
2739   PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1)));
2740   PetscCall(MatDenseRestoreArrayRead(A, &av));
2741   PetscCall(MatDenseRestoreArrayRead(B, &bv));
2742   PetscCall(MatDenseRestoreArrayWrite(C, &cv));
2743   PetscFunctionReturn(PETSC_SUCCESS);
2744 }
2745 
MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A,Mat B,PetscReal fill,Mat C)2746 PetscErrorCode MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
2747 {
2748   PetscInt  m = A->rmap->n, n = B->rmap->n;
2749   PetscBool cisdense = PETSC_FALSE;
2750 
2751   PetscFunctionBegin;
2752   PetscCall(MatSetSizes(C, m, n, m, n));
2753 #if defined(PETSC_HAVE_CUDA)
2754   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, ""));
2755 #endif
2756 #if defined(PETSC_HAVE_HIP)
2757   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, ""));
2758 #endif
2759   if (!cisdense) {
2760     PetscBool flg;
2761 
2762     PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg));
2763     PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE));
2764   }
2765   PetscCall(MatSetUp(C));
2766   PetscFunctionReturn(PETSC_SUCCESS);
2767 }
2768 
MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A,Mat B,Mat C)2769 PetscErrorCode MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C)
2770 {
2771   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
2772   Mat_SeqDense      *b = (Mat_SeqDense *)B->data;
2773   Mat_SeqDense      *c = (Mat_SeqDense *)C->data;
2774   const PetscScalar *av, *bv;
2775   PetscScalar       *cv;
2776   PetscBLASInt       m, n, k;
2777   PetscScalar        _DOne = 1.0, _DZero = 0.0;
2778 
2779   PetscFunctionBegin;
2780   PetscCall(PetscBLASIntCast(C->rmap->n, &m));
2781   PetscCall(PetscBLASIntCast(C->cmap->n, &n));
2782   PetscCall(PetscBLASIntCast(A->cmap->n, &k));
2783   if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS);
2784   PetscCall(MatDenseGetArrayRead(A, &av));
2785   PetscCall(MatDenseGetArrayRead(B, &bv));
2786   PetscCall(MatDenseGetArrayWrite(C, &cv));
2787   PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda));
2788   PetscCall(MatDenseRestoreArrayRead(A, &av));
2789   PetscCall(MatDenseRestoreArrayRead(B, &bv));
2790   PetscCall(MatDenseRestoreArrayWrite(C, &cv));
2791   PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1)));
2792   PetscFunctionReturn(PETSC_SUCCESS);
2793 }
2794 
MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A,Mat B,PetscReal fill,Mat C)2795 PetscErrorCode MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
2796 {
2797   PetscInt  m = A->cmap->n, n = B->cmap->n;
2798   PetscBool cisdense = PETSC_FALSE;
2799 
2800   PetscFunctionBegin;
2801   PetscCall(MatSetSizes(C, m, n, m, n));
2802 #if defined(PETSC_HAVE_CUDA)
2803   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, ""));
2804 #endif
2805 #if defined(PETSC_HAVE_HIP)
2806   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, ""));
2807 #endif
2808   if (!cisdense) {
2809     PetscBool flg;
2810 
2811     PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg));
2812     PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE));
2813   }
2814   PetscCall(MatSetUp(C));
2815   PetscFunctionReturn(PETSC_SUCCESS);
2816 }
2817 
MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A,Mat B,Mat C)2818 PetscErrorCode MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C)
2819 {
2820   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
2821   Mat_SeqDense      *b = (Mat_SeqDense *)B->data;
2822   Mat_SeqDense      *c = (Mat_SeqDense *)C->data;
2823   const PetscScalar *av, *bv;
2824   PetscScalar       *cv;
2825   PetscBLASInt       m, n, k;
2826   PetscScalar        _DOne = 1.0, _DZero = 0.0;
2827 
2828   PetscFunctionBegin;
2829   PetscCall(PetscBLASIntCast(C->rmap->n, &m));
2830   PetscCall(PetscBLASIntCast(C->cmap->n, &n));
2831   PetscCall(PetscBLASIntCast(A->rmap->n, &k));
2832   if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS);
2833   PetscCall(MatDenseGetArrayRead(A, &av));
2834   PetscCall(MatDenseGetArrayRead(B, &bv));
2835   PetscCall(MatDenseGetArrayWrite(C, &cv));
2836   PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda));
2837   PetscCall(MatDenseRestoreArrayRead(A, &av));
2838   PetscCall(MatDenseRestoreArrayRead(B, &bv));
2839   PetscCall(MatDenseRestoreArrayWrite(C, &cv));
2840   PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1)));
2841   PetscFunctionReturn(PETSC_SUCCESS);
2842 }
2843 
MatProductSetFromOptions_SeqDense_AB(Mat C)2844 static PetscErrorCode MatProductSetFromOptions_SeqDense_AB(Mat C)
2845 {
2846   PetscFunctionBegin;
2847   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqDense;
2848   C->ops->productsymbolic = MatProductSymbolic_AB;
2849   PetscFunctionReturn(PETSC_SUCCESS);
2850 }
2851 
MatProductSetFromOptions_SeqDense_AtB(Mat C)2852 static PetscErrorCode MatProductSetFromOptions_SeqDense_AtB(Mat C)
2853 {
2854   PetscFunctionBegin;
2855   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqDense_SeqDense;
2856   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2857   PetscFunctionReturn(PETSC_SUCCESS);
2858 }
2859 
MatProductSetFromOptions_SeqDense_ABt(Mat C)2860 static PetscErrorCode MatProductSetFromOptions_SeqDense_ABt(Mat C)
2861 {
2862   PetscFunctionBegin;
2863   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqDense_SeqDense;
2864   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2865   PetscFunctionReturn(PETSC_SUCCESS);
2866 }
2867 
MatProductSetFromOptions_SeqDense(Mat C)2868 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense(Mat C)
2869 {
2870   Mat_Product *product = C->product;
2871 
2872   PetscFunctionBegin;
2873   switch (product->type) {
2874   case MATPRODUCT_AB:
2875     PetscCall(MatProductSetFromOptions_SeqDense_AB(C));
2876     break;
2877   case MATPRODUCT_AtB:
2878     PetscCall(MatProductSetFromOptions_SeqDense_AtB(C));
2879     break;
2880   case MATPRODUCT_ABt:
2881     PetscCall(MatProductSetFromOptions_SeqDense_ABt(C));
2882     break;
2883   default:
2884     break;
2885   }
2886   PetscFunctionReturn(PETSC_SUCCESS);
2887 }
2888 
MatGetRowMax_SeqDense(Mat A,Vec v,PetscInt idx[])2889 static PetscErrorCode MatGetRowMax_SeqDense(Mat A, Vec v, PetscInt idx[])
2890 {
2891   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
2892   PetscInt           i, j, m = A->rmap->n, n = A->cmap->n, p;
2893   PetscScalar       *x;
2894   const PetscScalar *aa;
2895 
2896   PetscFunctionBegin;
2897   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2898   PetscCall(VecGetArray(v, &x));
2899   PetscCall(VecGetLocalSize(v, &p));
2900   PetscCall(MatDenseGetArrayRead(A, &aa));
2901   PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2902   for (i = 0; i < m; i++) {
2903     x[i] = aa[i];
2904     if (idx) idx[i] = 0;
2905     for (j = 1; j < n; j++) {
2906       if (PetscRealPart(x[i]) < PetscRealPart(aa[i + a->lda * j])) {
2907         x[i] = aa[i + a->lda * j];
2908         if (idx) idx[i] = j;
2909       }
2910     }
2911   }
2912   PetscCall(MatDenseRestoreArrayRead(A, &aa));
2913   PetscCall(VecRestoreArray(v, &x));
2914   PetscFunctionReturn(PETSC_SUCCESS);
2915 }
2916 
MatGetRowMaxAbs_SeqDense(Mat A,Vec v,PetscInt idx[])2917 static PetscErrorCode MatGetRowMaxAbs_SeqDense(Mat A, Vec v, PetscInt idx[])
2918 {
2919   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
2920   PetscInt           i, j, m = A->rmap->n, n = A->cmap->n, p;
2921   PetscScalar       *x;
2922   PetscReal          atmp;
2923   const PetscScalar *aa;
2924 
2925   PetscFunctionBegin;
2926   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2927   PetscCall(VecGetArray(v, &x));
2928   PetscCall(VecGetLocalSize(v, &p));
2929   PetscCall(MatDenseGetArrayRead(A, &aa));
2930   PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2931   for (i = 0; i < m; i++) {
2932     x[i] = PetscAbsScalar(aa[i]);
2933     for (j = 1; j < n; j++) {
2934       atmp = PetscAbsScalar(aa[i + a->lda * j]);
2935       if (PetscAbsScalar(x[i]) < atmp) {
2936         x[i] = atmp;
2937         if (idx) idx[i] = j;
2938       }
2939     }
2940   }
2941   PetscCall(MatDenseRestoreArrayRead(A, &aa));
2942   PetscCall(VecRestoreArray(v, &x));
2943   PetscFunctionReturn(PETSC_SUCCESS);
2944 }
2945 
MatGetRowMin_SeqDense(Mat A,Vec v,PetscInt idx[])2946 static PetscErrorCode MatGetRowMin_SeqDense(Mat A, Vec v, PetscInt idx[])
2947 {
2948   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
2949   PetscInt           i, j, m = A->rmap->n, n = A->cmap->n, p;
2950   PetscScalar       *x;
2951   const PetscScalar *aa;
2952 
2953   PetscFunctionBegin;
2954   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2955   PetscCall(MatDenseGetArrayRead(A, &aa));
2956   PetscCall(VecGetArray(v, &x));
2957   PetscCall(VecGetLocalSize(v, &p));
2958   PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2959   for (i = 0; i < m; i++) {
2960     x[i] = aa[i];
2961     if (idx) idx[i] = 0;
2962     for (j = 1; j < n; j++) {
2963       if (PetscRealPart(x[i]) > PetscRealPart(aa[i + a->lda * j])) {
2964         x[i] = aa[i + a->lda * j];
2965         if (idx) idx[i] = j;
2966       }
2967     }
2968   }
2969   PetscCall(VecRestoreArray(v, &x));
2970   PetscCall(MatDenseRestoreArrayRead(A, &aa));
2971   PetscFunctionReturn(PETSC_SUCCESS);
2972 }
2973 
MatGetColumnVector_SeqDense(Mat A,Vec v,PetscInt col)2974 PetscErrorCode MatGetColumnVector_SeqDense(Mat A, Vec v, PetscInt col)
2975 {
2976   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
2977   PetscScalar       *x;
2978   const PetscScalar *aa;
2979 
2980   PetscFunctionBegin;
2981   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2982   PetscCall(MatDenseGetArrayRead(A, &aa));
2983   PetscCall(VecGetArray(v, &x));
2984   PetscCall(PetscArraycpy(x, aa + col * a->lda, A->rmap->n));
2985   PetscCall(VecRestoreArray(v, &x));
2986   PetscCall(MatDenseRestoreArrayRead(A, &aa));
2987   PetscFunctionReturn(PETSC_SUCCESS);
2988 }
2989 
MatGetColumnReductions_SeqDense(Mat A,PetscInt type,PetscReal * reductions)2990 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat A, PetscInt type, PetscReal *reductions)
2991 {
2992   PetscInt           i, j, m, n;
2993   const PetscScalar *a;
2994 
2995   PetscFunctionBegin;
2996   PetscCall(MatGetSize(A, &m, &n));
2997   PetscCall(PetscArrayzero(reductions, n));
2998   PetscCall(MatDenseGetArrayRead(A, &a));
2999   if (type == NORM_2) {
3000     for (i = 0; i < n; i++) {
3001       for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j] * a[j]);
3002       a = PetscSafePointerPlusOffset(a, m);
3003     }
3004   } else if (type == NORM_1) {
3005     for (i = 0; i < n; i++) {
3006       for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j]);
3007       a = PetscSafePointerPlusOffset(a, m);
3008     }
3009   } else if (type == NORM_INFINITY) {
3010     for (i = 0; i < n; i++) {
3011       for (j = 0; j < m; j++) reductions[i] = PetscMax(PetscAbsScalar(a[j]), reductions[i]);
3012       a = PetscSafePointerPlusOffset(a, m);
3013     }
3014   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
3015     for (i = 0; i < n; i++) {
3016       for (j = 0; j < m; j++) reductions[i] += PetscRealPart(a[j]);
3017       a = PetscSafePointerPlusOffset(a, m);
3018     }
3019   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
3020     for (i = 0; i < n; i++) {
3021       for (j = 0; j < m; j++) reductions[i] += PetscImaginaryPart(a[j]);
3022       a = PetscSafePointerPlusOffset(a, m);
3023     }
3024   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
3025   PetscCall(MatDenseRestoreArrayRead(A, &a));
3026   if (type == NORM_2) {
3027     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
3028   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
3029     for (i = 0; i < n; i++) reductions[i] /= m;
3030   }
3031   PetscFunctionReturn(PETSC_SUCCESS);
3032 }
3033 
MatSetRandom_SeqDense(Mat x,PetscRandom rctx)3034 PetscErrorCode MatSetRandom_SeqDense(Mat x, PetscRandom rctx)
3035 {
3036   PetscScalar *a;
3037   PetscInt     lda, m, n, i, j;
3038 
3039   PetscFunctionBegin;
3040   PetscCall(MatGetSize(x, &m, &n));
3041   PetscCall(MatDenseGetLDA(x, &lda));
3042   PetscCall(MatDenseGetArrayWrite(x, &a));
3043   for (j = 0; j < n; j++) {
3044     for (i = 0; i < m; i++) PetscCall(PetscRandomGetValue(rctx, a + j * lda + i));
3045   }
3046   PetscCall(MatDenseRestoreArrayWrite(x, &a));
3047   PetscFunctionReturn(PETSC_SUCCESS);
3048 }
3049 
3050 /* vals is not const */
MatDenseGetColumn_SeqDense(Mat A,PetscInt col,PetscScalar ** vals)3051 static PetscErrorCode MatDenseGetColumn_SeqDense(Mat A, PetscInt col, PetscScalar **vals)
3052 {
3053   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3054   PetscScalar  *v;
3055 
3056   PetscFunctionBegin;
3057   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3058   PetscCall(MatDenseGetArray(A, &v));
3059   *vals = v + col * a->lda;
3060   PetscCall(MatDenseRestoreArray(A, &v));
3061   PetscFunctionReturn(PETSC_SUCCESS);
3062 }
3063 
MatDenseRestoreColumn_SeqDense(Mat A,PetscScalar ** vals)3064 static PetscErrorCode MatDenseRestoreColumn_SeqDense(Mat A, PetscScalar **vals)
3065 {
3066   PetscFunctionBegin;
3067   if (vals) *vals = NULL; /* user cannot accidentally use the array later */
3068   PetscFunctionReturn(PETSC_SUCCESS);
3069 }
3070 
3071 static struct _MatOps MatOps_Values = {MatSetValues_SeqDense,
3072                                        MatGetRow_SeqDense,
3073                                        MatRestoreRow_SeqDense,
3074                                        MatMult_SeqDense,
3075                                        /*  4*/ MatMultAdd_SeqDense,
3076                                        MatMultTranspose_SeqDense,
3077                                        MatMultTransposeAdd_SeqDense,
3078                                        NULL,
3079                                        NULL,
3080                                        NULL,
3081                                        /* 10*/ NULL,
3082                                        MatLUFactor_SeqDense,
3083                                        MatCholeskyFactor_SeqDense,
3084                                        MatSOR_SeqDense,
3085                                        MatTranspose_SeqDense,
3086                                        /* 15*/ MatGetInfo_SeqDense,
3087                                        MatEqual_SeqDense,
3088                                        MatGetDiagonal_SeqDense,
3089                                        MatDiagonalScale_SeqDense,
3090                                        MatNorm_SeqDense,
3091                                        /* 20*/ NULL,
3092                                        NULL,
3093                                        MatSetOption_SeqDense,
3094                                        MatZeroEntries_SeqDense,
3095                                        /* 24*/ MatZeroRows_SeqDense,
3096                                        NULL,
3097                                        NULL,
3098                                        NULL,
3099                                        NULL,
3100                                        /* 29*/ MatSetUp_SeqDense,
3101                                        NULL,
3102                                        NULL,
3103                                        NULL,
3104                                        NULL,
3105                                        /* 34*/ MatDuplicate_SeqDense,
3106                                        NULL,
3107                                        NULL,
3108                                        NULL,
3109                                        NULL,
3110                                        /* 39*/ MatAXPY_SeqDense,
3111                                        MatCreateSubMatrices_SeqDense,
3112                                        NULL,
3113                                        MatGetValues_SeqDense,
3114                                        MatCopy_SeqDense,
3115                                        /* 44*/ MatGetRowMax_SeqDense,
3116                                        MatScale_SeqDense,
3117                                        MatShift_SeqDense,
3118                                        NULL,
3119                                        MatZeroRowsColumns_SeqDense,
3120                                        /* 49*/ MatSetRandom_SeqDense,
3121                                        NULL,
3122                                        NULL,
3123                                        NULL,
3124                                        NULL,
3125                                        /* 54*/ NULL,
3126                                        NULL,
3127                                        NULL,
3128                                        NULL,
3129                                        NULL,
3130                                        /* 59*/ MatCreateSubMatrix_SeqDense,
3131                                        MatDestroy_SeqDense,
3132                                        MatView_SeqDense,
3133                                        NULL,
3134                                        NULL,
3135                                        /* 64*/ NULL,
3136                                        NULL,
3137                                        NULL,
3138                                        NULL,
3139                                        MatGetRowMaxAbs_SeqDense,
3140                                        /* 69*/ NULL,
3141                                        NULL,
3142                                        NULL,
3143                                        NULL,
3144                                        NULL,
3145                                        /* 74*/ NULL,
3146                                        NULL,
3147                                        NULL,
3148                                        NULL,
3149                                        MatLoad_SeqDense,
3150                                        /* 79*/ MatIsSymmetric_SeqDense,
3151                                        MatIsHermitian_SeqDense,
3152                                        NULL,
3153                                        NULL,
3154                                        NULL,
3155                                        /* 84*/ NULL,
3156                                        MatMatMultNumeric_SeqDense_SeqDense,
3157                                        NULL,
3158                                        NULL,
3159                                        MatMatTransposeMultNumeric_SeqDense_SeqDense,
3160                                        /* 89*/ NULL,
3161                                        MatProductSetFromOptions_SeqDense,
3162                                        NULL,
3163                                        NULL,
3164                                        MatConjugate_SeqDense,
3165                                        /* 94*/ NULL,
3166                                        NULL,
3167                                        MatRealPart_SeqDense,
3168                                        MatImaginaryPart_SeqDense,
3169                                        NULL,
3170                                        /* 99*/ NULL,
3171                                        NULL,
3172                                        NULL,
3173                                        MatGetRowMin_SeqDense,
3174                                        MatGetColumnVector_SeqDense,
3175                                        /*104*/ NULL,
3176                                        NULL,
3177                                        NULL,
3178                                        NULL,
3179                                        NULL,
3180                                        /*109*/ NULL,
3181                                        NULL,
3182                                        MatMultHermitianTranspose_SeqDense,
3183                                        MatMultHermitianTransposeAdd_SeqDense,
3184                                        NULL,
3185                                        /*114*/ NULL,
3186                                        MatGetColumnReductions_SeqDense,
3187                                        NULL,
3188                                        NULL,
3189                                        NULL,
3190                                        /*119*/ NULL,
3191                                        NULL,
3192                                        MatTransposeMatMultNumeric_SeqDense_SeqDense,
3193                                        NULL,
3194                                        NULL,
3195                                        /*124*/ NULL,
3196                                        NULL,
3197                                        NULL,
3198                                        NULL,
3199                                        NULL,
3200                                        /*129*/ NULL,
3201                                        MatCreateMPIMatConcatenateSeqMat_SeqDense,
3202                                        NULL,
3203                                        NULL,
3204                                        NULL,
3205                                        /*134*/ NULL,
3206                                        NULL,
3207                                        NULL,
3208                                        NULL,
3209                                        NULL,
3210                                        /*139*/ NULL,
3211                                        NULL,
3212                                        NULL,
3213                                        NULL,
3214                                        NULL};
3215 
3216 /*@
3217   MatCreateSeqDense - Creates a `MATSEQDENSE` that
3218   is stored in column major order (the usual Fortran format).
3219 
3220   Collective
3221 
3222   Input Parameters:
3223 + comm - MPI communicator, set to `PETSC_COMM_SELF`
3224 . m    - number of rows
3225 . n    - number of columns
3226 - data - optional location of matrix data in column major order.  Use `NULL` for PETSc
3227          to control all matrix memory allocation.
3228 
3229   Output Parameter:
3230 . A - the matrix
3231 
3232   Level: intermediate
3233 
3234   Note:
3235   The data input variable is intended primarily for Fortran programmers
3236   who wish to allocate their own matrix memory space.  Most users should
3237   set `data` = `NULL`.
3238 
3239   Developer Note:
3240   Many of the matrix operations for this variant use the BLAS and LAPACK routines.
3241 
3242 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
3243 @*/
MatCreateSeqDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscScalar data[],Mat * A)3244 PetscErrorCode MatCreateSeqDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscScalar data[], Mat *A)
3245 {
3246   PetscFunctionBegin;
3247   PetscCall(MatCreate(comm, A));
3248   PetscCall(MatSetSizes(*A, m, n, m, n));
3249   PetscCall(MatSetType(*A, MATSEQDENSE));
3250   PetscCall(MatSeqDenseSetPreallocation(*A, data));
3251   PetscFunctionReturn(PETSC_SUCCESS);
3252 }
3253 
3254 /*@
3255   MatSeqDenseSetPreallocation - Sets the array used for storing the matrix elements of a `MATSEQDENSE` matrix
3256 
3257   Collective
3258 
3259   Input Parameters:
3260 + B    - the matrix
3261 - data - the array (or `NULL`)
3262 
3263   Level: intermediate
3264 
3265   Note:
3266   The data input variable is intended primarily for Fortran programmers
3267   who wish to allocate their own matrix memory space.  Most users should
3268   need not call this routine.
3269 
3270 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`, `MatDenseSetLDA()`
3271 @*/
MatSeqDenseSetPreallocation(Mat B,PetscScalar data[])3272 PetscErrorCode MatSeqDenseSetPreallocation(Mat B, PetscScalar data[])
3273 {
3274   PetscFunctionBegin;
3275   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3276   PetscTryMethod(B, "MatSeqDenseSetPreallocation_C", (Mat, PetscScalar[]), (B, data));
3277   PetscFunctionReturn(PETSC_SUCCESS);
3278 }
3279 
MatSeqDenseSetPreallocation_SeqDense(Mat B,PetscScalar * data)3280 PetscErrorCode MatSeqDenseSetPreallocation_SeqDense(Mat B, PetscScalar *data)
3281 {
3282   Mat_SeqDense *b = (Mat_SeqDense *)B->data;
3283 
3284   PetscFunctionBegin;
3285   PetscCheck(!b->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
3286   B->preallocated = PETSC_TRUE;
3287 
3288   PetscCall(PetscLayoutSetUp(B->rmap));
3289   PetscCall(PetscLayoutSetUp(B->cmap));
3290 
3291   if (b->lda <= 0) PetscCall(PetscBLASIntCast(B->rmap->n, &b->lda));
3292 
3293   if (!data) { /* petsc-allocated storage */
3294     if (!b->user_alloc) PetscCall(PetscFree(b->v));
3295     PetscCall(PetscCalloc1((size_t)b->lda * B->cmap->n, &b->v));
3296 
3297     b->user_alloc = PETSC_FALSE;
3298   } else { /* user-allocated storage */
3299     if (!b->user_alloc) PetscCall(PetscFree(b->v));
3300     b->v          = data;
3301     b->user_alloc = PETSC_TRUE;
3302   }
3303   B->assembled = PETSC_TRUE;
3304   PetscFunctionReturn(PETSC_SUCCESS);
3305 }
3306 
3307 #if defined(PETSC_HAVE_ELEMENTAL)
MatConvert_SeqDense_Elemental(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)3308 PETSC_INTERN PetscErrorCode MatConvert_SeqDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
3309 {
3310   Mat                mat_elemental;
3311   const PetscScalar *array;
3312   PetscScalar       *v_colwise;
3313   PetscInt           M = A->rmap->N, N = A->cmap->N, i, j, k, *rows, *cols;
3314 
3315   PetscFunctionBegin;
3316   PetscCall(PetscMalloc3(M * N, &v_colwise, M, &rows, N, &cols));
3317   PetscCall(MatDenseGetArrayRead(A, &array));
3318   /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */
3319   k = 0;
3320   for (j = 0; j < N; j++) {
3321     cols[j] = j;
3322     for (i = 0; i < M; i++) v_colwise[j * M + i] = array[k++];
3323   }
3324   for (i = 0; i < M; i++) rows[i] = i;
3325   PetscCall(MatDenseRestoreArrayRead(A, &array));
3326 
3327   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
3328   PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N));
3329   PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
3330   PetscCall(MatSetUp(mat_elemental));
3331 
3332   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
3333   PetscCall(MatSetValues(mat_elemental, M, rows, N, cols, v_colwise, ADD_VALUES));
3334   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
3335   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
3336   PetscCall(PetscFree3(v_colwise, rows, cols));
3337 
3338   if (reuse == MAT_INPLACE_MATRIX) {
3339     PetscCall(MatHeaderReplace(A, &mat_elemental));
3340   } else {
3341     *newmat = mat_elemental;
3342   }
3343   PetscFunctionReturn(PETSC_SUCCESS);
3344 }
3345 #endif
3346 
MatDenseSetLDA_SeqDense(Mat B,PetscInt lda)3347 PetscErrorCode MatDenseSetLDA_SeqDense(Mat B, PetscInt lda)
3348 {
3349   Mat_SeqDense *b = (Mat_SeqDense *)B->data;
3350   PetscBool     data;
3351 
3352   PetscFunctionBegin;
3353   data = (B->rmap->n > 0 && B->cmap->n > 0) ? (b->v ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;
3354   PetscCheck(b->user_alloc || !data || b->lda == lda, PETSC_COMM_SELF, PETSC_ERR_ORDER, "LDA cannot be changed after allocation of internal storage");
3355   PetscCheck(lda >= B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "LDA %" PetscInt_FMT " must be at least matrix dimension %" PetscInt_FMT, lda, B->rmap->n);
3356   PetscCall(PetscBLASIntCast(lda, &b->lda));
3357   PetscFunctionReturn(PETSC_SUCCESS);
3358 }
3359 
MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat * outmat)3360 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3361 {
3362   PetscFunctionBegin;
3363   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIDense(comm, inmat, n, scall, outmat));
3364   PetscFunctionReturn(PETSC_SUCCESS);
3365 }
3366 
MatDenseCreateColumnVec_Private(Mat A,Vec * v)3367 PetscErrorCode MatDenseCreateColumnVec_Private(Mat A, Vec *v)
3368 {
3369   PetscBool   isstd, iskok, iscuda, iship;
3370   PetscMPIInt size;
3371 #if PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)
3372   /* we pass the data of A, to prevent allocating needless GPU memory the first time VecCUPMPlaceArray is called. */
3373   const PetscScalar *a;
3374 #endif
3375 
3376   PetscFunctionBegin;
3377   *v = NULL;
3378   PetscCall(PetscStrcmpAny(A->defaultvectype, &isstd, VECSTANDARD, VECSEQ, VECMPI, ""));
3379   PetscCall(PetscStrcmpAny(A->defaultvectype, &iskok, VECKOKKOS, VECSEQKOKKOS, VECMPIKOKKOS, ""));
3380   PetscCall(PetscStrcmpAny(A->defaultvectype, &iscuda, VECCUDA, VECSEQCUDA, VECMPICUDA, ""));
3381   PetscCall(PetscStrcmpAny(A->defaultvectype, &iship, VECHIP, VECSEQHIP, VECMPIHIP, ""));
3382   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3383   if (isstd) {
3384     if (size > 1) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, v));
3385     else PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, v));
3386   } else if (iskok) {
3387     PetscCheck(PetscDefined(HAVE_KOKKOS_KERNELS), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using KOKKOS kernels support");
3388 #if PetscDefined(HAVE_KOKKOS_KERNELS)
3389     if (size > 1) PetscCall(VecCreateMPIKokkosWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, v));
3390     else PetscCall(VecCreateSeqKokkosWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, v));
3391 #endif
3392   } else if (iscuda) {
3393     PetscCheck(PetscDefined(HAVE_CUDA), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using CUDA support");
3394 #if PetscDefined(HAVE_CUDA)
3395     PetscCall(MatDenseCUDAGetArrayRead(A, &a));
3396     if (size > 1) PetscCall(VecCreateMPICUDAWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, a, v));
3397     else PetscCall(VecCreateSeqCUDAWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, a, v));
3398 #endif
3399   } else if (iship) {
3400     PetscCheck(PetscDefined(HAVE_HIP), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using HIP support");
3401 #if PetscDefined(HAVE_HIP)
3402     PetscCall(MatDenseHIPGetArrayRead(A, &a));
3403     if (size > 1) PetscCall(VecCreateMPIHIPWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, a, v));
3404     else PetscCall(VecCreateSeqHIPWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, a, v));
3405 #endif
3406   }
3407   PetscCheck(*v, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not coded for type %s", A->defaultvectype);
3408   PetscFunctionReturn(PETSC_SUCCESS);
3409 }
3410 
MatDenseGetColumnVec_SeqDense(Mat A,PetscInt col,Vec * v)3411 PetscErrorCode MatDenseGetColumnVec_SeqDense(Mat A, PetscInt col, Vec *v)
3412 {
3413   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3414 
3415   PetscFunctionBegin;
3416   PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
3417   PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
3418   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
3419   a->vecinuse = col + 1;
3420   PetscCall(MatDenseGetArray(A, (PetscScalar **)&a->ptrinuse));
3421   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda));
3422   *v = a->cvec;
3423   PetscFunctionReturn(PETSC_SUCCESS);
3424 }
3425 
MatDenseRestoreColumnVec_SeqDense(Mat A,PetscInt col,Vec * v)3426 PetscErrorCode MatDenseRestoreColumnVec_SeqDense(Mat A, PetscInt col, Vec *v)
3427 {
3428   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3429 
3430   PetscFunctionBegin;
3431   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
3432   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
3433   VecCheckAssembled(a->cvec);
3434   a->vecinuse = 0;
3435   PetscCall(MatDenseRestoreArray(A, (PetscScalar **)&a->ptrinuse));
3436   PetscCall(VecResetArray(a->cvec));
3437   if (v) *v = NULL;
3438   PetscFunctionReturn(PETSC_SUCCESS);
3439 }
3440 
MatDenseGetColumnVecRead_SeqDense(Mat A,PetscInt col,Vec * v)3441 PetscErrorCode MatDenseGetColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v)
3442 {
3443   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3444 
3445   PetscFunctionBegin;
3446   PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
3447   PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
3448   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
3449   a->vecinuse = col + 1;
3450   PetscCall(MatDenseGetArrayRead(A, &a->ptrinuse));
3451   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)a->lda)));
3452   PetscCall(VecLockReadPush(a->cvec));
3453   *v = a->cvec;
3454   PetscFunctionReturn(PETSC_SUCCESS);
3455 }
3456 
MatDenseRestoreColumnVecRead_SeqDense(Mat A,PetscInt col,Vec * v)3457 PetscErrorCode MatDenseRestoreColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v)
3458 {
3459   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3460 
3461   PetscFunctionBegin;
3462   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
3463   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
3464   VecCheckAssembled(a->cvec);
3465   a->vecinuse = 0;
3466   PetscCall(MatDenseRestoreArrayRead(A, &a->ptrinuse));
3467   PetscCall(VecLockReadPop(a->cvec));
3468   PetscCall(VecResetArray(a->cvec));
3469   if (v) *v = NULL;
3470   PetscFunctionReturn(PETSC_SUCCESS);
3471 }
3472 
MatDenseGetColumnVecWrite_SeqDense(Mat A,PetscInt col,Vec * v)3473 PetscErrorCode MatDenseGetColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v)
3474 {
3475   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3476 
3477   PetscFunctionBegin;
3478   PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
3479   PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
3480   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
3481   a->vecinuse = col + 1;
3482   PetscCall(MatDenseGetArrayWrite(A, (PetscScalar **)&a->ptrinuse));
3483   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)a->lda)));
3484   *v = a->cvec;
3485   PetscFunctionReturn(PETSC_SUCCESS);
3486 }
3487 
MatDenseRestoreColumnVecWrite_SeqDense(Mat A,PetscInt col,Vec * v)3488 PetscErrorCode MatDenseRestoreColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v)
3489 {
3490   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3491 
3492   PetscFunctionBegin;
3493   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
3494   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
3495   VecCheckAssembled(a->cvec);
3496   a->vecinuse = 0;
3497   PetscCall(MatDenseRestoreArrayWrite(A, (PetscScalar **)&a->ptrinuse));
3498   PetscCall(VecResetArray(a->cvec));
3499   if (v) *v = NULL;
3500   PetscFunctionReturn(PETSC_SUCCESS);
3501 }
3502 
MatDenseGetSubMatrix_SeqDense(Mat A,PetscInt rbegin,PetscInt rend,PetscInt cbegin,PetscInt cend,Mat * v)3503 PetscErrorCode MatDenseGetSubMatrix_SeqDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
3504 {
3505   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3506 
3507   PetscFunctionBegin;
3508   PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
3509   PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
3510   if (a->cmat && (cend - cbegin != a->cmat->cmap->N || rend - rbegin != a->cmat->rmap->N)) PetscCall(MatDestroy(&a->cmat));
3511   if (!a->cmat) {
3512     PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), rend - rbegin, PETSC_DECIDE, rend - rbegin, cend - cbegin, PetscSafePointerPlusOffset(a->v, rbegin + (size_t)cbegin * a->lda), &a->cmat));
3513   } else {
3514     PetscCall(MatDensePlaceArray(a->cmat, PetscSafePointerPlusOffset(a->v, rbegin + (size_t)cbegin * a->lda)));
3515   }
3516   PetscCall(MatDenseSetLDA(a->cmat, a->lda));
3517   a->matinuse = cbegin + 1;
3518   *v          = a->cmat;
3519 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
3520   A->offloadmask = PETSC_OFFLOAD_CPU;
3521 #endif
3522   PetscFunctionReturn(PETSC_SUCCESS);
3523 }
3524 
MatDenseRestoreSubMatrix_SeqDense(Mat A,Mat * v)3525 PetscErrorCode MatDenseRestoreSubMatrix_SeqDense(Mat A, Mat *v)
3526 {
3527   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3528 
3529   PetscFunctionBegin;
3530   PetscCheck(a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
3531   PetscCheck(a->cmat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column matrix");
3532   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
3533   a->matinuse = 0;
3534   PetscCall(MatDenseResetArray(a->cmat));
3535   *v = NULL;
3536 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
3537   A->offloadmask = PETSC_OFFLOAD_CPU;
3538 #endif
3539   PetscFunctionReturn(PETSC_SUCCESS);
3540 }
3541 
3542 /*MC
3543    MATSEQDENSE - MATSEQDENSE = "seqdense" - A matrix type to be used for sequential dense matrices.
3544 
3545    Options Database Key:
3546 . -mat_type seqdense - sets the matrix type to `MATSEQDENSE` during a call to `MatSetFromOptions()`
3547 
3548   Level: beginner
3549 
3550 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreateSeqDense()`
3551 M*/
MatCreate_SeqDense(Mat B)3552 PetscErrorCode MatCreate_SeqDense(Mat B)
3553 {
3554   Mat_SeqDense *b;
3555   PetscMPIInt   size;
3556 
3557   PetscFunctionBegin;
3558   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
3559   PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3560 
3561   PetscCall(PetscNew(&b));
3562   B->data   = (void *)b;
3563   B->ops[0] = MatOps_Values;
3564 
3565   b->roworiented = PETSC_TRUE;
3566 
3567   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatQRFactor_C", MatQRFactor_SeqDense));
3568   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetLDA_C", MatDenseGetLDA_SeqDense));
3569   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseSetLDA_C", MatDenseSetLDA_SeqDense));
3570   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArray_C", MatDenseGetArray_SeqDense));
3571   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArray_C", MatDenseRestoreArray_SeqDense));
3572   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDensePlaceArray_C", MatDensePlaceArray_SeqDense));
3573   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseResetArray_C", MatDenseResetArray_SeqDense));
3574   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseReplaceArray_C", MatDenseReplaceArray_SeqDense));
3575   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayRead_C", MatDenseGetArray_SeqDense));
3576   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayRead_C", MatDenseRestoreArray_SeqDense));
3577   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayWrite_C", MatDenseGetArray_SeqDense));
3578   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArray_SeqDense));
3579   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqaij_C", MatConvert_SeqDense_SeqAIJ));
3580 #if defined(PETSC_HAVE_ELEMENTAL)
3581   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_elemental_C", MatConvert_SeqDense_Elemental));
3582 #endif
3583 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
3584   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_scalapack_C", MatConvert_Dense_ScaLAPACK));
3585 #endif
3586 #if defined(PETSC_HAVE_CUDA)
3587   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensecuda_C", MatConvert_SeqDense_SeqDenseCUDA));
3588   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", MatProductSetFromOptions_SeqDense));
3589   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdense_C", MatProductSetFromOptions_SeqDense));
3590   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensecuda_C", MatProductSetFromOptions_SeqDense));
3591 #endif
3592 #if defined(PETSC_HAVE_HIP)
3593   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensehip_C", MatConvert_SeqDense_SeqDenseHIP));
3594   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", MatProductSetFromOptions_SeqDense));
3595   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdense_C", MatProductSetFromOptions_SeqDense));
3596   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensehip_C", MatProductSetFromOptions_SeqDense));
3597 #endif
3598   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqDenseSetPreallocation_C", MatSeqDenseSetPreallocation_SeqDense));
3599   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense));
3600   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdense_C", MatProductSetFromOptions_SeqDense));
3601   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense));
3602   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqsbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense));
3603 
3604   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumn_C", MatDenseGetColumn_SeqDense));
3605   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_SeqDense));
3606   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_SeqDense));
3607   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_SeqDense));
3608   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_SeqDense));
3609   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_SeqDense));
3610   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_SeqDense));
3611   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_SeqDense));
3612   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_SeqDense));
3613   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_SeqDense));
3614   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultColumnRange_C", MatMultColumnRange_SeqDense));
3615   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultAddColumnRange_C", MatMultAddColumnRange_SeqDense));
3616   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_SeqDense));
3617   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_SeqDense));
3618   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQDENSE));
3619   PetscFunctionReturn(PETSC_SUCCESS);
3620 }
3621 
3622 /*@C
3623   MatDenseGetColumn - gives access to a column of a dense matrix. This is only the local part of the column. You MUST call `MatDenseRestoreColumn()` to avoid memory bleeding.
3624 
3625   Not Collective
3626 
3627   Input Parameters:
3628 + A   - a `MATSEQDENSE` or `MATMPIDENSE` matrix
3629 - col - column index
3630 
3631   Output Parameter:
3632 . vals - pointer to the data
3633 
3634   Level: intermediate
3635 
3636   Note:
3637   Use `MatDenseGetColumnVec()` to get access to a column of a `MATDENSE` treated as a `Vec`
3638 
3639 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreColumn()`, `MatDenseGetColumnVec()`
3640 @*/
MatDenseGetColumn(Mat A,PetscInt col,PetscScalar * vals[])3641 PetscErrorCode MatDenseGetColumn(Mat A, PetscInt col, PetscScalar *vals[])
3642 {
3643   PetscFunctionBegin;
3644   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3645   PetscValidLogicalCollectiveInt(A, col, 2);
3646   PetscAssertPointer(vals, 3);
3647   PetscUseMethod(A, "MatDenseGetColumn_C", (Mat, PetscInt, PetscScalar **), (A, col, vals));
3648   PetscFunctionReturn(PETSC_SUCCESS);
3649 }
3650 
3651 /*@C
3652   MatDenseRestoreColumn - returns access to a column of a `MATDENSE` matrix which is returned by `MatDenseGetColumn()`.
3653 
3654   Not Collective
3655 
3656   Input Parameters:
3657 + A    - a `MATSEQDENSE` or `MATMPIDENSE` matrix
3658 - vals - pointer to the data (may be `NULL`)
3659 
3660   Level: intermediate
3661 
3662 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetColumn()`
3663 @*/
MatDenseRestoreColumn(Mat A,PetscScalar * vals[])3664 PetscErrorCode MatDenseRestoreColumn(Mat A, PetscScalar *vals[])
3665 {
3666   PetscFunctionBegin;
3667   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3668   PetscAssertPointer(vals, 2);
3669   PetscUseMethod(A, "MatDenseRestoreColumn_C", (Mat, PetscScalar **), (A, vals));
3670   PetscFunctionReturn(PETSC_SUCCESS);
3671 }
3672 
3673 /*@
3674   MatDenseGetColumnVec - Gives read-write access to a column of a `MATDENSE` matrix, represented as a `Vec`.
3675 
3676   Collective
3677 
3678   Input Parameters:
3679 + A   - the `Mat` object
3680 - col - the column index
3681 
3682   Output Parameter:
3683 . v - the vector
3684 
3685   Level: intermediate
3686 
3687   Notes:
3688   The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVec()` when the vector is no longer needed.
3689 
3690   Use `MatDenseGetColumnVecRead()` to obtain read-only access or `MatDenseGetColumnVecWrite()` for write-only access.
3691 
3692 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`, `MatDenseGetColumn()`
3693 @*/
MatDenseGetColumnVec(Mat A,PetscInt col,Vec * v)3694 PetscErrorCode MatDenseGetColumnVec(Mat A, PetscInt col, Vec *v)
3695 {
3696   PetscFunctionBegin;
3697   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3698   PetscValidType(A, 1);
3699   PetscValidLogicalCollectiveInt(A, col, 2);
3700   PetscAssertPointer(v, 3);
3701   PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated");
3702   PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N);
3703   PetscUseMethod(A, "MatDenseGetColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v));
3704   PetscFunctionReturn(PETSC_SUCCESS);
3705 }
3706 
3707 /*@
3708   MatDenseRestoreColumnVec - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVec()`.
3709 
3710   Collective
3711 
3712   Input Parameters:
3713 + A   - the `Mat` object
3714 . col - the column index
3715 - v   - the `Vec` object (may be `NULL`)
3716 
3717   Level: intermediate
3718 
3719 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`
3720 @*/
MatDenseRestoreColumnVec(Mat A,PetscInt col,Vec * v)3721 PetscErrorCode MatDenseRestoreColumnVec(Mat A, PetscInt col, Vec *v)
3722 {
3723   PetscFunctionBegin;
3724   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3725   PetscValidType(A, 1);
3726   PetscValidLogicalCollectiveInt(A, col, 2);
3727   if (v) PetscValidHeaderSpecific(*v, VEC_CLASSID, 3);
3728   PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated");
3729   PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N);
3730   PetscUseMethod(A, "MatDenseRestoreColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v));
3731   PetscFunctionReturn(PETSC_SUCCESS);
3732 }
3733 
3734 /*@
3735   MatDenseGetColumnVecRead - Gives read-only access to a column of a dense matrix, represented as a `Vec`.
3736 
3737   Collective
3738 
3739   Input Parameters:
3740 + A   - the `Mat` object
3741 - col - the column index
3742 
3743   Output Parameter:
3744 . v - the vector
3745 
3746   Level: intermediate
3747 
3748   Notes:
3749   The vector is owned by PETSc and users cannot modify it.
3750 
3751   Users need to call `MatDenseRestoreColumnVecRead()` when the vector is no longer needed.
3752 
3753   Use `MatDenseGetColumnVec()` to obtain read-write access or `MatDenseGetColumnVecWrite()` for write-only access.
3754 
3755 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`
3756 @*/
MatDenseGetColumnVecRead(Mat A,PetscInt col,Vec * v)3757 PetscErrorCode MatDenseGetColumnVecRead(Mat A, PetscInt col, Vec *v)
3758 {
3759   PetscFunctionBegin;
3760   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3761   PetscValidType(A, 1);
3762   PetscValidLogicalCollectiveInt(A, col, 2);
3763   PetscAssertPointer(v, 3);
3764   PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated");
3765   PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N);
3766   PetscUseMethod(A, "MatDenseGetColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v));
3767   PetscFunctionReturn(PETSC_SUCCESS);
3768 }
3769 
3770 /*@
3771   MatDenseRestoreColumnVecRead - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVecRead()`.
3772 
3773   Collective
3774 
3775   Input Parameters:
3776 + A   - the `Mat` object
3777 . col - the column index
3778 - v   - the `Vec` object (may be `NULL`)
3779 
3780   Level: intermediate
3781 
3782 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecWrite()`
3783 @*/
MatDenseRestoreColumnVecRead(Mat A,PetscInt col,Vec * v)3784 PetscErrorCode MatDenseRestoreColumnVecRead(Mat A, PetscInt col, Vec *v)
3785 {
3786   PetscFunctionBegin;
3787   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3788   PetscValidType(A, 1);
3789   PetscValidLogicalCollectiveInt(A, col, 2);
3790   if (v) PetscValidHeaderSpecific(*v, VEC_CLASSID, 3);
3791   PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated");
3792   PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N);
3793   PetscUseMethod(A, "MatDenseRestoreColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v));
3794   PetscFunctionReturn(PETSC_SUCCESS);
3795 }
3796 
3797 /*@
3798   MatDenseGetColumnVecWrite - Gives write-only access to a column of a dense matrix, represented as a `Vec`.
3799 
3800   Collective
3801 
3802   Input Parameters:
3803 + A   - the `Mat` object
3804 - col - the column index
3805 
3806   Output Parameter:
3807 . v - the vector
3808 
3809   Level: intermediate
3810 
3811   Notes:
3812   The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVecWrite()` when the vector is no longer needed.
3813 
3814   Use `MatDenseGetColumnVec()` to obtain read-write access or `MatDenseGetColumnVecRead()` for read-only access.
3815 
3816 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`
3817 @*/
MatDenseGetColumnVecWrite(Mat A,PetscInt col,Vec * v)3818 PetscErrorCode MatDenseGetColumnVecWrite(Mat A, PetscInt col, Vec *v)
3819 {
3820   PetscFunctionBegin;
3821   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3822   PetscValidType(A, 1);
3823   PetscValidLogicalCollectiveInt(A, col, 2);
3824   PetscAssertPointer(v, 3);
3825   PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated");
3826   PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N);
3827   PetscUseMethod(A, "MatDenseGetColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v));
3828   PetscFunctionReturn(PETSC_SUCCESS);
3829 }
3830 
3831 /*@
3832   MatDenseRestoreColumnVecWrite - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVecWrite()`.
3833 
3834   Collective
3835 
3836   Input Parameters:
3837 + A   - the `Mat` object
3838 . col - the column index
3839 - v   - the `Vec` object (may be `NULL`)
3840 
3841   Level: intermediate
3842 
3843 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`
3844 @*/
MatDenseRestoreColumnVecWrite(Mat A,PetscInt col,Vec * v)3845 PetscErrorCode MatDenseRestoreColumnVecWrite(Mat A, PetscInt col, Vec *v)
3846 {
3847   PetscFunctionBegin;
3848   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3849   PetscValidType(A, 1);
3850   PetscValidLogicalCollectiveInt(A, col, 2);
3851   if (v) PetscValidHeaderSpecific(*v, VEC_CLASSID, 3);
3852   PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated");
3853   PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N);
3854   PetscUseMethod(A, "MatDenseRestoreColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v));
3855   PetscFunctionReturn(PETSC_SUCCESS);
3856 }
3857 
3858 /*@
3859   MatDenseGetSubMatrix - Gives access to a block of rows and columns of a dense matrix, represented as a `Mat`.
3860 
3861   Collective
3862 
3863   Input Parameters:
3864 + A      - the `Mat` object
3865 . rbegin - the first global row index in the block (if `PETSC_DECIDE`, is 0)
3866 . rend   - the global row index past the last one in the block (if `PETSC_DECIDE`, is `M`)
3867 . cbegin - the first global column index in the block (if `PETSC_DECIDE`, is 0)
3868 - cend   - the global column index past the last one in the block (if `PETSC_DECIDE`, is `N`)
3869 
3870   Output Parameter:
3871 . v - the matrix
3872 
3873   Level: intermediate
3874 
3875   Notes:
3876   The matrix is owned by PETSc. Users need to call `MatDenseRestoreSubMatrix()` when the matrix is no longer needed.
3877 
3878   The output matrix is not redistributed by PETSc, so depending on the values of `rbegin` and `rend`, some processes may have no local rows.
3879 
3880 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreSubMatrix()`
3881 @*/
MatDenseGetSubMatrix(Mat A,PetscInt rbegin,PetscInt rend,PetscInt cbegin,PetscInt cend,Mat * v)3882 PetscErrorCode MatDenseGetSubMatrix(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
3883 {
3884   PetscFunctionBegin;
3885   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3886   PetscValidType(A, 1);
3887   PetscValidLogicalCollectiveInt(A, rbegin, 2);
3888   PetscValidLogicalCollectiveInt(A, rend, 3);
3889   PetscValidLogicalCollectiveInt(A, cbegin, 4);
3890   PetscValidLogicalCollectiveInt(A, cend, 5);
3891   PetscAssertPointer(v, 6);
3892   if (rbegin == PETSC_DECIDE) rbegin = 0;
3893   if (rend == PETSC_DECIDE) rend = A->rmap->N;
3894   if (cbegin == PETSC_DECIDE) cbegin = 0;
3895   if (cend == PETSC_DECIDE) cend = A->cmap->N;
3896   PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated");
3897   PetscCheck(rbegin >= 0 && rbegin <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", rbegin, A->rmap->N);
3898   PetscCheck(rend >= rbegin && rend <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", rend, rbegin, A->rmap->N);
3899   PetscCheck(cbegin >= 0 && cbegin <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", cbegin, A->cmap->N);
3900   PetscCheck(cend >= cbegin && cend <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", cend, cbegin, A->cmap->N);
3901   PetscUseMethod(A, "MatDenseGetSubMatrix_C", (Mat, PetscInt, PetscInt, PetscInt, PetscInt, Mat *), (A, rbegin, rend, cbegin, cend, v));
3902   PetscFunctionReturn(PETSC_SUCCESS);
3903 }
3904 
3905 /*@
3906   MatDenseRestoreSubMatrix - Returns access to a block of columns of a dense matrix obtained from `MatDenseGetSubMatrix()`.
3907 
3908   Collective
3909 
3910   Input Parameters:
3911 + A - the `Mat` object
3912 - v - the `Mat` object (cannot be `NULL`)
3913 
3914   Level: intermediate
3915 
3916 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseGetSubMatrix()`
3917 @*/
MatDenseRestoreSubMatrix(Mat A,Mat * v)3918 PetscErrorCode MatDenseRestoreSubMatrix(Mat A, Mat *v)
3919 {
3920   PetscFunctionBegin;
3921   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3922   PetscValidType(A, 1);
3923   PetscAssertPointer(v, 2);
3924   PetscValidHeaderSpecific(*v, MAT_CLASSID, 2);
3925   PetscUseMethod(A, "MatDenseRestoreSubMatrix_C", (Mat, Mat *), (A, v));
3926   PetscFunctionReturn(PETSC_SUCCESS);
3927 }
3928 
3929 #include <petscblaslapack.h>
3930 #include <petsc/private/kernels/blockinvert.h>
3931 
MatSeqDenseInvert(Mat A)3932 PetscErrorCode MatSeqDenseInvert(Mat A)
3933 {
3934   PetscInt        m;
3935   const PetscReal shift = 0.0;
3936   PetscBool       allowzeropivot, zeropivotdetected = PETSC_FALSE;
3937   PetscScalar    *values;
3938 
3939   PetscFunctionBegin;
3940   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3941   PetscCall(MatDenseGetArray(A, &values));
3942   PetscCall(MatGetLocalSize(A, &m, NULL));
3943   allowzeropivot = PetscNot(A->erroriffailure);
3944   /* factor and invert each block */
3945   switch (m) {
3946   case 1:
3947     values[0] = (PetscScalar)1.0 / (values[0] + shift);
3948     break;
3949   case 2:
3950     PetscCall(PetscKernel_A_gets_inverse_A_2(values, shift, allowzeropivot, &zeropivotdetected));
3951     if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3952     break;
3953   case 3:
3954     PetscCall(PetscKernel_A_gets_inverse_A_3(values, shift, allowzeropivot, &zeropivotdetected));
3955     if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3956     break;
3957   case 4:
3958     PetscCall(PetscKernel_A_gets_inverse_A_4(values, shift, allowzeropivot, &zeropivotdetected));
3959     if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3960     break;
3961   case 5: {
3962     PetscScalar work[25];
3963     PetscInt    ipvt[5];
3964 
3965     PetscCall(PetscKernel_A_gets_inverse_A_5(values, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
3966     if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3967   } break;
3968   case 6:
3969     PetscCall(PetscKernel_A_gets_inverse_A_6(values, shift, allowzeropivot, &zeropivotdetected));
3970     if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3971     break;
3972   case 7:
3973     PetscCall(PetscKernel_A_gets_inverse_A_7(values, shift, allowzeropivot, &zeropivotdetected));
3974     if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3975     break;
3976   default: {
3977     PetscInt    *v_pivots, *IJ, j;
3978     PetscScalar *v_work;
3979 
3980     PetscCall(PetscMalloc3(m, &v_work, m, &v_pivots, m, &IJ));
3981     for (j = 0; j < m; j++) IJ[j] = j;
3982     PetscCall(PetscKernel_A_gets_inverse_A(m, values, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
3983     if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3984     PetscCall(PetscFree3(v_work, v_pivots, IJ));
3985   }
3986   }
3987   PetscCall(MatDenseRestoreArray(A, &values));
3988   PetscFunctionReturn(PETSC_SUCCESS);
3989 }
3990