xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 4cc2b5b5fe4ffd09e5956b56d7cdc4f43e324103)
1ed3cc1f0SBarry Smith /*
2ed3cc1f0SBarry Smith    Basic functions for basic parallel dense matrices.
347d993e7Ssuyashtn    Portions of this code are under:
447d993e7Ssuyashtn    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5ed3cc1f0SBarry Smith */
6ed3cc1f0SBarry Smith 
7c6db04a5SJed Brown #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
88949adfdSHong Zhang #include <../src/mat/impls/aij/mpi/mpiaij.h>
9baa3c1c6SHong Zhang #include <petscblaslapack.h>
108965ea79SLois Curfman McInnes 
11ab92ecdeSBarry Smith /*@
1211a5261eSBarry Smith   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
13ab92ecdeSBarry Smith   matrix that represents the operator. For sequential matrices it returns itself.
14ab92ecdeSBarry Smith 
15ab92ecdeSBarry Smith   Input Parameter:
162ef1f0ffSBarry Smith . A - the sequential or MPI `MATDENSE` matrix
17ab92ecdeSBarry Smith 
18ab92ecdeSBarry Smith   Output Parameter:
19ab92ecdeSBarry Smith . B - the inner matrix
20ab92ecdeSBarry Smith 
218e6c10adSSatish Balay   Level: intermediate
228e6c10adSSatish Balay 
231cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
24ab92ecdeSBarry Smith @*/
25d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
26d71ae5a4SJacob Faibussowitsch {
27ab92ecdeSBarry Smith   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
28ace3abfcSBarry Smith   PetscBool     flg;
29ab92ecdeSBarry Smith 
30ab92ecdeSBarry Smith   PetscFunctionBegin;
31d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
324f572ea9SToby Isaac   PetscAssertPointer(B, 2);
339566063dSJacob Faibussowitsch   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
342205254eSKarl Rupp   if (flg) *B = mat->A;
35f140bc17SStefano Zampini   else {
369566063dSJacob Faibussowitsch     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
3728b400f6SJacob Faibussowitsch     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
38f140bc17SStefano Zampini     *B = A;
39f140bc17SStefano Zampini   }
403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
41ab92ecdeSBarry Smith }
42ab92ecdeSBarry Smith 
4366976f2fSJacob Faibussowitsch static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
44d71ae5a4SJacob Faibussowitsch {
45a76f77c3SStefano Zampini   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
46a76f77c3SStefano Zampini   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
47a76f77c3SStefano Zampini 
48a76f77c3SStefano Zampini   PetscFunctionBegin;
499566063dSJacob Faibussowitsch   PetscCall(MatCopy(Amat->A, Bmat->A, s));
503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
51a76f77c3SStefano Zampini }
52a76f77c3SStefano Zampini 
53d71ae5a4SJacob Faibussowitsch PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
54d71ae5a4SJacob Faibussowitsch {
552f605a99SJose E. Roman   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
562f605a99SJose E. Roman   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
572f605a99SJose E. Roman   PetscScalar  *v;
582f605a99SJose E. Roman 
592f605a99SJose E. Roman   PetscFunctionBegin;
602f605a99SJose E. Roman   PetscCall(MatDenseGetArray(mat->A, &v));
612f605a99SJose E. Roman   PetscCall(MatDenseGetLDA(mat->A, &lda));
622f605a99SJose E. Roman   rend2 = PetscMin(rend, A->cmap->N);
632f605a99SJose E. Roman   if (rend2 > rstart) {
642f605a99SJose E. Roman     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
652f605a99SJose E. Roman     PetscCall(PetscLogFlops(rend2 - rstart));
662f605a99SJose E. Roman   }
672f605a99SJose E. Roman   PetscCall(MatDenseRestoreArray(mat->A, &v));
683ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
692f605a99SJose E. Roman }
702f605a99SJose E. Roman 
7166976f2fSJacob Faibussowitsch static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
72d71ae5a4SJacob Faibussowitsch {
73ba8c8a56SBarry Smith   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
74d0f46423SBarry Smith   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
75ba8c8a56SBarry Smith 
76ba8c8a56SBarry Smith   PetscFunctionBegin;
77aed4548fSBarry Smith   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
78ba8c8a56SBarry Smith   lrow = row - rstart;
799566063dSJacob Faibussowitsch   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
81ba8c8a56SBarry Smith }
82ba8c8a56SBarry Smith 
8366976f2fSJacob Faibussowitsch static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
84d71ae5a4SJacob Faibussowitsch {
85637a0070SStefano Zampini   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
86637a0070SStefano Zampini   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
87ba8c8a56SBarry Smith 
88ba8c8a56SBarry Smith   PetscFunctionBegin;
89aed4548fSBarry Smith   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
90637a0070SStefano Zampini   lrow = row - rstart;
919566063dSJacob Faibussowitsch   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
923ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
93ba8c8a56SBarry Smith }
94ba8c8a56SBarry Smith 
9566976f2fSJacob Faibussowitsch static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
96d71ae5a4SJacob Faibussowitsch {
970de54da6SSatish Balay   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
98d0f46423SBarry Smith   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
9987828ca2SBarry Smith   PetscScalar  *array;
1000de54da6SSatish Balay   MPI_Comm      comm;
1014b6ae80fSStefano Zampini   PetscBool     flg;
10211bd1e4dSLisandro Dalcin   Mat           B;
1030de54da6SSatish Balay 
1040de54da6SSatish Balay   PetscFunctionBegin;
1059566063dSJacob Faibussowitsch   PetscCall(MatHasCongruentLayouts(A, &flg));
10628b400f6SJacob Faibussowitsch   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
1079566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
108616b8fbbSStefano Zampini   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
109a51cd166SPierre Jolivet #if PetscDefined(HAVE_CUDA)
1109566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
11128b400f6SJacob Faibussowitsch     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
112a51cd166SPierre Jolivet #elif PetscDefined(HAVE_HIP)
11347d993e7Ssuyashtn     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
11447d993e7Ssuyashtn     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
11547d993e7Ssuyashtn #endif
116f4f49eeaSPierre Jolivet     PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm));
1179566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, &B));
1189566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, m, m, m, m));
1199566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
1209566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
1219566063dSJacob Faibussowitsch     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
1229566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
1239566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
12411bd1e4dSLisandro Dalcin     *a = B;
1259566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&B));
1262205254eSKarl Rupp   } else *a = B;
1273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1280de54da6SSatish Balay }
1290de54da6SSatish Balay 
13066976f2fSJacob Faibussowitsch static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
131d71ae5a4SJacob Faibussowitsch {
13239b7565bSBarry Smith   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
133d0f46423SBarry Smith   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
134ace3abfcSBarry Smith   PetscBool     roworiented = A->roworiented;
1358965ea79SLois Curfman McInnes 
1363a40ed3dSBarry Smith   PetscFunctionBegin;
1378965ea79SLois Curfman McInnes   for (i = 0; i < m; i++) {
1385ef9f2a5SBarry Smith     if (idxm[i] < 0) continue;
13908401ef6SPierre Jolivet     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
1408965ea79SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
1418965ea79SLois Curfman McInnes       row = idxm[i] - rstart;
14239b7565bSBarry Smith       if (roworiented) {
143a80ceddfSStefano Zampini         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v ? v + i * n : NULL, addv));
1443a40ed3dSBarry Smith       } else {
1458965ea79SLois Curfman McInnes         for (j = 0; j < n; j++) {
1465ef9f2a5SBarry Smith           if (idxn[j] < 0) continue;
14708401ef6SPierre Jolivet           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
148a80ceddfSStefano Zampini           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v ? v + i + j * m : NULL, addv));
14939b7565bSBarry Smith         }
1508965ea79SLois Curfman McInnes       }
1512205254eSKarl Rupp     } else if (!A->donotstash) {
1525080c13bSMatthew G Knepley       mat->assembled = PETSC_FALSE;
15339b7565bSBarry Smith       if (roworiented) {
154a80ceddfSStefano Zampini         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v ? v + i * n : NULL, PETSC_FALSE));
155d36fbae8SSatish Balay       } else {
156a80ceddfSStefano Zampini         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v ? v + i : NULL, m, PETSC_FALSE));
15739b7565bSBarry Smith       }
158b49de8d1SLois Curfman McInnes     }
159b49de8d1SLois Curfman McInnes   }
1603ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
161b49de8d1SLois Curfman McInnes }
162b49de8d1SLois Curfman McInnes 
16366976f2fSJacob Faibussowitsch static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
164d71ae5a4SJacob Faibussowitsch {
165b49de8d1SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
166d0f46423SBarry Smith   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
167b49de8d1SLois Curfman McInnes 
1683a40ed3dSBarry Smith   PetscFunctionBegin;
169b49de8d1SLois Curfman McInnes   for (i = 0; i < m; i++) {
17054c59aa7SJacob Faibussowitsch     if (idxm[i] < 0) continue; /* negative row */
17154c59aa7SJacob Faibussowitsch     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
172b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
173b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
174b49de8d1SLois Curfman McInnes       for (j = 0; j < n; j++) {
17554c59aa7SJacob Faibussowitsch         if (idxn[j] < 0) continue; /* negative column */
17654c59aa7SJacob Faibussowitsch         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
1779566063dSJacob Faibussowitsch         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
178b49de8d1SLois Curfman McInnes       }
179e7e72b3dSBarry Smith     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
1808965ea79SLois Curfman McInnes   }
1813ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1828965ea79SLois Curfman McInnes }
1838965ea79SLois Curfman McInnes 
184d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
185d71ae5a4SJacob Faibussowitsch {
18649a6ff4bSBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
18749a6ff4bSBarry Smith 
18849a6ff4bSBarry Smith   PetscFunctionBegin;
1899566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, lda));
1903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
19149a6ff4bSBarry Smith }
19249a6ff4bSBarry Smith 
193d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
194d71ae5a4SJacob Faibussowitsch {
195ad16ce7aSStefano Zampini   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
19647d993e7Ssuyashtn   MatType       mtype = MATSEQDENSE;
197ad16ce7aSStefano Zampini 
198ad16ce7aSStefano Zampini   PetscFunctionBegin;
19917359960SJose E. Roman   if (!a->A) {
20028b400f6SJacob Faibussowitsch     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2019566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(A->rmap));
2029566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(A->cmap));
2039566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
2049566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
205a51cd166SPierre Jolivet #if PetscDefined(HAVE_CUDA)
20647d993e7Ssuyashtn     PetscBool iscuda;
2079566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
20847d993e7Ssuyashtn     if (iscuda) mtype = MATSEQDENSECUDA;
209a51cd166SPierre Jolivet #elif PetscDefined(HAVE_HIP)
21047d993e7Ssuyashtn     PetscBool iship;
21147d993e7Ssuyashtn     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
21247d993e7Ssuyashtn     if (iship) mtype = MATSEQDENSEHIP;
21347d993e7Ssuyashtn #endif
21447d993e7Ssuyashtn     PetscCall(MatSetType(a->A, mtype));
21517359960SJose E. Roman   }
2169566063dSJacob Faibussowitsch   PetscCall(MatDenseSetLDA(a->A, lda));
2173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
218ad16ce7aSStefano Zampini }
219ad16ce7aSStefano Zampini 
220d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
221d71ae5a4SJacob Faibussowitsch {
222ff14e315SSatish Balay   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
223ff14e315SSatish Balay 
2243a40ed3dSBarry Smith   PetscFunctionBegin;
22528b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2269566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(a->A, array));
2273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
228ff14e315SSatish Balay }
229ff14e315SSatish Balay 
230f1e99121SPierre Jolivet static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
231d71ae5a4SJacob Faibussowitsch {
2328572280aSBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2338572280aSBarry Smith 
2348572280aSBarry Smith   PetscFunctionBegin;
23528b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
236f1e99121SPierre Jolivet   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
2373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2388572280aSBarry Smith }
2398572280aSBarry Smith 
240d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
241d71ae5a4SJacob Faibussowitsch {
2426947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2436947451fSStefano Zampini 
2446947451fSStefano Zampini   PetscFunctionBegin;
24528b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2469566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayWrite(a->A, array));
2473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2486947451fSStefano Zampini }
2496947451fSStefano Zampini 
250d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
251d71ae5a4SJacob Faibussowitsch {
252d3042a70SBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
253d3042a70SBarry Smith 
254d3042a70SBarry Smith   PetscFunctionBegin;
25528b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
25628b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2579566063dSJacob Faibussowitsch   PetscCall(MatDensePlaceArray(a->A, array));
2583ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
259d3042a70SBarry Smith }
260d3042a70SBarry Smith 
261d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
262d71ae5a4SJacob Faibussowitsch {
263d3042a70SBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
264d3042a70SBarry Smith 
265d3042a70SBarry Smith   PetscFunctionBegin;
26628b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
26728b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2689566063dSJacob Faibussowitsch   PetscCall(MatDenseResetArray(a->A));
2693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
270d3042a70SBarry Smith }
271d3042a70SBarry Smith 
272d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
273d71ae5a4SJacob Faibussowitsch {
274d5ea218eSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
275d5ea218eSStefano Zampini 
276d5ea218eSStefano Zampini   PetscFunctionBegin;
27728b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
27828b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2799566063dSJacob Faibussowitsch   PetscCall(MatDenseReplaceArray(a->A, array));
2803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
281d5ea218eSStefano Zampini }
282d5ea218eSStefano Zampini 
283d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
284d71ae5a4SJacob Faibussowitsch {
285ca3fa75bSLois Curfman McInnes   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
286637a0070SStefano Zampini   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
2875d0c19d7SBarry Smith   const PetscInt    *irow, *icol;
288637a0070SStefano Zampini   const PetscScalar *v;
289637a0070SStefano Zampini   PetscScalar       *bv;
290ca3fa75bSLois Curfman McInnes   Mat                newmat;
2914aa3045dSJed Brown   IS                 iscol_local;
29242a884f0SBarry Smith   MPI_Comm           comm_is, comm_mat;
293ca3fa75bSLois Curfman McInnes 
294ca3fa75bSLois Curfman McInnes   PetscFunctionBegin;
2959566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
2969566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
29708401ef6SPierre Jolivet   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
29842a884f0SBarry Smith 
2999566063dSJacob Faibussowitsch   PetscCall(ISAllGather(iscol, &iscol_local));
3009566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &irow));
3019566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iscol_local, &icol));
3029566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(isrow, &nrows));
3039566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol, &ncols));
3049566063dSJacob Faibussowitsch   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
305ca3fa75bSLois Curfman McInnes 
306ca3fa75bSLois Curfman McInnes   /* No parallel redistribution currently supported! Should really check each index set
307da81f932SPierre Jolivet      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
3087eba5e9cSLois Curfman McInnes      original matrix! */
309ca3fa75bSLois Curfman McInnes 
3109566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
3119566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
312ca3fa75bSLois Curfman McInnes 
313ca3fa75bSLois Curfman McInnes   /* Check submatrix call */
314ca3fa75bSLois Curfman McInnes   if (scall == MAT_REUSE_MATRIX) {
315e32f2f54SBarry Smith     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
3167eba5e9cSLois Curfman McInnes     /* Really need to test rows and column sizes! */
317ca3fa75bSLois Curfman McInnes     newmat = *B;
318ca3fa75bSLois Curfman McInnes   } else {
319ca3fa75bSLois Curfman McInnes     /* Create and fill new matrix */
3209566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
3219566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
3229566063dSJacob Faibussowitsch     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
3239566063dSJacob Faibussowitsch     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
324ca3fa75bSLois Curfman McInnes   }
325ca3fa75bSLois Curfman McInnes 
326ca3fa75bSLois Curfman McInnes   /* Now extract the data pointers and do the copy, column at a time */
327ca3fa75bSLois Curfman McInnes   newmatd = (Mat_MPIDense *)newmat->data;
3289566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(newmatd->A, &bv));
3299566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(mat->A, &v));
3309566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(mat->A, &lda));
3314aa3045dSJed Brown   for (i = 0; i < Ncols; i++) {
332637a0070SStefano Zampini     const PetscScalar *av = v + lda * icol[i];
333ad540459SPierre Jolivet     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
334ca3fa75bSLois Curfman McInnes   }
3359566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
3369566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
337ca3fa75bSLois Curfman McInnes 
338ca3fa75bSLois Curfman McInnes   /* Assemble the matrices so that the correct flags are set */
3399566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
3409566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
341ca3fa75bSLois Curfman McInnes 
342ca3fa75bSLois Curfman McInnes   /* Free work space */
3439566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &irow));
3449566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol_local, &icol));
3459566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&iscol_local));
346ca3fa75bSLois Curfman McInnes   *B = newmat;
3473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
348ca3fa75bSLois Curfman McInnes }
349ca3fa75bSLois Curfman McInnes 
35066976f2fSJacob Faibussowitsch static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
351d71ae5a4SJacob Faibussowitsch {
35273a71a0fSBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
35373a71a0fSBarry Smith 
3543a40ed3dSBarry Smith   PetscFunctionBegin;
3559566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(a->A, array));
3563ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
357ff14e315SSatish Balay }
358ff14e315SSatish Balay 
359f1e99121SPierre Jolivet static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
360d71ae5a4SJacob Faibussowitsch {
3618572280aSBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
3628572280aSBarry Smith 
3638572280aSBarry Smith   PetscFunctionBegin;
364f1e99121SPierre Jolivet   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
3653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3668572280aSBarry Smith }
3678572280aSBarry Smith 
36866976f2fSJacob Faibussowitsch static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
369d71ae5a4SJacob Faibussowitsch {
3706947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
3716947451fSStefano Zampini 
3726947451fSStefano Zampini   PetscFunctionBegin;
3739566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
3743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3756947451fSStefano Zampini }
3766947451fSStefano Zampini 
37766976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
378d71ae5a4SJacob Faibussowitsch {
37939ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
38013f74950SBarry Smith   PetscInt      nstash, reallocs;
3818965ea79SLois Curfman McInnes 
3823a40ed3dSBarry Smith   PetscFunctionBegin;
3833ba16761SJacob Faibussowitsch   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
3848965ea79SLois Curfman McInnes 
3859566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
3869566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
3879566063dSJacob Faibussowitsch   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
3883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3898965ea79SLois Curfman McInnes }
3908965ea79SLois Curfman McInnes 
39166976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
392d71ae5a4SJacob Faibussowitsch {
39339ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
39413f74950SBarry Smith   PetscInt      i, *row, *col, flg, j, rstart, ncols;
39513f74950SBarry Smith   PetscMPIInt   n;
39687828ca2SBarry Smith   PetscScalar  *val;
3978965ea79SLois Curfman McInnes 
3983a40ed3dSBarry Smith   PetscFunctionBegin;
399910cf402Sprj-   if (!mdn->donotstash && !mat->nooffprocentries) {
4008965ea79SLois Curfman McInnes     /*  wait on receives */
4017ef1d9bdSSatish Balay     while (1) {
4029566063dSJacob Faibussowitsch       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
4037ef1d9bdSSatish Balay       if (!flg) break;
4048965ea79SLois Curfman McInnes 
4057ef1d9bdSSatish Balay       for (i = 0; i < n;) {
4067ef1d9bdSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
4072205254eSKarl Rupp         for (j = i, rstart = row[j]; j < n; j++) {
4082205254eSKarl Rupp           if (row[j] != rstart) break;
4092205254eSKarl Rupp         }
4107ef1d9bdSSatish Balay         if (j < n) ncols = j - i;
4117ef1d9bdSSatish Balay         else ncols = n - i;
4127ef1d9bdSSatish Balay         /* Now assemble all these values with a single function call */
4139566063dSJacob Faibussowitsch         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
4147ef1d9bdSSatish Balay         i = j;
4158965ea79SLois Curfman McInnes       }
4167ef1d9bdSSatish Balay     }
4179566063dSJacob Faibussowitsch     PetscCall(MatStashScatterEnd_Private(&mat->stash));
418910cf402Sprj-   }
4198965ea79SLois Curfman McInnes 
4209566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mdn->A, mode));
4219566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mdn->A, mode));
4223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4238965ea79SLois Curfman McInnes }
4248965ea79SLois Curfman McInnes 
42566976f2fSJacob Faibussowitsch static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
426d71ae5a4SJacob Faibussowitsch {
42739ddd567SLois Curfman McInnes   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
4283a40ed3dSBarry Smith 
4293a40ed3dSBarry Smith   PetscFunctionBegin;
4309566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->A));
4313ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4328965ea79SLois Curfman McInnes }
4338965ea79SLois Curfman McInnes 
43466976f2fSJacob Faibussowitsch static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
435d71ae5a4SJacob Faibussowitsch {
43639ddd567SLois Curfman McInnes   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
437637a0070SStefano Zampini   PetscInt      i, len, *lrows;
438637a0070SStefano Zampini 
439637a0070SStefano Zampini   PetscFunctionBegin;
440637a0070SStefano Zampini   /* get locally owned rows */
4419566063dSJacob Faibussowitsch   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
442dd8e379bSPierre Jolivet   /* fix right-hand side if needed */
443637a0070SStefano Zampini   if (x && b) {
44497b48c8fSBarry Smith     const PetscScalar *xx;
44597b48c8fSBarry Smith     PetscScalar       *bb;
4468965ea79SLois Curfman McInnes 
4479566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
4489566063dSJacob Faibussowitsch     PetscCall(VecGetArrayWrite(b, &bb));
449637a0070SStefano Zampini     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
4509566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
4519566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayWrite(b, &bb));
45297b48c8fSBarry Smith   }
4539566063dSJacob Faibussowitsch   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
454e2eb51b1SBarry Smith   if (diag != 0.0) {
455637a0070SStefano Zampini     Vec d;
456b9679d65SBarry Smith 
4579566063dSJacob Faibussowitsch     PetscCall(MatCreateVecs(A, NULL, &d));
4589566063dSJacob Faibussowitsch     PetscCall(VecSet(d, diag));
4599566063dSJacob Faibussowitsch     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
4609566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&d));
461b9679d65SBarry Smith   }
4629566063dSJacob Faibussowitsch   PetscCall(PetscFree(lrows));
4633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4648965ea79SLois Curfman McInnes }
4658965ea79SLois Curfman McInnes 
4660be0d8bdSHansol Suh PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
4670be0d8bdSHansol Suh PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
4680be0d8bdSHansol Suh PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
4690be0d8bdSHansol Suh PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
4700be0d8bdSHansol Suh 
47166976f2fSJacob Faibussowitsch static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
472d71ae5a4SJacob Faibussowitsch {
47339ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
474637a0070SStefano Zampini   const PetscScalar *ax;
475637a0070SStefano Zampini   PetscScalar       *ay;
476d0295fc0SJunchao Zhang   PetscMemType       axmtype, aymtype;
477c456f294SBarry Smith 
4783a40ed3dSBarry Smith   PetscFunctionBegin;
4796f08ad05SPierre Jolivet   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
4809566063dSJacob Faibussowitsch   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
4819566063dSJacob Faibussowitsch   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
4829566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
4839566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
4849566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
4859566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
4869566063dSJacob Faibussowitsch   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
4873ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4888965ea79SLois Curfman McInnes }
4898965ea79SLois Curfman McInnes 
4900be0d8bdSHansol Suh static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
4910be0d8bdSHansol Suh {
4920be0d8bdSHansol Suh   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
4930be0d8bdSHansol Suh   const PetscScalar *ax;
4940be0d8bdSHansol Suh   PetscScalar       *ay;
4950be0d8bdSHansol Suh   PetscMemType       axmtype, aymtype;
4960be0d8bdSHansol Suh 
4970be0d8bdSHansol Suh   PetscFunctionBegin;
4980be0d8bdSHansol Suh   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
4990be0d8bdSHansol Suh   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
5000be0d8bdSHansol Suh   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
5010be0d8bdSHansol Suh   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
5020be0d8bdSHansol Suh   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
5030be0d8bdSHansol Suh   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
5040be0d8bdSHansol Suh   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
5050be0d8bdSHansol Suh   PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end));
5060be0d8bdSHansol Suh   PetscFunctionReturn(PETSC_SUCCESS);
5070be0d8bdSHansol Suh }
5080be0d8bdSHansol Suh 
50966976f2fSJacob Faibussowitsch static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
510d71ae5a4SJacob Faibussowitsch {
51139ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
512637a0070SStefano Zampini   const PetscScalar *ax;
513637a0070SStefano Zampini   PetscScalar       *ay;
514d0295fc0SJunchao Zhang   PetscMemType       axmtype, aymtype;
515c456f294SBarry Smith 
5163a40ed3dSBarry Smith   PetscFunctionBegin;
5176f08ad05SPierre Jolivet   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
5189566063dSJacob Faibussowitsch   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
5199566063dSJacob Faibussowitsch   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
5209566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
5219566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
5229566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
5239566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
5249566063dSJacob Faibussowitsch   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
5253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
5268965ea79SLois Curfman McInnes }
5278965ea79SLois Curfman McInnes 
5280be0d8bdSHansol Suh static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
5290be0d8bdSHansol Suh {
5300be0d8bdSHansol Suh   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
5310be0d8bdSHansol Suh   const PetscScalar *ax;
5320be0d8bdSHansol Suh   PetscScalar       *ay;
5330be0d8bdSHansol Suh   PetscMemType       axmtype, aymtype;
5340be0d8bdSHansol Suh 
5350be0d8bdSHansol Suh   PetscFunctionBegin;
5360be0d8bdSHansol Suh   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
5370be0d8bdSHansol Suh   PetscCall(VecSet(yy, 0.0));
5380be0d8bdSHansol Suh   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
5390be0d8bdSHansol Suh   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
5400be0d8bdSHansol Suh   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
5410be0d8bdSHansol Suh   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
5420be0d8bdSHansol Suh   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
5430be0d8bdSHansol Suh   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
5440be0d8bdSHansol Suh   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
5450be0d8bdSHansol Suh   PetscFunctionReturn(PETSC_SUCCESS);
5460be0d8bdSHansol Suh }
5470be0d8bdSHansol Suh 
548459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
549d71ae5a4SJacob Faibussowitsch {
550096963f5SLois Curfman McInnes   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
551637a0070SStefano Zampini   const PetscScalar *ax;
552637a0070SStefano Zampini   PetscScalar       *ay;
553d0295fc0SJunchao Zhang   PetscMemType       axmtype, aymtype;
5544d86920dSPierre Jolivet 
5553a40ed3dSBarry Smith   PetscFunctionBegin;
5566f08ad05SPierre Jolivet   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
5579566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
558459e8d23SBlanca Mellado Pinto   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
559459e8d23SBlanca Mellado Pinto   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
5609566063dSJacob Faibussowitsch   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
5619566063dSJacob Faibussowitsch   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
5629566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
5639566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
5649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
5659566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
5663ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
567096963f5SLois Curfman McInnes }
568096963f5SLois Curfman McInnes 
5690be0d8bdSHansol Suh static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
5700be0d8bdSHansol Suh {
5710be0d8bdSHansol Suh   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
5720be0d8bdSHansol Suh   const PetscScalar *ax;
5730be0d8bdSHansol Suh   PetscScalar       *ay;
5740be0d8bdSHansol Suh   PetscMemType       axmtype, aymtype;
5750be0d8bdSHansol Suh 
5760be0d8bdSHansol Suh   PetscFunctionBegin;
5770be0d8bdSHansol Suh   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
5780be0d8bdSHansol Suh   PetscCall(VecCopy(yy, zz));
5790be0d8bdSHansol Suh   PetscMPIInt rank;
5800be0d8bdSHansol Suh   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
5810be0d8bdSHansol Suh   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
5820be0d8bdSHansol Suh   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
5830be0d8bdSHansol Suh   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
5840be0d8bdSHansol Suh   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
5850be0d8bdSHansol Suh   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
5860be0d8bdSHansol Suh   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
5870be0d8bdSHansol Suh   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
5880be0d8bdSHansol Suh   PetscFunctionReturn(PETSC_SUCCESS);
5890be0d8bdSHansol Suh }
5900be0d8bdSHansol Suh 
591459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
592d71ae5a4SJacob Faibussowitsch {
593096963f5SLois Curfman McInnes   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
594637a0070SStefano Zampini   const PetscScalar *ax;
595637a0070SStefano Zampini   PetscScalar       *ay;
596d0295fc0SJunchao Zhang   PetscMemType       axmtype, aymtype;
597096963f5SLois Curfman McInnes 
5983a40ed3dSBarry Smith   PetscFunctionBegin;
5996f08ad05SPierre Jolivet   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
6009566063dSJacob Faibussowitsch   PetscCall(VecCopy(yy, zz));
601459e8d23SBlanca Mellado Pinto   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
602459e8d23SBlanca Mellado Pinto   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
6039566063dSJacob Faibussowitsch   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
6049566063dSJacob Faibussowitsch   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
6059566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
6069566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
6079566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
6089566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
6093ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
610096963f5SLois Curfman McInnes }
611096963f5SLois Curfman McInnes 
612459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
613459e8d23SBlanca Mellado Pinto {
614459e8d23SBlanca Mellado Pinto   PetscFunctionBegin;
615459e8d23SBlanca Mellado Pinto   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
616459e8d23SBlanca Mellado Pinto   PetscFunctionReturn(PETSC_SUCCESS);
617459e8d23SBlanca Mellado Pinto }
618459e8d23SBlanca Mellado Pinto 
619459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
620459e8d23SBlanca Mellado Pinto {
621459e8d23SBlanca Mellado Pinto   PetscFunctionBegin;
622459e8d23SBlanca Mellado Pinto   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
623459e8d23SBlanca Mellado Pinto   PetscFunctionReturn(PETSC_SUCCESS);
624459e8d23SBlanca Mellado Pinto }
625459e8d23SBlanca Mellado Pinto 
626459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
627459e8d23SBlanca Mellado Pinto {
628459e8d23SBlanca Mellado Pinto   PetscFunctionBegin;
629459e8d23SBlanca Mellado Pinto   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
630459e8d23SBlanca Mellado Pinto   PetscFunctionReturn(PETSC_SUCCESS);
631459e8d23SBlanca Mellado Pinto }
632459e8d23SBlanca Mellado Pinto 
633459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
634459e8d23SBlanca Mellado Pinto {
635459e8d23SBlanca Mellado Pinto   PetscFunctionBegin;
636459e8d23SBlanca Mellado Pinto   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
637459e8d23SBlanca Mellado Pinto   PetscFunctionReturn(PETSC_SUCCESS);
638459e8d23SBlanca Mellado Pinto }
639459e8d23SBlanca Mellado Pinto 
640d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
641d71ae5a4SJacob Faibussowitsch {
64239ddd567SLois Curfman McInnes   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
64379560b15SJacob Faibussowitsch   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
64479560b15SJacob Faibussowitsch   PetscScalar       *x;
645637a0070SStefano Zampini   const PetscScalar *av;
646ed3cc1f0SBarry Smith 
6473a40ed3dSBarry Smith   PetscFunctionBegin;
6489566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &x));
64979560b15SJacob Faibussowitsch   PetscCall(VecGetSize(v, &ng));
65079560b15SJacob Faibussowitsch   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
65179560b15SJacob Faibussowitsch   PetscCall(VecGetLocalSize(v, &nl));
652d0f46423SBarry Smith   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
653d0f46423SBarry Smith   radd = A->rmap->rstart * m;
6549566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(a->A, &av));
6559566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
656ad540459SPierre Jolivet   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
6579566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
6580be0d8bdSHansol Suh   if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i));
6599566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &x));
6603ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6618965ea79SLois Curfman McInnes }
6628965ea79SLois Curfman McInnes 
66366976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MPIDense(Mat mat)
664d71ae5a4SJacob Faibussowitsch {
6653501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
666ed3cc1f0SBarry Smith 
6673a40ed3dSBarry Smith   PetscFunctionBegin;
6683ba16761SJacob Faibussowitsch   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
6699566063dSJacob Faibussowitsch   PetscCall(MatStashDestroy_Private(&mat->stash));
67028b400f6SJacob Faibussowitsch   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
67128b400f6SJacob Faibussowitsch   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
6729566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&mdn->A));
6739566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&mdn->lvec));
6749566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&mdn->Mvctx));
6759566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&mdn->cvec));
6769566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&mdn->cmat));
67701b82886SBarry Smith 
6789566063dSJacob Faibussowitsch   PetscCall(PetscFree(mat->data));
6799566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
6808baccfbdSHong Zhang 
6819566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
6829566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
6839566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
6849566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
6859566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
6869566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
6879566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
6889566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
6899566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
6909566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
6919566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
6929566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
6939566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
6948baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
6959566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
6968baccfbdSHong Zhang #endif
697d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
6989566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
699d24d4204SJose E. Roman #endif
7009566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
7019566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
7029566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
7036718818eSStefano Zampini #if defined(PETSC_HAVE_CUDA)
7049566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
7059566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
7062e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
7072e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
7082e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
7092e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
7102e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
7112e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
7122e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
7132e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
7142e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
7152e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
7162e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
7172e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
7182e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
7192e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
7202e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
7213d9668e3SJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
7226718818eSStefano Zampini #endif
72347d993e7Ssuyashtn #if defined(PETSC_HAVE_HIP)
72447d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
72547d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
72647d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
72747d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
72847d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
72947d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
73047d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
73147d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
73247d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
73347d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
73447d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
73547d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
73647d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
73747d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
73847d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
73947d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
74047d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
7413d9668e3SJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
74247d993e7Ssuyashtn #endif
7439566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
7449566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
7459566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
7469566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
7479566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
7489566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
7499566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
7509566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
7519566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
7529566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
7530be0d8bdSHansol Suh   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
7540be0d8bdSHansol Suh   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
7550be0d8bdSHansol Suh   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));
756f8103e6bSStefano Zampini 
757f8103e6bSStefano Zampini   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
7583ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
7598965ea79SLois Curfman McInnes }
76039ddd567SLois Curfman McInnes 
7619804daf3SBarry Smith #include <petscdraw.h>
762d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
763d71ae5a4SJacob Faibussowitsch {
76439ddd567SLois Curfman McInnes   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
765616b8fbbSStefano Zampini   PetscMPIInt       rank;
76619fd82e9SBarry Smith   PetscViewerType   vtype;
767ace3abfcSBarry Smith   PetscBool         iascii, isdraw;
768b0a32e0cSBarry Smith   PetscViewer       sviewer;
769f3ef73ceSBarry Smith   PetscViewerFormat format;
7708965ea79SLois Curfman McInnes 
7713a40ed3dSBarry Smith   PetscFunctionBegin;
7729566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
7739566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
7749566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
77532077d6dSBarry Smith   if (iascii) {
7769566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetType(viewer, &vtype));
7779566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
778456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
7794e220ebcSLois Curfman McInnes       MatInfo info;
7809566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
7819566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
7829371c9d4SSatish Balay       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
7839371c9d4SSatish Balay                                                    (PetscInt)info.memory));
7849566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
7859566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
7866f08ad05SPierre Jolivet       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
7873ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
788fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
7893ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
7908965ea79SLois Curfman McInnes     }
791f1af5d2fSBarry Smith   } else if (isdraw) {
792b0a32e0cSBarry Smith     PetscDraw draw;
793ace3abfcSBarry Smith     PetscBool isnull;
794f1af5d2fSBarry Smith 
7959566063dSJacob Faibussowitsch     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
7969566063dSJacob Faibussowitsch     PetscCall(PetscDrawIsNull(draw, &isnull));
7973ba16761SJacob Faibussowitsch     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
798f1af5d2fSBarry Smith   }
79977ed5343SBarry Smith 
8007da1fb6eSBarry Smith   {
8018965ea79SLois Curfman McInnes     /* assemble the entire matrix onto first processor. */
8028965ea79SLois Curfman McInnes     Mat          A;
803d0f46423SBarry Smith     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
804ba8c8a56SBarry Smith     PetscInt    *cols;
805ba8c8a56SBarry Smith     PetscScalar *vals;
8068965ea79SLois Curfman McInnes 
8079566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
808dd400576SPatrick Sanan     if (rank == 0) {
8099566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, M, N, M, N));
8103a40ed3dSBarry Smith     } else {
8119566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, 0, 0, M, N));
8128965ea79SLois Curfman McInnes     }
8137adad957SLisandro Dalcin     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
8149566063dSJacob Faibussowitsch     PetscCall(MatSetType(A, MATMPIDENSE));
8159566063dSJacob Faibussowitsch     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
8168965ea79SLois Curfman McInnes 
81739ddd567SLois Curfman McInnes     /* Copy the matrix ... This isn't the most efficient means,
81839ddd567SLois Curfman McInnes        but it's quick for now */
81951022da4SBarry Smith     A->insertmode = INSERT_VALUES;
8202205254eSKarl Rupp 
8212205254eSKarl Rupp     row = mat->rmap->rstart;
8222205254eSKarl Rupp     m   = mdn->A->rmap->n;
8238965ea79SLois Curfman McInnes     for (i = 0; i < m; i++) {
8249566063dSJacob Faibussowitsch       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
8259566063dSJacob Faibussowitsch       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
8269566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
82739ddd567SLois Curfman McInnes       row++;
8288965ea79SLois Curfman McInnes     }
8298965ea79SLois Curfman McInnes 
8309566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
8319566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
8329566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
833dd400576SPatrick Sanan     if (rank == 0) {
834f4f49eeaSPierre Jolivet       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name));
835f4f49eeaSPierre Jolivet       PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer));
8368965ea79SLois Curfman McInnes     }
8379566063dSJacob Faibussowitsch     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
8389566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&A));
8398965ea79SLois Curfman McInnes   }
8403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
8418965ea79SLois Curfman McInnes }
8428965ea79SLois Curfman McInnes 
84366976f2fSJacob Faibussowitsch static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
844d71ae5a4SJacob Faibussowitsch {
845ace3abfcSBarry Smith   PetscBool iascii, isbinary, isdraw, issocket;
8468965ea79SLois Curfman McInnes 
847433994e6SBarry Smith   PetscFunctionBegin;
8489566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
8499566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
8509566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
8519566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
8520f5bd95cSBarry Smith 
85332077d6dSBarry Smith   if (iascii || issocket || isdraw) {
8549566063dSJacob Faibussowitsch     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
8551baa6e33SBarry Smith   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
8563ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
8578965ea79SLois Curfman McInnes }
8588965ea79SLois Curfman McInnes 
85966976f2fSJacob Faibussowitsch static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
860d71ae5a4SJacob Faibussowitsch {
8613501a2bdSLois Curfman McInnes   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
8623501a2bdSLois Curfman McInnes   Mat            mdn = mat->A;
8633966268fSBarry Smith   PetscLogDouble isend[5], irecv[5];
8648965ea79SLois Curfman McInnes 
8653a40ed3dSBarry Smith   PetscFunctionBegin;
8664e220ebcSLois Curfman McInnes   info->block_size = 1.0;
8672205254eSKarl Rupp 
8689566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
8692205254eSKarl Rupp 
8709371c9d4SSatish Balay   isend[0] = info->nz_used;
8719371c9d4SSatish Balay   isend[1] = info->nz_allocated;
8729371c9d4SSatish Balay   isend[2] = info->nz_unneeded;
8739371c9d4SSatish Balay   isend[3] = info->memory;
8749371c9d4SSatish Balay   isend[4] = info->mallocs;
8758965ea79SLois Curfman McInnes   if (flag == MAT_LOCAL) {
8764e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
8774e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
8784e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
8794e220ebcSLois Curfman McInnes     info->memory       = isend[3];
8804e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
8818965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
8821c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
8832205254eSKarl Rupp 
8844e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
8854e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8864e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8874e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8884e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
8898965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
8901c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
8912205254eSKarl Rupp 
8924e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
8934e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8944e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8954e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8964e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
8978965ea79SLois Curfman McInnes   }
8984e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
8994e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
9004e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
9013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9028965ea79SLois Curfman McInnes }
9038965ea79SLois Curfman McInnes 
90466976f2fSJacob Faibussowitsch static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
905d71ae5a4SJacob Faibussowitsch {
90639ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
9078965ea79SLois Curfman McInnes 
9083a40ed3dSBarry Smith   PetscFunctionBegin;
90912c028f9SKris Buschelman   switch (op) {
910512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
91112c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
91212c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
91343674050SBarry Smith     MatCheckPreallocated(A, 1);
9149566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
91512c028f9SKris Buschelman     break;
91612c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
91743674050SBarry Smith     MatCheckPreallocated(A, 1);
9184e0d8c25SBarry Smith     a->roworiented = flg;
9199566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
92012c028f9SKris Buschelman     break;
9218c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
92213fa8e87SLisandro Dalcin   case MAT_KEEP_NONZERO_PATTERN:
92312c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
924d71ae5a4SJacob Faibussowitsch   case MAT_SORTED_FULL:
925d71ae5a4SJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
926d71ae5a4SJacob Faibussowitsch     break;
927d71ae5a4SJacob Faibussowitsch   case MAT_IGNORE_OFF_PROC_ENTRIES:
928d71ae5a4SJacob Faibussowitsch     a->donotstash = flg;
929d71ae5a4SJacob Faibussowitsch     break;
93077e54ba9SKris Buschelman   case MAT_SYMMETRIC:
93177e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
9329a4540c5SBarry Smith   case MAT_HERMITIAN:
9339a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
934b94d7dedSBarry Smith   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
935b94d7dedSBarry Smith   case MAT_SPD:
936600fe468SBarry Smith   case MAT_IGNORE_LOWER_TRIANGULAR:
9375d7aebe8SStefano Zampini   case MAT_IGNORE_ZERO_ENTRIES:
938b94d7dedSBarry Smith   case MAT_SPD_ETERNAL:
939b94d7dedSBarry Smith     /* if the diagonal matrix is square it inherits some of the properties above */
9409566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
94177e54ba9SKris Buschelman     break;
942d71ae5a4SJacob Faibussowitsch   default:
943d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
9443a40ed3dSBarry Smith   }
9453ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9468965ea79SLois Curfman McInnes }
9478965ea79SLois Curfman McInnes 
94866976f2fSJacob Faibussowitsch static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
949d71ae5a4SJacob Faibussowitsch {
9505b2fa520SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
951637a0070SStefano Zampini   const PetscScalar *l;
952637a0070SStefano Zampini   PetscScalar        x, *v, *vv, *r;
953637a0070SStefano Zampini   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
9545b2fa520SLois Curfman McInnes 
9555b2fa520SLois Curfman McInnes   PetscFunctionBegin;
9569566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(mdn->A, &vv));
9579566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(mdn->A, &lda));
9589566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(A, &s2, &s3));
9595b2fa520SLois Curfman McInnes   if (ll) {
9609566063dSJacob Faibussowitsch     PetscCall(VecGetLocalSize(ll, &s2a));
96108401ef6SPierre Jolivet     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
9629566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(ll, &l));
9635b2fa520SLois Curfman McInnes     for (i = 0; i < m; i++) {
9645b2fa520SLois Curfman McInnes       x = l[i];
965637a0070SStefano Zampini       v = vv + i;
9669371c9d4SSatish Balay       for (j = 0; j < n; j++) {
9679371c9d4SSatish Balay         (*v) *= x;
9689371c9d4SSatish Balay         v += lda;
9699371c9d4SSatish Balay       }
9705b2fa520SLois Curfman McInnes     }
9719566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(ll, &l));
9729566063dSJacob Faibussowitsch     PetscCall(PetscLogFlops(1.0 * n * m));
9735b2fa520SLois Curfman McInnes   }
9745b2fa520SLois Curfman McInnes   if (rr) {
975637a0070SStefano Zampini     const PetscScalar *ar;
976637a0070SStefano Zampini 
9779566063dSJacob Faibussowitsch     PetscCall(VecGetLocalSize(rr, &s3a));
97808401ef6SPierre Jolivet     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
9799566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(rr, &ar));
9806f08ad05SPierre Jolivet     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
9819566063dSJacob Faibussowitsch     PetscCall(VecGetArray(mdn->lvec, &r));
9829566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
9839566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
9849566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(rr, &ar));
9855b2fa520SLois Curfman McInnes     for (i = 0; i < n; i++) {
9865b2fa520SLois Curfman McInnes       x = r[i];
987637a0070SStefano Zampini       v = vv + i * lda;
9882205254eSKarl Rupp       for (j = 0; j < m; j++) (*v++) *= x;
9895b2fa520SLois Curfman McInnes     }
9909566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(mdn->lvec, &r));
9919566063dSJacob Faibussowitsch     PetscCall(PetscLogFlops(1.0 * n * m));
9925b2fa520SLois Curfman McInnes   }
9939566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
9943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9955b2fa520SLois Curfman McInnes }
9965b2fa520SLois Curfman McInnes 
99766976f2fSJacob Faibussowitsch static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
998d71ae5a4SJacob Faibussowitsch {
9993501a2bdSLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
100013f74950SBarry Smith   PetscInt           i, j;
1001616b8fbbSStefano Zampini   PetscMPIInt        size;
1002329f5518SBarry Smith   PetscReal          sum = 0.0;
1003637a0070SStefano Zampini   const PetscScalar *av, *v;
10043501a2bdSLois Curfman McInnes 
10053a40ed3dSBarry Smith   PetscFunctionBegin;
10069566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
1007637a0070SStefano Zampini   v = av;
10089566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1009616b8fbbSStefano Zampini   if (size == 1) {
10109566063dSJacob Faibussowitsch     PetscCall(MatNorm(mdn->A, type, nrm));
10113501a2bdSLois Curfman McInnes   } else {
10123501a2bdSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
1013d0f46423SBarry Smith       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
10149371c9d4SSatish Balay         sum += PetscRealPart(PetscConj(*v) * (*v));
10159371c9d4SSatish Balay         v++;
10163501a2bdSLois Curfman McInnes       }
10171c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
10188f1a2a5eSBarry Smith       *nrm = PetscSqrtReal(*nrm);
10199566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
10203a40ed3dSBarry Smith     } else if (type == NORM_1) {
1021329f5518SBarry Smith       PetscReal *tmp, *tmp2;
10229566063dSJacob Faibussowitsch       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
1023064f8208SBarry Smith       *nrm = 0.0;
1024637a0070SStefano Zampini       v    = av;
1025d0f46423SBarry Smith       for (j = 0; j < mdn->A->cmap->n; j++) {
1026d0f46423SBarry Smith         for (i = 0; i < mdn->A->rmap->n; i++) {
10279371c9d4SSatish Balay           tmp[j] += PetscAbsScalar(*v);
10289371c9d4SSatish Balay           v++;
10293501a2bdSLois Curfman McInnes         }
10303501a2bdSLois Curfman McInnes       }
10311c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1032d0f46423SBarry Smith       for (j = 0; j < A->cmap->N; j++) {
1033064f8208SBarry Smith         if (tmp2[j] > *nrm) *nrm = tmp2[j];
10343501a2bdSLois Curfman McInnes       }
10359566063dSJacob Faibussowitsch       PetscCall(PetscFree2(tmp, tmp2));
10369566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
10373a40ed3dSBarry Smith     } else if (type == NORM_INFINITY) { /* max row norm */
1038329f5518SBarry Smith       PetscReal ntemp;
10399566063dSJacob Faibussowitsch       PetscCall(MatNorm(mdn->A, type, &ntemp));
10401c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
1041ce94432eSBarry Smith     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
10423501a2bdSLois Curfman McInnes   }
10439566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
10443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
10453501a2bdSLois Curfman McInnes }
10463501a2bdSLois Curfman McInnes 
104766976f2fSJacob Faibussowitsch static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
1048d71ae5a4SJacob Faibussowitsch {
10493501a2bdSLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
10503501a2bdSLois Curfman McInnes   Mat           B;
1051d0f46423SBarry Smith   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
1052637a0070SStefano Zampini   PetscInt      j, i, lda;
105387828ca2SBarry Smith   PetscScalar  *v;
10543501a2bdSLois Curfman McInnes 
10553a40ed3dSBarry Smith   PetscFunctionBegin;
10567fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1057cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
10589566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
10599566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
10609566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
10619566063dSJacob Faibussowitsch     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
1062637a0070SStefano Zampini   } else B = *matout;
10633501a2bdSLois Curfman McInnes 
10649371c9d4SSatish Balay   m = a->A->rmap->n;
10659371c9d4SSatish Balay   n = a->A->cmap->n;
10669566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
10679566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
10689566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &rwork));
10693501a2bdSLois Curfman McInnes   for (i = 0; i < m; i++) rwork[i] = rstart + i;
10701acff37aSSatish Balay   for (j = 0; j < n; j++) {
10719566063dSJacob Faibussowitsch     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
10728e3a54c0SPierre Jolivet     v = PetscSafePointerPlusOffset(v, lda);
10733501a2bdSLois Curfman McInnes   }
10749566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
10759566063dSJacob Faibussowitsch   PetscCall(PetscFree(rwork));
10769566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
10779566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1078cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
10793501a2bdSLois Curfman McInnes     *matout = B;
10803501a2bdSLois Curfman McInnes   } else {
10819566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(A, &B));
10823501a2bdSLois Curfman McInnes   }
10833ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1084096963f5SLois Curfman McInnes }
1085096963f5SLois Curfman McInnes 
10866849ba73SBarry Smith static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
108752c5f739Sprj- PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
10888965ea79SLois Curfman McInnes 
108966976f2fSJacob Faibussowitsch static PetscErrorCode MatSetUp_MPIDense(Mat A)
1090d71ae5a4SJacob Faibussowitsch {
1091273d9f13SBarry Smith   PetscFunctionBegin;
10929566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->rmap));
10939566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->cmap));
109448a46eb9SPierre Jolivet   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
10953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1096273d9f13SBarry Smith }
1097273d9f13SBarry Smith 
109866976f2fSJacob Faibussowitsch static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1099d71ae5a4SJacob Faibussowitsch {
1100488007eeSBarry Smith   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1101488007eeSBarry Smith 
1102488007eeSBarry Smith   PetscFunctionBegin;
11039566063dSJacob Faibussowitsch   PetscCall(MatAXPY(A->A, alpha, B->A, str));
11043ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1105488007eeSBarry Smith }
1106488007eeSBarry Smith 
110766976f2fSJacob Faibussowitsch static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1108d71ae5a4SJacob Faibussowitsch {
1109ba337c44SJed Brown   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1110ba337c44SJed Brown 
1111ba337c44SJed Brown   PetscFunctionBegin;
11129566063dSJacob Faibussowitsch   PetscCall(MatConjugate(a->A));
11133ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1114ba337c44SJed Brown }
1115ba337c44SJed Brown 
111666976f2fSJacob Faibussowitsch static PetscErrorCode MatRealPart_MPIDense(Mat A)
1117d71ae5a4SJacob Faibussowitsch {
1118ba337c44SJed Brown   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1119ba337c44SJed Brown 
1120ba337c44SJed Brown   PetscFunctionBegin;
11219566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->A));
11223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1123ba337c44SJed Brown }
1124ba337c44SJed Brown 
112566976f2fSJacob Faibussowitsch static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1126d71ae5a4SJacob Faibussowitsch {
1127ba337c44SJed Brown   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1128ba337c44SJed Brown 
1129ba337c44SJed Brown   PetscFunctionBegin;
11309566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->A));
11313ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1132ba337c44SJed Brown }
1133ba337c44SJed Brown 
1134d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1135d71ae5a4SJacob Faibussowitsch {
1136637a0070SStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
113749a6ff4bSBarry Smith 
113849a6ff4bSBarry Smith   PetscFunctionBegin;
113928b400f6SJacob Faibussowitsch   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
114028b400f6SJacob Faibussowitsch   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
11419566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
11423ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
114349a6ff4bSBarry Smith }
114449a6ff4bSBarry Smith 
1145857cbf51SRichard Tran Mills PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
114652c5f739Sprj- 
114766976f2fSJacob Faibussowitsch static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1148d71ae5a4SJacob Faibussowitsch {
1149a873a8cdSSam Reynolds   PetscInt      i, m, n;
11500716a85fSBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
11510716a85fSBarry Smith   PetscReal    *work;
11520716a85fSBarry Smith 
11530716a85fSBarry Smith   PetscFunctionBegin;
11549566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A, &m, &n));
11559566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &work));
1156857cbf51SRichard Tran Mills   if (type == REDUCTION_MEAN_REALPART) {
11579566063dSJacob Faibussowitsch     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1158857cbf51SRichard Tran Mills   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
11599566063dSJacob Faibussowitsch     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1160a873a8cdSSam Reynolds   } else {
11619566063dSJacob Faibussowitsch     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1162a873a8cdSSam Reynolds   }
1163857cbf51SRichard Tran Mills   if (type == NORM_2) {
11640716a85fSBarry Smith     for (i = 0; i < n; i++) work[i] *= work[i];
11650716a85fSBarry Smith   }
1166857cbf51SRichard Tran Mills   if (type == NORM_INFINITY) {
11671c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
11680716a85fSBarry Smith   } else {
11691c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
11700716a85fSBarry Smith   }
11719566063dSJacob Faibussowitsch   PetscCall(PetscFree(work));
1172857cbf51SRichard Tran Mills   if (type == NORM_2) {
1173a873a8cdSSam Reynolds     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1174857cbf51SRichard Tran Mills   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1175a873a8cdSSam Reynolds     for (i = 0; i < n; i++) reductions[i] /= m;
11760716a85fSBarry Smith   }
11773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
11780716a85fSBarry Smith }
11790716a85fSBarry Smith 
1180d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1181d71ae5a4SJacob Faibussowitsch {
118273a71a0fSBarry Smith   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
118373a71a0fSBarry Smith 
118473a71a0fSBarry Smith   PetscFunctionBegin;
11859566063dSJacob Faibussowitsch   PetscCall(MatSetRandom(d->A, rctx));
11863faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
11873faff063SStefano Zampini   x->offloadmask = d->A->offloadmask;
11883faff063SStefano Zampini #endif
11893ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
119073a71a0fSBarry Smith }
119173a71a0fSBarry Smith 
1192d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1193d71ae5a4SJacob Faibussowitsch {
11943b49f96aSBarry Smith   PetscFunctionBegin;
11953b49f96aSBarry Smith   *missing = PETSC_FALSE;
11963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
11973b49f96aSBarry Smith }
11983b49f96aSBarry Smith 
11994222ddf1SHong Zhang static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1200cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
12016718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1202a1f9a5e6SStefano Zampini static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
12036718818eSStefano Zampini static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
12046718818eSStefano Zampini static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
12055d8c7819SPierre Jolivet static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);
1206cc48ffa7SToby Isaac 
120709dc0095SBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
120809dc0095SBarry Smith                                        MatGetRow_MPIDense,
120909dc0095SBarry Smith                                        MatRestoreRow_MPIDense,
121009dc0095SBarry Smith                                        MatMult_MPIDense,
121197304618SKris Buschelman                                        /*  4*/ MatMultAdd_MPIDense,
12127c922b88SBarry Smith                                        MatMultTranspose_MPIDense,
12137c922b88SBarry Smith                                        MatMultTransposeAdd_MPIDense,
1214f4259b30SLisandro Dalcin                                        NULL,
1215f4259b30SLisandro Dalcin                                        NULL,
1216f4259b30SLisandro Dalcin                                        NULL,
1217f4259b30SLisandro Dalcin                                        /* 10*/ NULL,
1218f4259b30SLisandro Dalcin                                        NULL,
1219f4259b30SLisandro Dalcin                                        NULL,
1220f4259b30SLisandro Dalcin                                        NULL,
122109dc0095SBarry Smith                                        MatTranspose_MPIDense,
122297304618SKris Buschelman                                        /* 15*/ MatGetInfo_MPIDense,
12236e4ee0c6SHong Zhang                                        MatEqual_MPIDense,
122409dc0095SBarry Smith                                        MatGetDiagonal_MPIDense,
12255b2fa520SLois Curfman McInnes                                        MatDiagonalScale_MPIDense,
122609dc0095SBarry Smith                                        MatNorm_MPIDense,
122797304618SKris Buschelman                                        /* 20*/ MatAssemblyBegin_MPIDense,
122809dc0095SBarry Smith                                        MatAssemblyEnd_MPIDense,
122909dc0095SBarry Smith                                        MatSetOption_MPIDense,
123009dc0095SBarry Smith                                        MatZeroEntries_MPIDense,
1231d519adbfSMatthew Knepley                                        /* 24*/ MatZeroRows_MPIDense,
1232f4259b30SLisandro Dalcin                                        NULL,
1233f4259b30SLisandro Dalcin                                        NULL,
1234f4259b30SLisandro Dalcin                                        NULL,
1235f4259b30SLisandro Dalcin                                        NULL,
12364994cf47SJed Brown                                        /* 29*/ MatSetUp_MPIDense,
1237f4259b30SLisandro Dalcin                                        NULL,
1238f4259b30SLisandro Dalcin                                        NULL,
1239c56a70eeSBarry Smith                                        MatGetDiagonalBlock_MPIDense,
1240f4259b30SLisandro Dalcin                                        NULL,
1241d519adbfSMatthew Knepley                                        /* 34*/ MatDuplicate_MPIDense,
1242f4259b30SLisandro Dalcin                                        NULL,
1243f4259b30SLisandro Dalcin                                        NULL,
1244f4259b30SLisandro Dalcin                                        NULL,
1245f4259b30SLisandro Dalcin                                        NULL,
1246d519adbfSMatthew Knepley                                        /* 39*/ MatAXPY_MPIDense,
12477dae84e0SHong Zhang                                        MatCreateSubMatrices_MPIDense,
1248f4259b30SLisandro Dalcin                                        NULL,
124909dc0095SBarry Smith                                        MatGetValues_MPIDense,
1250a76f77c3SStefano Zampini                                        MatCopy_MPIDense,
1251f4259b30SLisandro Dalcin                                        /* 44*/ NULL,
125209dc0095SBarry Smith                                        MatScale_MPIDense,
12532f605a99SJose E. Roman                                        MatShift_MPIDense,
1254f4259b30SLisandro Dalcin                                        NULL,
1255f4259b30SLisandro Dalcin                                        NULL,
125673a71a0fSBarry Smith                                        /* 49*/ MatSetRandom_MPIDense,
1257f4259b30SLisandro Dalcin                                        NULL,
1258f4259b30SLisandro Dalcin                                        NULL,
1259f4259b30SLisandro Dalcin                                        NULL,
1260f4259b30SLisandro Dalcin                                        NULL,
1261f4259b30SLisandro Dalcin                                        /* 54*/ NULL,
1262f4259b30SLisandro Dalcin                                        NULL,
1263f4259b30SLisandro Dalcin                                        NULL,
1264f4259b30SLisandro Dalcin                                        NULL,
1265f4259b30SLisandro Dalcin                                        NULL,
12667dae84e0SHong Zhang                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1267b9b97703SBarry Smith                                        MatDestroy_MPIDense,
1268b9b97703SBarry Smith                                        MatView_MPIDense,
1269f4259b30SLisandro Dalcin                                        NULL,
1270f4259b30SLisandro Dalcin                                        NULL,
1271f4259b30SLisandro Dalcin                                        /* 64*/ NULL,
1272f4259b30SLisandro Dalcin                                        NULL,
1273f4259b30SLisandro Dalcin                                        NULL,
1274f4259b30SLisandro Dalcin                                        NULL,
1275f4259b30SLisandro Dalcin                                        NULL,
1276f4259b30SLisandro Dalcin                                        /* 69*/ NULL,
1277f4259b30SLisandro Dalcin                                        NULL,
1278f4259b30SLisandro Dalcin                                        NULL,
1279f4259b30SLisandro Dalcin                                        NULL,
1280f4259b30SLisandro Dalcin                                        NULL,
1281f4259b30SLisandro Dalcin                                        /* 74*/ NULL,
1282f4259b30SLisandro Dalcin                                        NULL,
1283f4259b30SLisandro Dalcin                                        NULL,
1284f4259b30SLisandro Dalcin                                        NULL,
1285f4259b30SLisandro Dalcin                                        NULL,
1286f4259b30SLisandro Dalcin                                        /* 79*/ NULL,
1287f4259b30SLisandro Dalcin                                        NULL,
1288f4259b30SLisandro Dalcin                                        NULL,
1289f4259b30SLisandro Dalcin                                        NULL,
12905bba2384SShri Abhyankar                                        /* 83*/ MatLoad_MPIDense,
1291f4259b30SLisandro Dalcin                                        NULL,
1292f4259b30SLisandro Dalcin                                        NULL,
1293f4259b30SLisandro Dalcin                                        NULL,
1294f4259b30SLisandro Dalcin                                        NULL,
1295f4259b30SLisandro Dalcin                                        NULL,
1296f4259b30SLisandro Dalcin                                        /* 89*/ NULL,
1297f4259b30SLisandro Dalcin                                        NULL,
1298f4259b30SLisandro Dalcin                                        NULL,
1299f4259b30SLisandro Dalcin                                        NULL,
1300f4259b30SLisandro Dalcin                                        NULL,
1301f4259b30SLisandro Dalcin                                        /* 94*/ NULL,
1302f4259b30SLisandro Dalcin                                        NULL,
13036718818eSStefano Zampini                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1304cc48ffa7SToby Isaac                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1305f4259b30SLisandro Dalcin                                        NULL,
13064222ddf1SHong Zhang                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1307f4259b30SLisandro Dalcin                                        NULL,
1308f4259b30SLisandro Dalcin                                        NULL,
1309ba337c44SJed Brown                                        MatConjugate_MPIDense,
1310f4259b30SLisandro Dalcin                                        NULL,
1311f4259b30SLisandro Dalcin                                        /*104*/ NULL,
1312ba337c44SJed Brown                                        MatRealPart_MPIDense,
1313ba337c44SJed Brown                                        MatImaginaryPart_MPIDense,
1314f4259b30SLisandro Dalcin                                        NULL,
1315f4259b30SLisandro Dalcin                                        NULL,
1316f4259b30SLisandro Dalcin                                        /*109*/ NULL,
1317f4259b30SLisandro Dalcin                                        NULL,
1318f4259b30SLisandro Dalcin                                        NULL,
131949a6ff4bSBarry Smith                                        MatGetColumnVector_MPIDense,
13203b49f96aSBarry Smith                                        MatMissingDiagonal_MPIDense,
1321f4259b30SLisandro Dalcin                                        /*114*/ NULL,
1322f4259b30SLisandro Dalcin                                        NULL,
1323f4259b30SLisandro Dalcin                                        NULL,
1324f4259b30SLisandro Dalcin                                        NULL,
1325f4259b30SLisandro Dalcin                                        NULL,
1326f4259b30SLisandro Dalcin                                        /*119*/ NULL,
1327f4259b30SLisandro Dalcin                                        NULL,
1328459e8d23SBlanca Mellado Pinto                                        MatMultHermitianTranspose_MPIDense,
1329459e8d23SBlanca Mellado Pinto                                        MatMultHermitianTransposeAdd_MPIDense,
1330f4259b30SLisandro Dalcin                                        NULL,
1331f4259b30SLisandro Dalcin                                        /*124*/ NULL,
1332a873a8cdSSam Reynolds                                        MatGetColumnReductions_MPIDense,
1333f4259b30SLisandro Dalcin                                        NULL,
1334f4259b30SLisandro Dalcin                                        NULL,
1335f4259b30SLisandro Dalcin                                        NULL,
1336f4259b30SLisandro Dalcin                                        /*129*/ NULL,
1337f4259b30SLisandro Dalcin                                        NULL,
13386718818eSStefano Zampini                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1339cb20be35SHong Zhang                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1340f4259b30SLisandro Dalcin                                        NULL,
1341f4259b30SLisandro Dalcin                                        /*134*/ NULL,
1342f4259b30SLisandro Dalcin                                        NULL,
1343f4259b30SLisandro Dalcin                                        NULL,
1344f4259b30SLisandro Dalcin                                        NULL,
1345f4259b30SLisandro Dalcin                                        NULL,
1346f4259b30SLisandro Dalcin                                        /*139*/ NULL,
1347f4259b30SLisandro Dalcin                                        NULL,
1348f4259b30SLisandro Dalcin                                        NULL,
1349f4259b30SLisandro Dalcin                                        NULL,
1350f4259b30SLisandro Dalcin                                        NULL,
13514222ddf1SHong Zhang                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1352f4259b30SLisandro Dalcin                                        /*145*/ NULL,
1353f4259b30SLisandro Dalcin                                        NULL,
135499a7f59eSMark Adams                                        NULL,
135599a7f59eSMark Adams                                        NULL,
13567fb60732SBarry Smith                                        NULL,
1357dec0b466SHong Zhang                                        /*150*/ NULL,
1358eede4a3fSMark Adams                                        NULL,
1359*4cc2b5b5SPierre Jolivet                                        NULL,
1360dec0b466SHong Zhang                                        NULL};
13618965ea79SLois Curfman McInnes 
136266976f2fSJacob Faibussowitsch static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1363d71ae5a4SJacob Faibussowitsch {
1364637a0070SStefano Zampini   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
136547d993e7Ssuyashtn   MatType       mtype = MATSEQDENSE;
1366a23d5eceSKris Buschelman 
1367a23d5eceSKris Buschelman   PetscFunctionBegin;
136828b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
13699566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->rmap));
13709566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->cmap));
1371637a0070SStefano Zampini   if (!a->A) {
13729566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
13739566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1374637a0070SStefano Zampini   }
137547d993e7Ssuyashtn #if defined(PETSC_HAVE_CUDA)
137647d993e7Ssuyashtn   PetscBool iscuda;
13779566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
137847d993e7Ssuyashtn   if (iscuda) mtype = MATSEQDENSECUDA;
137947d993e7Ssuyashtn #endif
138047d993e7Ssuyashtn #if defined(PETSC_HAVE_HIP)
138147d993e7Ssuyashtn   PetscBool iship;
138247d993e7Ssuyashtn   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
138347d993e7Ssuyashtn   if (iship) mtype = MATSEQDENSEHIP;
138447d993e7Ssuyashtn #endif
138547d993e7Ssuyashtn   PetscCall(MatSetType(a->A, mtype));
13869566063dSJacob Faibussowitsch   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
138747d993e7Ssuyashtn #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
13883faff063SStefano Zampini   mat->offloadmask = a->A->offloadmask;
13893faff063SStefano Zampini #endif
1390637a0070SStefano Zampini   mat->preallocated = PETSC_TRUE;
13916f08ad05SPierre Jolivet   mat->assembled    = PETSC_TRUE;
13923ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1393a23d5eceSKris Buschelman }
1394a23d5eceSKris Buschelman 
1395d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1396d71ae5a4SJacob Faibussowitsch {
13974e29119aSPierre Jolivet   Mat B, C;
13984e29119aSPierre Jolivet 
13994e29119aSPierre Jolivet   PetscFunctionBegin;
14009566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
14019566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
14029566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&C));
14034e29119aSPierre Jolivet   if (reuse == MAT_REUSE_MATRIX) {
14044e29119aSPierre Jolivet     C = *newmat;
14054e29119aSPierre Jolivet   } else C = NULL;
14069566063dSJacob Faibussowitsch   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
14079566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
14084e29119aSPierre Jolivet   if (reuse == MAT_INPLACE_MATRIX) {
14099566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &C));
14104e29119aSPierre Jolivet   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
14113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14124e29119aSPierre Jolivet }
14134e29119aSPierre Jolivet 
141466976f2fSJacob Faibussowitsch static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1415d71ae5a4SJacob Faibussowitsch {
14164e29119aSPierre Jolivet   Mat B, C;
14174e29119aSPierre Jolivet 
14184e29119aSPierre Jolivet   PetscFunctionBegin;
14199566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLocalMatrix(A, &C));
14209566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
14214e29119aSPierre Jolivet   if (reuse == MAT_REUSE_MATRIX) {
14224e29119aSPierre Jolivet     C = *newmat;
14234e29119aSPierre Jolivet   } else C = NULL;
14249566063dSJacob Faibussowitsch   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
14259566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
14264e29119aSPierre Jolivet   if (reuse == MAT_INPLACE_MATRIX) {
14279566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &C));
14284e29119aSPierre Jolivet   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
14293ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14304e29119aSPierre Jolivet }
14314e29119aSPierre Jolivet 
143265b80a83SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
1433d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1434d71ae5a4SJacob Faibussowitsch {
1435f709e8d7SPierre Jolivet   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
14368ea901baSHong Zhang   Mat           mat_elemental;
143732d7a744SHong Zhang   PetscScalar  *v;
1438f709e8d7SPierre Jolivet   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
14398ea901baSHong Zhang 
14408baccfbdSHong Zhang   PetscFunctionBegin;
1441378336b6SHong Zhang   if (reuse == MAT_REUSE_MATRIX) {
1442378336b6SHong Zhang     mat_elemental = *newmat;
14439566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(*newmat));
1444378336b6SHong Zhang   } else {
14459566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
14469566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
14479566063dSJacob Faibussowitsch     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
14489566063dSJacob Faibussowitsch     PetscCall(MatSetUp(mat_elemental));
14499566063dSJacob Faibussowitsch     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1450378336b6SHong Zhang   }
1451378336b6SHong Zhang 
14529566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &rows, N, &cols));
145332d7a744SHong Zhang   for (i = 0; i < N; i++) cols[i] = i;
145432d7a744SHong Zhang   for (i = 0; i < m; i++) rows[i] = rstart + i;
14558ea901baSHong Zhang 
1456637a0070SStefano Zampini   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
14579566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(A, &v));
1458f709e8d7SPierre Jolivet   PetscCall(MatDenseGetLDA(a->A, &lda));
1459f709e8d7SPierre Jolivet   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1460f709e8d7SPierre Jolivet   else {
1461f709e8d7SPierre Jolivet     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1462f709e8d7SPierre Jolivet   }
14639566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
14649566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
14659566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(A, &v));
14669566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rows, cols));
14678ea901baSHong Zhang 
1468511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
14699566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &mat_elemental));
14708ea901baSHong Zhang   } else {
14718ea901baSHong Zhang     *newmat = mat_elemental;
14728ea901baSHong Zhang   }
14733ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14748baccfbdSHong Zhang }
147565b80a83SHong Zhang #endif
14768baccfbdSHong Zhang 
1477d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1478d71ae5a4SJacob Faibussowitsch {
147986aefd0dSHong Zhang   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
148086aefd0dSHong Zhang 
148186aefd0dSHong Zhang   PetscFunctionBegin;
14829566063dSJacob Faibussowitsch   PetscCall(MatDenseGetColumn(mat->A, col, vals));
14833ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
148486aefd0dSHong Zhang }
148586aefd0dSHong Zhang 
1486d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1487d71ae5a4SJacob Faibussowitsch {
148886aefd0dSHong Zhang   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
148986aefd0dSHong Zhang 
149086aefd0dSHong Zhang   PetscFunctionBegin;
14919566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreColumn(mat->A, vals));
14923ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
149386aefd0dSHong Zhang }
149486aefd0dSHong Zhang 
1495d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1496d71ae5a4SJacob Faibussowitsch {
149794e2cb23SJakub Kruzik   Mat_MPIDense *mat;
149894e2cb23SJakub Kruzik   PetscInt      m, nloc, N;
149994e2cb23SJakub Kruzik 
150094e2cb23SJakub Kruzik   PetscFunctionBegin;
15019566063dSJacob Faibussowitsch   PetscCall(MatGetSize(inmat, &m, &N));
15029566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
150394e2cb23SJakub Kruzik   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
150494e2cb23SJakub Kruzik     PetscInt sum;
150594e2cb23SJakub Kruzik 
150648a46eb9SPierre Jolivet     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
150794e2cb23SJakub Kruzik     /* Check sum(n) = N */
15081c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
150908401ef6SPierre Jolivet     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
151094e2cb23SJakub Kruzik 
15119566063dSJacob Faibussowitsch     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
15129566063dSJacob Faibussowitsch     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
151394e2cb23SJakub Kruzik   }
151494e2cb23SJakub Kruzik 
151594e2cb23SJakub Kruzik   /* numeric phase */
151694e2cb23SJakub Kruzik   mat = (Mat_MPIDense *)(*outmat)->data;
15179566063dSJacob Faibussowitsch   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
15183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
151994e2cb23SJakub Kruzik }
152094e2cb23SJakub Kruzik 
1521d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1522d71ae5a4SJacob Faibussowitsch {
15236947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
15246947451fSStefano Zampini   PetscInt      lda;
15256947451fSStefano Zampini 
15266947451fSStefano Zampini   PetscFunctionBegin;
152728b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
152828b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1529d16ceb75SStefano Zampini   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
15306947451fSStefano Zampini   a->vecinuse = col + 1;
15319566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
15329566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
15338e3a54c0SPierre Jolivet   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
15346947451fSStefano Zampini   *v = a->cvec;
15353ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15366947451fSStefano Zampini }
15376947451fSStefano Zampini 
1538d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1539d71ae5a4SJacob Faibussowitsch {
15406947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
15416947451fSStefano Zampini 
15426947451fSStefano Zampini   PetscFunctionBegin;
154328b400f6SJacob Faibussowitsch   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
154428b400f6SJacob Faibussowitsch   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
15456947451fSStefano Zampini   a->vecinuse = 0;
15469566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
15479566063dSJacob Faibussowitsch   PetscCall(VecResetArray(a->cvec));
1548742765d3SMatthew Knepley   if (v) *v = NULL;
15493ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15506947451fSStefano Zampini }
15516947451fSStefano Zampini 
1552d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1553d71ae5a4SJacob Faibussowitsch {
15546947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
15556947451fSStefano Zampini   PetscInt      lda;
15566947451fSStefano Zampini 
15576947451fSStefano Zampini   PetscFunctionBegin;
155828b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
155928b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1560d16ceb75SStefano Zampini   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
15616947451fSStefano Zampini   a->vecinuse = col + 1;
15629566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
15639566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
15648e3a54c0SPierre Jolivet   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
15659566063dSJacob Faibussowitsch   PetscCall(VecLockReadPush(a->cvec));
15666947451fSStefano Zampini   *v = a->cvec;
15673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15686947451fSStefano Zampini }
15696947451fSStefano Zampini 
1570d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1571d71ae5a4SJacob Faibussowitsch {
15726947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
15736947451fSStefano Zampini 
15746947451fSStefano Zampini   PetscFunctionBegin;
15755f80ce2aSJacob Faibussowitsch   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
15765f80ce2aSJacob Faibussowitsch   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
15776947451fSStefano Zampini   a->vecinuse = 0;
15789566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
15799566063dSJacob Faibussowitsch   PetscCall(VecLockReadPop(a->cvec));
15809566063dSJacob Faibussowitsch   PetscCall(VecResetArray(a->cvec));
1581742765d3SMatthew Knepley   if (v) *v = NULL;
15823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15836947451fSStefano Zampini }
15846947451fSStefano Zampini 
1585d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1586d71ae5a4SJacob Faibussowitsch {
15876947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
15886947451fSStefano Zampini   PetscInt      lda;
15896947451fSStefano Zampini 
15906947451fSStefano Zampini   PetscFunctionBegin;
15915f80ce2aSJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
15925f80ce2aSJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1593d16ceb75SStefano Zampini   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
15946947451fSStefano Zampini   a->vecinuse = col + 1;
15959566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
15969566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
15978e3a54c0SPierre Jolivet   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
15986947451fSStefano Zampini   *v = a->cvec;
15993ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16006947451fSStefano Zampini }
16016947451fSStefano Zampini 
1602d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1603d71ae5a4SJacob Faibussowitsch {
16046947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
16056947451fSStefano Zampini 
16066947451fSStefano Zampini   PetscFunctionBegin;
16075f80ce2aSJacob Faibussowitsch   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
16085f80ce2aSJacob Faibussowitsch   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
16096947451fSStefano Zampini   a->vecinuse = 0;
16109566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
16119566063dSJacob Faibussowitsch   PetscCall(VecResetArray(a->cvec));
1612742765d3SMatthew Knepley   if (v) *v = NULL;
16133ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16146947451fSStefano Zampini }
16156947451fSStefano Zampini 
161666976f2fSJacob Faibussowitsch static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1617d71ae5a4SJacob Faibussowitsch {
16185ea7661aSPierre Jolivet   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1619616b8fbbSStefano Zampini   Mat_MPIDense *c;
1620616b8fbbSStefano Zampini   MPI_Comm      comm;
1621a2748737SPierre Jolivet   PetscInt      pbegin, pend;
16225ea7661aSPierre Jolivet 
16235ea7661aSPierre Jolivet   PetscFunctionBegin;
16249566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
16255f80ce2aSJacob Faibussowitsch   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
16265f80ce2aSJacob Faibussowitsch   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1627a2748737SPierre Jolivet   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1628a2748737SPierre Jolivet   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
16295ea7661aSPierre Jolivet   if (!a->cmat) {
16309566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, &a->cmat));
16319566063dSJacob Faibussowitsch     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1632a2748737SPierre Jolivet     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1633a2748737SPierre Jolivet     else {
1634a2748737SPierre Jolivet       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1635a2748737SPierre Jolivet       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1636a2748737SPierre Jolivet       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1637a2748737SPierre Jolivet     }
16389566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
16399566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1640a2748737SPierre Jolivet   } else {
1641a2748737SPierre Jolivet     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1642a2748737SPierre Jolivet     if (same && a->cmat->rmap->N != A->rmap->N) {
1643a2748737SPierre Jolivet       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1644a2748737SPierre Jolivet       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1645a2748737SPierre Jolivet     }
1646a2748737SPierre Jolivet     if (!same) {
1647a2748737SPierre Jolivet       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1648a2748737SPierre Jolivet       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1649a2748737SPierre Jolivet       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1650a2748737SPierre Jolivet       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1651a2748737SPierre Jolivet       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1652a2748737SPierre Jolivet     }
1653a2748737SPierre Jolivet     if (cend - cbegin != a->cmat->cmap->N) {
16549566063dSJacob Faibussowitsch       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
16559566063dSJacob Faibussowitsch       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
16569566063dSJacob Faibussowitsch       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
16579566063dSJacob Faibussowitsch       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
16585ea7661aSPierre Jolivet     }
1659a2748737SPierre Jolivet   }
1660616b8fbbSStefano Zampini   c = (Mat_MPIDense *)a->cmat->data;
16615f80ce2aSJacob Faibussowitsch   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1662a2748737SPierre Jolivet   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
16633faff063SStefano Zampini 
1664616b8fbbSStefano Zampini   a->cmat->preallocated = PETSC_TRUE;
1665616b8fbbSStefano Zampini   a->cmat->assembled    = PETSC_TRUE;
16663faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
16673faff063SStefano Zampini   a->cmat->offloadmask = c->A->offloadmask;
16683faff063SStefano Zampini #endif
16695ea7661aSPierre Jolivet   a->matinuse = cbegin + 1;
16705ea7661aSPierre Jolivet   *v          = a->cmat;
16713ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16725ea7661aSPierre Jolivet }
16735ea7661aSPierre Jolivet 
167466976f2fSJacob Faibussowitsch static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1675d71ae5a4SJacob Faibussowitsch {
16765ea7661aSPierre Jolivet   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1677616b8fbbSStefano Zampini   Mat_MPIDense *c;
16785ea7661aSPierre Jolivet 
16795ea7661aSPierre Jolivet   PetscFunctionBegin;
16805f80ce2aSJacob Faibussowitsch   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
16815f80ce2aSJacob Faibussowitsch   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
16825f80ce2aSJacob Faibussowitsch   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
16835ea7661aSPierre Jolivet   a->matinuse = 0;
1684616b8fbbSStefano Zampini   c           = (Mat_MPIDense *)a->cmat->data;
16859566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1686742765d3SMatthew Knepley   if (v) *v = NULL;
16873faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
16883faff063SStefano Zampini   A->offloadmask = a->A->offloadmask;
16893faff063SStefano Zampini #endif
16903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16915ea7661aSPierre Jolivet }
16925ea7661aSPierre Jolivet 
1693002bc6daSRichard Tran Mills /*MC
1694002bc6daSRichard Tran Mills    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1695002bc6daSRichard Tran Mills 
16962ef1f0ffSBarry Smith    Options Database Key:
169711a5261eSBarry Smith . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1698002bc6daSRichard Tran Mills 
1699002bc6daSRichard Tran Mills   Level: beginner
1700002bc6daSRichard Tran Mills 
17011cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1702002bc6daSRichard Tran Mills M*/
17034742e46bSJacob Faibussowitsch PetscErrorCode MatCreate_MPIDense(Mat mat)
1704d71ae5a4SJacob Faibussowitsch {
1705273d9f13SBarry Smith   Mat_MPIDense *a;
1706273d9f13SBarry Smith 
1707273d9f13SBarry Smith   PetscFunctionBegin;
17084dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&a));
1709b0a32e0cSBarry Smith   mat->data   = (void *)a;
1710aea10558SJacob Faibussowitsch   mat->ops[0] = MatOps_Values;
1711273d9f13SBarry Smith 
1712273d9f13SBarry Smith   mat->insertmode = NOT_SET_VALUES;
1713273d9f13SBarry Smith 
1714273d9f13SBarry Smith   /* build cache for off array entries formed */
1715273d9f13SBarry Smith   a->donotstash = PETSC_FALSE;
17162205254eSKarl Rupp 
17179566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1718273d9f13SBarry Smith 
1719273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
1720f4259b30SLisandro Dalcin   a->lvec        = NULL;
1721f4259b30SLisandro Dalcin   a->Mvctx       = NULL;
1722273d9f13SBarry Smith   a->roworiented = PETSC_TRUE;
1723273d9f13SBarry Smith 
17249566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
17259566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
17269566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
17279566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
17289566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
17299566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
17309566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
17319566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
17329566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
17339566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
17349566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
17359566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
17369566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
17379566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
17389566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
17399566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
17409566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
17419566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
17429566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
17439566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
17449566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
17458baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
17469566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
17478baccfbdSHong Zhang #endif
1748d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
17499566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1750d24d4204SJose E. Roman #endif
1751637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
17529566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1753637a0070SStefano Zampini #endif
17549566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
17559566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
17569566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
17576718818eSStefano Zampini #if defined(PETSC_HAVE_CUDA)
17589566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
17599566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
17606718818eSStefano Zampini #endif
176147d993e7Ssuyashtn #if defined(PETSC_HAVE_HIP)
176247d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
176347d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
176447d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
176547d993e7Ssuyashtn #endif
17669566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
17679566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
17680be0d8bdSHansol Suh   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense));
17690be0d8bdSHansol Suh   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense));
17700be0d8bdSHansol Suh   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense));
17719566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
17723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1773273d9f13SBarry Smith }
1774273d9f13SBarry Smith 
1775209238afSKris Buschelman /*MC
1776002d173eSKris Buschelman    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1777209238afSKris Buschelman 
177811a5261eSBarry Smith    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
177911a5261eSBarry Smith    and `MATMPIDENSE` otherwise.
1780209238afSKris Buschelman 
17812ef1f0ffSBarry Smith    Options Database Key:
178211a5261eSBarry Smith . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1783209238afSKris Buschelman 
1784209238afSKris Buschelman   Level: beginner
1785209238afSKris Buschelman 
17861cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
17876947451fSStefano Zampini M*/
17886947451fSStefano Zampini 
17895d83a8b1SBarry Smith /*@
1790273d9f13SBarry Smith   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1791273d9f13SBarry Smith 
17928f490ea3SStefano Zampini   Collective
1793273d9f13SBarry Smith 
1794273d9f13SBarry Smith   Input Parameters:
1795fe59aa6dSJacob Faibussowitsch + B    - the matrix
17962ef1f0ffSBarry Smith - data - optional location of matrix data.  Set to `NULL` for PETSc
1797273d9f13SBarry Smith          to control all matrix memory allocation.
1798273d9f13SBarry Smith 
17992ef1f0ffSBarry Smith   Level: intermediate
18002ef1f0ffSBarry Smith 
1801273d9f13SBarry Smith   Notes:
18022ef1f0ffSBarry Smith   The dense format is fully compatible with standard Fortran
1803273d9f13SBarry Smith   storage by columns.
1804273d9f13SBarry Smith 
1805273d9f13SBarry Smith   The data input variable is intended primarily for Fortran programmers
1806273d9f13SBarry Smith   who wish to allocate their own matrix memory space.  Most users should
18072ef1f0ffSBarry Smith   set `data` to `NULL`.
1808273d9f13SBarry Smith 
18091cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1810273d9f13SBarry Smith @*/
1811d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1812d71ae5a4SJacob Faibussowitsch {
1813273d9f13SBarry Smith   PetscFunctionBegin;
1814d5ea218eSStefano Zampini   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1815cac4c232SBarry Smith   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
18163ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1817273d9f13SBarry Smith }
1818273d9f13SBarry Smith 
1819d3042a70SBarry Smith /*@
182011a5261eSBarry Smith   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1821d3042a70SBarry Smith   array provided by the user. This is useful to avoid copying an array
1822d3042a70SBarry Smith   into a matrix
1823d3042a70SBarry Smith 
1824d3042a70SBarry Smith   Not Collective
1825d3042a70SBarry Smith 
1826d3042a70SBarry Smith   Input Parameters:
1827d3042a70SBarry Smith + mat   - the matrix
1828d3042a70SBarry Smith - array - the array in column major order
1829d3042a70SBarry Smith 
18302ef1f0ffSBarry Smith   Level: developer
18312ef1f0ffSBarry Smith 
183211a5261eSBarry Smith   Note:
183311a5261eSBarry Smith   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1834d3042a70SBarry Smith   freed when the matrix is destroyed.
1835d3042a70SBarry Smith 
18361cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
183711a5261eSBarry Smith           `MatDenseReplaceArray()`
1838d3042a70SBarry Smith @*/
1839d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1840d71ae5a4SJacob Faibussowitsch {
1841d3042a70SBarry Smith   PetscFunctionBegin;
1842d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1843cac4c232SBarry Smith   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
18449566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
184547d993e7Ssuyashtn #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1846637a0070SStefano Zampini   mat->offloadmask = PETSC_OFFLOAD_CPU;
1847637a0070SStefano Zampini #endif
18483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1849d3042a70SBarry Smith }
1850d3042a70SBarry Smith 
1851d3042a70SBarry Smith /*@
185211a5261eSBarry Smith   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1853d3042a70SBarry Smith 
1854d3042a70SBarry Smith   Not Collective
1855d3042a70SBarry Smith 
18562fe279fdSBarry Smith   Input Parameter:
1857d3042a70SBarry Smith . mat - the matrix
1858d3042a70SBarry Smith 
18592ef1f0ffSBarry Smith   Level: developer
18602ef1f0ffSBarry Smith 
186111a5261eSBarry Smith   Note:
186211a5261eSBarry Smith   You can only call this after a call to `MatDensePlaceArray()`
1863d3042a70SBarry Smith 
18641cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1865d3042a70SBarry Smith @*/
1866d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseResetArray(Mat mat)
1867d71ae5a4SJacob Faibussowitsch {
1868d3042a70SBarry Smith   PetscFunctionBegin;
1869d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1870cac4c232SBarry Smith   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
18719566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
18723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1873d3042a70SBarry Smith }
1874d3042a70SBarry Smith 
1875d5ea218eSStefano Zampini /*@
1876d5ea218eSStefano Zampini   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1877d5ea218eSStefano Zampini   array provided by the user. This is useful to avoid copying an array
1878d5ea218eSStefano Zampini   into a matrix
1879d5ea218eSStefano Zampini 
1880d5ea218eSStefano Zampini   Not Collective
1881d5ea218eSStefano Zampini 
1882d5ea218eSStefano Zampini   Input Parameters:
1883d5ea218eSStefano Zampini + mat   - the matrix
1884d5ea218eSStefano Zampini - array - the array in column major order
1885d5ea218eSStefano Zampini 
18862ef1f0ffSBarry Smith   Level: developer
18872ef1f0ffSBarry Smith 
188811a5261eSBarry Smith   Note:
188911a5261eSBarry Smith   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1890d5ea218eSStefano Zampini   freed by the user. It will be freed when the matrix is destroyed.
1891d5ea218eSStefano Zampini 
18921cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1893d5ea218eSStefano Zampini @*/
1894d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1895d71ae5a4SJacob Faibussowitsch {
1896d5ea218eSStefano Zampini   PetscFunctionBegin;
1897d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1898cac4c232SBarry Smith   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
18999566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
190047d993e7Ssuyashtn #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1901d5ea218eSStefano Zampini   mat->offloadmask = PETSC_OFFLOAD_CPU;
1902d5ea218eSStefano Zampini #endif
19033ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1904d5ea218eSStefano Zampini }
1905d5ea218eSStefano Zampini 
19065d83a8b1SBarry Smith /*@
190711a5261eSBarry Smith   MatCreateDense - Creates a matrix in `MATDENSE` format.
19088965ea79SLois Curfman McInnes 
1909d083f849SBarry Smith   Collective
1910db81eaa0SLois Curfman McInnes 
19118965ea79SLois Curfman McInnes   Input Parameters:
1912db81eaa0SLois Curfman McInnes + comm - MPI communicator
19132ef1f0ffSBarry Smith . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
19142ef1f0ffSBarry Smith . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
19152ef1f0ffSBarry Smith . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
19162ef1f0ffSBarry Smith . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
19172ef1f0ffSBarry Smith - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1918dfc5480cSLois Curfman McInnes    to control all matrix memory allocation.
19198965ea79SLois Curfman McInnes 
19208965ea79SLois Curfman McInnes   Output Parameter:
1921477f1c0bSLois Curfman McInnes . A - the matrix
19228965ea79SLois Curfman McInnes 
19232ef1f0ffSBarry Smith   Level: intermediate
19242ef1f0ffSBarry Smith 
1925b259b22eSLois Curfman McInnes   Notes:
19262ef1f0ffSBarry Smith   The dense format is fully compatible with standard Fortran
192739ddd567SLois Curfman McInnes   storage by columns.
192811a5261eSBarry Smith 
192911a5261eSBarry Smith   Although local portions of the matrix are stored in column-major
1930002bc6daSRichard Tran Mills   order, the matrix is partitioned across MPI ranks by row.
19318965ea79SLois Curfman McInnes 
193218f449edSLois Curfman McInnes   The data input variable is intended primarily for Fortran programmers
193318f449edSLois Curfman McInnes   who wish to allocate their own matrix memory space.  Most users should
19342ef1f0ffSBarry Smith   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
193518f449edSLois Curfman McInnes 
19368965ea79SLois Curfman McInnes   The user MUST specify either the local or global matrix dimensions
19378965ea79SLois Curfman McInnes   (possibly both).
19388965ea79SLois Curfman McInnes 
19391cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
19408965ea79SLois Curfman McInnes @*/
1941d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1942d71ae5a4SJacob Faibussowitsch {
19433a40ed3dSBarry Smith   PetscFunctionBegin;
19449566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
19459566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, M, N));
19466f08ad05SPierre Jolivet   PetscCall(MatSetType(*A, MATDENSE));
19479566063dSJacob Faibussowitsch   PetscCall(MatSeqDenseSetPreallocation(*A, data));
19486f08ad05SPierre Jolivet   PetscCall(MatMPIDenseSetPreallocation(*A, data));
19493ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
19508965ea79SLois Curfman McInnes }
19518965ea79SLois Curfman McInnes 
1952d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1953d71ae5a4SJacob Faibussowitsch {
19548965ea79SLois Curfman McInnes   Mat           mat;
19553501a2bdSLois Curfman McInnes   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
19568965ea79SLois Curfman McInnes 
19573a40ed3dSBarry Smith   PetscFunctionBegin;
1958f4259b30SLisandro Dalcin   *newmat = NULL;
19599566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
19609566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
19619566063dSJacob Faibussowitsch   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1962834f8fabSBarry Smith   a = (Mat_MPIDense *)mat->data;
19635aa7edbeSHong Zhang 
1964d5f3da31SBarry Smith   mat->factortype   = A->factortype;
1965c456f294SBarry Smith   mat->assembled    = PETSC_TRUE;
1966273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
19678965ea79SLois Curfman McInnes 
1968e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
19693782ba37SSatish Balay   a->donotstash   = oldmat->donotstash;
1970e04c1aa4SHong Zhang 
19719566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
19729566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
19738965ea79SLois Curfman McInnes 
19749566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
197501b82886SBarry Smith 
19768965ea79SLois Curfman McInnes   *newmat = mat;
19773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
19788965ea79SLois Curfman McInnes }
19798965ea79SLois Curfman McInnes 
198066976f2fSJacob Faibussowitsch static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1981d71ae5a4SJacob Faibussowitsch {
198287d5ce66SSatish Balay   PetscBool isbinary;
198387d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5)
198487d5ce66SSatish Balay   PetscBool ishdf5;
198587d5ce66SSatish Balay #endif
1986eb91f321SVaclav Hapla 
1987eb91f321SVaclav Hapla   PetscFunctionBegin;
1988eb91f321SVaclav Hapla   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1989eb91f321SVaclav Hapla   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1990eb91f321SVaclav Hapla   /* force binary viewer to load .info file if it has not yet done so */
19919566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
19929566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
199387d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5)
19949566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
199587d5ce66SSatish Balay #endif
1996eb91f321SVaclav Hapla   if (isbinary) {
19979566063dSJacob Faibussowitsch     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1998eb91f321SVaclav Hapla #if defined(PETSC_HAVE_HDF5)
199987d5ce66SSatish Balay   } else if (ishdf5) {
20009566063dSJacob Faibussowitsch     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2001eb91f321SVaclav Hapla #endif
200298921bdaSJacob Faibussowitsch   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
20033ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2004eb91f321SVaclav Hapla }
2005eb91f321SVaclav Hapla 
2006d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2007d71ae5a4SJacob Faibussowitsch {
20086e4ee0c6SHong Zhang   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
20096e4ee0c6SHong Zhang   Mat           a, b;
201090ace30eSBarry Smith 
20116e4ee0c6SHong Zhang   PetscFunctionBegin;
20126e4ee0c6SHong Zhang   a = matA->A;
20136e4ee0c6SHong Zhang   b = matB->A;
20142cf15c64SPierre Jolivet   PetscCall(MatEqual(a, b, flag));
20152cf15c64SPierre Jolivet   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
20163ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
20176e4ee0c6SHong Zhang }
201890ace30eSBarry Smith 
201966976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2020d71ae5a4SJacob Faibussowitsch {
20216718818eSStefano Zampini   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2022baa3c1c6SHong Zhang 
2023baa3c1c6SHong Zhang   PetscFunctionBegin;
20249566063dSJacob Faibussowitsch   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
20259566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&atb->atb));
20269566063dSJacob Faibussowitsch   PetscCall(PetscFree(atb));
20273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2028baa3c1c6SHong Zhang }
2029baa3c1c6SHong Zhang 
203066976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2031d71ae5a4SJacob Faibussowitsch {
20326718818eSStefano Zampini   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2033cc48ffa7SToby Isaac 
2034cc48ffa7SToby Isaac   PetscFunctionBegin;
20359566063dSJacob Faibussowitsch   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
20369566063dSJacob Faibussowitsch   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
20379566063dSJacob Faibussowitsch   PetscCall(PetscFree(abt));
20383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2039cc48ffa7SToby Isaac }
2040cc48ffa7SToby Isaac 
2041d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2042d71ae5a4SJacob Faibussowitsch {
2043baa3c1c6SHong Zhang   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
20446718818eSStefano Zampini   Mat_TransMatMultDense *atb;
2045cb20be35SHong Zhang   MPI_Comm               comm;
20466718818eSStefano Zampini   PetscMPIInt            size, *recvcounts;
20476718818eSStefano Zampini   PetscScalar           *carray, *sendbuf;
2048637a0070SStefano Zampini   const PetscScalar     *atbarray;
2049a1f9a5e6SStefano Zampini   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2050e68c0b26SHong Zhang   const PetscInt        *ranges;
2051cb20be35SHong Zhang 
2052cb20be35SHong Zhang   PetscFunctionBegin;
20536718818eSStefano Zampini   MatCheckProduct(C, 3);
20545f80ce2aSJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
20556718818eSStefano Zampini   atb        = (Mat_TransMatMultDense *)C->product->data;
20566718818eSStefano Zampini   recvcounts = atb->recvcounts;
20576718818eSStefano Zampini   sendbuf    = atb->sendbuf;
20586718818eSStefano Zampini 
20599566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
20609566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
2061e68c0b26SHong Zhang 
2062c5ef1628SHong Zhang   /* compute atbarray = aseq^T * bseq */
20639566063dSJacob Faibussowitsch   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
2064cb20be35SHong Zhang 
20659566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRanges(C, &ranges));
2066cb20be35SHong Zhang 
2067660d5466SHong Zhang   /* arrange atbarray into sendbuf */
20689566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
20699566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2070637a0070SStefano Zampini   for (proc = 0, k = 0; proc < size; proc++) {
2071baa3c1c6SHong Zhang     for (j = 0; j < cN; j++) {
2072a1f9a5e6SStefano Zampini       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2073cb20be35SHong Zhang     }
2074cb20be35SHong Zhang   }
20759566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2076637a0070SStefano Zampini 
2077c5ef1628SHong Zhang   /* sum all atbarray to local values of C */
20789566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
20799566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
20809566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
208167af85e8SPierre Jolivet   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
20829566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
20839566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
20843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2085cb20be35SHong Zhang }
2086cb20be35SHong Zhang 
2087d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2088d71ae5a4SJacob Faibussowitsch {
2089cb20be35SHong Zhang   MPI_Comm               comm;
2090baa3c1c6SHong Zhang   PetscMPIInt            size;
2091660d5466SHong Zhang   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2092baa3c1c6SHong Zhang   Mat_TransMatMultDense *atb;
209347d993e7Ssuyashtn   PetscBool              cisdense = PETSC_FALSE;
20940acaeb71SStefano Zampini   PetscInt               i;
20950acaeb71SStefano Zampini   const PetscInt        *ranges;
2096cb20be35SHong Zhang 
2097cb20be35SHong Zhang   PetscFunctionBegin;
20988f83a4d9SJacob Faibussowitsch   MatCheckProduct(C, 4);
20995f80ce2aSJacob Faibussowitsch   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
21009566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2101cb20be35SHong Zhang   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
210298921bdaSJacob Faibussowitsch     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2103cb20be35SHong Zhang   }
2104cb20be35SHong Zhang 
21054222ddf1SHong Zhang   /* create matrix product C */
21069566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
210747d993e7Ssuyashtn #if defined(PETSC_HAVE_CUDA)
21089566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
210947d993e7Ssuyashtn #endif
211047d993e7Ssuyashtn #if defined(PETSC_HAVE_HIP)
211147d993e7Ssuyashtn   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
211247d993e7Ssuyashtn #endif
211348a46eb9SPierre Jolivet   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
21149566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
2115baa3c1c6SHong Zhang 
21164222ddf1SHong Zhang   /* create data structure for reuse C */
21179566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
21189566063dSJacob Faibussowitsch   PetscCall(PetscNew(&atb));
21194222ddf1SHong Zhang   cM = C->rmap->N;
21209566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
21219566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRanges(C, &ranges));
21220acaeb71SStefano Zampini   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2123baa3c1c6SHong Zhang 
21246718818eSStefano Zampini   C->product->data    = atb;
21256718818eSStefano Zampini   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
21263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2127cb20be35SHong Zhang }
2128cb20be35SHong Zhang 
2129d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2130d71ae5a4SJacob Faibussowitsch {
2131cc48ffa7SToby Isaac   MPI_Comm               comm;
2132cc48ffa7SToby Isaac   PetscMPIInt            i, size;
2133cc48ffa7SToby Isaac   PetscInt               maxRows, bufsiz;
2134cc48ffa7SToby Isaac   PetscMPIInt            tag;
21354222ddf1SHong Zhang   PetscInt               alg;
2136cc48ffa7SToby Isaac   Mat_MatTransMultDense *abt;
21374222ddf1SHong Zhang   Mat_Product           *product = C->product;
21384222ddf1SHong Zhang   PetscBool              flg;
2139cc48ffa7SToby Isaac 
2140cc48ffa7SToby Isaac   PetscFunctionBegin;
21416718818eSStefano Zampini   MatCheckProduct(C, 4);
21425f80ce2aSJacob Faibussowitsch   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
21434222ddf1SHong Zhang   /* check local size of A and B */
21445f80ce2aSJacob Faibussowitsch   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2145cc48ffa7SToby Isaac 
21469566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2147637a0070SStefano Zampini   alg = flg ? 0 : 1;
2148cc48ffa7SToby Isaac 
21494222ddf1SHong Zhang   /* setup matrix product C */
21509566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
21519566063dSJacob Faibussowitsch   PetscCall(MatSetType(C, MATMPIDENSE));
21529566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
21539566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
21544222ddf1SHong Zhang 
21554222ddf1SHong Zhang   /* create data structure for reuse C */
21569566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
21579566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
21589566063dSJacob Faibussowitsch   PetscCall(PetscNew(&abt));
2159cc48ffa7SToby Isaac   abt->tag = tag;
2160faa55883SToby Isaac   abt->alg = alg;
2161faa55883SToby Isaac   switch (alg) {
21624222ddf1SHong Zhang   case 1: /* alg: "cyclic" */
2163cc48ffa7SToby Isaac     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2164cc48ffa7SToby Isaac     bufsiz = A->cmap->N * maxRows;
2165f4f49eeaSPierre Jolivet     PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1]));
2166faa55883SToby Isaac     break;
21674222ddf1SHong Zhang   default: /* alg: "allgatherv" */
2168f4f49eeaSPierre Jolivet     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1]));
2169f4f49eeaSPierre Jolivet     PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls));
2170faa55883SToby Isaac     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2171faa55883SToby Isaac     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2172faa55883SToby Isaac     break;
2173faa55883SToby Isaac   }
2174cc48ffa7SToby Isaac 
21756718818eSStefano Zampini   C->product->data                = abt;
21766718818eSStefano Zampini   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
21774222ddf1SHong Zhang   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
21783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2179cc48ffa7SToby Isaac }
2180cc48ffa7SToby Isaac 
2181d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2182d71ae5a4SJacob Faibussowitsch {
2183cc48ffa7SToby Isaac   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
21846718818eSStefano Zampini   Mat_MatTransMultDense *abt;
2185cc48ffa7SToby Isaac   MPI_Comm               comm;
2186cc48ffa7SToby Isaac   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2187f4259b30SLisandro Dalcin   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2188cc48ffa7SToby Isaac   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2189cc48ffa7SToby Isaac   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2190637a0070SStefano Zampini   const PetscScalar     *av, *bv;
219123fff9afSBarry Smith   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2192cc48ffa7SToby Isaac   MPI_Request            reqs[2];
2193cc48ffa7SToby Isaac   const PetscInt        *ranges;
2194cc48ffa7SToby Isaac 
2195cc48ffa7SToby Isaac   PetscFunctionBegin;
21966718818eSStefano Zampini   MatCheckProduct(C, 3);
21975f80ce2aSJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
21986718818eSStefano Zampini   abt = (Mat_MatTransMultDense *)C->product->data;
21999566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
22009566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
22019566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
22029566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(a->A, &av));
22039566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(b->A, &bv));
22049566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
22059566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &i));
22069566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(i, &alda));
22079566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(b->A, &i));
22089566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(i, &blda));
22099566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(c->A, &i));
22109566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(i, &clda));
22119566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRanges(B, &ranges));
2212cc48ffa7SToby Isaac   bn = B->rmap->n;
2213637a0070SStefano Zampini   if (blda == bn) {
2214637a0070SStefano Zampini     sendbuf = (PetscScalar *)bv;
2215cc48ffa7SToby Isaac   } else {
2216cc48ffa7SToby Isaac     sendbuf = abt->buf[0];
2217cc48ffa7SToby Isaac     for (k = 0, i = 0; i < cK; i++) {
2218ad540459SPierre Jolivet       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2219cc48ffa7SToby Isaac     }
2220cc48ffa7SToby Isaac   }
2221cc48ffa7SToby Isaac   if (size > 1) {
2222cc48ffa7SToby Isaac     sendto   = (rank + size - 1) % size;
2223cc48ffa7SToby Isaac     recvfrom = (rank + size + 1) % size;
2224cc48ffa7SToby Isaac   } else {
2225cc48ffa7SToby Isaac     sendto = recvfrom = 0;
2226cc48ffa7SToby Isaac   }
22279566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(cK, &ck));
22289566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2229cc48ffa7SToby Isaac   recvisfrom = rank;
2230cc48ffa7SToby Isaac   for (i = 0; i < size; i++) {
2231cc48ffa7SToby Isaac     /* we have finished receiving in sending, bufs can be read/modified */
2232cc48ffa7SToby Isaac     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2233cc48ffa7SToby Isaac     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2234cc48ffa7SToby Isaac 
2235cc48ffa7SToby Isaac     if (nextrecvisfrom != rank) {
2236cc48ffa7SToby Isaac       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2237cc48ffa7SToby Isaac       sendsiz = cK * bn;
2238cc48ffa7SToby Isaac       recvsiz = cK * nextbn;
2239cc48ffa7SToby Isaac       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
22409566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
22419566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2242cc48ffa7SToby Isaac     }
2243cc48ffa7SToby Isaac 
2244cc48ffa7SToby Isaac     /* local aseq * sendbuf^T */
22459566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2246792fecdfSBarry Smith     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2247cc48ffa7SToby Isaac 
2248cc48ffa7SToby Isaac     if (nextrecvisfrom != rank) {
2249cc48ffa7SToby Isaac       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
22509566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2251cc48ffa7SToby Isaac     }
2252cc48ffa7SToby Isaac     bn         = nextbn;
2253cc48ffa7SToby Isaac     recvisfrom = nextrecvisfrom;
2254cc48ffa7SToby Isaac     sendbuf    = recvbuf;
2255cc48ffa7SToby Isaac   }
22569566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
22579566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
22589566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
225967af85e8SPierre Jolivet   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
22609566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
22619566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
22623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2263cc48ffa7SToby Isaac }
2264cc48ffa7SToby Isaac 
2265d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2266d71ae5a4SJacob Faibussowitsch {
2267faa55883SToby Isaac   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
22686718818eSStefano Zampini   Mat_MatTransMultDense *abt;
2269faa55883SToby Isaac   MPI_Comm               comm;
2270637a0070SStefano Zampini   PetscMPIInt            size;
2271637a0070SStefano Zampini   PetscScalar           *cv, *sendbuf, *recvbuf;
2272637a0070SStefano Zampini   const PetscScalar     *av, *bv;
2273637a0070SStefano Zampini   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2274faa55883SToby Isaac   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2275637a0070SStefano Zampini   PetscBLASInt           cm, cn, ck, alda, clda;
2276faa55883SToby Isaac 
2277faa55883SToby Isaac   PetscFunctionBegin;
22786718818eSStefano Zampini   MatCheckProduct(C, 3);
22795f80ce2aSJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
22806718818eSStefano Zampini   abt = (Mat_MatTransMultDense *)C->product->data;
22819566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
22829566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
22839566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(a->A, &av));
22849566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(b->A, &bv));
22859566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
22869566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &i));
22879566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(i, &alda));
22889566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(b->A, &blda));
22899566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(c->A, &i));
22909566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(i, &clda));
2291faa55883SToby Isaac   /* copy transpose of B into buf[0] */
2292faa55883SToby Isaac   bn      = B->rmap->n;
2293faa55883SToby Isaac   sendbuf = abt->buf[0];
2294faa55883SToby Isaac   recvbuf = abt->buf[1];
2295faa55883SToby Isaac   for (k = 0, j = 0; j < bn; j++) {
2296ad540459SPierre Jolivet     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2297faa55883SToby Isaac   }
22989566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
22999566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
23009566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(cK, &ck));
23019566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
23029566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2303792fecdfSBarry Smith   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
23049566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
23059566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
23069566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
230767af85e8SPierre Jolivet   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
23089566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
23099566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
23103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2311faa55883SToby Isaac }
2312faa55883SToby Isaac 
2313d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2314d71ae5a4SJacob Faibussowitsch {
23156718818eSStefano Zampini   Mat_MatTransMultDense *abt;
2316faa55883SToby Isaac 
2317faa55883SToby Isaac   PetscFunctionBegin;
23186718818eSStefano Zampini   MatCheckProduct(C, 3);
23195f80ce2aSJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
23206718818eSStefano Zampini   abt = (Mat_MatTransMultDense *)C->product->data;
2321faa55883SToby Isaac   switch (abt->alg) {
2322d71ae5a4SJacob Faibussowitsch   case 1:
2323d71ae5a4SJacob Faibussowitsch     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2324d71ae5a4SJacob Faibussowitsch     break;
2325d71ae5a4SJacob Faibussowitsch   default:
2326d71ae5a4SJacob Faibussowitsch     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2327d71ae5a4SJacob Faibussowitsch     break;
2328faa55883SToby Isaac   }
23293ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2330faa55883SToby Isaac }
2331faa55883SToby Isaac 
233266976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2333d71ae5a4SJacob Faibussowitsch {
23346718818eSStefano Zampini   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2335320f2790SHong Zhang 
2336320f2790SHong Zhang   PetscFunctionBegin;
23379566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&ab->Ce));
23389566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&ab->Ae));
23399566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&ab->Be));
23409566063dSJacob Faibussowitsch   PetscCall(PetscFree(ab));
23413ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2342320f2790SHong Zhang }
2343320f2790SHong Zhang 
23445d8c7819SPierre Jolivet static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2345d71ae5a4SJacob Faibussowitsch {
23466718818eSStefano Zampini   Mat_MatMultDense *ab;
23475d8c7819SPierre Jolivet   Mat_MPIDense     *mdn = (Mat_MPIDense *)A->data;
2348320f2790SHong Zhang 
2349320f2790SHong Zhang   PetscFunctionBegin;
23506718818eSStefano Zampini   MatCheckProduct(C, 3);
23515f80ce2aSJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
23526718818eSStefano Zampini   ab = (Mat_MatMultDense *)C->product->data;
23535d8c7819SPierre Jolivet   if (ab->Ae && ab->Ce) {
23545d8c7819SPierre Jolivet #if PetscDefined(HAVE_ELEMENTAL)
23559566063dSJacob Faibussowitsch     PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
23569566063dSJacob Faibussowitsch     PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
23579566063dSJacob Faibussowitsch     PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
23589566063dSJacob Faibussowitsch     PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
23595d8c7819SPierre Jolivet #else
23605d8c7819SPierre Jolivet     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
23615d8c7819SPierre Jolivet #endif
23625d8c7819SPierre Jolivet   } else {
23635d8c7819SPierre Jolivet     const PetscScalar *read;
23645d8c7819SPierre Jolivet     PetscScalar       *write;
23655d8c7819SPierre Jolivet     PetscInt           lda;
23665d8c7819SPierre Jolivet 
23675d8c7819SPierre Jolivet     PetscCall(MatDenseGetLDA(B, &lda));
23685d8c7819SPierre Jolivet     PetscCall(MatDenseGetArrayRead(B, &read));
23695d8c7819SPierre Jolivet     PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
23705d8c7819SPierre Jolivet     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
23715d8c7819SPierre Jolivet     for (PetscInt i = 0; i < C->cmap->N; ++i) {
23725d8c7819SPierre Jolivet       PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
23735d8c7819SPierre Jolivet       PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
23745d8c7819SPierre Jolivet     }
23755d8c7819SPierre Jolivet     PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
23765d8c7819SPierre Jolivet     PetscCall(MatDenseRestoreArrayRead(B, &read));
2377f4f49eeaSPierre Jolivet     PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A));
23785d8c7819SPierre Jolivet   }
23793ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2380320f2790SHong Zhang }
2381320f2790SHong Zhang 
2382d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2383d71ae5a4SJacob Faibussowitsch {
23845d8c7819SPierre Jolivet   Mat_Product      *product = C->product;
23855d8c7819SPierre Jolivet   PetscInt          alg;
2386320f2790SHong Zhang   Mat_MatMultDense *ab;
23875d8c7819SPierre Jolivet   PetscBool         flg;
2388320f2790SHong Zhang 
2389320f2790SHong Zhang   PetscFunctionBegin;
23906718818eSStefano Zampini   MatCheckProduct(C, 4);
23915f80ce2aSJacob Faibussowitsch   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
23924222ddf1SHong Zhang   /* check local size of A and B */
23935d8c7819SPierre Jolivet   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
23945d8c7819SPierre Jolivet              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2395320f2790SHong Zhang 
23965d8c7819SPierre Jolivet   PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
23975d8c7819SPierre Jolivet   alg = flg ? 0 : 1;
23984222ddf1SHong Zhang 
23994222ddf1SHong Zhang   /* setup C */
24005d8c7819SPierre Jolivet   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
24015d8c7819SPierre Jolivet   PetscCall(MatSetType(C, MATMPIDENSE));
24029566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
2403320f2790SHong Zhang 
2404320f2790SHong Zhang   /* create data structure for reuse Cdense */
24059566063dSJacob Faibussowitsch   PetscCall(PetscNew(&ab));
24065d8c7819SPierre Jolivet 
24075d8c7819SPierre Jolivet   switch (alg) {
24085d8c7819SPierre Jolivet   case 1: /* alg: "elemental" */
24095d8c7819SPierre Jolivet #if PetscDefined(HAVE_ELEMENTAL)
24105d8c7819SPierre Jolivet     /* create elemental matrices Ae and Be */
24115d8c7819SPierre Jolivet     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
24125d8c7819SPierre Jolivet     PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
24135d8c7819SPierre Jolivet     PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
24145d8c7819SPierre Jolivet     PetscCall(MatSetUp(ab->Ae));
24155d8c7819SPierre Jolivet     PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
24165d8c7819SPierre Jolivet 
24175d8c7819SPierre Jolivet     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
24185d8c7819SPierre Jolivet     PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
24195d8c7819SPierre Jolivet     PetscCall(MatSetType(ab->Be, MATELEMENTAL));
24205d8c7819SPierre Jolivet     PetscCall(MatSetUp(ab->Be));
24215d8c7819SPierre Jolivet     PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));
24225d8c7819SPierre Jolivet 
24235d8c7819SPierre Jolivet     /* compute symbolic Ce = Ae*Be */
24245d8c7819SPierre Jolivet     PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
24255d8c7819SPierre Jolivet     PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
24265d8c7819SPierre Jolivet #else
24275d8c7819SPierre Jolivet     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
24285d8c7819SPierre Jolivet #endif
24295d8c7819SPierre Jolivet     break;
24305d8c7819SPierre Jolivet   default: /* alg: "petsc" */
24315d8c7819SPierre Jolivet     ab->Ae = NULL;
24325d8c7819SPierre Jolivet     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
24335d8c7819SPierre Jolivet     ab->Ce = NULL;
24345d8c7819SPierre Jolivet     break;
24355d8c7819SPierre Jolivet   }
24366718818eSStefano Zampini 
24376718818eSStefano Zampini   C->product->data       = ab;
24386718818eSStefano Zampini   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
24394222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
24403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2441320f2790SHong Zhang }
24422ef1f0ffSBarry Smith 
2443d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2444d71ae5a4SJacob Faibussowitsch {
24455d8c7819SPierre Jolivet   Mat_Product *product     = C->product;
24465d8c7819SPierre Jolivet   const char  *algTypes[2] = {"petsc", "elemental"};
24475d8c7819SPierre Jolivet   PetscInt     alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
24485d8c7819SPierre Jolivet   PetscBool    flg = PETSC_FALSE;
24495d8c7819SPierre Jolivet 
24504d86920dSPierre Jolivet   PetscFunctionBegin;
24515d8c7819SPierre Jolivet   /* Set default algorithm */
24525d8c7819SPierre Jolivet   alg = 0; /* default is petsc */
24535d8c7819SPierre Jolivet   PetscCall(PetscStrcmp(product->alg, "default", &flg));
24545d8c7819SPierre Jolivet   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
24555d8c7819SPierre Jolivet 
24565d8c7819SPierre Jolivet   /* Get runtime option */
24575d8c7819SPierre Jolivet   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
24585d8c7819SPierre Jolivet   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
24595d8c7819SPierre Jolivet   PetscOptionsEnd();
24605d8c7819SPierre Jolivet   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
24615d8c7819SPierre Jolivet 
24624222ddf1SHong Zhang   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
24634222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AB;
24643ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2465320f2790SHong Zhang }
246686aefd0dSHong Zhang 
2467d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2468d71ae5a4SJacob Faibussowitsch {
24694222ddf1SHong Zhang   Mat_Product *product = C->product;
24704222ddf1SHong Zhang   Mat          A = product->A, B = product->B;
24714222ddf1SHong Zhang 
24724222ddf1SHong Zhang   PetscFunctionBegin;
247307e90273SPierre Jolivet   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
247407e90273SPierre Jolivet              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
24756718818eSStefano Zampini   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
24766718818eSStefano Zampini   C->ops->productsymbolic          = MatProductSymbolic_AtB;
24773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
24784222ddf1SHong Zhang }
24794222ddf1SHong Zhang 
2480d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2481d71ae5a4SJacob Faibussowitsch {
24824222ddf1SHong Zhang   Mat_Product *product     = C->product;
24834222ddf1SHong Zhang   const char  *algTypes[2] = {"allgatherv", "cyclic"};
24844222ddf1SHong Zhang   PetscInt     alg, nalg = 2;
24854222ddf1SHong Zhang   PetscBool    flg = PETSC_FALSE;
24864222ddf1SHong Zhang 
24874222ddf1SHong Zhang   PetscFunctionBegin;
24884222ddf1SHong Zhang   /* Set default algorithm */
24894222ddf1SHong Zhang   alg = 0; /* default is allgatherv */
24909566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg, "default", &flg));
249148a46eb9SPierre Jolivet   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
24924222ddf1SHong Zhang 
24934222ddf1SHong Zhang   /* Get runtime option */
24944222ddf1SHong Zhang   if (product->api_user) {
2495d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
24969566063dSJacob Faibussowitsch     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2497d0609cedSBarry Smith     PetscOptionsEnd();
24984222ddf1SHong Zhang   } else {
2499d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
25009566063dSJacob Faibussowitsch     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2501d0609cedSBarry Smith     PetscOptionsEnd();
25024222ddf1SHong Zhang   }
250348a46eb9SPierre Jolivet   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
25044222ddf1SHong Zhang 
25054222ddf1SHong Zhang   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
25064222ddf1SHong Zhang   C->ops->productsymbolic          = MatProductSymbolic_ABt;
25073ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25084222ddf1SHong Zhang }
25094222ddf1SHong Zhang 
25105d8c7819SPierre Jolivet static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2511d71ae5a4SJacob Faibussowitsch {
25124222ddf1SHong Zhang   Mat_Product *product = C->product;
25134222ddf1SHong Zhang 
25144222ddf1SHong Zhang   PetscFunctionBegin;
25154222ddf1SHong Zhang   switch (product->type) {
2516d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_AB:
2517d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2518d71ae5a4SJacob Faibussowitsch     break;
2519d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_AtB:
2520d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2521d71ae5a4SJacob Faibussowitsch     break;
2522d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_ABt:
2523d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2524d71ae5a4SJacob Faibussowitsch     break;
2525d71ae5a4SJacob Faibussowitsch   default:
2526d71ae5a4SJacob Faibussowitsch     break;
25274222ddf1SHong Zhang   }
25283ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25294222ddf1SHong Zhang }
2530