1ed3cc1f0SBarry Smith /* 2ed3cc1f0SBarry Smith Basic functions for basic parallel dense matrices. 347d993e7Ssuyashtn Portions of this code are under: 447d993e7Ssuyashtn Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5ed3cc1f0SBarry Smith */ 6ed3cc1f0SBarry Smith 7c6db04a5SJed Brown #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 88949adfdSHong Zhang #include <../src/mat/impls/aij/mpi/mpiaij.h> 9baa3c1c6SHong Zhang #include <petscblaslapack.h> 108965ea79SLois Curfman McInnes 11ab92ecdeSBarry Smith /*@ 1211a5261eSBarry Smith MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 13ab92ecdeSBarry Smith matrix that represents the operator. For sequential matrices it returns itself. 14ab92ecdeSBarry Smith 15ab92ecdeSBarry Smith Input Parameter: 162ef1f0ffSBarry Smith . A - the sequential or MPI `MATDENSE` matrix 17ab92ecdeSBarry Smith 18ab92ecdeSBarry Smith Output Parameter: 19ab92ecdeSBarry Smith . B - the inner matrix 20ab92ecdeSBarry Smith 218e6c10adSSatish Balay Level: intermediate 228e6c10adSSatish Balay 231cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 24ab92ecdeSBarry Smith @*/ 25d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 26d71ae5a4SJacob Faibussowitsch { 27ab92ecdeSBarry Smith Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 28ace3abfcSBarry Smith PetscBool flg; 29ab92ecdeSBarry Smith 30ab92ecdeSBarry Smith PetscFunctionBegin; 31d5ea218eSStefano Zampini PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 324f572ea9SToby Isaac PetscAssertPointer(B, 2); 339566063dSJacob Faibussowitsch PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 342205254eSKarl Rupp if (flg) *B = mat->A; 35f140bc17SStefano Zampini else { 369566063dSJacob Faibussowitsch PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 3728b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 38f140bc17SStefano Zampini *B = A; 39f140bc17SStefano Zampini } 403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 41ab92ecdeSBarry Smith } 42ab92ecdeSBarry Smith 4366976f2fSJacob Faibussowitsch static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 44d71ae5a4SJacob Faibussowitsch { 45a76f77c3SStefano Zampini Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 46a76f77c3SStefano Zampini Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 47a76f77c3SStefano Zampini 48a76f77c3SStefano Zampini PetscFunctionBegin; 499566063dSJacob Faibussowitsch PetscCall(MatCopy(Amat->A, Bmat->A, s)); 503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 51a76f77c3SStefano Zampini } 52a76f77c3SStefano Zampini 53d71ae5a4SJacob Faibussowitsch PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 54d71ae5a4SJacob Faibussowitsch { 552f605a99SJose E. Roman Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 562f605a99SJose E. Roman PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 572f605a99SJose E. Roman PetscScalar *v; 582f605a99SJose E. Roman 592f605a99SJose E. Roman PetscFunctionBegin; 602f605a99SJose E. Roman PetscCall(MatDenseGetArray(mat->A, &v)); 612f605a99SJose E. Roman PetscCall(MatDenseGetLDA(mat->A, &lda)); 622f605a99SJose E. Roman rend2 = PetscMin(rend, A->cmap->N); 632f605a99SJose E. Roman if (rend2 > rstart) { 642f605a99SJose E. Roman for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 652f605a99SJose E. Roman PetscCall(PetscLogFlops(rend2 - rstart)); 662f605a99SJose E. Roman } 672f605a99SJose E. Roman PetscCall(MatDenseRestoreArray(mat->A, &v)); 683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 692f605a99SJose E. Roman } 702f605a99SJose E. Roman 7166976f2fSJacob Faibussowitsch static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 72d71ae5a4SJacob Faibussowitsch { 73ba8c8a56SBarry Smith Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 74d0f46423SBarry Smith PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 75ba8c8a56SBarry Smith 76ba8c8a56SBarry Smith PetscFunctionBegin; 77aed4548fSBarry Smith PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 78ba8c8a56SBarry Smith lrow = row - rstart; 799566063dSJacob Faibussowitsch PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 81ba8c8a56SBarry Smith } 82ba8c8a56SBarry Smith 8366976f2fSJacob Faibussowitsch static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 84d71ae5a4SJacob Faibussowitsch { 85637a0070SStefano Zampini Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 86637a0070SStefano Zampini PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 87ba8c8a56SBarry Smith 88ba8c8a56SBarry Smith PetscFunctionBegin; 89aed4548fSBarry Smith PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 90637a0070SStefano Zampini lrow = row - rstart; 919566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 93ba8c8a56SBarry Smith } 94ba8c8a56SBarry Smith 9566976f2fSJacob Faibussowitsch static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 96d71ae5a4SJacob Faibussowitsch { 970de54da6SSatish Balay Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 98d0f46423SBarry Smith PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 9987828ca2SBarry Smith PetscScalar *array; 1000de54da6SSatish Balay MPI_Comm comm; 1014b6ae80fSStefano Zampini PetscBool flg; 10211bd1e4dSLisandro Dalcin Mat B; 1030de54da6SSatish Balay 1040de54da6SSatish Balay PetscFunctionBegin; 1059566063dSJacob Faibussowitsch PetscCall(MatHasCongruentLayouts(A, &flg)); 10628b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 1079566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 108616b8fbbSStefano Zampini if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 109a51cd166SPierre Jolivet #if PetscDefined(HAVE_CUDA) 1109566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 11128b400f6SJacob Faibussowitsch PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 112a51cd166SPierre Jolivet #elif PetscDefined(HAVE_HIP) 11347d993e7Ssuyashtn PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 11447d993e7Ssuyashtn PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 11547d993e7Ssuyashtn #endif 116f4f49eeaSPierre Jolivet PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm)); 1179566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &B)); 1189566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, m, m, m, m)); 1199566063dSJacob Faibussowitsch PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 1209566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 1219566063dSJacob Faibussowitsch PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 1229566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 1239566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 12411bd1e4dSLisandro Dalcin *a = B; 1259566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 1262205254eSKarl Rupp } else *a = B; 1273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1280de54da6SSatish Balay } 1290de54da6SSatish Balay 13066976f2fSJacob Faibussowitsch static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 131d71ae5a4SJacob Faibussowitsch { 13239b7565bSBarry Smith Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 133d0f46423SBarry Smith PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 134ace3abfcSBarry Smith PetscBool roworiented = A->roworiented; 1358965ea79SLois Curfman McInnes 1363a40ed3dSBarry Smith PetscFunctionBegin; 1378965ea79SLois Curfman McInnes for (i = 0; i < m; i++) { 1385ef9f2a5SBarry Smith if (idxm[i] < 0) continue; 13908401ef6SPierre Jolivet PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 1408965ea79SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 1418965ea79SLois Curfman McInnes row = idxm[i] - rstart; 14239b7565bSBarry Smith if (roworiented) { 143a80ceddfSStefano Zampini PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v ? v + i * n : NULL, addv)); 1443a40ed3dSBarry Smith } else { 1458965ea79SLois Curfman McInnes for (j = 0; j < n; j++) { 1465ef9f2a5SBarry Smith if (idxn[j] < 0) continue; 14708401ef6SPierre Jolivet PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 148a80ceddfSStefano Zampini PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v ? v + i + j * m : NULL, addv)); 14939b7565bSBarry Smith } 1508965ea79SLois Curfman McInnes } 1512205254eSKarl Rupp } else if (!A->donotstash) { 1525080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 15339b7565bSBarry Smith if (roworiented) { 154a80ceddfSStefano Zampini PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v ? v + i * n : NULL, PETSC_FALSE)); 155d36fbae8SSatish Balay } else { 156a80ceddfSStefano Zampini PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v ? v + i : NULL, m, PETSC_FALSE)); 15739b7565bSBarry Smith } 158b49de8d1SLois Curfman McInnes } 159b49de8d1SLois Curfman McInnes } 1603ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 161b49de8d1SLois Curfman McInnes } 162b49de8d1SLois Curfman McInnes 16366976f2fSJacob Faibussowitsch static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 164d71ae5a4SJacob Faibussowitsch { 165b49de8d1SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 166d0f46423SBarry Smith PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 167b49de8d1SLois Curfman McInnes 1683a40ed3dSBarry Smith PetscFunctionBegin; 169b49de8d1SLois Curfman McInnes for (i = 0; i < m; i++) { 17054c59aa7SJacob Faibussowitsch if (idxm[i] < 0) continue; /* negative row */ 17154c59aa7SJacob Faibussowitsch PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 172b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 173b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 174b49de8d1SLois Curfman McInnes for (j = 0; j < n; j++) { 17554c59aa7SJacob Faibussowitsch if (idxn[j] < 0) continue; /* negative column */ 17654c59aa7SJacob Faibussowitsch PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 1779566063dSJacob Faibussowitsch PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 178b49de8d1SLois Curfman McInnes } 179e7e72b3dSBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 1808965ea79SLois Curfman McInnes } 1813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1828965ea79SLois Curfman McInnes } 1838965ea79SLois Curfman McInnes 184d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 185d71ae5a4SJacob Faibussowitsch { 18649a6ff4bSBarry Smith Mat_MPIDense *a = (Mat_MPIDense *)A->data; 18749a6ff4bSBarry Smith 18849a6ff4bSBarry Smith PetscFunctionBegin; 1899566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, lda)); 1903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 19149a6ff4bSBarry Smith } 19249a6ff4bSBarry Smith 193d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 194d71ae5a4SJacob Faibussowitsch { 195ad16ce7aSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 19647d993e7Ssuyashtn MatType mtype = MATSEQDENSE; 197ad16ce7aSStefano Zampini 198ad16ce7aSStefano Zampini PetscFunctionBegin; 19917359960SJose E. Roman if (!a->A) { 20028b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2019566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 2029566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 2039566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 2049566063dSJacob Faibussowitsch PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 205a51cd166SPierre Jolivet #if PetscDefined(HAVE_CUDA) 20647d993e7Ssuyashtn PetscBool iscuda; 2079566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 20847d993e7Ssuyashtn if (iscuda) mtype = MATSEQDENSECUDA; 209a51cd166SPierre Jolivet #elif PetscDefined(HAVE_HIP) 21047d993e7Ssuyashtn PetscBool iship; 21147d993e7Ssuyashtn PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 21247d993e7Ssuyashtn if (iship) mtype = MATSEQDENSEHIP; 21347d993e7Ssuyashtn #endif 21447d993e7Ssuyashtn PetscCall(MatSetType(a->A, mtype)); 21517359960SJose E. Roman } 2169566063dSJacob Faibussowitsch PetscCall(MatDenseSetLDA(a->A, lda)); 2173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 218ad16ce7aSStefano Zampini } 219ad16ce7aSStefano Zampini 220d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 221d71ae5a4SJacob Faibussowitsch { 222ff14e315SSatish Balay Mat_MPIDense *a = (Mat_MPIDense *)A->data; 223ff14e315SSatish Balay 2243a40ed3dSBarry Smith PetscFunctionBegin; 22528b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2269566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(a->A, array)); 2273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 228ff14e315SSatish Balay } 229ff14e315SSatish Balay 230f1e99121SPierre Jolivet static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array) 231d71ae5a4SJacob Faibussowitsch { 2328572280aSBarry Smith Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2338572280aSBarry Smith 2348572280aSBarry Smith PetscFunctionBegin; 23528b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 236f1e99121SPierre Jolivet PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array)); 2373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2388572280aSBarry Smith } 2398572280aSBarry Smith 240d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 241d71ae5a4SJacob Faibussowitsch { 2426947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2436947451fSStefano Zampini 2446947451fSStefano Zampini PetscFunctionBegin; 24528b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2469566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(a->A, array)); 2473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2486947451fSStefano Zampini } 2496947451fSStefano Zampini 250d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 251d71ae5a4SJacob Faibussowitsch { 252d3042a70SBarry Smith Mat_MPIDense *a = (Mat_MPIDense *)A->data; 253d3042a70SBarry Smith 254d3042a70SBarry Smith PetscFunctionBegin; 25528b400f6SJacob Faibussowitsch PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 25628b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2579566063dSJacob Faibussowitsch PetscCall(MatDensePlaceArray(a->A, array)); 2583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 259d3042a70SBarry Smith } 260d3042a70SBarry Smith 261d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 262d71ae5a4SJacob Faibussowitsch { 263d3042a70SBarry Smith Mat_MPIDense *a = (Mat_MPIDense *)A->data; 264d3042a70SBarry Smith 265d3042a70SBarry Smith PetscFunctionBegin; 26628b400f6SJacob Faibussowitsch PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 26728b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2689566063dSJacob Faibussowitsch PetscCall(MatDenseResetArray(a->A)); 2693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 270d3042a70SBarry Smith } 271d3042a70SBarry Smith 272d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 273d71ae5a4SJacob Faibussowitsch { 274d5ea218eSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 275d5ea218eSStefano Zampini 276d5ea218eSStefano Zampini PetscFunctionBegin; 27728b400f6SJacob Faibussowitsch PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 27828b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2799566063dSJacob Faibussowitsch PetscCall(MatDenseReplaceArray(a->A, array)); 2803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 281d5ea218eSStefano Zampini } 282d5ea218eSStefano Zampini 283d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 284d71ae5a4SJacob Faibussowitsch { 285ca3fa75bSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 286637a0070SStefano Zampini PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 2875d0c19d7SBarry Smith const PetscInt *irow, *icol; 288637a0070SStefano Zampini const PetscScalar *v; 289637a0070SStefano Zampini PetscScalar *bv; 290ca3fa75bSLois Curfman McInnes Mat newmat; 2914aa3045dSJed Brown IS iscol_local; 29242a884f0SBarry Smith MPI_Comm comm_is, comm_mat; 293ca3fa75bSLois Curfman McInnes 294ca3fa75bSLois Curfman McInnes PetscFunctionBegin; 2959566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 2969566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 29708401ef6SPierre Jolivet PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 29842a884f0SBarry Smith 2999566063dSJacob Faibussowitsch PetscCall(ISAllGather(iscol, &iscol_local)); 3009566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &irow)); 3019566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iscol_local, &icol)); 3029566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(isrow, &nrows)); 3039566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol, &ncols)); 3049566063dSJacob Faibussowitsch PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 305ca3fa75bSLois Curfman McInnes 306ca3fa75bSLois Curfman McInnes /* No parallel redistribution currently supported! Should really check each index set 307da81f932SPierre Jolivet to confirm that it is OK. ... Currently supports only submatrix same partitioning as 3087eba5e9cSLois Curfman McInnes original matrix! */ 309ca3fa75bSLois Curfman McInnes 3109566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 3119566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 312ca3fa75bSLois Curfman McInnes 313ca3fa75bSLois Curfman McInnes /* Check submatrix call */ 314ca3fa75bSLois Curfman McInnes if (scall == MAT_REUSE_MATRIX) { 315e32f2f54SBarry Smith /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 3167eba5e9cSLois Curfman McInnes /* Really need to test rows and column sizes! */ 317ca3fa75bSLois Curfman McInnes newmat = *B; 318ca3fa75bSLois Curfman McInnes } else { 319ca3fa75bSLois Curfman McInnes /* Create and fill new matrix */ 3209566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 3219566063dSJacob Faibussowitsch PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 3229566063dSJacob Faibussowitsch PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 3239566063dSJacob Faibussowitsch PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 324ca3fa75bSLois Curfman McInnes } 325ca3fa75bSLois Curfman McInnes 326ca3fa75bSLois Curfman McInnes /* Now extract the data pointers and do the copy, column at a time */ 327ca3fa75bSLois Curfman McInnes newmatd = (Mat_MPIDense *)newmat->data; 3289566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(newmatd->A, &bv)); 3299566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(mat->A, &v)); 3309566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(mat->A, &lda)); 3314aa3045dSJed Brown for (i = 0; i < Ncols; i++) { 332637a0070SStefano Zampini const PetscScalar *av = v + lda * icol[i]; 333ad540459SPierre Jolivet for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 334ca3fa75bSLois Curfman McInnes } 3359566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 3369566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 337ca3fa75bSLois Curfman McInnes 338ca3fa75bSLois Curfman McInnes /* Assemble the matrices so that the correct flags are set */ 3399566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 3409566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 341ca3fa75bSLois Curfman McInnes 342ca3fa75bSLois Curfman McInnes /* Free work space */ 3439566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &irow)); 3449566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol_local, &icol)); 3459566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol_local)); 346ca3fa75bSLois Curfman McInnes *B = newmat; 3473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 348ca3fa75bSLois Curfman McInnes } 349ca3fa75bSLois Curfman McInnes 35066976f2fSJacob Faibussowitsch static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 351d71ae5a4SJacob Faibussowitsch { 35273a71a0fSBarry Smith Mat_MPIDense *a = (Mat_MPIDense *)A->data; 35373a71a0fSBarry Smith 3543a40ed3dSBarry Smith PetscFunctionBegin; 3559566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(a->A, array)); 3563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 357ff14e315SSatish Balay } 358ff14e315SSatish Balay 359f1e99121SPierre Jolivet static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array) 360d71ae5a4SJacob Faibussowitsch { 3618572280aSBarry Smith Mat_MPIDense *a = (Mat_MPIDense *)A->data; 3628572280aSBarry Smith 3638572280aSBarry Smith PetscFunctionBegin; 364f1e99121SPierre Jolivet PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array)); 3653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3668572280aSBarry Smith } 3678572280aSBarry Smith 36866976f2fSJacob Faibussowitsch static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 369d71ae5a4SJacob Faibussowitsch { 3706947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 3716947451fSStefano Zampini 3726947451fSStefano Zampini PetscFunctionBegin; 3739566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 3743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3756947451fSStefano Zampini } 3766947451fSStefano Zampini 37766976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 378d71ae5a4SJacob Faibussowitsch { 37939ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 38013f74950SBarry Smith PetscInt nstash, reallocs; 3818965ea79SLois Curfman McInnes 3823a40ed3dSBarry Smith PetscFunctionBegin; 3833ba16761SJacob Faibussowitsch if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 3848965ea79SLois Curfman McInnes 3859566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 3869566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 3879566063dSJacob Faibussowitsch PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 3883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3898965ea79SLois Curfman McInnes } 3908965ea79SLois Curfman McInnes 39166976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 392d71ae5a4SJacob Faibussowitsch { 39339ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 39413f74950SBarry Smith PetscInt i, *row, *col, flg, j, rstart, ncols; 39513f74950SBarry Smith PetscMPIInt n; 39687828ca2SBarry Smith PetscScalar *val; 3978965ea79SLois Curfman McInnes 3983a40ed3dSBarry Smith PetscFunctionBegin; 399910cf402Sprj- if (!mdn->donotstash && !mat->nooffprocentries) { 4008965ea79SLois Curfman McInnes /* wait on receives */ 4017ef1d9bdSSatish Balay while (1) { 4029566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 4037ef1d9bdSSatish Balay if (!flg) break; 4048965ea79SLois Curfman McInnes 4057ef1d9bdSSatish Balay for (i = 0; i < n;) { 4067ef1d9bdSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 4072205254eSKarl Rupp for (j = i, rstart = row[j]; j < n; j++) { 4082205254eSKarl Rupp if (row[j] != rstart) break; 4092205254eSKarl Rupp } 4107ef1d9bdSSatish Balay if (j < n) ncols = j - i; 4117ef1d9bdSSatish Balay else ncols = n - i; 4127ef1d9bdSSatish Balay /* Now assemble all these values with a single function call */ 4139566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 4147ef1d9bdSSatish Balay i = j; 4158965ea79SLois Curfman McInnes } 4167ef1d9bdSSatish Balay } 4179566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&mat->stash)); 418910cf402Sprj- } 4198965ea79SLois Curfman McInnes 4209566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mdn->A, mode)); 4219566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mdn->A, mode)); 4223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4238965ea79SLois Curfman McInnes } 4248965ea79SLois Curfman McInnes 42566976f2fSJacob Faibussowitsch static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 426d71ae5a4SJacob Faibussowitsch { 42739ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *)A->data; 4283a40ed3dSBarry Smith 4293a40ed3dSBarry Smith PetscFunctionBegin; 4309566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(l->A)); 4313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4328965ea79SLois Curfman McInnes } 4338965ea79SLois Curfman McInnes 43466976f2fSJacob Faibussowitsch static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 435d71ae5a4SJacob Faibussowitsch { 43639ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *)A->data; 437637a0070SStefano Zampini PetscInt i, len, *lrows; 438637a0070SStefano Zampini 439637a0070SStefano Zampini PetscFunctionBegin; 440637a0070SStefano Zampini /* get locally owned rows */ 4419566063dSJacob Faibussowitsch PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 442dd8e379bSPierre Jolivet /* fix right-hand side if needed */ 443637a0070SStefano Zampini if (x && b) { 44497b48c8fSBarry Smith const PetscScalar *xx; 44597b48c8fSBarry Smith PetscScalar *bb; 4468965ea79SLois Curfman McInnes 4479566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 4489566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(b, &bb)); 449637a0070SStefano Zampini for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 4509566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 4519566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(b, &bb)); 45297b48c8fSBarry Smith } 4539566063dSJacob Faibussowitsch PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 454e2eb51b1SBarry Smith if (diag != 0.0) { 455637a0070SStefano Zampini Vec d; 456b9679d65SBarry Smith 4579566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &d)); 4589566063dSJacob Faibussowitsch PetscCall(VecSet(d, diag)); 4599566063dSJacob Faibussowitsch PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 4609566063dSJacob Faibussowitsch PetscCall(VecDestroy(&d)); 461b9679d65SBarry Smith } 4629566063dSJacob Faibussowitsch PetscCall(PetscFree(lrows)); 4633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4648965ea79SLois Curfman McInnes } 4658965ea79SLois Curfman McInnes 4660be0d8bdSHansol Suh PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 4670be0d8bdSHansol Suh PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 4680be0d8bdSHansol Suh PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 4690be0d8bdSHansol Suh PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 4700be0d8bdSHansol Suh 47166976f2fSJacob Faibussowitsch static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 472d71ae5a4SJacob Faibussowitsch { 47339ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 474637a0070SStefano Zampini const PetscScalar *ax; 475637a0070SStefano Zampini PetscScalar *ay; 476d0295fc0SJunchao Zhang PetscMemType axmtype, aymtype; 477c456f294SBarry Smith 4783a40ed3dSBarry Smith PetscFunctionBegin; 4796f08ad05SPierre Jolivet if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 4809566063dSJacob Faibussowitsch PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 4819566063dSJacob Faibussowitsch PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 4829566063dSJacob Faibussowitsch PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 4839566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 4849566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 4859566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 4869566063dSJacob Faibussowitsch PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 4873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4888965ea79SLois Curfman McInnes } 4898965ea79SLois Curfman McInnes 4900be0d8bdSHansol Suh static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 4910be0d8bdSHansol Suh { 4920be0d8bdSHansol Suh Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 4930be0d8bdSHansol Suh const PetscScalar *ax; 4940be0d8bdSHansol Suh PetscScalar *ay; 4950be0d8bdSHansol Suh PetscMemType axmtype, aymtype; 4960be0d8bdSHansol Suh 4970be0d8bdSHansol Suh PetscFunctionBegin; 4980be0d8bdSHansol Suh if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 4990be0d8bdSHansol Suh PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 5000be0d8bdSHansol Suh PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 5010be0d8bdSHansol Suh PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 5020be0d8bdSHansol Suh PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 5030be0d8bdSHansol Suh PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 5040be0d8bdSHansol Suh PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 5050be0d8bdSHansol Suh PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end)); 5060be0d8bdSHansol Suh PetscFunctionReturn(PETSC_SUCCESS); 5070be0d8bdSHansol Suh } 5080be0d8bdSHansol Suh 50966976f2fSJacob Faibussowitsch static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 510d71ae5a4SJacob Faibussowitsch { 51139ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 512637a0070SStefano Zampini const PetscScalar *ax; 513637a0070SStefano Zampini PetscScalar *ay; 514d0295fc0SJunchao Zhang PetscMemType axmtype, aymtype; 515c456f294SBarry Smith 5163a40ed3dSBarry Smith PetscFunctionBegin; 5176f08ad05SPierre Jolivet if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 5189566063dSJacob Faibussowitsch PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 5199566063dSJacob Faibussowitsch PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 5209566063dSJacob Faibussowitsch PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 5219566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 5229566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 5239566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 5249566063dSJacob Faibussowitsch PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 5253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5268965ea79SLois Curfman McInnes } 5278965ea79SLois Curfman McInnes 5280be0d8bdSHansol Suh static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 5290be0d8bdSHansol Suh { 5300be0d8bdSHansol Suh Mat_MPIDense *a = (Mat_MPIDense *)A->data; 5310be0d8bdSHansol Suh const PetscScalar *ax; 5320be0d8bdSHansol Suh PetscScalar *ay; 5330be0d8bdSHansol Suh PetscMemType axmtype, aymtype; 5340be0d8bdSHansol Suh 5350be0d8bdSHansol Suh PetscFunctionBegin; 5360be0d8bdSHansol Suh if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 5370be0d8bdSHansol Suh PetscCall(VecSet(yy, 0.0)); 5380be0d8bdSHansol Suh PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 5390be0d8bdSHansol Suh PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 5400be0d8bdSHansol Suh PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 5410be0d8bdSHansol Suh PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 5420be0d8bdSHansol Suh PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 5430be0d8bdSHansol Suh PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 5440be0d8bdSHansol Suh PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 5450be0d8bdSHansol Suh PetscFunctionReturn(PETSC_SUCCESS); 5460be0d8bdSHansol Suh } 5470be0d8bdSHansol Suh 548459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm) 549d71ae5a4SJacob Faibussowitsch { 550096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *)A->data; 551637a0070SStefano Zampini const PetscScalar *ax; 552637a0070SStefano Zampini PetscScalar *ay; 553d0295fc0SJunchao Zhang PetscMemType axmtype, aymtype; 5544d86920dSPierre Jolivet 5553a40ed3dSBarry Smith PetscFunctionBegin; 5566f08ad05SPierre Jolivet if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 5579566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 558459e8d23SBlanca Mellado Pinto if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 559459e8d23SBlanca Mellado Pinto else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 5609566063dSJacob Faibussowitsch PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 5619566063dSJacob Faibussowitsch PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 5629566063dSJacob Faibussowitsch PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 5639566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 5649566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 5659566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 5663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 567096963f5SLois Curfman McInnes } 568096963f5SLois Curfman McInnes 5690be0d8bdSHansol Suh static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 5700be0d8bdSHansol Suh { 5710be0d8bdSHansol Suh Mat_MPIDense *a = (Mat_MPIDense *)A->data; 5720be0d8bdSHansol Suh const PetscScalar *ax; 5730be0d8bdSHansol Suh PetscScalar *ay; 5740be0d8bdSHansol Suh PetscMemType axmtype, aymtype; 5750be0d8bdSHansol Suh 5760be0d8bdSHansol Suh PetscFunctionBegin; 5770be0d8bdSHansol Suh if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 5780be0d8bdSHansol Suh PetscCall(VecCopy(yy, zz)); 5790be0d8bdSHansol Suh PetscMPIInt rank; 5800be0d8bdSHansol Suh PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 5810be0d8bdSHansol Suh PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 5820be0d8bdSHansol Suh PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 5830be0d8bdSHansol Suh PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 5840be0d8bdSHansol Suh PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 5850be0d8bdSHansol Suh PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 5860be0d8bdSHansol Suh PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 5870be0d8bdSHansol Suh PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 5880be0d8bdSHansol Suh PetscFunctionReturn(PETSC_SUCCESS); 5890be0d8bdSHansol Suh } 5900be0d8bdSHansol Suh 591459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm) 592d71ae5a4SJacob Faibussowitsch { 593096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *)A->data; 594637a0070SStefano Zampini const PetscScalar *ax; 595637a0070SStefano Zampini PetscScalar *ay; 596d0295fc0SJunchao Zhang PetscMemType axmtype, aymtype; 597096963f5SLois Curfman McInnes 5983a40ed3dSBarry Smith PetscFunctionBegin; 5996f08ad05SPierre Jolivet if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 6009566063dSJacob Faibussowitsch PetscCall(VecCopy(yy, zz)); 601459e8d23SBlanca Mellado Pinto if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 602459e8d23SBlanca Mellado Pinto else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 6039566063dSJacob Faibussowitsch PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 6049566063dSJacob Faibussowitsch PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 6059566063dSJacob Faibussowitsch PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 6069566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 6079566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 6089566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 6093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 610096963f5SLois Curfman McInnes } 611096963f5SLois Curfman McInnes 612459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 613459e8d23SBlanca Mellado Pinto { 614459e8d23SBlanca Mellado Pinto PetscFunctionBegin; 615459e8d23SBlanca Mellado Pinto PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE)); 616459e8d23SBlanca Mellado Pinto PetscFunctionReturn(PETSC_SUCCESS); 617459e8d23SBlanca Mellado Pinto } 618459e8d23SBlanca Mellado Pinto 619459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 620459e8d23SBlanca Mellado Pinto { 621459e8d23SBlanca Mellado Pinto PetscFunctionBegin; 622459e8d23SBlanca Mellado Pinto PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE)); 623459e8d23SBlanca Mellado Pinto PetscFunctionReturn(PETSC_SUCCESS); 624459e8d23SBlanca Mellado Pinto } 625459e8d23SBlanca Mellado Pinto 626459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy) 627459e8d23SBlanca Mellado Pinto { 628459e8d23SBlanca Mellado Pinto PetscFunctionBegin; 629459e8d23SBlanca Mellado Pinto PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE)); 630459e8d23SBlanca Mellado Pinto PetscFunctionReturn(PETSC_SUCCESS); 631459e8d23SBlanca Mellado Pinto } 632459e8d23SBlanca Mellado Pinto 633459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 634459e8d23SBlanca Mellado Pinto { 635459e8d23SBlanca Mellado Pinto PetscFunctionBegin; 636459e8d23SBlanca Mellado Pinto PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE)); 637459e8d23SBlanca Mellado Pinto PetscFunctionReturn(PETSC_SUCCESS); 638459e8d23SBlanca Mellado Pinto } 639459e8d23SBlanca Mellado Pinto 640d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 641d71ae5a4SJacob Faibussowitsch { 64239ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *)A->data; 64379560b15SJacob Faibussowitsch PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 64479560b15SJacob Faibussowitsch PetscScalar *x; 645637a0070SStefano Zampini const PetscScalar *av; 646ed3cc1f0SBarry Smith 6473a40ed3dSBarry Smith PetscFunctionBegin; 6489566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &x)); 64979560b15SJacob Faibussowitsch PetscCall(VecGetSize(v, &ng)); 65079560b15SJacob Faibussowitsch PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 65179560b15SJacob Faibussowitsch PetscCall(VecGetLocalSize(v, &nl)); 652d0f46423SBarry Smith len = PetscMin(a->A->rmap->n, a->A->cmap->n); 653d0f46423SBarry Smith radd = A->rmap->rstart * m; 6549566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(a->A, &av)); 6559566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &lda)); 656ad540459SPierre Jolivet for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 6579566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 6580be0d8bdSHansol Suh if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i)); 6599566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &x)); 6603ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6618965ea79SLois Curfman McInnes } 6628965ea79SLois Curfman McInnes 66366976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MPIDense(Mat mat) 664d71ae5a4SJacob Faibussowitsch { 6653501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 666ed3cc1f0SBarry Smith 6673a40ed3dSBarry Smith PetscFunctionBegin; 6683ba16761SJacob Faibussowitsch PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 6699566063dSJacob Faibussowitsch PetscCall(MatStashDestroy_Private(&mat->stash)); 67028b400f6SJacob Faibussowitsch PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 67128b400f6SJacob Faibussowitsch PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 6729566063dSJacob Faibussowitsch PetscCall(MatDestroy(&mdn->A)); 6739566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mdn->lvec)); 6749566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&mdn->Mvctx)); 6759566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mdn->cvec)); 6769566063dSJacob Faibussowitsch PetscCall(MatDestroy(&mdn->cmat)); 67701b82886SBarry Smith 6789566063dSJacob Faibussowitsch PetscCall(PetscFree(mat->data)); 6799566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 6808baccfbdSHong Zhang 6819566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 6829566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 6839566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 6849566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 6859566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 6869566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 6879566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 6889566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 6899566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 6909566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 6919566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 6929566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 6939566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 6948baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 6959566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 6968baccfbdSHong Zhang #endif 697d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 6989566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 699d24d4204SJose E. Roman #endif 7009566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 7019566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 7029566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 7036718818eSStefano Zampini #if defined(PETSC_HAVE_CUDA) 7049566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 7059566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 7062e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 7072e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 7082e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 7092e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 7102e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 7112e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 7122e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 7132e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 7142e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 7152e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 7162e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 7172e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 7182e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 7192e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 7202e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 7213d9668e3SJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 7226718818eSStefano Zampini #endif 72347d993e7Ssuyashtn #if defined(PETSC_HAVE_HIP) 72447d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 72547d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 72647d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 72747d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 72847d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 72947d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 73047d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 73147d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 73247d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 73347d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 73447d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 73547d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 73647d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 73747d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 73847d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 73947d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 74047d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 7413d9668e3SJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 74247d993e7Ssuyashtn #endif 7439566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 7449566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 7459566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 7469566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 7479566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 7489566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 7499566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 7509566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 7519566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 7529566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 7530be0d8bdSHansol Suh PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); 7540be0d8bdSHansol Suh PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); 7550be0d8bdSHansol Suh PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); 756f8103e6bSStefano Zampini 757f8103e6bSStefano Zampini PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 7583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7598965ea79SLois Curfman McInnes } 76039ddd567SLois Curfman McInnes 7619804daf3SBarry Smith #include <petscdraw.h> 762d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 763d71ae5a4SJacob Faibussowitsch { 76439ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 765616b8fbbSStefano Zampini PetscMPIInt rank; 76619fd82e9SBarry Smith PetscViewerType vtype; 767ace3abfcSBarry Smith PetscBool iascii, isdraw; 768b0a32e0cSBarry Smith PetscViewer sviewer; 769f3ef73ceSBarry Smith PetscViewerFormat format; 7708965ea79SLois Curfman McInnes 7713a40ed3dSBarry Smith PetscFunctionBegin; 7729566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 7739566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 7749566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 77532077d6dSBarry Smith if (iascii) { 7769566063dSJacob Faibussowitsch PetscCall(PetscViewerGetType(viewer, &vtype)); 7779566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 778456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 7794e220ebcSLois Curfman McInnes MatInfo info; 7809566063dSJacob Faibussowitsch PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 7819566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 7829371c9d4SSatish Balay PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 7839371c9d4SSatish Balay (PetscInt)info.memory)); 7849566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 7859566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 7866f08ad05SPierre Jolivet if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 7873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 788fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 7893ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7908965ea79SLois Curfman McInnes } 791f1af5d2fSBarry Smith } else if (isdraw) { 792b0a32e0cSBarry Smith PetscDraw draw; 793ace3abfcSBarry Smith PetscBool isnull; 794f1af5d2fSBarry Smith 7959566063dSJacob Faibussowitsch PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 7969566063dSJacob Faibussowitsch PetscCall(PetscDrawIsNull(draw, &isnull)); 7973ba16761SJacob Faibussowitsch if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 798f1af5d2fSBarry Smith } 79977ed5343SBarry Smith 8007da1fb6eSBarry Smith { 8018965ea79SLois Curfman McInnes /* assemble the entire matrix onto first processor. */ 8028965ea79SLois Curfman McInnes Mat A; 803d0f46423SBarry Smith PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 804ba8c8a56SBarry Smith PetscInt *cols; 805ba8c8a56SBarry Smith PetscScalar *vals; 8068965ea79SLois Curfman McInnes 8079566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 808dd400576SPatrick Sanan if (rank == 0) { 8099566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, M, N, M, N)); 8103a40ed3dSBarry Smith } else { 8119566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, 0, 0, M, N)); 8128965ea79SLois Curfman McInnes } 8137adad957SLisandro Dalcin /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 8149566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATMPIDENSE)); 8159566063dSJacob Faibussowitsch PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 8168965ea79SLois Curfman McInnes 81739ddd567SLois Curfman McInnes /* Copy the matrix ... This isn't the most efficient means, 81839ddd567SLois Curfman McInnes but it's quick for now */ 81951022da4SBarry Smith A->insertmode = INSERT_VALUES; 8202205254eSKarl Rupp 8212205254eSKarl Rupp row = mat->rmap->rstart; 8222205254eSKarl Rupp m = mdn->A->rmap->n; 8238965ea79SLois Curfman McInnes for (i = 0; i < m; i++) { 8249566063dSJacob Faibussowitsch PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 8259566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 8269566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 82739ddd567SLois Curfman McInnes row++; 8288965ea79SLois Curfman McInnes } 8298965ea79SLois Curfman McInnes 8309566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 8319566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 8329566063dSJacob Faibussowitsch PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 833dd400576SPatrick Sanan if (rank == 0) { 834f4f49eeaSPierre Jolivet PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name)); 835f4f49eeaSPierre Jolivet PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer)); 8368965ea79SLois Curfman McInnes } 8379566063dSJacob Faibussowitsch PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 8389566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 8398965ea79SLois Curfman McInnes } 8403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8418965ea79SLois Curfman McInnes } 8428965ea79SLois Curfman McInnes 84366976f2fSJacob Faibussowitsch static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 844d71ae5a4SJacob Faibussowitsch { 845ace3abfcSBarry Smith PetscBool iascii, isbinary, isdraw, issocket; 8468965ea79SLois Curfman McInnes 847433994e6SBarry Smith PetscFunctionBegin; 8489566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 8499566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 8509566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 8519566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 8520f5bd95cSBarry Smith 85332077d6dSBarry Smith if (iascii || issocket || isdraw) { 8549566063dSJacob Faibussowitsch PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 8551baa6e33SBarry Smith } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 8563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8578965ea79SLois Curfman McInnes } 8588965ea79SLois Curfman McInnes 85966976f2fSJacob Faibussowitsch static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 860d71ae5a4SJacob Faibussowitsch { 8613501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 8623501a2bdSLois Curfman McInnes Mat mdn = mat->A; 8633966268fSBarry Smith PetscLogDouble isend[5], irecv[5]; 8648965ea79SLois Curfman McInnes 8653a40ed3dSBarry Smith PetscFunctionBegin; 8664e220ebcSLois Curfman McInnes info->block_size = 1.0; 8672205254eSKarl Rupp 8689566063dSJacob Faibussowitsch PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 8692205254eSKarl Rupp 8709371c9d4SSatish Balay isend[0] = info->nz_used; 8719371c9d4SSatish Balay isend[1] = info->nz_allocated; 8729371c9d4SSatish Balay isend[2] = info->nz_unneeded; 8739371c9d4SSatish Balay isend[3] = info->memory; 8749371c9d4SSatish Balay isend[4] = info->mallocs; 8758965ea79SLois Curfman McInnes if (flag == MAT_LOCAL) { 8764e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 8774e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 8784e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 8794e220ebcSLois Curfman McInnes info->memory = isend[3]; 8804e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 8818965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 8821c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 8832205254eSKarl Rupp 8844e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 8854e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 8864e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 8874e220ebcSLois Curfman McInnes info->memory = irecv[3]; 8884e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 8898965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 8901c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 8912205254eSKarl Rupp 8924e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 8934e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 8944e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 8954e220ebcSLois Curfman McInnes info->memory = irecv[3]; 8964e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 8978965ea79SLois Curfman McInnes } 8984e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 8994e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 9004e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 9013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9028965ea79SLois Curfman McInnes } 9038965ea79SLois Curfman McInnes 90466976f2fSJacob Faibussowitsch static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 905d71ae5a4SJacob Faibussowitsch { 90639ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *)A->data; 9078965ea79SLois Curfman McInnes 9083a40ed3dSBarry Smith PetscFunctionBegin; 90912c028f9SKris Buschelman switch (op) { 910512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 91112c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 91212c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 91343674050SBarry Smith MatCheckPreallocated(A, 1); 9149566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 91512c028f9SKris Buschelman break; 91612c028f9SKris Buschelman case MAT_ROW_ORIENTED: 91743674050SBarry Smith MatCheckPreallocated(A, 1); 9184e0d8c25SBarry Smith a->roworiented = flg; 9199566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 92012c028f9SKris Buschelman break; 9218c78258cSHong Zhang case MAT_FORCE_DIAGONAL_ENTRIES: 92213fa8e87SLisandro Dalcin case MAT_KEEP_NONZERO_PATTERN: 92312c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 924d71ae5a4SJacob Faibussowitsch case MAT_SORTED_FULL: 925d71ae5a4SJacob Faibussowitsch PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 926d71ae5a4SJacob Faibussowitsch break; 927d71ae5a4SJacob Faibussowitsch case MAT_IGNORE_OFF_PROC_ENTRIES: 928d71ae5a4SJacob Faibussowitsch a->donotstash = flg; 929d71ae5a4SJacob Faibussowitsch break; 93077e54ba9SKris Buschelman case MAT_SYMMETRIC: 93177e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 9329a4540c5SBarry Smith case MAT_HERMITIAN: 9339a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 934b94d7dedSBarry Smith case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 935b94d7dedSBarry Smith case MAT_SPD: 936600fe468SBarry Smith case MAT_IGNORE_LOWER_TRIANGULAR: 9375d7aebe8SStefano Zampini case MAT_IGNORE_ZERO_ENTRIES: 938b94d7dedSBarry Smith case MAT_SPD_ETERNAL: 939b94d7dedSBarry Smith /* if the diagonal matrix is square it inherits some of the properties above */ 9409566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 94177e54ba9SKris Buschelman break; 942d71ae5a4SJacob Faibussowitsch default: 943d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 9443a40ed3dSBarry Smith } 9453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9468965ea79SLois Curfman McInnes } 9478965ea79SLois Curfman McInnes 94866976f2fSJacob Faibussowitsch static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 949d71ae5a4SJacob Faibussowitsch { 9505b2fa520SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 951637a0070SStefano Zampini const PetscScalar *l; 952637a0070SStefano Zampini PetscScalar x, *v, *vv, *r; 953637a0070SStefano Zampini PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 9545b2fa520SLois Curfman McInnes 9555b2fa520SLois Curfman McInnes PetscFunctionBegin; 9569566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(mdn->A, &vv)); 9579566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(mdn->A, &lda)); 9589566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, &s2, &s3)); 9595b2fa520SLois Curfman McInnes if (ll) { 9609566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(ll, &s2a)); 96108401ef6SPierre Jolivet PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 9629566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(ll, &l)); 9635b2fa520SLois Curfman McInnes for (i = 0; i < m; i++) { 9645b2fa520SLois Curfman McInnes x = l[i]; 965637a0070SStefano Zampini v = vv + i; 9669371c9d4SSatish Balay for (j = 0; j < n; j++) { 9679371c9d4SSatish Balay (*v) *= x; 9689371c9d4SSatish Balay v += lda; 9699371c9d4SSatish Balay } 9705b2fa520SLois Curfman McInnes } 9719566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(ll, &l)); 9729566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * n * m)); 9735b2fa520SLois Curfman McInnes } 9745b2fa520SLois Curfman McInnes if (rr) { 975637a0070SStefano Zampini const PetscScalar *ar; 976637a0070SStefano Zampini 9779566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(rr, &s3a)); 97808401ef6SPierre Jolivet PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 9799566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(rr, &ar)); 9806f08ad05SPierre Jolivet if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 9819566063dSJacob Faibussowitsch PetscCall(VecGetArray(mdn->lvec, &r)); 9829566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 9839566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 9849566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(rr, &ar)); 9855b2fa520SLois Curfman McInnes for (i = 0; i < n; i++) { 9865b2fa520SLois Curfman McInnes x = r[i]; 987637a0070SStefano Zampini v = vv + i * lda; 9882205254eSKarl Rupp for (j = 0; j < m; j++) (*v++) *= x; 9895b2fa520SLois Curfman McInnes } 9909566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mdn->lvec, &r)); 9919566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * n * m)); 9925b2fa520SLois Curfman McInnes } 9939566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 9943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9955b2fa520SLois Curfman McInnes } 9965b2fa520SLois Curfman McInnes 99766976f2fSJacob Faibussowitsch static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 998d71ae5a4SJacob Faibussowitsch { 9993501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 100013f74950SBarry Smith PetscInt i, j; 1001616b8fbbSStefano Zampini PetscMPIInt size; 1002329f5518SBarry Smith PetscReal sum = 0.0; 1003637a0070SStefano Zampini const PetscScalar *av, *v; 10043501a2bdSLois Curfman McInnes 10053a40ed3dSBarry Smith PetscFunctionBegin; 10069566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 1007637a0070SStefano Zampini v = av; 10089566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1009616b8fbbSStefano Zampini if (size == 1) { 10109566063dSJacob Faibussowitsch PetscCall(MatNorm(mdn->A, type, nrm)); 10113501a2bdSLois Curfman McInnes } else { 10123501a2bdSLois Curfman McInnes if (type == NORM_FROBENIUS) { 1013d0f46423SBarry Smith for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 10149371c9d4SSatish Balay sum += PetscRealPart(PetscConj(*v) * (*v)); 10159371c9d4SSatish Balay v++; 10163501a2bdSLois Curfman McInnes } 10171c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 10188f1a2a5eSBarry Smith *nrm = PetscSqrtReal(*nrm); 10199566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 10203a40ed3dSBarry Smith } else if (type == NORM_1) { 1021329f5518SBarry Smith PetscReal *tmp, *tmp2; 10229566063dSJacob Faibussowitsch PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 1023064f8208SBarry Smith *nrm = 0.0; 1024637a0070SStefano Zampini v = av; 1025d0f46423SBarry Smith for (j = 0; j < mdn->A->cmap->n; j++) { 1026d0f46423SBarry Smith for (i = 0; i < mdn->A->rmap->n; i++) { 10279371c9d4SSatish Balay tmp[j] += PetscAbsScalar(*v); 10289371c9d4SSatish Balay v++; 10293501a2bdSLois Curfman McInnes } 10303501a2bdSLois Curfman McInnes } 10311c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1032d0f46423SBarry Smith for (j = 0; j < A->cmap->N; j++) { 1033064f8208SBarry Smith if (tmp2[j] > *nrm) *nrm = tmp2[j]; 10343501a2bdSLois Curfman McInnes } 10359566063dSJacob Faibussowitsch PetscCall(PetscFree2(tmp, tmp2)); 10369566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 10373a40ed3dSBarry Smith } else if (type == NORM_INFINITY) { /* max row norm */ 1038329f5518SBarry Smith PetscReal ntemp; 10399566063dSJacob Faibussowitsch PetscCall(MatNorm(mdn->A, type, &ntemp)); 10401c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 1041ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 10423501a2bdSLois Curfman McInnes } 10439566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 10443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 10453501a2bdSLois Curfman McInnes } 10463501a2bdSLois Curfman McInnes 104766976f2fSJacob Faibussowitsch static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 1048d71ae5a4SJacob Faibussowitsch { 10493501a2bdSLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *)A->data; 10503501a2bdSLois Curfman McInnes Mat B; 1051d0f46423SBarry Smith PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 1052637a0070SStefano Zampini PetscInt j, i, lda; 105387828ca2SBarry Smith PetscScalar *v; 10543501a2bdSLois Curfman McInnes 10553a40ed3dSBarry Smith PetscFunctionBegin; 10567fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1057cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 10589566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 10599566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 10609566063dSJacob Faibussowitsch PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 10619566063dSJacob Faibussowitsch PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 1062637a0070SStefano Zampini } else B = *matout; 10633501a2bdSLois Curfman McInnes 10649371c9d4SSatish Balay m = a->A->rmap->n; 10659371c9d4SSatish Balay n = a->A->cmap->n; 10669566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 10679566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &lda)); 10689566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &rwork)); 10693501a2bdSLois Curfman McInnes for (i = 0; i < m; i++) rwork[i] = rstart + i; 10701acff37aSSatish Balay for (j = 0; j < n; j++) { 10719566063dSJacob Faibussowitsch PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 10728e3a54c0SPierre Jolivet v = PetscSafePointerPlusOffset(v, lda); 10733501a2bdSLois Curfman McInnes } 10749566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 10759566063dSJacob Faibussowitsch PetscCall(PetscFree(rwork)); 10769566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 10779566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1078cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 10793501a2bdSLois Curfman McInnes *matout = B; 10803501a2bdSLois Curfman McInnes } else { 10819566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(A, &B)); 10823501a2bdSLois Curfman McInnes } 10833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1084096963f5SLois Curfman McInnes } 1085096963f5SLois Curfman McInnes 10866849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 108752c5f739Sprj- PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 10888965ea79SLois Curfman McInnes 108966976f2fSJacob Faibussowitsch static PetscErrorCode MatSetUp_MPIDense(Mat A) 1090d71ae5a4SJacob Faibussowitsch { 1091273d9f13SBarry Smith PetscFunctionBegin; 10929566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 10939566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 109448a46eb9SPierre Jolivet if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 10953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1096273d9f13SBarry Smith } 1097273d9f13SBarry Smith 109866976f2fSJacob Faibussowitsch static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1099d71ae5a4SJacob Faibussowitsch { 1100488007eeSBarry Smith Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1101488007eeSBarry Smith 1102488007eeSBarry Smith PetscFunctionBegin; 11039566063dSJacob Faibussowitsch PetscCall(MatAXPY(A->A, alpha, B->A, str)); 11043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1105488007eeSBarry Smith } 1106488007eeSBarry Smith 110766976f2fSJacob Faibussowitsch static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1108d71ae5a4SJacob Faibussowitsch { 1109ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1110ba337c44SJed Brown 1111ba337c44SJed Brown PetscFunctionBegin; 11129566063dSJacob Faibussowitsch PetscCall(MatConjugate(a->A)); 11133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1114ba337c44SJed Brown } 1115ba337c44SJed Brown 111666976f2fSJacob Faibussowitsch static PetscErrorCode MatRealPart_MPIDense(Mat A) 1117d71ae5a4SJacob Faibussowitsch { 1118ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1119ba337c44SJed Brown 1120ba337c44SJed Brown PetscFunctionBegin; 11219566063dSJacob Faibussowitsch PetscCall(MatRealPart(a->A)); 11223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1123ba337c44SJed Brown } 1124ba337c44SJed Brown 112566976f2fSJacob Faibussowitsch static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1126d71ae5a4SJacob Faibussowitsch { 1127ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1128ba337c44SJed Brown 1129ba337c44SJed Brown PetscFunctionBegin; 11309566063dSJacob Faibussowitsch PetscCall(MatImaginaryPart(a->A)); 11313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1132ba337c44SJed Brown } 1133ba337c44SJed Brown 1134d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1135d71ae5a4SJacob Faibussowitsch { 1136637a0070SStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 113749a6ff4bSBarry Smith 113849a6ff4bSBarry Smith PetscFunctionBegin; 113928b400f6SJacob Faibussowitsch PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 114028b400f6SJacob Faibussowitsch PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 11419566063dSJacob Faibussowitsch PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 11423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 114349a6ff4bSBarry Smith } 114449a6ff4bSBarry Smith 1145857cbf51SRichard Tran Mills PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 114652c5f739Sprj- 114766976f2fSJacob Faibussowitsch static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1148d71ae5a4SJacob Faibussowitsch { 1149a873a8cdSSam Reynolds PetscInt i, m, n; 11500716a85fSBarry Smith Mat_MPIDense *a = (Mat_MPIDense *)A->data; 11510716a85fSBarry Smith PetscReal *work; 11520716a85fSBarry Smith 11530716a85fSBarry Smith PetscFunctionBegin; 11549566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &m, &n)); 11559566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &work)); 1156857cbf51SRichard Tran Mills if (type == REDUCTION_MEAN_REALPART) { 11579566063dSJacob Faibussowitsch PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1158857cbf51SRichard Tran Mills } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 11599566063dSJacob Faibussowitsch PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1160a873a8cdSSam Reynolds } else { 11619566063dSJacob Faibussowitsch PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1162a873a8cdSSam Reynolds } 1163857cbf51SRichard Tran Mills if (type == NORM_2) { 11640716a85fSBarry Smith for (i = 0; i < n; i++) work[i] *= work[i]; 11650716a85fSBarry Smith } 1166857cbf51SRichard Tran Mills if (type == NORM_INFINITY) { 11671c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 11680716a85fSBarry Smith } else { 11691c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 11700716a85fSBarry Smith } 11719566063dSJacob Faibussowitsch PetscCall(PetscFree(work)); 1172857cbf51SRichard Tran Mills if (type == NORM_2) { 1173a873a8cdSSam Reynolds for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1174857cbf51SRichard Tran Mills } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1175a873a8cdSSam Reynolds for (i = 0; i < n; i++) reductions[i] /= m; 11760716a85fSBarry Smith } 11773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 11780716a85fSBarry Smith } 11790716a85fSBarry Smith 1180d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1181d71ae5a4SJacob Faibussowitsch { 118273a71a0fSBarry Smith Mat_MPIDense *d = (Mat_MPIDense *)x->data; 118373a71a0fSBarry Smith 118473a71a0fSBarry Smith PetscFunctionBegin; 11859566063dSJacob Faibussowitsch PetscCall(MatSetRandom(d->A, rctx)); 11863faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE) 11873faff063SStefano Zampini x->offloadmask = d->A->offloadmask; 11883faff063SStefano Zampini #endif 11893ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 119073a71a0fSBarry Smith } 119173a71a0fSBarry Smith 1192d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1193d71ae5a4SJacob Faibussowitsch { 11943b49f96aSBarry Smith PetscFunctionBegin; 11953b49f96aSBarry Smith *missing = PETSC_FALSE; 11963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 11973b49f96aSBarry Smith } 11983b49f96aSBarry Smith 11994222ddf1SHong Zhang static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1200cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 12016718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1202a1f9a5e6SStefano Zampini static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 12036718818eSStefano Zampini static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 12046718818eSStefano Zampini static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 12055d8c7819SPierre Jolivet static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat); 1206cc48ffa7SToby Isaac 120709dc0095SBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 120809dc0095SBarry Smith MatGetRow_MPIDense, 120909dc0095SBarry Smith MatRestoreRow_MPIDense, 121009dc0095SBarry Smith MatMult_MPIDense, 121197304618SKris Buschelman /* 4*/ MatMultAdd_MPIDense, 12127c922b88SBarry Smith MatMultTranspose_MPIDense, 12137c922b88SBarry Smith MatMultTransposeAdd_MPIDense, 1214f4259b30SLisandro Dalcin NULL, 1215f4259b30SLisandro Dalcin NULL, 1216f4259b30SLisandro Dalcin NULL, 1217f4259b30SLisandro Dalcin /* 10*/ NULL, 1218f4259b30SLisandro Dalcin NULL, 1219f4259b30SLisandro Dalcin NULL, 1220f4259b30SLisandro Dalcin NULL, 122109dc0095SBarry Smith MatTranspose_MPIDense, 122297304618SKris Buschelman /* 15*/ MatGetInfo_MPIDense, 12236e4ee0c6SHong Zhang MatEqual_MPIDense, 122409dc0095SBarry Smith MatGetDiagonal_MPIDense, 12255b2fa520SLois Curfman McInnes MatDiagonalScale_MPIDense, 122609dc0095SBarry Smith MatNorm_MPIDense, 122797304618SKris Buschelman /* 20*/ MatAssemblyBegin_MPIDense, 122809dc0095SBarry Smith MatAssemblyEnd_MPIDense, 122909dc0095SBarry Smith MatSetOption_MPIDense, 123009dc0095SBarry Smith MatZeroEntries_MPIDense, 1231d519adbfSMatthew Knepley /* 24*/ MatZeroRows_MPIDense, 1232f4259b30SLisandro Dalcin NULL, 1233f4259b30SLisandro Dalcin NULL, 1234f4259b30SLisandro Dalcin NULL, 1235f4259b30SLisandro Dalcin NULL, 12364994cf47SJed Brown /* 29*/ MatSetUp_MPIDense, 1237f4259b30SLisandro Dalcin NULL, 1238f4259b30SLisandro Dalcin NULL, 1239c56a70eeSBarry Smith MatGetDiagonalBlock_MPIDense, 1240f4259b30SLisandro Dalcin NULL, 1241d519adbfSMatthew Knepley /* 34*/ MatDuplicate_MPIDense, 1242f4259b30SLisandro Dalcin NULL, 1243f4259b30SLisandro Dalcin NULL, 1244f4259b30SLisandro Dalcin NULL, 1245f4259b30SLisandro Dalcin NULL, 1246d519adbfSMatthew Knepley /* 39*/ MatAXPY_MPIDense, 12477dae84e0SHong Zhang MatCreateSubMatrices_MPIDense, 1248f4259b30SLisandro Dalcin NULL, 124909dc0095SBarry Smith MatGetValues_MPIDense, 1250a76f77c3SStefano Zampini MatCopy_MPIDense, 1251f4259b30SLisandro Dalcin /* 44*/ NULL, 125209dc0095SBarry Smith MatScale_MPIDense, 12532f605a99SJose E. Roman MatShift_MPIDense, 1254f4259b30SLisandro Dalcin NULL, 1255f4259b30SLisandro Dalcin NULL, 125673a71a0fSBarry Smith /* 49*/ MatSetRandom_MPIDense, 1257f4259b30SLisandro Dalcin NULL, 1258f4259b30SLisandro Dalcin NULL, 1259f4259b30SLisandro Dalcin NULL, 1260f4259b30SLisandro Dalcin NULL, 1261f4259b30SLisandro Dalcin /* 54*/ NULL, 1262f4259b30SLisandro Dalcin NULL, 1263f4259b30SLisandro Dalcin NULL, 1264f4259b30SLisandro Dalcin NULL, 1265f4259b30SLisandro Dalcin NULL, 12667dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_MPIDense, 1267b9b97703SBarry Smith MatDestroy_MPIDense, 1268b9b97703SBarry Smith MatView_MPIDense, 1269f4259b30SLisandro Dalcin NULL, 1270f4259b30SLisandro Dalcin NULL, 1271f4259b30SLisandro Dalcin /* 64*/ NULL, 1272f4259b30SLisandro Dalcin NULL, 1273f4259b30SLisandro Dalcin NULL, 1274f4259b30SLisandro Dalcin NULL, 1275f4259b30SLisandro Dalcin NULL, 1276f4259b30SLisandro Dalcin /* 69*/ NULL, 1277f4259b30SLisandro Dalcin NULL, 1278f4259b30SLisandro Dalcin NULL, 1279f4259b30SLisandro Dalcin NULL, 1280f4259b30SLisandro Dalcin NULL, 1281f4259b30SLisandro Dalcin /* 74*/ NULL, 1282f4259b30SLisandro Dalcin NULL, 1283f4259b30SLisandro Dalcin NULL, 1284f4259b30SLisandro Dalcin NULL, 1285f4259b30SLisandro Dalcin NULL, 1286f4259b30SLisandro Dalcin /* 79*/ NULL, 1287f4259b30SLisandro Dalcin NULL, 1288f4259b30SLisandro Dalcin NULL, 1289f4259b30SLisandro Dalcin NULL, 12905bba2384SShri Abhyankar /* 83*/ MatLoad_MPIDense, 1291f4259b30SLisandro Dalcin NULL, 1292f4259b30SLisandro Dalcin NULL, 1293f4259b30SLisandro Dalcin NULL, 1294f4259b30SLisandro Dalcin NULL, 1295f4259b30SLisandro Dalcin NULL, 1296f4259b30SLisandro Dalcin /* 89*/ NULL, 1297f4259b30SLisandro Dalcin NULL, 1298f4259b30SLisandro Dalcin NULL, 1299f4259b30SLisandro Dalcin NULL, 1300f4259b30SLisandro Dalcin NULL, 1301f4259b30SLisandro Dalcin /* 94*/ NULL, 1302f4259b30SLisandro Dalcin NULL, 13036718818eSStefano Zampini MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1304cc48ffa7SToby Isaac MatMatTransposeMultNumeric_MPIDense_MPIDense, 1305f4259b30SLisandro Dalcin NULL, 13064222ddf1SHong Zhang /* 99*/ MatProductSetFromOptions_MPIDense, 1307f4259b30SLisandro Dalcin NULL, 1308f4259b30SLisandro Dalcin NULL, 1309ba337c44SJed Brown MatConjugate_MPIDense, 1310f4259b30SLisandro Dalcin NULL, 1311f4259b30SLisandro Dalcin /*104*/ NULL, 1312ba337c44SJed Brown MatRealPart_MPIDense, 1313ba337c44SJed Brown MatImaginaryPart_MPIDense, 1314f4259b30SLisandro Dalcin NULL, 1315f4259b30SLisandro Dalcin NULL, 1316f4259b30SLisandro Dalcin /*109*/ NULL, 1317f4259b30SLisandro Dalcin NULL, 1318f4259b30SLisandro Dalcin NULL, 131949a6ff4bSBarry Smith MatGetColumnVector_MPIDense, 13203b49f96aSBarry Smith MatMissingDiagonal_MPIDense, 1321f4259b30SLisandro Dalcin /*114*/ NULL, 1322f4259b30SLisandro Dalcin NULL, 1323f4259b30SLisandro Dalcin NULL, 1324f4259b30SLisandro Dalcin NULL, 1325f4259b30SLisandro Dalcin NULL, 1326f4259b30SLisandro Dalcin /*119*/ NULL, 1327f4259b30SLisandro Dalcin NULL, 1328459e8d23SBlanca Mellado Pinto MatMultHermitianTranspose_MPIDense, 1329459e8d23SBlanca Mellado Pinto MatMultHermitianTransposeAdd_MPIDense, 1330f4259b30SLisandro Dalcin NULL, 1331f4259b30SLisandro Dalcin /*124*/ NULL, 1332a873a8cdSSam Reynolds MatGetColumnReductions_MPIDense, 1333f4259b30SLisandro Dalcin NULL, 1334f4259b30SLisandro Dalcin NULL, 1335f4259b30SLisandro Dalcin NULL, 1336f4259b30SLisandro Dalcin /*129*/ NULL, 1337f4259b30SLisandro Dalcin NULL, 13386718818eSStefano Zampini MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1339cb20be35SHong Zhang MatTransposeMatMultNumeric_MPIDense_MPIDense, 1340f4259b30SLisandro Dalcin NULL, 1341f4259b30SLisandro Dalcin /*134*/ NULL, 1342f4259b30SLisandro Dalcin NULL, 1343f4259b30SLisandro Dalcin NULL, 1344f4259b30SLisandro Dalcin NULL, 1345f4259b30SLisandro Dalcin NULL, 1346f4259b30SLisandro Dalcin /*139*/ NULL, 1347f4259b30SLisandro Dalcin NULL, 1348f4259b30SLisandro Dalcin NULL, 1349f4259b30SLisandro Dalcin NULL, 1350f4259b30SLisandro Dalcin NULL, 13514222ddf1SHong Zhang MatCreateMPIMatConcatenateSeqMat_MPIDense, 1352f4259b30SLisandro Dalcin /*145*/ NULL, 1353f4259b30SLisandro Dalcin NULL, 135499a7f59eSMark Adams NULL, 135599a7f59eSMark Adams NULL, 13567fb60732SBarry Smith NULL, 1357dec0b466SHong Zhang /*150*/ NULL, 1358eede4a3fSMark Adams NULL, 1359*4cc2b5b5SPierre Jolivet NULL, 1360dec0b466SHong Zhang NULL}; 13618965ea79SLois Curfman McInnes 136266976f2fSJacob Faibussowitsch static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1363d71ae5a4SJacob Faibussowitsch { 1364637a0070SStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 136547d993e7Ssuyashtn MatType mtype = MATSEQDENSE; 1366a23d5eceSKris Buschelman 1367a23d5eceSKris Buschelman PetscFunctionBegin; 136828b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 13699566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->rmap)); 13709566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->cmap)); 1371637a0070SStefano Zampini if (!a->A) { 13729566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 13739566063dSJacob Faibussowitsch PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1374637a0070SStefano Zampini } 137547d993e7Ssuyashtn #if defined(PETSC_HAVE_CUDA) 137647d993e7Ssuyashtn PetscBool iscuda; 13779566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 137847d993e7Ssuyashtn if (iscuda) mtype = MATSEQDENSECUDA; 137947d993e7Ssuyashtn #endif 138047d993e7Ssuyashtn #if defined(PETSC_HAVE_HIP) 138147d993e7Ssuyashtn PetscBool iship; 138247d993e7Ssuyashtn PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 138347d993e7Ssuyashtn if (iship) mtype = MATSEQDENSEHIP; 138447d993e7Ssuyashtn #endif 138547d993e7Ssuyashtn PetscCall(MatSetType(a->A, mtype)); 13869566063dSJacob Faibussowitsch PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 138747d993e7Ssuyashtn #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 13883faff063SStefano Zampini mat->offloadmask = a->A->offloadmask; 13893faff063SStefano Zampini #endif 1390637a0070SStefano Zampini mat->preallocated = PETSC_TRUE; 13916f08ad05SPierre Jolivet mat->assembled = PETSC_TRUE; 13923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1393a23d5eceSKris Buschelman } 1394a23d5eceSKris Buschelman 1395d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1396d71ae5a4SJacob Faibussowitsch { 13974e29119aSPierre Jolivet Mat B, C; 13984e29119aSPierre Jolivet 13994e29119aSPierre Jolivet PetscFunctionBegin; 14009566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 14019566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 14029566063dSJacob Faibussowitsch PetscCall(MatDestroy(&C)); 14034e29119aSPierre Jolivet if (reuse == MAT_REUSE_MATRIX) { 14044e29119aSPierre Jolivet C = *newmat; 14054e29119aSPierre Jolivet } else C = NULL; 14069566063dSJacob Faibussowitsch PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 14079566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 14084e29119aSPierre Jolivet if (reuse == MAT_INPLACE_MATRIX) { 14099566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &C)); 14104e29119aSPierre Jolivet } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 14113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 14124e29119aSPierre Jolivet } 14134e29119aSPierre Jolivet 141466976f2fSJacob Faibussowitsch static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1415d71ae5a4SJacob Faibussowitsch { 14164e29119aSPierre Jolivet Mat B, C; 14174e29119aSPierre Jolivet 14184e29119aSPierre Jolivet PetscFunctionBegin; 14199566063dSJacob Faibussowitsch PetscCall(MatDenseGetLocalMatrix(A, &C)); 14209566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 14214e29119aSPierre Jolivet if (reuse == MAT_REUSE_MATRIX) { 14224e29119aSPierre Jolivet C = *newmat; 14234e29119aSPierre Jolivet } else C = NULL; 14249566063dSJacob Faibussowitsch PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 14259566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 14264e29119aSPierre Jolivet if (reuse == MAT_INPLACE_MATRIX) { 14279566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &C)); 14284e29119aSPierre Jolivet } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 14293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 14304e29119aSPierre Jolivet } 14314e29119aSPierre Jolivet 143265b80a83SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 1433d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1434d71ae5a4SJacob Faibussowitsch { 1435f709e8d7SPierre Jolivet Mat_MPIDense *a = (Mat_MPIDense *)A->data; 14368ea901baSHong Zhang Mat mat_elemental; 143732d7a744SHong Zhang PetscScalar *v; 1438f709e8d7SPierre Jolivet PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 14398ea901baSHong Zhang 14408baccfbdSHong Zhang PetscFunctionBegin; 1441378336b6SHong Zhang if (reuse == MAT_REUSE_MATRIX) { 1442378336b6SHong Zhang mat_elemental = *newmat; 14439566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(*newmat)); 1444378336b6SHong Zhang } else { 14459566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 14469566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 14479566063dSJacob Faibussowitsch PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 14489566063dSJacob Faibussowitsch PetscCall(MatSetUp(mat_elemental)); 14499566063dSJacob Faibussowitsch PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1450378336b6SHong Zhang } 1451378336b6SHong Zhang 14529566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(m, &rows, N, &cols)); 145332d7a744SHong Zhang for (i = 0; i < N; i++) cols[i] = i; 145432d7a744SHong Zhang for (i = 0; i < m; i++) rows[i] = rstart + i; 14558ea901baSHong Zhang 1456637a0070SStefano Zampini /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 14579566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(A, &v)); 1458f709e8d7SPierre Jolivet PetscCall(MatDenseGetLDA(a->A, &lda)); 1459f709e8d7SPierre Jolivet if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1460f709e8d7SPierre Jolivet else { 1461f709e8d7SPierre Jolivet for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1462f709e8d7SPierre Jolivet } 14639566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 14649566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 14659566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(A, &v)); 14669566063dSJacob Faibussowitsch PetscCall(PetscFree2(rows, cols)); 14678ea901baSHong Zhang 1468511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 14699566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &mat_elemental)); 14708ea901baSHong Zhang } else { 14718ea901baSHong Zhang *newmat = mat_elemental; 14728ea901baSHong Zhang } 14733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 14748baccfbdSHong Zhang } 147565b80a83SHong Zhang #endif 14768baccfbdSHong Zhang 1477d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1478d71ae5a4SJacob Faibussowitsch { 147986aefd0dSHong Zhang Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 148086aefd0dSHong Zhang 148186aefd0dSHong Zhang PetscFunctionBegin; 14829566063dSJacob Faibussowitsch PetscCall(MatDenseGetColumn(mat->A, col, vals)); 14833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 148486aefd0dSHong Zhang } 148586aefd0dSHong Zhang 1486d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1487d71ae5a4SJacob Faibussowitsch { 148886aefd0dSHong Zhang Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 148986aefd0dSHong Zhang 149086aefd0dSHong Zhang PetscFunctionBegin; 14919566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreColumn(mat->A, vals)); 14923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 149386aefd0dSHong Zhang } 149486aefd0dSHong Zhang 1495d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1496d71ae5a4SJacob Faibussowitsch { 149794e2cb23SJakub Kruzik Mat_MPIDense *mat; 149894e2cb23SJakub Kruzik PetscInt m, nloc, N; 149994e2cb23SJakub Kruzik 150094e2cb23SJakub Kruzik PetscFunctionBegin; 15019566063dSJacob Faibussowitsch PetscCall(MatGetSize(inmat, &m, &N)); 15029566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 150394e2cb23SJakub Kruzik if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 150494e2cb23SJakub Kruzik PetscInt sum; 150594e2cb23SJakub Kruzik 150648a46eb9SPierre Jolivet if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 150794e2cb23SJakub Kruzik /* Check sum(n) = N */ 15081c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 150908401ef6SPierre Jolivet PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 151094e2cb23SJakub Kruzik 15119566063dSJacob Faibussowitsch PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 15129566063dSJacob Faibussowitsch PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 151394e2cb23SJakub Kruzik } 151494e2cb23SJakub Kruzik 151594e2cb23SJakub Kruzik /* numeric phase */ 151694e2cb23SJakub Kruzik mat = (Mat_MPIDense *)(*outmat)->data; 15179566063dSJacob Faibussowitsch PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 15183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 151994e2cb23SJakub Kruzik } 152094e2cb23SJakub Kruzik 1521d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1522d71ae5a4SJacob Faibussowitsch { 15236947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 15246947451fSStefano Zampini PetscInt lda; 15256947451fSStefano Zampini 15266947451fSStefano Zampini PetscFunctionBegin; 152728b400f6SJacob Faibussowitsch PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 152828b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1529d16ceb75SStefano Zampini if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 15306947451fSStefano Zampini a->vecinuse = col + 1; 15319566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &lda)); 15329566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 15338e3a54c0SPierre Jolivet PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 15346947451fSStefano Zampini *v = a->cvec; 15353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 15366947451fSStefano Zampini } 15376947451fSStefano Zampini 1538d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1539d71ae5a4SJacob Faibussowitsch { 15406947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 15416947451fSStefano Zampini 15426947451fSStefano Zampini PetscFunctionBegin; 154328b400f6SJacob Faibussowitsch PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 154428b400f6SJacob Faibussowitsch PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 15456947451fSStefano Zampini a->vecinuse = 0; 15469566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 15479566063dSJacob Faibussowitsch PetscCall(VecResetArray(a->cvec)); 1548742765d3SMatthew Knepley if (v) *v = NULL; 15493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 15506947451fSStefano Zampini } 15516947451fSStefano Zampini 1552d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1553d71ae5a4SJacob Faibussowitsch { 15546947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 15556947451fSStefano Zampini PetscInt lda; 15566947451fSStefano Zampini 15576947451fSStefano Zampini PetscFunctionBegin; 155828b400f6SJacob Faibussowitsch PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 155928b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1560d16ceb75SStefano Zampini if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 15616947451fSStefano Zampini a->vecinuse = col + 1; 15629566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &lda)); 15639566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 15648e3a54c0SPierre Jolivet PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 15659566063dSJacob Faibussowitsch PetscCall(VecLockReadPush(a->cvec)); 15666947451fSStefano Zampini *v = a->cvec; 15673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 15686947451fSStefano Zampini } 15696947451fSStefano Zampini 1570d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1571d71ae5a4SJacob Faibussowitsch { 15726947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 15736947451fSStefano Zampini 15746947451fSStefano Zampini PetscFunctionBegin; 15755f80ce2aSJacob Faibussowitsch PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 15765f80ce2aSJacob Faibussowitsch PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 15776947451fSStefano Zampini a->vecinuse = 0; 15789566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 15799566063dSJacob Faibussowitsch PetscCall(VecLockReadPop(a->cvec)); 15809566063dSJacob Faibussowitsch PetscCall(VecResetArray(a->cvec)); 1581742765d3SMatthew Knepley if (v) *v = NULL; 15823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 15836947451fSStefano Zampini } 15846947451fSStefano Zampini 1585d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1586d71ae5a4SJacob Faibussowitsch { 15876947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 15886947451fSStefano Zampini PetscInt lda; 15896947451fSStefano Zampini 15906947451fSStefano Zampini PetscFunctionBegin; 15915f80ce2aSJacob Faibussowitsch PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 15925f80ce2aSJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1593d16ceb75SStefano Zampini if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 15946947451fSStefano Zampini a->vecinuse = col + 1; 15959566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &lda)); 15969566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 15978e3a54c0SPierre Jolivet PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 15986947451fSStefano Zampini *v = a->cvec; 15993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16006947451fSStefano Zampini } 16016947451fSStefano Zampini 1602d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1603d71ae5a4SJacob Faibussowitsch { 16046947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 16056947451fSStefano Zampini 16066947451fSStefano Zampini PetscFunctionBegin; 16075f80ce2aSJacob Faibussowitsch PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 16085f80ce2aSJacob Faibussowitsch PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 16096947451fSStefano Zampini a->vecinuse = 0; 16109566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 16119566063dSJacob Faibussowitsch PetscCall(VecResetArray(a->cvec)); 1612742765d3SMatthew Knepley if (v) *v = NULL; 16133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16146947451fSStefano Zampini } 16156947451fSStefano Zampini 161666976f2fSJacob Faibussowitsch static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1617d71ae5a4SJacob Faibussowitsch { 16185ea7661aSPierre Jolivet Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1619616b8fbbSStefano Zampini Mat_MPIDense *c; 1620616b8fbbSStefano Zampini MPI_Comm comm; 1621a2748737SPierre Jolivet PetscInt pbegin, pend; 16225ea7661aSPierre Jolivet 16235ea7661aSPierre Jolivet PetscFunctionBegin; 16249566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 16255f80ce2aSJacob Faibussowitsch PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 16265f80ce2aSJacob Faibussowitsch PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1627a2748737SPierre Jolivet pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1628a2748737SPierre Jolivet pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 16295ea7661aSPierre Jolivet if (!a->cmat) { 16309566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &a->cmat)); 16319566063dSJacob Faibussowitsch PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1632a2748737SPierre Jolivet if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1633a2748737SPierre Jolivet else { 1634a2748737SPierre Jolivet PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1635a2748737SPierre Jolivet PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1636a2748737SPierre Jolivet PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1637a2748737SPierre Jolivet } 16389566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 16399566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1640a2748737SPierre Jolivet } else { 1641a2748737SPierre Jolivet PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1642a2748737SPierre Jolivet if (same && a->cmat->rmap->N != A->rmap->N) { 1643a2748737SPierre Jolivet same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1644a2748737SPierre Jolivet PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1645a2748737SPierre Jolivet } 1646a2748737SPierre Jolivet if (!same) { 1647a2748737SPierre Jolivet PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1648a2748737SPierre Jolivet PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1649a2748737SPierre Jolivet PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1650a2748737SPierre Jolivet PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1651a2748737SPierre Jolivet PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1652a2748737SPierre Jolivet } 1653a2748737SPierre Jolivet if (cend - cbegin != a->cmat->cmap->N) { 16549566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 16559566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 16569566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 16579566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 16585ea7661aSPierre Jolivet } 1659a2748737SPierre Jolivet } 1660616b8fbbSStefano Zampini c = (Mat_MPIDense *)a->cmat->data; 16615f80ce2aSJacob Faibussowitsch PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1662a2748737SPierre Jolivet PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 16633faff063SStefano Zampini 1664616b8fbbSStefano Zampini a->cmat->preallocated = PETSC_TRUE; 1665616b8fbbSStefano Zampini a->cmat->assembled = PETSC_TRUE; 16663faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE) 16673faff063SStefano Zampini a->cmat->offloadmask = c->A->offloadmask; 16683faff063SStefano Zampini #endif 16695ea7661aSPierre Jolivet a->matinuse = cbegin + 1; 16705ea7661aSPierre Jolivet *v = a->cmat; 16713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16725ea7661aSPierre Jolivet } 16735ea7661aSPierre Jolivet 167466976f2fSJacob Faibussowitsch static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1675d71ae5a4SJacob Faibussowitsch { 16765ea7661aSPierre Jolivet Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1677616b8fbbSStefano Zampini Mat_MPIDense *c; 16785ea7661aSPierre Jolivet 16795ea7661aSPierre Jolivet PetscFunctionBegin; 16805f80ce2aSJacob Faibussowitsch PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 16815f80ce2aSJacob Faibussowitsch PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 16825f80ce2aSJacob Faibussowitsch PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 16835ea7661aSPierre Jolivet a->matinuse = 0; 1684616b8fbbSStefano Zampini c = (Mat_MPIDense *)a->cmat->data; 16859566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1686742765d3SMatthew Knepley if (v) *v = NULL; 16873faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE) 16883faff063SStefano Zampini A->offloadmask = a->A->offloadmask; 16893faff063SStefano Zampini #endif 16903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16915ea7661aSPierre Jolivet } 16925ea7661aSPierre Jolivet 1693002bc6daSRichard Tran Mills /*MC 1694002bc6daSRichard Tran Mills MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1695002bc6daSRichard Tran Mills 16962ef1f0ffSBarry Smith Options Database Key: 169711a5261eSBarry Smith . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1698002bc6daSRichard Tran Mills 1699002bc6daSRichard Tran Mills Level: beginner 1700002bc6daSRichard Tran Mills 17011cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1702002bc6daSRichard Tran Mills M*/ 17034742e46bSJacob Faibussowitsch PetscErrorCode MatCreate_MPIDense(Mat mat) 1704d71ae5a4SJacob Faibussowitsch { 1705273d9f13SBarry Smith Mat_MPIDense *a; 1706273d9f13SBarry Smith 1707273d9f13SBarry Smith PetscFunctionBegin; 17084dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&a)); 1709b0a32e0cSBarry Smith mat->data = (void *)a; 1710aea10558SJacob Faibussowitsch mat->ops[0] = MatOps_Values; 1711273d9f13SBarry Smith 1712273d9f13SBarry Smith mat->insertmode = NOT_SET_VALUES; 1713273d9f13SBarry Smith 1714273d9f13SBarry Smith /* build cache for off array entries formed */ 1715273d9f13SBarry Smith a->donotstash = PETSC_FALSE; 17162205254eSKarl Rupp 17179566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1718273d9f13SBarry Smith 1719273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 1720f4259b30SLisandro Dalcin a->lvec = NULL; 1721f4259b30SLisandro Dalcin a->Mvctx = NULL; 1722273d9f13SBarry Smith a->roworiented = PETSC_TRUE; 1723273d9f13SBarry Smith 17249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 17259566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 17269566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 17279566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 17289566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 17299566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 17309566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 17319566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 17329566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 17339566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 17349566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 17359566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 17369566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 17379566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 17389566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 17399566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 17409566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 17419566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 17429566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 17439566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 17449566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 17458baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 17469566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 17478baccfbdSHong Zhang #endif 1748d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 17499566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1750d24d4204SJose E. Roman #endif 1751637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 17529566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1753637a0070SStefano Zampini #endif 17549566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 17559566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 17569566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 17576718818eSStefano Zampini #if defined(PETSC_HAVE_CUDA) 17589566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 17599566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 17606718818eSStefano Zampini #endif 176147d993e7Ssuyashtn #if defined(PETSC_HAVE_HIP) 176247d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 176347d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 176447d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 176547d993e7Ssuyashtn #endif 17669566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 17679566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 17680be0d8bdSHansol Suh PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense)); 17690be0d8bdSHansol Suh PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense)); 17700be0d8bdSHansol Suh PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense)); 17719566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 17723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1773273d9f13SBarry Smith } 1774273d9f13SBarry Smith 1775209238afSKris Buschelman /*MC 1776002d173eSKris Buschelman MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1777209238afSKris Buschelman 177811a5261eSBarry Smith This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 177911a5261eSBarry Smith and `MATMPIDENSE` otherwise. 1780209238afSKris Buschelman 17812ef1f0ffSBarry Smith Options Database Key: 178211a5261eSBarry Smith . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1783209238afSKris Buschelman 1784209238afSKris Buschelman Level: beginner 1785209238afSKris Buschelman 17861cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 17876947451fSStefano Zampini M*/ 17886947451fSStefano Zampini 17895d83a8b1SBarry Smith /*@ 1790273d9f13SBarry Smith MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1791273d9f13SBarry Smith 17928f490ea3SStefano Zampini Collective 1793273d9f13SBarry Smith 1794273d9f13SBarry Smith Input Parameters: 1795fe59aa6dSJacob Faibussowitsch + B - the matrix 17962ef1f0ffSBarry Smith - data - optional location of matrix data. Set to `NULL` for PETSc 1797273d9f13SBarry Smith to control all matrix memory allocation. 1798273d9f13SBarry Smith 17992ef1f0ffSBarry Smith Level: intermediate 18002ef1f0ffSBarry Smith 1801273d9f13SBarry Smith Notes: 18022ef1f0ffSBarry Smith The dense format is fully compatible with standard Fortran 1803273d9f13SBarry Smith storage by columns. 1804273d9f13SBarry Smith 1805273d9f13SBarry Smith The data input variable is intended primarily for Fortran programmers 1806273d9f13SBarry Smith who wish to allocate their own matrix memory space. Most users should 18072ef1f0ffSBarry Smith set `data` to `NULL`. 1808273d9f13SBarry Smith 18091cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1810273d9f13SBarry Smith @*/ 1811d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1812d71ae5a4SJacob Faibussowitsch { 1813273d9f13SBarry Smith PetscFunctionBegin; 1814d5ea218eSStefano Zampini PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1815cac4c232SBarry Smith PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 18163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1817273d9f13SBarry Smith } 1818273d9f13SBarry Smith 1819d3042a70SBarry Smith /*@ 182011a5261eSBarry Smith MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1821d3042a70SBarry Smith array provided by the user. This is useful to avoid copying an array 1822d3042a70SBarry Smith into a matrix 1823d3042a70SBarry Smith 1824d3042a70SBarry Smith Not Collective 1825d3042a70SBarry Smith 1826d3042a70SBarry Smith Input Parameters: 1827d3042a70SBarry Smith + mat - the matrix 1828d3042a70SBarry Smith - array - the array in column major order 1829d3042a70SBarry Smith 18302ef1f0ffSBarry Smith Level: developer 18312ef1f0ffSBarry Smith 183211a5261eSBarry Smith Note: 183311a5261eSBarry Smith You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1834d3042a70SBarry Smith freed when the matrix is destroyed. 1835d3042a70SBarry Smith 18361cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 183711a5261eSBarry Smith `MatDenseReplaceArray()` 1838d3042a70SBarry Smith @*/ 1839d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1840d71ae5a4SJacob Faibussowitsch { 1841d3042a70SBarry Smith PetscFunctionBegin; 1842d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1843cac4c232SBarry Smith PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 18449566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 184547d993e7Ssuyashtn #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1846637a0070SStefano Zampini mat->offloadmask = PETSC_OFFLOAD_CPU; 1847637a0070SStefano Zampini #endif 18483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1849d3042a70SBarry Smith } 1850d3042a70SBarry Smith 1851d3042a70SBarry Smith /*@ 185211a5261eSBarry Smith MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1853d3042a70SBarry Smith 1854d3042a70SBarry Smith Not Collective 1855d3042a70SBarry Smith 18562fe279fdSBarry Smith Input Parameter: 1857d3042a70SBarry Smith . mat - the matrix 1858d3042a70SBarry Smith 18592ef1f0ffSBarry Smith Level: developer 18602ef1f0ffSBarry Smith 186111a5261eSBarry Smith Note: 186211a5261eSBarry Smith You can only call this after a call to `MatDensePlaceArray()` 1863d3042a70SBarry Smith 18641cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1865d3042a70SBarry Smith @*/ 1866d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseResetArray(Mat mat) 1867d71ae5a4SJacob Faibussowitsch { 1868d3042a70SBarry Smith PetscFunctionBegin; 1869d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1870cac4c232SBarry Smith PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 18719566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 18723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1873d3042a70SBarry Smith } 1874d3042a70SBarry Smith 1875d5ea218eSStefano Zampini /*@ 1876d5ea218eSStefano Zampini MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1877d5ea218eSStefano Zampini array provided by the user. This is useful to avoid copying an array 1878d5ea218eSStefano Zampini into a matrix 1879d5ea218eSStefano Zampini 1880d5ea218eSStefano Zampini Not Collective 1881d5ea218eSStefano Zampini 1882d5ea218eSStefano Zampini Input Parameters: 1883d5ea218eSStefano Zampini + mat - the matrix 1884d5ea218eSStefano Zampini - array - the array in column major order 1885d5ea218eSStefano Zampini 18862ef1f0ffSBarry Smith Level: developer 18872ef1f0ffSBarry Smith 188811a5261eSBarry Smith Note: 188911a5261eSBarry Smith The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1890d5ea218eSStefano Zampini freed by the user. It will be freed when the matrix is destroyed. 1891d5ea218eSStefano Zampini 18921cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1893d5ea218eSStefano Zampini @*/ 1894d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1895d71ae5a4SJacob Faibussowitsch { 1896d5ea218eSStefano Zampini PetscFunctionBegin; 1897d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1898cac4c232SBarry Smith PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 18999566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 190047d993e7Ssuyashtn #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1901d5ea218eSStefano Zampini mat->offloadmask = PETSC_OFFLOAD_CPU; 1902d5ea218eSStefano Zampini #endif 19033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1904d5ea218eSStefano Zampini } 1905d5ea218eSStefano Zampini 19065d83a8b1SBarry Smith /*@ 190711a5261eSBarry Smith MatCreateDense - Creates a matrix in `MATDENSE` format. 19088965ea79SLois Curfman McInnes 1909d083f849SBarry Smith Collective 1910db81eaa0SLois Curfman McInnes 19118965ea79SLois Curfman McInnes Input Parameters: 1912db81eaa0SLois Curfman McInnes + comm - MPI communicator 19132ef1f0ffSBarry Smith . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 19142ef1f0ffSBarry Smith . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 19152ef1f0ffSBarry Smith . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 19162ef1f0ffSBarry Smith . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 19172ef1f0ffSBarry Smith - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1918dfc5480cSLois Curfman McInnes to control all matrix memory allocation. 19198965ea79SLois Curfman McInnes 19208965ea79SLois Curfman McInnes Output Parameter: 1921477f1c0bSLois Curfman McInnes . A - the matrix 19228965ea79SLois Curfman McInnes 19232ef1f0ffSBarry Smith Level: intermediate 19242ef1f0ffSBarry Smith 1925b259b22eSLois Curfman McInnes Notes: 19262ef1f0ffSBarry Smith The dense format is fully compatible with standard Fortran 192739ddd567SLois Curfman McInnes storage by columns. 192811a5261eSBarry Smith 192911a5261eSBarry Smith Although local portions of the matrix are stored in column-major 1930002bc6daSRichard Tran Mills order, the matrix is partitioned across MPI ranks by row. 19318965ea79SLois Curfman McInnes 193218f449edSLois Curfman McInnes The data input variable is intended primarily for Fortran programmers 193318f449edSLois Curfman McInnes who wish to allocate their own matrix memory space. Most users should 19342ef1f0ffSBarry Smith set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 193518f449edSLois Curfman McInnes 19368965ea79SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 19378965ea79SLois Curfman McInnes (possibly both). 19388965ea79SLois Curfman McInnes 19391cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 19408965ea79SLois Curfman McInnes @*/ 1941d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 1942d71ae5a4SJacob Faibussowitsch { 19433a40ed3dSBarry Smith PetscFunctionBegin; 19449566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 19459566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, M, N)); 19466f08ad05SPierre Jolivet PetscCall(MatSetType(*A, MATDENSE)); 19479566063dSJacob Faibussowitsch PetscCall(MatSeqDenseSetPreallocation(*A, data)); 19486f08ad05SPierre Jolivet PetscCall(MatMPIDenseSetPreallocation(*A, data)); 19493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 19508965ea79SLois Curfman McInnes } 19518965ea79SLois Curfman McInnes 1952d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1953d71ae5a4SJacob Faibussowitsch { 19548965ea79SLois Curfman McInnes Mat mat; 19553501a2bdSLois Curfman McInnes Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 19568965ea79SLois Curfman McInnes 19573a40ed3dSBarry Smith PetscFunctionBegin; 1958f4259b30SLisandro Dalcin *newmat = NULL; 19599566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 19609566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 19619566063dSJacob Faibussowitsch PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1962834f8fabSBarry Smith a = (Mat_MPIDense *)mat->data; 19635aa7edbeSHong Zhang 1964d5f3da31SBarry Smith mat->factortype = A->factortype; 1965c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1966273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 19678965ea79SLois Curfman McInnes 1968e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 19693782ba37SSatish Balay a->donotstash = oldmat->donotstash; 1970e04c1aa4SHong Zhang 19719566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 19729566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 19738965ea79SLois Curfman McInnes 19749566063dSJacob Faibussowitsch PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 197501b82886SBarry Smith 19768965ea79SLois Curfman McInnes *newmat = mat; 19773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 19788965ea79SLois Curfman McInnes } 19798965ea79SLois Curfman McInnes 198066976f2fSJacob Faibussowitsch static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1981d71ae5a4SJacob Faibussowitsch { 198287d5ce66SSatish Balay PetscBool isbinary; 198387d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5) 198487d5ce66SSatish Balay PetscBool ishdf5; 198587d5ce66SSatish Balay #endif 1986eb91f321SVaclav Hapla 1987eb91f321SVaclav Hapla PetscFunctionBegin; 1988eb91f321SVaclav Hapla PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1989eb91f321SVaclav Hapla PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1990eb91f321SVaclav Hapla /* force binary viewer to load .info file if it has not yet done so */ 19919566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 19929566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 199387d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5) 19949566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 199587d5ce66SSatish Balay #endif 1996eb91f321SVaclav Hapla if (isbinary) { 19979566063dSJacob Faibussowitsch PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1998eb91f321SVaclav Hapla #if defined(PETSC_HAVE_HDF5) 199987d5ce66SSatish Balay } else if (ishdf5) { 20009566063dSJacob Faibussowitsch PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 2001eb91f321SVaclav Hapla #endif 200298921bdaSJacob Faibussowitsch } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 20033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2004eb91f321SVaclav Hapla } 2005eb91f321SVaclav Hapla 2006d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 2007d71ae5a4SJacob Faibussowitsch { 20086e4ee0c6SHong Zhang Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 20096e4ee0c6SHong Zhang Mat a, b; 201090ace30eSBarry Smith 20116e4ee0c6SHong Zhang PetscFunctionBegin; 20126e4ee0c6SHong Zhang a = matA->A; 20136e4ee0c6SHong Zhang b = matB->A; 20142cf15c64SPierre Jolivet PetscCall(MatEqual(a, b, flag)); 20152cf15c64SPierre Jolivet PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 20163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 20176e4ee0c6SHong Zhang } 201890ace30eSBarry Smith 201966976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2020d71ae5a4SJacob Faibussowitsch { 20216718818eSStefano Zampini Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2022baa3c1c6SHong Zhang 2023baa3c1c6SHong Zhang PetscFunctionBegin; 20249566063dSJacob Faibussowitsch PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 20259566063dSJacob Faibussowitsch PetscCall(MatDestroy(&atb->atb)); 20269566063dSJacob Faibussowitsch PetscCall(PetscFree(atb)); 20273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2028baa3c1c6SHong Zhang } 2029baa3c1c6SHong Zhang 203066976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2031d71ae5a4SJacob Faibussowitsch { 20326718818eSStefano Zampini Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2033cc48ffa7SToby Isaac 2034cc48ffa7SToby Isaac PetscFunctionBegin; 20359566063dSJacob Faibussowitsch PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 20369566063dSJacob Faibussowitsch PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 20379566063dSJacob Faibussowitsch PetscCall(PetscFree(abt)); 20383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2039cc48ffa7SToby Isaac } 2040cc48ffa7SToby Isaac 2041d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2042d71ae5a4SJacob Faibussowitsch { 2043baa3c1c6SHong Zhang Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 20446718818eSStefano Zampini Mat_TransMatMultDense *atb; 2045cb20be35SHong Zhang MPI_Comm comm; 20466718818eSStefano Zampini PetscMPIInt size, *recvcounts; 20476718818eSStefano Zampini PetscScalar *carray, *sendbuf; 2048637a0070SStefano Zampini const PetscScalar *atbarray; 2049a1f9a5e6SStefano Zampini PetscInt i, cN = C->cmap->N, proc, k, j, lda; 2050e68c0b26SHong Zhang const PetscInt *ranges; 2051cb20be35SHong Zhang 2052cb20be35SHong Zhang PetscFunctionBegin; 20536718818eSStefano Zampini MatCheckProduct(C, 3); 20545f80ce2aSJacob Faibussowitsch PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 20556718818eSStefano Zampini atb = (Mat_TransMatMultDense *)C->product->data; 20566718818eSStefano Zampini recvcounts = atb->recvcounts; 20576718818eSStefano Zampini sendbuf = atb->sendbuf; 20586718818eSStefano Zampini 20599566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 20609566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 2061e68c0b26SHong Zhang 2062c5ef1628SHong Zhang /* compute atbarray = aseq^T * bseq */ 20639566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 2064cb20be35SHong Zhang 20659566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(C, &ranges)); 2066cb20be35SHong Zhang 2067660d5466SHong Zhang /* arrange atbarray into sendbuf */ 20689566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 20699566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(atb->atb, &lda)); 2070637a0070SStefano Zampini for (proc = 0, k = 0; proc < size; proc++) { 2071baa3c1c6SHong Zhang for (j = 0; j < cN; j++) { 2072a1f9a5e6SStefano Zampini for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 2073cb20be35SHong Zhang } 2074cb20be35SHong Zhang } 20759566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2076637a0070SStefano Zampini 2077c5ef1628SHong Zhang /* sum all atbarray to local values of C */ 20789566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 20799566063dSJacob Faibussowitsch PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 20809566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 208167af85e8SPierre Jolivet PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 20829566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 20839566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 20843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2085cb20be35SHong Zhang } 2086cb20be35SHong Zhang 2087d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2088d71ae5a4SJacob Faibussowitsch { 2089cb20be35SHong Zhang MPI_Comm comm; 2090baa3c1c6SHong Zhang PetscMPIInt size; 2091660d5466SHong Zhang PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2092baa3c1c6SHong Zhang Mat_TransMatMultDense *atb; 209347d993e7Ssuyashtn PetscBool cisdense = PETSC_FALSE; 20940acaeb71SStefano Zampini PetscInt i; 20950acaeb71SStefano Zampini const PetscInt *ranges; 2096cb20be35SHong Zhang 2097cb20be35SHong Zhang PetscFunctionBegin; 20988f83a4d9SJacob Faibussowitsch MatCheckProduct(C, 4); 20995f80ce2aSJacob Faibussowitsch PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 21009566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2101cb20be35SHong Zhang if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 210298921bdaSJacob Faibussowitsch SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2103cb20be35SHong Zhang } 2104cb20be35SHong Zhang 21054222ddf1SHong Zhang /* create matrix product C */ 21069566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 210747d993e7Ssuyashtn #if defined(PETSC_HAVE_CUDA) 21089566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 210947d993e7Ssuyashtn #endif 211047d993e7Ssuyashtn #if defined(PETSC_HAVE_HIP) 211147d993e7Ssuyashtn PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 211247d993e7Ssuyashtn #endif 211348a46eb9SPierre Jolivet if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 21149566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 2115baa3c1c6SHong Zhang 21164222ddf1SHong Zhang /* create data structure for reuse C */ 21179566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 21189566063dSJacob Faibussowitsch PetscCall(PetscNew(&atb)); 21194222ddf1SHong Zhang cM = C->rmap->N; 21209566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 21219566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(C, &ranges)); 21220acaeb71SStefano Zampini for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2123baa3c1c6SHong Zhang 21246718818eSStefano Zampini C->product->data = atb; 21256718818eSStefano Zampini C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 21263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2127cb20be35SHong Zhang } 2128cb20be35SHong Zhang 2129d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2130d71ae5a4SJacob Faibussowitsch { 2131cc48ffa7SToby Isaac MPI_Comm comm; 2132cc48ffa7SToby Isaac PetscMPIInt i, size; 2133cc48ffa7SToby Isaac PetscInt maxRows, bufsiz; 2134cc48ffa7SToby Isaac PetscMPIInt tag; 21354222ddf1SHong Zhang PetscInt alg; 2136cc48ffa7SToby Isaac Mat_MatTransMultDense *abt; 21374222ddf1SHong Zhang Mat_Product *product = C->product; 21384222ddf1SHong Zhang PetscBool flg; 2139cc48ffa7SToby Isaac 2140cc48ffa7SToby Isaac PetscFunctionBegin; 21416718818eSStefano Zampini MatCheckProduct(C, 4); 21425f80ce2aSJacob Faibussowitsch PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 21434222ddf1SHong Zhang /* check local size of A and B */ 21445f80ce2aSJacob Faibussowitsch PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2145cc48ffa7SToby Isaac 21469566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2147637a0070SStefano Zampini alg = flg ? 0 : 1; 2148cc48ffa7SToby Isaac 21494222ddf1SHong Zhang /* setup matrix product C */ 21509566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 21519566063dSJacob Faibussowitsch PetscCall(MatSetType(C, MATMPIDENSE)); 21529566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 21539566063dSJacob Faibussowitsch PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 21544222ddf1SHong Zhang 21554222ddf1SHong Zhang /* create data structure for reuse C */ 21569566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 21579566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 21589566063dSJacob Faibussowitsch PetscCall(PetscNew(&abt)); 2159cc48ffa7SToby Isaac abt->tag = tag; 2160faa55883SToby Isaac abt->alg = alg; 2161faa55883SToby Isaac switch (alg) { 21624222ddf1SHong Zhang case 1: /* alg: "cyclic" */ 2163cc48ffa7SToby Isaac for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2164cc48ffa7SToby Isaac bufsiz = A->cmap->N * maxRows; 2165f4f49eeaSPierre Jolivet PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1])); 2166faa55883SToby Isaac break; 21674222ddf1SHong Zhang default: /* alg: "allgatherv" */ 2168f4f49eeaSPierre Jolivet PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1])); 2169f4f49eeaSPierre Jolivet PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls)); 2170faa55883SToby Isaac for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2171faa55883SToby Isaac for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2172faa55883SToby Isaac break; 2173faa55883SToby Isaac } 2174cc48ffa7SToby Isaac 21756718818eSStefano Zampini C->product->data = abt; 21766718818eSStefano Zampini C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 21774222ddf1SHong Zhang C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 21783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2179cc48ffa7SToby Isaac } 2180cc48ffa7SToby Isaac 2181d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2182d71ae5a4SJacob Faibussowitsch { 2183cc48ffa7SToby Isaac Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 21846718818eSStefano Zampini Mat_MatTransMultDense *abt; 2185cc48ffa7SToby Isaac MPI_Comm comm; 2186cc48ffa7SToby Isaac PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2187f4259b30SLisandro Dalcin PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2188cc48ffa7SToby Isaac PetscInt i, cK = A->cmap->N, k, j, bn; 2189cc48ffa7SToby Isaac PetscScalar _DOne = 1.0, _DZero = 0.0; 2190637a0070SStefano Zampini const PetscScalar *av, *bv; 219123fff9afSBarry Smith PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2192cc48ffa7SToby Isaac MPI_Request reqs[2]; 2193cc48ffa7SToby Isaac const PetscInt *ranges; 2194cc48ffa7SToby Isaac 2195cc48ffa7SToby Isaac PetscFunctionBegin; 21966718818eSStefano Zampini MatCheckProduct(C, 3); 21975f80ce2aSJacob Faibussowitsch PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 21986718818eSStefano Zampini abt = (Mat_MatTransMultDense *)C->product->data; 21999566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 22009566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 22019566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 22029566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(a->A, &av)); 22039566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(b->A, &bv)); 22049566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 22059566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &i)); 22069566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(i, &alda)); 22079566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(b->A, &i)); 22089566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(i, &blda)); 22099566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(c->A, &i)); 22109566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(i, &clda)); 22119566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(B, &ranges)); 2212cc48ffa7SToby Isaac bn = B->rmap->n; 2213637a0070SStefano Zampini if (blda == bn) { 2214637a0070SStefano Zampini sendbuf = (PetscScalar *)bv; 2215cc48ffa7SToby Isaac } else { 2216cc48ffa7SToby Isaac sendbuf = abt->buf[0]; 2217cc48ffa7SToby Isaac for (k = 0, i = 0; i < cK; i++) { 2218ad540459SPierre Jolivet for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2219cc48ffa7SToby Isaac } 2220cc48ffa7SToby Isaac } 2221cc48ffa7SToby Isaac if (size > 1) { 2222cc48ffa7SToby Isaac sendto = (rank + size - 1) % size; 2223cc48ffa7SToby Isaac recvfrom = (rank + size + 1) % size; 2224cc48ffa7SToby Isaac } else { 2225cc48ffa7SToby Isaac sendto = recvfrom = 0; 2226cc48ffa7SToby Isaac } 22279566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(cK, &ck)); 22289566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2229cc48ffa7SToby Isaac recvisfrom = rank; 2230cc48ffa7SToby Isaac for (i = 0; i < size; i++) { 2231cc48ffa7SToby Isaac /* we have finished receiving in sending, bufs can be read/modified */ 2232cc48ffa7SToby Isaac PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2233cc48ffa7SToby Isaac PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2234cc48ffa7SToby Isaac 2235cc48ffa7SToby Isaac if (nextrecvisfrom != rank) { 2236cc48ffa7SToby Isaac /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2237cc48ffa7SToby Isaac sendsiz = cK * bn; 2238cc48ffa7SToby Isaac recvsiz = cK * nextbn; 2239cc48ffa7SToby Isaac recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 22409566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 22419566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2242cc48ffa7SToby Isaac } 2243cc48ffa7SToby Isaac 2244cc48ffa7SToby Isaac /* local aseq * sendbuf^T */ 22459566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2246792fecdfSBarry Smith if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2247cc48ffa7SToby Isaac 2248cc48ffa7SToby Isaac if (nextrecvisfrom != rank) { 2249cc48ffa7SToby Isaac /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 22509566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2251cc48ffa7SToby Isaac } 2252cc48ffa7SToby Isaac bn = nextbn; 2253cc48ffa7SToby Isaac recvisfrom = nextrecvisfrom; 2254cc48ffa7SToby Isaac sendbuf = recvbuf; 2255cc48ffa7SToby Isaac } 22569566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 22579566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 22589566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 225967af85e8SPierre Jolivet PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 22609566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 22619566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 22623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2263cc48ffa7SToby Isaac } 2264cc48ffa7SToby Isaac 2265d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2266d71ae5a4SJacob Faibussowitsch { 2267faa55883SToby Isaac Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 22686718818eSStefano Zampini Mat_MatTransMultDense *abt; 2269faa55883SToby Isaac MPI_Comm comm; 2270637a0070SStefano Zampini PetscMPIInt size; 2271637a0070SStefano Zampini PetscScalar *cv, *sendbuf, *recvbuf; 2272637a0070SStefano Zampini const PetscScalar *av, *bv; 2273637a0070SStefano Zampini PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2274faa55883SToby Isaac PetscScalar _DOne = 1.0, _DZero = 0.0; 2275637a0070SStefano Zampini PetscBLASInt cm, cn, ck, alda, clda; 2276faa55883SToby Isaac 2277faa55883SToby Isaac PetscFunctionBegin; 22786718818eSStefano Zampini MatCheckProduct(C, 3); 22795f80ce2aSJacob Faibussowitsch PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 22806718818eSStefano Zampini abt = (Mat_MatTransMultDense *)C->product->data; 22819566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 22829566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 22839566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(a->A, &av)); 22849566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(b->A, &bv)); 22859566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 22869566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &i)); 22879566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(i, &alda)); 22889566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(b->A, &blda)); 22899566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(c->A, &i)); 22909566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(i, &clda)); 2291faa55883SToby Isaac /* copy transpose of B into buf[0] */ 2292faa55883SToby Isaac bn = B->rmap->n; 2293faa55883SToby Isaac sendbuf = abt->buf[0]; 2294faa55883SToby Isaac recvbuf = abt->buf[1]; 2295faa55883SToby Isaac for (k = 0, j = 0; j < bn; j++) { 2296ad540459SPierre Jolivet for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2297faa55883SToby Isaac } 22989566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 22999566063dSJacob Faibussowitsch PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 23009566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(cK, &ck)); 23019566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 23029566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2303792fecdfSBarry Smith if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 23049566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 23059566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 23069566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 230767af85e8SPierre Jolivet PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 23089566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 23099566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 23103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2311faa55883SToby Isaac } 2312faa55883SToby Isaac 2313d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2314d71ae5a4SJacob Faibussowitsch { 23156718818eSStefano Zampini Mat_MatTransMultDense *abt; 2316faa55883SToby Isaac 2317faa55883SToby Isaac PetscFunctionBegin; 23186718818eSStefano Zampini MatCheckProduct(C, 3); 23195f80ce2aSJacob Faibussowitsch PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 23206718818eSStefano Zampini abt = (Mat_MatTransMultDense *)C->product->data; 2321faa55883SToby Isaac switch (abt->alg) { 2322d71ae5a4SJacob Faibussowitsch case 1: 2323d71ae5a4SJacob Faibussowitsch PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2324d71ae5a4SJacob Faibussowitsch break; 2325d71ae5a4SJacob Faibussowitsch default: 2326d71ae5a4SJacob Faibussowitsch PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2327d71ae5a4SJacob Faibussowitsch break; 2328faa55883SToby Isaac } 23293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2330faa55883SToby Isaac } 2331faa55883SToby Isaac 233266976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2333d71ae5a4SJacob Faibussowitsch { 23346718818eSStefano Zampini Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2335320f2790SHong Zhang 2336320f2790SHong Zhang PetscFunctionBegin; 23379566063dSJacob Faibussowitsch PetscCall(MatDestroy(&ab->Ce)); 23389566063dSJacob Faibussowitsch PetscCall(MatDestroy(&ab->Ae)); 23399566063dSJacob Faibussowitsch PetscCall(MatDestroy(&ab->Be)); 23409566063dSJacob Faibussowitsch PetscCall(PetscFree(ab)); 23413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2342320f2790SHong Zhang } 2343320f2790SHong Zhang 23445d8c7819SPierre Jolivet static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2345d71ae5a4SJacob Faibussowitsch { 23466718818eSStefano Zampini Mat_MatMultDense *ab; 23475d8c7819SPierre Jolivet Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 2348320f2790SHong Zhang 2349320f2790SHong Zhang PetscFunctionBegin; 23506718818eSStefano Zampini MatCheckProduct(C, 3); 23515f80ce2aSJacob Faibussowitsch PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 23526718818eSStefano Zampini ab = (Mat_MatMultDense *)C->product->data; 23535d8c7819SPierre Jolivet if (ab->Ae && ab->Ce) { 23545d8c7819SPierre Jolivet #if PetscDefined(HAVE_ELEMENTAL) 23559566063dSJacob Faibussowitsch PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 23569566063dSJacob Faibussowitsch PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 23579566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 23589566063dSJacob Faibussowitsch PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 23595d8c7819SPierre Jolivet #else 23605d8c7819SPierre Jolivet SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 23615d8c7819SPierre Jolivet #endif 23625d8c7819SPierre Jolivet } else { 23635d8c7819SPierre Jolivet const PetscScalar *read; 23645d8c7819SPierre Jolivet PetscScalar *write; 23655d8c7819SPierre Jolivet PetscInt lda; 23665d8c7819SPierre Jolivet 23675d8c7819SPierre Jolivet PetscCall(MatDenseGetLDA(B, &lda)); 23685d8c7819SPierre Jolivet PetscCall(MatDenseGetArrayRead(B, &read)); 23695d8c7819SPierre Jolivet PetscCall(MatDenseGetArrayWrite(ab->Be, &write)); 23705d8c7819SPierre Jolivet if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */ 23715d8c7819SPierre Jolivet for (PetscInt i = 0; i < C->cmap->N; ++i) { 23725d8c7819SPierre Jolivet PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 23735d8c7819SPierre Jolivet PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 23745d8c7819SPierre Jolivet } 23755d8c7819SPierre Jolivet PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write)); 23765d8c7819SPierre Jolivet PetscCall(MatDenseRestoreArrayRead(B, &read)); 2377f4f49eeaSPierre Jolivet PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A)); 23785d8c7819SPierre Jolivet } 23793ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2380320f2790SHong Zhang } 2381320f2790SHong Zhang 2382d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2383d71ae5a4SJacob Faibussowitsch { 23845d8c7819SPierre Jolivet Mat_Product *product = C->product; 23855d8c7819SPierre Jolivet PetscInt alg; 2386320f2790SHong Zhang Mat_MatMultDense *ab; 23875d8c7819SPierre Jolivet PetscBool flg; 2388320f2790SHong Zhang 2389320f2790SHong Zhang PetscFunctionBegin; 23906718818eSStefano Zampini MatCheckProduct(C, 4); 23915f80ce2aSJacob Faibussowitsch PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 23924222ddf1SHong Zhang /* check local size of A and B */ 23935d8c7819SPierre Jolivet PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")", 23945d8c7819SPierre Jolivet A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2395320f2790SHong Zhang 23965d8c7819SPierre Jolivet PetscCall(PetscStrcmp(product->alg, "petsc", &flg)); 23975d8c7819SPierre Jolivet alg = flg ? 0 : 1; 23984222ddf1SHong Zhang 23994222ddf1SHong Zhang /* setup C */ 24005d8c7819SPierre Jolivet PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 24015d8c7819SPierre Jolivet PetscCall(MatSetType(C, MATMPIDENSE)); 24029566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 2403320f2790SHong Zhang 2404320f2790SHong Zhang /* create data structure for reuse Cdense */ 24059566063dSJacob Faibussowitsch PetscCall(PetscNew(&ab)); 24065d8c7819SPierre Jolivet 24075d8c7819SPierre Jolivet switch (alg) { 24085d8c7819SPierre Jolivet case 1: /* alg: "elemental" */ 24095d8c7819SPierre Jolivet #if PetscDefined(HAVE_ELEMENTAL) 24105d8c7819SPierre Jolivet /* create elemental matrices Ae and Be */ 24115d8c7819SPierre Jolivet PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae)); 24125d8c7819SPierre Jolivet PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 24135d8c7819SPierre Jolivet PetscCall(MatSetType(ab->Ae, MATELEMENTAL)); 24145d8c7819SPierre Jolivet PetscCall(MatSetUp(ab->Ae)); 24155d8c7819SPierre Jolivet PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 24165d8c7819SPierre Jolivet 24175d8c7819SPierre Jolivet PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be)); 24185d8c7819SPierre Jolivet PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 24195d8c7819SPierre Jolivet PetscCall(MatSetType(ab->Be, MATELEMENTAL)); 24205d8c7819SPierre Jolivet PetscCall(MatSetUp(ab->Be)); 24215d8c7819SPierre Jolivet PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 24225d8c7819SPierre Jolivet 24235d8c7819SPierre Jolivet /* compute symbolic Ce = Ae*Be */ 24245d8c7819SPierre Jolivet PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce)); 24255d8c7819SPierre Jolivet PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce)); 24265d8c7819SPierre Jolivet #else 24275d8c7819SPierre Jolivet SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 24285d8c7819SPierre Jolivet #endif 24295d8c7819SPierre Jolivet break; 24305d8c7819SPierre Jolivet default: /* alg: "petsc" */ 24315d8c7819SPierre Jolivet ab->Ae = NULL; 24325d8c7819SPierre Jolivet PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be)); 24335d8c7819SPierre Jolivet ab->Ce = NULL; 24345d8c7819SPierre Jolivet break; 24355d8c7819SPierre Jolivet } 24366718818eSStefano Zampini 24376718818eSStefano Zampini C->product->data = ab; 24386718818eSStefano Zampini C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 24394222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 24403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2441320f2790SHong Zhang } 24422ef1f0ffSBarry Smith 2443d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2444d71ae5a4SJacob Faibussowitsch { 24455d8c7819SPierre Jolivet Mat_Product *product = C->product; 24465d8c7819SPierre Jolivet const char *algTypes[2] = {"petsc", "elemental"}; 24475d8c7819SPierre Jolivet PetscInt alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1; 24485d8c7819SPierre Jolivet PetscBool flg = PETSC_FALSE; 24495d8c7819SPierre Jolivet 24504d86920dSPierre Jolivet PetscFunctionBegin; 24515d8c7819SPierre Jolivet /* Set default algorithm */ 24525d8c7819SPierre Jolivet alg = 0; /* default is petsc */ 24535d8c7819SPierre Jolivet PetscCall(PetscStrcmp(product->alg, "default", &flg)); 24545d8c7819SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 24555d8c7819SPierre Jolivet 24565d8c7819SPierre Jolivet /* Get runtime option */ 24575d8c7819SPierre Jolivet PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 24585d8c7819SPierre Jolivet PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg)); 24595d8c7819SPierre Jolivet PetscOptionsEnd(); 24605d8c7819SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 24615d8c7819SPierre Jolivet 24624222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 24634222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 24643ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2465320f2790SHong Zhang } 246686aefd0dSHong Zhang 2467d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2468d71ae5a4SJacob Faibussowitsch { 24694222ddf1SHong Zhang Mat_Product *product = C->product; 24704222ddf1SHong Zhang Mat A = product->A, B = product->B; 24714222ddf1SHong Zhang 24724222ddf1SHong Zhang PetscFunctionBegin; 247307e90273SPierre Jolivet PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 247407e90273SPierre Jolivet A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 24756718818eSStefano Zampini C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 24766718818eSStefano Zampini C->ops->productsymbolic = MatProductSymbolic_AtB; 24773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 24784222ddf1SHong Zhang } 24794222ddf1SHong Zhang 2480d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2481d71ae5a4SJacob Faibussowitsch { 24824222ddf1SHong Zhang Mat_Product *product = C->product; 24834222ddf1SHong Zhang const char *algTypes[2] = {"allgatherv", "cyclic"}; 24844222ddf1SHong Zhang PetscInt alg, nalg = 2; 24854222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 24864222ddf1SHong Zhang 24874222ddf1SHong Zhang PetscFunctionBegin; 24884222ddf1SHong Zhang /* Set default algorithm */ 24894222ddf1SHong Zhang alg = 0; /* default is allgatherv */ 24909566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "default", &flg)); 249148a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 24924222ddf1SHong Zhang 24934222ddf1SHong Zhang /* Get runtime option */ 24944222ddf1SHong Zhang if (product->api_user) { 2495d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 24969566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2497d0609cedSBarry Smith PetscOptionsEnd(); 24984222ddf1SHong Zhang } else { 2499d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 25009566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2501d0609cedSBarry Smith PetscOptionsEnd(); 25024222ddf1SHong Zhang } 250348a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 25044222ddf1SHong Zhang 25054222ddf1SHong Zhang C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 25064222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_ABt; 25073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25084222ddf1SHong Zhang } 25094222ddf1SHong Zhang 25105d8c7819SPierre Jolivet static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2511d71ae5a4SJacob Faibussowitsch { 25124222ddf1SHong Zhang Mat_Product *product = C->product; 25134222ddf1SHong Zhang 25144222ddf1SHong Zhang PetscFunctionBegin; 25154222ddf1SHong Zhang switch (product->type) { 2516d71ae5a4SJacob Faibussowitsch case MATPRODUCT_AB: 2517d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2518d71ae5a4SJacob Faibussowitsch break; 2519d71ae5a4SJacob Faibussowitsch case MATPRODUCT_AtB: 2520d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2521d71ae5a4SJacob Faibussowitsch break; 2522d71ae5a4SJacob Faibussowitsch case MATPRODUCT_ABt: 2523d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2524d71ae5a4SJacob Faibussowitsch break; 2525d71ae5a4SJacob Faibussowitsch default: 2526d71ae5a4SJacob Faibussowitsch break; 25274222ddf1SHong Zhang } 25283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25294222ddf1SHong Zhang } 2530