#include /*I "petscmat.h" I*/ const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" " TITLE = {Sca{LAPACK} Users' Guide},\n" " PUBLISHER = {SIAM},\n" " ADDRESS = {Philadelphia, PA},\n" " YEAR = 1997\n" "}\n"; static PetscBool ScaLAPACKCite = PETSC_FALSE; #define DEFAULT_BLOCKSIZE 64 /* The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid */ static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) { PetscFunctionBegin; PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n")); PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; PetscBool isascii; PetscViewerFormat format; Mat Adense; PetscFunctionBegin; PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); if (isascii) { PetscCall(PetscViewerGetFormat(viewer, &format)); if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb)); PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol)); PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc)); PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc)); PetscFunctionReturn(PETSC_SUCCESS); } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { PetscFunctionReturn(PETSC_SUCCESS); } } /* convert to dense format and call MatView() */ PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense)); PetscCall(MatView(Adense, viewer)); PetscCall(MatDestroy(&Adense)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; PetscLogDouble isend[2], irecv[2]; PetscFunctionBegin; info->block_size = 1.0; isend[0] = a->lld * a->locc; /* locally allocated */ isend[1] = a->locr * a->locc; /* used submatrix */ if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { info->nz_allocated = isend[0]; info->nz_used = isend[1]; } else if (flag == MAT_GLOBAL_MAX) { PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); info->nz_allocated = irecv[0]; info->nz_used = irecv[1]; } else if (flag == MAT_GLOBAL_SUM) { PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); info->nz_allocated = irecv[0]; info->nz_used = irecv[1]; } info->nz_unneeded = 0; info->assemblies = A->num_ass; info->mallocs = 0; info->memory = 0; /* REVIEW ME */ info->fill_ratio_given = 0; info->fill_ratio_needed = 0; info->factor_mallocs = 0; PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; PetscFunctionBegin; switch (op) { case MAT_NEW_NONZERO_LOCATIONS: case MAT_NEW_NONZERO_LOCATION_ERR: case MAT_NEW_NONZERO_ALLOCATION_ERR: case MAT_SYMMETRIC: case MAT_SORTED_FULL: case MAT_HERMITIAN: break; case MAT_ROW_ORIENTED: a->roworiented = flg; break; default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]); } PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; PetscInt i, j; PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; PetscBool roworiented = a->roworiented; PetscFunctionBegin; PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode); for (i = 0; i < nr; i++) { if (rows[i] < 0) continue; PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx)); for (j = 0; j < nc; j++) { if (cols[j] < 0) continue; PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx)); PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); if (rsrc == a->grid->myrow && csrc == a->grid->mycol) { if (roworiented) { switch (imode) { case INSERT_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j]; break; default: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j]; break; } } else { switch (imode) { case INSERT_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr]; break; default: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr]; break; } } } else { PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); A->assembled = PETSC_FALSE; PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES))); } } } PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscBool hermitian, PetscScalar beta, const PetscScalar *x, PetscScalar *y) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; PetscScalar *x2d, *y2d, alpha = 1.0; const PetscInt *ranges; PetscBLASInt xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info; PetscFunctionBegin; if (transpose) { /* create ScaLAPACK descriptors for vectors (1d block distribution) */ PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld)); PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); PetscCheckScaLapackInfo("descinit", info); PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */ ylld = 1; PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info)); PetscCheckScaLapackInfo("descinit", info); /* allocate 2d vectors */ lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld)); /* create ScaLAPACK descriptors for vectors (2d block distribution) */ PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); PetscCheckScaLapackInfo("descinit", info); PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info)); PetscCheckScaLapackInfo("descinit", info); /* redistribute x as a column of a 2d matrix */ PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); /* redistribute y as a row of a 2d matrix */ if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow)); /* call PBLAS subroutine */ if (hermitian) PetscCallBLAS("PBLASgemv", PBLASgemv_("C", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); else PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); /* redistribute y from a row of a 2d matrix */ PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow)); } else { /* non-transpose */ /* create ScaLAPACK descriptors for vectors (1d block distribution) */ PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */ xlld = 1; PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info)); PetscCheckScaLapackInfo("descinit", info); PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */ PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &ylld)); PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info)); PetscCheckScaLapackInfo("descinit", info); /* allocate 2d vectors */ lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); PetscCall(PetscBLASIntCast(PetscMax(1, lszy), &ylld)); /* create ScaLAPACK descriptors for vectors (2d block distribution) */ PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info)); PetscCheckScaLapackInfo("descinit", info); PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info)); PetscCheckScaLapackInfo("descinit", info); /* redistribute x as a row of a 2d matrix */ PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow)); /* redistribute y as a column of a 2d matrix */ if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol)); /* call PBLAS subroutine */ PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); /* redistribute y from a column of a 2d matrix */ PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol)); } PetscCall(PetscFree2(x2d, y2d)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y) { const PetscScalar *xarray; PetscScalar *yarray; PetscFunctionBegin; PetscCall(VecGetArrayRead(x, &xarray)); PetscCall(VecGetArray(y, &yarray)); PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 0.0, xarray, yarray)); PetscCall(VecRestoreArrayRead(x, &xarray)); PetscCall(VecRestoreArray(y, &yarray)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y) { const PetscScalar *xarray; PetscScalar *yarray; PetscFunctionBegin; PetscCall(VecGetArrayRead(x, &xarray)); PetscCall(VecGetArray(y, &yarray)); PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 0.0, xarray, yarray)); PetscCall(VecRestoreArrayRead(x, &xarray)); PetscCall(VecRestoreArray(y, &yarray)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatMultHermitianTranspose_ScaLAPACK(Mat A, Vec x, Vec y) { const PetscScalar *xarray; PetscScalar *yarray; PetscFunctionBegin; PetscCall(VecGetArrayRead(x, &xarray)); PetscCall(VecGetArrayWrite(y, &yarray)); PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 0.0, xarray, yarray)); PetscCall(VecRestoreArrayRead(x, &xarray)); PetscCall(VecRestoreArrayWrite(y, &yarray)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) { const PetscScalar *xarray; PetscScalar *zarray; PetscFunctionBegin; if (y != z) PetscCall(VecCopy(y, z)); PetscCall(VecGetArrayRead(x, &xarray)); PetscCall(VecGetArray(z, &zarray)); PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 1.0, xarray, zarray)); PetscCall(VecRestoreArrayRead(x, &xarray)); PetscCall(VecRestoreArray(z, &zarray)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) { const PetscScalar *xarray; PetscScalar *zarray; PetscFunctionBegin; if (y != z) PetscCall(VecCopy(y, z)); PetscCall(VecGetArrayRead(x, &xarray)); PetscCall(VecGetArray(z, &zarray)); PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 1.0, xarray, zarray)); PetscCall(VecRestoreArrayRead(x, &xarray)); PetscCall(VecRestoreArray(z, &zarray)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatMultHermitianTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) { const PetscScalar *xarray; PetscScalar *zarray; PetscFunctionBegin; if (y != z) PetscCall(VecCopy(y, z)); PetscCall(VecGetArrayRead(x, &xarray)); PetscCall(VecGetArray(z, &zarray)); PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 1.0, xarray, zarray)); PetscCall(VecRestoreArrayRead(x, &xarray)); PetscCall(VecRestoreArray(z, &zarray)); PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; PetscScalar sone = 1.0, zero = 0.0; PetscBLASInt one = 1; PetscFunctionBegin; PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); C->assembled = PETSC_TRUE; PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) { PetscFunctionBegin; PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); PetscCall(MatSetType(C, MATSCALAPACK)); PetscCall(MatSetUp(C)); C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatTransposeMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; PetscScalar sone = 1.0, zero = 0.0; PetscBLASInt one = 1; PetscFunctionBegin; PetscCallBLAS("PBLASgemm", PBLASgemm_("T", "N", &a->N, &b->N, &a->M, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); C->assembled = PETSC_TRUE; PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatTransposeMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) { PetscFunctionBegin; PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); PetscCall(MatSetType(C, MATSCALAPACK)); PetscCall(MatSetUp(C)); C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_ScaLAPACK; PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; PetscScalar sone = 1.0, zero = 0.0; PetscBLASInt one = 1; PetscFunctionBegin; PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); C->assembled = PETSC_TRUE; PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) { PetscFunctionBegin; PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE)); PetscCall(MatSetType(C, MATSCALAPACK)); PetscCall(MatSetUp(C)); C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_ScaLAPACK; PetscFunctionReturn(PETSC_SUCCESS); } PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) { Mat_Product *product = C->product; PetscFunctionBegin; switch (product->type) { case MATPRODUCT_AB: C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; C->ops->productsymbolic = MatProductSymbolic_AB; break; case MATPRODUCT_AtB: C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_ScaLAPACK; C->ops->productsymbolic = MatProductSymbolic_AtB; break; case MATPRODUCT_ABt: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; C->ops->productsymbolic = MatProductSymbolic_ABt; break; default: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]); } PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; PetscScalar *darray, *d2d, v; const PetscInt *ranges; PetscBLASInt j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; PetscFunctionBegin; PetscCall(VecGetArray(D, &darray)); if (A->rmap->N <= A->cmap->N) { /* row version */ /* create ScaLAPACK descriptor for vector (1d block distribution) */ PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld)); PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); PetscCheckScaLapackInfo("descinit", info); /* allocate 2d vector */ lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); PetscCall(PetscCalloc1(lszd, &d2d)); PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld)); /* create ScaLAPACK descriptor for vector (2d block distribution) */ PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); PetscCheckScaLapackInfo("descinit", info); /* collect diagonal */ for (j = 1; j <= a->M; j++) { PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc)); PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v)); } /* redistribute d from a column of a 2d matrix */ PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol)); PetscCall(PetscFree(d2d)); } else { /* column version */ /* create ScaLAPACK descriptor for vector (1d block distribution) */ PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ dlld = 1; PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); PetscCheckScaLapackInfo("descinit", info); /* allocate 2d vector */ lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); PetscCall(PetscCalloc1(lszd, &d2d)); /* create ScaLAPACK descriptor for vector (2d block distribution) */ PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); PetscCheckScaLapackInfo("descinit", info); /* collect diagonal */ for (j = 1; j <= a->N; j++) { PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc)); PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v)); } /* redistribute d from a row of a 2d matrix */ PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow)); PetscCall(PetscFree(d2d)); } PetscCall(VecRestoreArray(D, &darray)); PetscCall(VecAssemblyBegin(D)); PetscCall(VecAssemblyEnd(D)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; const PetscScalar *d; const PetscInt *ranges; PetscScalar *d2d; PetscBLASInt i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; PetscFunctionBegin; if (R) { PetscCall(VecGetArrayRead(R, &d)); /* create ScaLAPACK descriptor for vector (1d block distribution) */ PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ dlld = 1; PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); PetscCheckScaLapackInfo("descinit", info); /* allocate 2d vector */ lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); PetscCall(PetscCalloc1(lszd, &d2d)); /* create ScaLAPACK descriptor for vector (2d block distribution) */ PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); PetscCheckScaLapackInfo("descinit", info); /* redistribute d to a row of a 2d matrix */ PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow)); /* broadcast along process columns */ if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld); else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol); /* local scaling */ for (j = 0; j < a->locc; j++) for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j]; PetscCall(PetscFree(d2d)); PetscCall(VecRestoreArrayRead(R, &d)); } if (L) { PetscCall(VecGetArrayRead(L, &d)); /* create ScaLAPACK descriptor for vector (1d block distribution) */ PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld)); PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); PetscCheckScaLapackInfo("descinit", info); /* allocate 2d vector */ lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); PetscCall(PetscCalloc1(lszd, &d2d)); PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld)); /* create ScaLAPACK descriptor for vector (2d block distribution) */ PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); PetscCheckScaLapackInfo("descinit", info); /* redistribute d to a column of a 2d matrix */ PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol)); /* broadcast along process rows */ if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld); else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0); /* local scaling */ for (i = 0; i < a->locr; i++) for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i]; PetscCall(PetscFree(d2d)); PetscCall(VecRestoreArrayRead(L, &d)); } PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a) { Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; PetscBLASInt n, one = 1; PetscFunctionBegin; n = x->lld * x->locc; PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha) { Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; PetscBLASInt i, n; PetscScalar v; PetscFunctionBegin; n = PetscMin(x->M, x->N); for (i = 1; i <= n; i++) { PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc)); v += alpha; PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v)); } PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str) { Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; Mat_ScaLAPACK *y = (Mat_ScaLAPACK *)Y->data; PetscBLASInt one = 1; PetscScalar beta = 1.0; PetscFunctionBegin; MatScaLAPACKCheckDistribution(Y, 1, X, 3); PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc)); PetscCall(PetscObjectStateIncrease((PetscObject)Y)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; PetscFunctionBegin; PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); PetscCall(PetscObjectStateIncrease((PetscObject)B)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B) { Mat Bs; MPI_Comm comm; Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; PetscFunctionBegin; PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); PetscCall(MatCreate(comm, &Bs)); PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); PetscCall(MatSetType(Bs, MATSCALAPACK)); b = (Mat_ScaLAPACK *)Bs->data; b->M = a->M; b->N = a->N; b->mb = a->mb; b->nb = a->nb; b->rsrc = a->rsrc; b->csrc = a->csrc; PetscCall(MatSetUp(Bs)); *B = Bs; if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); Bs->assembled = PETSC_TRUE; PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; Mat Bs = *B; PetscBLASInt one = 1; PetscScalar sone = 1.0, zero = 0.0; #if defined(PETSC_USE_COMPLEX) PetscInt i, j; #endif PetscFunctionBegin; if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); *B = Bs; b = (Mat_ScaLAPACK *)Bs->data; PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); #if defined(PETSC_USE_COMPLEX) /* undo conjugation */ for (i = 0; i < b->locr; i++) for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]); #endif Bs->assembled = PETSC_TRUE; PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; PetscInt i, j; PetscFunctionBegin; for (i = 0; i < a->locr; i++) for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; Mat Bs = *B; PetscBLASInt one = 1; PetscScalar sone = 1.0, zero = 0.0; PetscFunctionBegin; PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); *B = Bs; b = (Mat_ScaLAPACK *)Bs->data; PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); Bs->assembled = PETSC_TRUE; PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; PetscScalar *x, *x2d; const PetscInt *ranges; PetscBLASInt xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info; PetscFunctionBegin; PetscCall(VecCopy(B, X)); PetscCall(VecGetArray(X, &x)); /* create ScaLAPACK descriptor for a vector (1d block distribution) */ PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld)); PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); PetscCheckScaLapackInfo("descinit", info); /* allocate 2d vector */ lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); PetscCall(PetscMalloc1(lszx, &x2d)); PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld)); /* create ScaLAPACK descriptor for a vector (2d block distribution) */ PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); PetscCheckScaLapackInfo("descinit", info); /* redistribute x as a column of a 2d matrix */ PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); /* call ScaLAPACK subroutine */ switch (A->factortype) { case MAT_FACTOR_LU: PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info)); PetscCheckScaLapackInfo("getrs", info); break; case MAT_FACTOR_CHOLESKY: PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info)); PetscCheckScaLapackInfo("potrs", info); break; default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); } /* redistribute x from a column of a 2d matrix */ PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol)); PetscCall(PetscFree(x2d)); PetscCall(VecRestoreArray(X, &x)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X) { PetscFunctionBegin; PetscCall(MatSolve_ScaLAPACK(A, B, X)); PetscCall(VecAXPY(X, 1, Y)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *x; PetscBool flg1, flg2; PetscBLASInt one = 1, info; Mat C; MatType type; PetscFunctionBegin; PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1)); PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2)); if (flg1 && flg2) MatScaLAPACKCheckDistribution(B, 2, X, 3); if (flg2) { if (flg1) PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN)); else PetscCall(MatConvert(B, MATSCALAPACK, MAT_REUSE_MATRIX, &X)); C = X; } else { PetscCall(MatConvert(B, MATSCALAPACK, MAT_INITIAL_MATRIX, &C)); } x = (Mat_ScaLAPACK *)C->data; switch (A->factortype) { case MAT_FACTOR_LU: PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info)); PetscCheckScaLapackInfo("getrs", info); break; case MAT_FACTOR_CHOLESKY: PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info)); PetscCheckScaLapackInfo("potrs", info); break; default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); } if (!flg2) { PetscCall(MatGetType(X, &type)); PetscCall(MatConvert(C, type, MAT_REUSE_MATRIX, &X)); PetscCall(MatDestroy(&C)); } PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; PetscBLASInt one = 1, info; PetscFunctionBegin; if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots)); PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info)); PetscCheckScaLapackInfo("getrf", info); A->factortype = MAT_FACTOR_LU; A->assembled = PETSC_TRUE; PetscCall(PetscFree(A->solvertype)); PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) { PetscFunctionBegin; PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) { PetscFunctionBegin; /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; PetscBLASInt one = 1, info; PetscFunctionBegin; PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info)); PetscCheckScaLapackInfo("potrf", info); A->factortype = MAT_FACTOR_CHOLESKY; A->assembled = PETSC_TRUE; PetscCall(PetscFree(A->solvertype)); PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) { PetscFunctionBegin; PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info) { PetscFunctionBegin; /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type) { PetscFunctionBegin; *type = MATSOLVERSCALAPACK; PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F) { Mat B; Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; PetscFunctionBegin; /* Create the factorization matrix */ PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B)); B->trivialsymbolic = PETSC_TRUE; B->factortype = ftype; PetscCall(PetscFree(B->solvertype)); PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack)); *F = B; PetscFunctionReturn(PETSC_SUCCESS); } PETSC_INTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) { PetscFunctionBegin; PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack)); PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; PetscBLASInt one = 1, lwork = 0; const char *ntype; PetscScalar *work = NULL, dummy; PetscFunctionBegin; switch (type) { case NORM_1: ntype = "1"; lwork = PetscMax(a->locr, a->locc); break; case NORM_FROBENIUS: ntype = "F"; work = &dummy; break; case NORM_INFINITY: ntype = "I"; lwork = PetscMax(a->locr, a->locc); break; default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type"); } if (lwork) PetscCall(PetscMalloc1(lwork, &work)); *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work); if (lwork) PetscCall(PetscFree(work)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; PetscFunctionBegin; PetscCall(PetscArrayzero(a->loc, a->lld * a->locc)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; PetscInt i, n, nb, isrc, nproc, iproc, *idx; PetscFunctionBegin; if (rows) { n = a->locr; nb = a->mb; isrc = a->rsrc; nproc = a->grid->nprow; iproc = a->grid->myrow; PetscCall(PetscMalloc1(n, &idx)); for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows)); } if (cols) { n = a->locc; nb = a->nb; isrc = a->csrc; nproc = a->grid->npcol; iproc = a->grid->mycol; PetscCall(PetscMalloc1(n, &idx)); for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols)); } PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; Mat Bmpi; MPI_Comm comm; PetscInt i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb; const PetscInt *ranges, *branges, *cwork; const PetscScalar *vwork; PetscBLASInt bdesc[9], bmb, zero = 0, one = 1, lld, info; PetscScalar *barray; PetscBool differ = PETSC_FALSE; PetscMPIInt size; PetscFunctionBegin; PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ PetscCallMPI(MPI_Comm_size(comm, &size)); PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges)); for (i = 0; i < size; i++) if (ranges[i + 1] != branges[i + 1]) { differ = PETSC_TRUE; break; } } if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ PetscCall(MatCreate(comm, &Bmpi)); m = PETSC_DECIDE; PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); n = PETSC_DECIDE; PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); PetscCall(MatSetSizes(Bmpi, m, n, M, N)); PetscCall(MatSetType(Bmpi, MATDENSE)); PetscCall(MatSetUp(Bmpi)); /* create ScaLAPACK descriptor for B (1d block distribution) */ PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ PetscCall(MatDenseGetLDA(Bmpi, &ldb)); PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */ PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); PetscCheckScaLapackInfo("descinit", info); /* redistribute matrix */ PetscCall(MatDenseGetArray(Bmpi, &barray)); PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); PetscCall(MatDenseRestoreArray(Bmpi, &barray)); PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); /* transfer rows of auxiliary matrix to the final matrix B */ PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend)); for (i = rstart; i < rend; i++) { PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork)); PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES)); PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork)); } PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY)); PetscCall(MatDestroy(&Bmpi)); } else { /* normal cases */ if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; else { PetscCall(MatCreate(comm, &Bmpi)); m = PETSC_DECIDE; PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); n = PETSC_DECIDE; PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); PetscCall(MatSetSizes(Bmpi, m, n, M, N)); PetscCall(MatSetType(Bmpi, MATDENSE)); PetscCall(MatSetUp(Bmpi)); } /* create ScaLAPACK descriptor for B (1d block distribution) */ PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ PetscCall(MatDenseGetLDA(Bmpi, &ldb)); PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */ PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); PetscCheckScaLapackInfo("descinit", info); /* redistribute matrix */ PetscCall(MatDenseGetArray(Bmpi, &barray)); PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); PetscCall(MatDenseRestoreArray(Bmpi, &barray)); PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &Bmpi)); else *B = Bmpi; } PetscFunctionReturn(PETSC_SUCCESS); } static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct) { const PetscInt *ranges; PetscMPIInt size; PetscInt i, n; PetscFunctionBegin; *correct = PETSC_TRUE; PetscCallMPI(MPI_Comm_size(map->comm, &size)); if (size > 1) { PetscCall(PetscLayoutGetRanges(map, &ranges)); n = ranges[1] - ranges[0]; for (i = 1; i < size; i++) if (ranges[i + 1] - ranges[i] != n) break; *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n)); } PetscFunctionReturn(PETSC_SUCCESS); } PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B) { Mat_ScaLAPACK *b; Mat Bmpi; MPI_Comm comm; PetscInt M = A->rmap->N, N = A->cmap->N, m, n; const PetscInt *ranges, *rows, *cols; PetscBLASInt adesc[9], amb, zero = 0, one = 1, lld, info; const PetscScalar *aarray; IS ir, ic; PetscInt lda; PetscBool flg; PetscFunctionBegin; PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; else { PetscCall(MatCreate(comm, &Bmpi)); m = PETSC_DECIDE; PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); n = PETSC_DECIDE; PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); PetscCall(MatSetSizes(Bmpi, m, n, M, N)); PetscCall(MatSetType(Bmpi, MATSCALAPACK)); PetscCall(MatSetUp(Bmpi)); } b = (Mat_ScaLAPACK *)Bmpi->data; PetscCall(MatDenseGetLDA(A, &lda)); PetscCall(MatDenseGetArrayRead(A, &aarray)); PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */ /* create ScaLAPACK descriptor for A (1d block distribution) */ PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */ PetscCall(PetscBLASIntCast(PetscMax(lda, 1), &lld)); /* local leading dimension */ PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info)); PetscCheckScaLapackInfo("descinit", info); /* redistribute matrix */ PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol)); Bmpi->nooffprocentries = PETSC_TRUE; } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */ PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n); b->roworiented = PETSC_FALSE; PetscCall(MatGetOwnershipIS(A, &ir, &ic)); PetscCall(ISGetIndices(ir, &rows)); PetscCall(ISGetIndices(ic, &cols)); PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES)); PetscCall(ISRestoreIndices(ir, &rows)); PetscCall(ISRestoreIndices(ic, &cols)); PetscCall(ISDestroy(&ic)); PetscCall(ISDestroy(&ir)); } PetscCall(MatDenseRestoreArrayRead(A, &aarray)); PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &Bmpi)); else *B = Bmpi; PetscFunctionReturn(PETSC_SUCCESS); } PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { Mat mat_scal; PetscInt M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols; const PetscInt *cols; const PetscScalar *vals; PetscFunctionBegin; if (reuse == MAT_REUSE_MATRIX) { mat_scal = *newmat; PetscCall(MatZeroEntries(mat_scal)); } else { PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); m = PETSC_DECIDE; PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); n = PETSC_DECIDE; PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); PetscCall(MatSetSizes(mat_scal, m, n, M, N)); PetscCall(MatSetType(mat_scal, MATSCALAPACK)); PetscCall(MatSetUp(mat_scal)); } for (row = rstart; row < rend; row++) { PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES)); PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); } PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); else *newmat = mat_scal; PetscFunctionReturn(PETSC_SUCCESS); } PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { Mat mat_scal; PetscInt M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend; const PetscInt *cols; const PetscScalar *vals; PetscScalar v; PetscFunctionBegin; if (reuse == MAT_REUSE_MATRIX) { mat_scal = *newmat; PetscCall(MatZeroEntries(mat_scal)); } else { PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); m = PETSC_DECIDE; PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); n = PETSC_DECIDE; PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); PetscCall(MatSetSizes(mat_scal, m, n, M, N)); PetscCall(MatSetType(mat_scal, MATSCALAPACK)); PetscCall(MatSetUp(mat_scal)); } PetscCall(MatGetRowUpperTriangular(A)); for (row = rstart; row < rend; row++) { PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES)); for (j = 0; j < ncols; j++) { /* lower triangular part */ if (cols[j] == row) continue; v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j]; PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES)); } PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); } PetscCall(MatRestoreRowUpperTriangular(A)); PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); else *newmat = mat_scal; PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; PetscInt sz = 0; PetscFunctionBegin; PetscCall(PetscLayoutSetUp(A->rmap)); PetscCall(PetscLayoutSetUp(A->cmap)); if (!a->lld) a->lld = a->locr; PetscCall(PetscFree(a->loc)); PetscCall(PetscIntMultError(a->lld, a->locc, &sz)); PetscCall(PetscCalloc1(sz, &a->loc)); A->preallocated = PETSC_TRUE; PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; Mat_ScaLAPACK_Grid *grid; PetscMPIInt iflg; MPI_Comm icomm; PetscFunctionBegin; PetscCall(MatStashDestroy_Private(&A->stash)); PetscCall(PetscFree(a->loc)); PetscCall(PetscFree(a->pivots)); PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, &iflg)); if (--grid->grid_refct == 0) { Cblacs_gridexit(grid->ictxt); Cblacs_gridexit(grid->ictxrow); Cblacs_gridexit(grid->ictxcol); PetscCall(PetscFree(grid)); PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval)); } PetscCall(PetscCommDestroy(&icomm)); PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL)); PetscCall(PetscFree(A->data)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatSetUp_ScaLAPACK(Mat A) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; PetscBLASInt info = 0; PetscBool flg; PetscFunctionBegin; PetscCall(PetscLayoutSetUp(A->rmap)); PetscCall(PetscLayoutSetUp(A->cmap)); /* check that the layout is as enforced by MatCreateScaLAPACK() */ PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); /* compute local sizes */ PetscCall(PetscBLASIntCast(A->rmap->N, &a->M)); PetscCall(PetscBLASIntCast(A->cmap->N, &a->N)); a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); a->lld = PetscMax(1, a->locr); /* allocate local array */ PetscCall(MatScaLAPACKSetPreallocation(A)); /* set up ScaLAPACK descriptor */ PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info)); PetscCheckScaLapackInfo("descinit", info); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type) { PetscInt nstash, reallocs; PetscFunctionBegin; if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL)); PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs)); PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; PetscMPIInt n; PetscInt i, flg, *row, *col; PetscScalar *val; PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; PetscFunctionBegin; if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); while (1) { PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg)); if (!flg) break; for (i = 0; i < n; i++) { PetscCall(PetscBLASIntCast(row[i] + 1, &gridx)); PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx)); PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process"); switch (A->insertmode) { case INSERT_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i]; break; case ADD_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i]; break; default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode); } } } PetscCall(MatStashScatterEnd_Private(&A->stash)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer) { Mat Adense, As; MPI_Comm comm; PetscFunctionBegin; PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm)); PetscCall(MatCreate(comm, &Adense)); PetscCall(MatSetType(Adense, MATDENSE)); PetscCall(MatLoad(Adense, viewer)); PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As)); PetscCall(MatDestroy(&Adense)); PetscCall(MatHeaderReplace(newMat, &As)); PetscFunctionReturn(PETSC_SUCCESS); } static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK, NULL, NULL, MatMult_ScaLAPACK, /* 4*/ MatMultAdd_ScaLAPACK, MatMultTranspose_ScaLAPACK, MatMultTransposeAdd_ScaLAPACK, MatSolve_ScaLAPACK, MatSolveAdd_ScaLAPACK, NULL, /*10*/ NULL, MatLUFactor_ScaLAPACK, MatCholeskyFactor_ScaLAPACK, NULL, MatTranspose_ScaLAPACK, /*15*/ MatGetInfo_ScaLAPACK, NULL, MatGetDiagonal_ScaLAPACK, MatDiagonalScale_ScaLAPACK, MatNorm_ScaLAPACK, /*20*/ MatAssemblyBegin_ScaLAPACK, MatAssemblyEnd_ScaLAPACK, MatSetOption_ScaLAPACK, MatZeroEntries_ScaLAPACK, /*24*/ NULL, MatLUFactorSymbolic_ScaLAPACK, MatLUFactorNumeric_ScaLAPACK, MatCholeskyFactorSymbolic_ScaLAPACK, MatCholeskyFactorNumeric_ScaLAPACK, /*29*/ MatSetUp_ScaLAPACK, NULL, NULL, NULL, NULL, /*34*/ MatDuplicate_ScaLAPACK, NULL, NULL, NULL, NULL, /*39*/ MatAXPY_ScaLAPACK, NULL, NULL, NULL, MatCopy_ScaLAPACK, /*44*/ NULL, MatScale_ScaLAPACK, MatShift_ScaLAPACK, NULL, NULL, /*49*/ NULL, NULL, NULL, NULL, NULL, /*54*/ NULL, NULL, NULL, NULL, NULL, /*59*/ NULL, MatDestroy_ScaLAPACK, MatView_ScaLAPACK, NULL, NULL, /*64*/ NULL, NULL, NULL, NULL, NULL, /*69*/ NULL, MatConvert_ScaLAPACK_Dense, NULL, NULL, NULL, /*74*/ NULL, NULL, NULL, NULL, MatLoad_ScaLAPACK, /*79*/ NULL, NULL, NULL, NULL, NULL, /*84*/ NULL, MatMatMultNumeric_ScaLAPACK, NULL, NULL, MatMatTransposeMultNumeric_ScaLAPACK, /*89*/ NULL, MatProductSetFromOptions_ScaLAPACK, NULL, NULL, MatConjugate_ScaLAPACK, /*94*/ NULL, NULL, NULL, NULL, NULL, /*99*/ NULL, MatMatSolve_ScaLAPACK, NULL, NULL, NULL, /*104*/ NULL, NULL, NULL, NULL, NULL, /*109*/ NULL, MatHermitianTranspose_ScaLAPACK, MatMultHermitianTranspose_ScaLAPACK, MatMultHermitianTransposeAdd_ScaLAPACK, NULL, /*114*/ NULL, NULL, NULL, NULL, NULL, /*119*/ NULL, NULL, MatTransposeMatMultNumeric_ScaLAPACK, NULL, /*124*/ NULL, NULL, NULL, NULL, NULL, /*129*/ NULL, NULL, NULL, NULL, NULL, /*134*/ NULL, NULL, NULL, NULL, NULL, NULL, /*140*/ NULL, NULL, NULL, NULL, NULL}; static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners) { PetscInt *owner, *startv, *starti, bs2; PetscInt size = stash->size, nsends; PetscInt *sindices, **rindices, j, l; PetscScalar **rvalues, *svalues; MPI_Comm comm = stash->comm; MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2; PetscMPIInt tag1 = stash->tag1, tag2 = stash->tag2, *sizes, *nlengths, nreceives, insends, ii; PetscInt *sp_idx, *sp_idy; PetscScalar *sp_val; PetscMatStashSpace space, space_next; PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)mat->data; PetscFunctionBegin; { /* make sure all processors are either in INSERTMODE or ADDMODE */ InsertMode addv; PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat))); PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added"); mat->insertmode = addv; /* in case this processor had no cache */ } bs2 = stash->bs * stash->bs; /* first count number of contributors to each processor */ PetscCall(PetscCalloc1(size, &nlengths)); PetscCall(PetscMalloc1(stash->n + 1, &owner)); ii = j = 0; space = stash->space_head; while (space) { space_next = space->next; for (l = 0; l < space->local_used; l++) { PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx)); PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx)); PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc); nlengths[j]++; owner[ii] = j; ii++; } space = space_next; } /* Now check what procs get messages - and compute nsends. */ PetscCall(PetscCalloc1(size, &sizes)); nsends = 0; for (PetscMPIInt i = 0; i < size; i++) { if (nlengths[i]) { sizes[i] = 1; nsends++; } } { PetscMPIInt *onodes, *olengths; /* Determine the number of messages to expect, their lengths, from from-ids */ PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives)); PetscCall(PetscMPIIntCast(nsends, &insends)); PetscCall(PetscGatherMessageLengths(comm, insends, nreceives, nlengths, &onodes, &olengths)); /* since clubbing row,col - lengths are multiplied by 2 */ for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] *= 2; PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1)); /* values are size 'bs2' lengths (and remove earlier factor 2 */ for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] = (PetscMPIInt)(olengths[i] * bs2 / 2); PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2)); PetscCall(PetscFree(onodes)); PetscCall(PetscFree(olengths)); } /* do sends: 1) starts[i] gives the starting index in svalues for stuff going to the ith processor */ PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices)); PetscCall(PetscMalloc1(2 * nsends, &send_waits)); PetscCall(PetscMalloc2(size, &startv, size, &starti)); /* use 2 sends the first with all_a, the next with all_i and all_j */ startv[0] = 0; starti[0] = 0; for (PetscMPIInt i = 1; i < size; i++) { startv[i] = startv[i - 1] + nlengths[i - 1]; starti[i] = starti[i - 1] + 2 * nlengths[i - 1]; } ii = 0; space = stash->space_head; while (space) { space_next = space->next; sp_idx = space->idx; sp_idy = space->idy; sp_val = space->val; for (l = 0; l < space->local_used; l++) { j = owner[ii]; if (bs2 == 1) { svalues[startv[j]] = sp_val[l]; } else { PetscInt k; PetscScalar *buf1, *buf2; buf1 = svalues + bs2 * startv[j]; buf2 = space->val + bs2 * l; for (k = 0; k < bs2; k++) buf1[k] = buf2[k]; } sindices[starti[j]] = sp_idx[l]; sindices[starti[j] + nlengths[j]] = sp_idy[l]; startv[j]++; starti[j]++; ii++; } space = space_next; } startv[0] = 0; for (PetscMPIInt i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1]; for (PetscMPIInt i = 0, count = 0; i < size; i++) { if (sizes[i]) { PetscCallMPI(MPIU_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++)); PetscCallMPI(MPIU_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++)); } } #if defined(PETSC_USE_INFO) PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends)); for (PetscMPIInt i = 0; i < size; i++) { if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %d: size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt))))); } #endif PetscCall(PetscFree(nlengths)); PetscCall(PetscFree(owner)); PetscCall(PetscFree2(startv, starti)); PetscCall(PetscFree(sizes)); /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ PetscCall(PetscMalloc1(2 * nreceives, &recv_waits)); for (PetscMPIInt i = 0; i < nreceives; i++) { recv_waits[2 * i] = recv_waits1[i]; recv_waits[2 * i + 1] = recv_waits2[i]; } stash->recv_waits = recv_waits; PetscCall(PetscFree(recv_waits1)); PetscCall(PetscFree(recv_waits2)); stash->svalues = svalues; stash->sindices = sindices; stash->rvalues = rvalues; stash->rindices = rindices; stash->send_waits = send_waits; stash->nsends = (PetscMPIInt)nsends; stash->nrecvs = nreceives; stash->reproduce_count = 0; PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; PetscFunctionBegin; PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp"); PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb); PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb); PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); PetscFunctionReturn(PETSC_SUCCESS); } /*@ MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of the `MATSCALAPACK` matrix Logically Collective Input Parameters: + A - a `MATSCALAPACK` matrix . mb - the row block size - nb - the column block size Level: intermediate Note: This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()` @*/ PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb) { PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); PetscValidLogicalCollectiveInt(A, mb, 2); PetscValidLogicalCollectiveInt(A, nb, 3); PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; PetscFunctionBegin; if (mb) *mb = a->mb; if (nb) *nb = a->nb; PetscFunctionReturn(PETSC_SUCCESS); } /*@ MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of the `MATSCALAPACK` matrix Not Collective Input Parameter: . A - a `MATSCALAPACK` matrix Output Parameters: + mb - the row block size - nb - the column block size Level: intermediate Note: This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()` @*/ PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb) { PetscFunctionBegin; PetscValidHeaderSpecific(A, MAT_CLASSID, 1); PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb)); PetscFunctionReturn(PETSC_SUCCESS); } PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *); PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *); /*MC MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK Options Database Keys: + -mat_type scalapack - sets the matrix type to `MATSCALAPACK` . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu` . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) Level: intermediate Note: Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on the given rank. .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()` M*/ PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) { Mat_ScaLAPACK *a; PetscBool flg; PetscMPIInt iflg; Mat_ScaLAPACK_Grid *grid; MPI_Comm icomm; PetscBLASInt nprow, npcol, myrow, mycol; PetscInt optv1, k = 2, array[2] = {0, 0}; PetscMPIInt size; PetscFunctionBegin; A->ops[0] = MatOps_Values; A->insertmode = NOT_SET_VALUES; PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash)); A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; A->stash.ScatterEnd = MatStashScatterEnd_Ref; A->stash.ScatterDestroy = NULL; PetscCall(PetscNew(&a)); A->data = (void *)a; /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, NULL)); PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite)); } PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, &iflg)); if (!iflg) { PetscCall(PetscNew(&grid)); PetscCallMPI(MPI_Comm_size(icomm, &size)); PetscCall(PetscBLASIntCast(PetscSqrtReal((PetscReal)size) + 0.001, &grid->nprow)); PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat"); PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg)); if (flg) { PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size); PetscCall(PetscBLASIntCast(optv1, &grid->nprow)); } PetscOptionsEnd(); if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ grid->npcol = size / grid->nprow; PetscCall(PetscBLASIntCast(grid->nprow, &nprow)); PetscCall(PetscBLASIntCast(grid->npcol, &npcol)); grid->ictxt = Csys2blacs_handle(icomm); Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol); Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol); grid->grid_refct = 1; grid->nprow = nprow; grid->npcol = npcol; grid->myrow = myrow; grid->mycol = mycol; /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ grid->ictxrow = Csys2blacs_handle(icomm); Cblacs_gridinit(&grid->ictxrow, "R", 1, size); grid->ictxcol = Csys2blacs_handle(icomm); Cblacs_gridinit(&grid->ictxcol, "R", size, 1); PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid)); } else grid->grid_refct++; PetscCall(PetscCommDestroy(&icomm)); a->grid = grid; a->mb = DEFAULT_BLOCKSIZE; a->nb = DEFAULT_BLOCKSIZE; PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat"); PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg)); if (flg) { a->mb = (PetscMPIInt)array[0]; a->nb = (k > 1) ? (PetscMPIInt)array[1] : a->mb; } PetscOptionsEnd(); a->roworiented = PETSC_TRUE; PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK)); PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK)); PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK)); PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK)); PetscFunctionReturn(PETSC_SUCCESS); } /*@C MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format (2D block cyclic distribution) for a `MATSCALAPACK` matrix Collective Input Parameters: + comm - MPI communicator . mb - row block size (or `PETSC_DECIDE` to have it set) . nb - column block size (or `PETSC_DECIDE` to have it set) . M - number of global rows . N - number of global columns . rsrc - coordinate of process that owns the first row of the distributed matrix - csrc - coordinate of process that owns the first column of the distributed matrix Output Parameter: . A - the matrix Options Database Key: . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) Level: intermediate Notes: If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, MatXXXXSetPreallocation() paradigm instead of this routine directly. [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] Storage is completely managed by ScaLAPACK, so this requires PETSc to be configured with ScaLAPACK. In particular, PETSc's local sizes lose significance and are thus ignored. The block sizes refer to the values used for the distributed matrix, not the same meaning as in `MATBAIJ`. .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` @*/ PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A) { Mat_ScaLAPACK *a; PetscInt m, n; PetscFunctionBegin; PetscCall(MatCreate(comm, A)); PetscCall(MatSetType(*A, MATSCALAPACK)); PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions"); /* rows and columns are NOT distributed according to PetscSplitOwnership */ m = PETSC_DECIDE; PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); n = PETSC_DECIDE; PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); PetscCall(MatSetSizes(*A, m, n, M, N)); a = (Mat_ScaLAPACK *)(*A)->data; PetscCall(PetscBLASIntCast(M, &a->M)); PetscCall(PetscBLASIntCast(N, &a->N)); PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); PetscCall(PetscBLASIntCast(rsrc, &a->rsrc)); PetscCall(PetscBLASIntCast(csrc, &a->csrc)); PetscCall(MatSetUp(*A)); PetscFunctionReturn(PETSC_SUCCESS); }