#include /*I "petscmat.h" I*/ const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" " TITLE = {Sca{LAPACK} Users' Guide},\n" " PUBLISHER = {SIAM},\n" " ADDRESS = {Philadelphia, PA},\n" " YEAR = 1997\n" "}\n"; static PetscBool ScaLAPACKCite = PETSC_FALSE; #define DEFAULT_BLOCKSIZE 64 /* The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid */ static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) { PetscFunctionBegin; PetscCall(PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n")); PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); PetscFunctionReturn(0); } static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; PetscBool iascii; PetscViewerFormat format; Mat Adense; PetscFunctionBegin; PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); if (iascii) { PetscCall(PetscViewerGetFormat(viewer,&format)); if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { PetscCall(PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb)); PetscCall(PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol)); PetscCall(PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc)); PetscCall(PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc)); PetscFunctionReturn(0); } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { PetscFunctionReturn(0); } } /* convert to dense format and call MatView() */ PetscCall(MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense)); PetscCall(MatView(Adense,viewer)); PetscCall(MatDestroy(&Adense)); PetscFunctionReturn(0); } static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; PetscLogDouble isend[2],irecv[2]; PetscFunctionBegin; info->block_size = 1.0; isend[0] = a->lld*a->locc; /* locally allocated */ isend[1] = a->locr*a->locc; /* used submatrix */ if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { info->nz_allocated = isend[0]; info->nz_used = isend[1]; } else if (flag == MAT_GLOBAL_MAX) { PetscCall(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A))); info->nz_allocated = irecv[0]; info->nz_used = irecv[1]; } else if (flag == MAT_GLOBAL_SUM) { PetscCall(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A))); info->nz_allocated = irecv[0]; info->nz_used = irecv[1]; } info->nz_unneeded = 0; info->assemblies = A->num_ass; info->mallocs = 0; info->memory = ((PetscObject)A)->mem; info->fill_ratio_given = 0; info->fill_ratio_needed = 0; info->factor_mallocs = 0; PetscFunctionReturn(0); } PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg) { PetscFunctionBegin; switch (op) { case MAT_NEW_NONZERO_LOCATIONS: case MAT_NEW_NONZERO_LOCATION_ERR: case MAT_NEW_NONZERO_ALLOCATION_ERR: case MAT_SYMMETRIC: case MAT_SORTED_FULL: case MAT_HERMITIAN: break; default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]); } PetscFunctionReturn(0); } static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; PetscInt i,j; PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; PetscFunctionBegin; for (i=0;idesc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); if (rsrc==a->grid->myrow && csrc==a->grid->mycol) { switch (imode) { case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break; case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break; default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode); } } else { PetscCheck(!A->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); A->assembled = PETSC_FALSE; PetscCall(MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES))); } } } PetscFunctionReturn(0); } static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; PetscScalar *x2d,*y2d,alpha=1.0; const PetscInt *ranges; PetscBLASInt xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info; PetscFunctionBegin; if (transpose) { /* create ScaLAPACK descriptors for vectors (1d block distribution) */ PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* x block size */ xlld = PetscMax(1,A->rmap->n); PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); PetscCheckScaLapackInfo("descinit",info); PetscCall(PetscLayoutGetRanges(A->cmap,&ranges)); PetscCall(PetscBLASIntCast(ranges[1],&nb)); /* y block size */ ylld = 1; PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info)); PetscCheckScaLapackInfo("descinit",info); /* allocate 2d vectors */ lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); PetscCall(PetscMalloc2(lszx,&x2d,lszy,&y2d)); xlld = PetscMax(1,lszx); /* create ScaLAPACK descriptors for vectors (2d block distribution) */ PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); PetscCheckScaLapackInfo("descinit",info); PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info)); PetscCheckScaLapackInfo("descinit",info); /* redistribute x as a column of a 2d matrix */ PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); /* redistribute y as a row of a 2d matrix */ if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow)); /* call PBLAS subroutine */ PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); /* redistribute y from a row of a 2d matrix */ PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow)); } else { /* non-transpose */ /* create ScaLAPACK descriptors for vectors (1d block distribution) */ PetscCall(PetscLayoutGetRanges(A->cmap,&ranges)); PetscCall(PetscBLASIntCast(ranges[1],&nb)); /* x block size */ xlld = 1; PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info)); PetscCheckScaLapackInfo("descinit",info); PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* y block size */ ylld = PetscMax(1,A->rmap->n); PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info)); PetscCheckScaLapackInfo("descinit",info); /* allocate 2d vectors */ lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); PetscCall(PetscMalloc2(lszx,&x2d,lszy,&y2d)); ylld = PetscMax(1,lszy); /* create ScaLAPACK descriptors for vectors (2d block distribution) */ PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info)); PetscCheckScaLapackInfo("descinit",info); PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info)); PetscCheckScaLapackInfo("descinit",info); /* redistribute x as a row of a 2d matrix */ PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow)); /* redistribute y as a column of a 2d matrix */ if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol)); /* call PBLAS subroutine */ PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); /* redistribute y from a column of a 2d matrix */ PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol)); } PetscCall(PetscFree2(x2d,y2d)); PetscFunctionReturn(0); } static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y) { const PetscScalar *xarray; PetscScalar *yarray; PetscFunctionBegin; PetscCall(VecGetArrayRead(x,&xarray)); PetscCall(VecGetArray(y,&yarray)); PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray)); PetscCall(VecRestoreArrayRead(x,&xarray)); PetscCall(VecRestoreArray(y,&yarray)); PetscFunctionReturn(0); } static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y) { const PetscScalar *xarray; PetscScalar *yarray; PetscFunctionBegin; PetscCall(VecGetArrayRead(x,&xarray)); PetscCall(VecGetArray(y,&yarray)); PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray)); PetscCall(VecRestoreArrayRead(x,&xarray)); PetscCall(VecRestoreArray(y,&yarray)); PetscFunctionReturn(0); } static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) { const PetscScalar *xarray; PetscScalar *zarray; PetscFunctionBegin; if (y != z) PetscCall(VecCopy(y,z)); PetscCall(VecGetArrayRead(x,&xarray)); PetscCall(VecGetArray(z,&zarray)); PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray)); PetscCall(VecRestoreArrayRead(x,&xarray)); PetscCall(VecRestoreArray(z,&zarray)); PetscFunctionReturn(0); } static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) { const PetscScalar *xarray; PetscScalar *zarray; PetscFunctionBegin; if (y != z) PetscCall(VecCopy(y,z)); PetscCall(VecGetArrayRead(x,&xarray)); PetscCall(VecGetArray(z,&zarray)); PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray)); PetscCall(VecRestoreArrayRead(x,&xarray)); PetscCall(VecRestoreArray(z,&zarray)); PetscFunctionReturn(0); } PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; PetscScalar sone=1.0,zero=0.0; PetscBLASInt one=1; PetscFunctionBegin; PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); C->assembled = PETSC_TRUE; PetscFunctionReturn(0); } PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) { PetscFunctionBegin; PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); PetscCall(MatSetType(C,MATSCALAPACK)); PetscCall(MatSetUp(C)); C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; PetscFunctionReturn(0); } static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; PetscScalar sone=1.0,zero=0.0; PetscBLASInt one=1; PetscFunctionBegin; PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); C->assembled = PETSC_TRUE; PetscFunctionReturn(0); } static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) { PetscFunctionBegin; PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE)); PetscCall(MatSetType(C,MATSCALAPACK)); PetscCall(MatSetUp(C)); PetscFunctionReturn(0); } /* --------------------------------------- */ static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) { PetscFunctionBegin; C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; C->ops->productsymbolic = MatProductSymbolic_AB; PetscFunctionReturn(0); } static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) { PetscFunctionBegin; C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; C->ops->productsymbolic = MatProductSymbolic_ABt; PetscFunctionReturn(0); } PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) { Mat_Product *product = C->product; PetscFunctionBegin; switch (product->type) { case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C)); break; case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C)); break; default: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]); } PetscFunctionReturn(0); } /* --------------------------------------- */ static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; PetscScalar *darray,*d2d,v; const PetscInt *ranges; PetscBLASInt j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; PetscFunctionBegin; PetscCall(VecGetArray(D,&darray)); if (A->rmap->N<=A->cmap->N) { /* row version */ /* create ScaLAPACK descriptor for vector (1d block distribution) */ PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* D block size */ dlld = PetscMax(1,A->rmap->n); PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); PetscCheckScaLapackInfo("descinit",info); /* allocate 2d vector */ lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); PetscCall(PetscCalloc1(lszd,&d2d)); dlld = PetscMax(1,lszd); /* create ScaLAPACK descriptor for vector (2d block distribution) */ PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); PetscCheckScaLapackInfo("descinit",info); /* collect diagonal */ for (j=1;j<=a->M;j++) { PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc)); PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v)); } /* redistribute d from a column of a 2d matrix */ PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol)); PetscCall(PetscFree(d2d)); } else { /* column version */ /* create ScaLAPACK descriptor for vector (1d block distribution) */ PetscCall(PetscLayoutGetRanges(A->cmap,&ranges)); PetscCall(PetscBLASIntCast(ranges[1],&nb)); /* D block size */ dlld = 1; PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); PetscCheckScaLapackInfo("descinit",info); /* allocate 2d vector */ lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); PetscCall(PetscCalloc1(lszd,&d2d)); /* create ScaLAPACK descriptor for vector (2d block distribution) */ PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); PetscCheckScaLapackInfo("descinit",info); /* collect diagonal */ for (j=1;j<=a->N;j++) { PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc)); PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v)); } /* redistribute d from a row of a 2d matrix */ PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow)); PetscCall(PetscFree(d2d)); } PetscCall(VecRestoreArray(D,&darray)); PetscCall(VecAssemblyBegin(D)); PetscCall(VecAssemblyEnd(D)); PetscFunctionReturn(0); } static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; const PetscScalar *d; const PetscInt *ranges; PetscScalar *d2d; PetscBLASInt i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; PetscFunctionBegin; if (R) { PetscCall(VecGetArrayRead(R,(const PetscScalar **)&d)); /* create ScaLAPACK descriptor for vector (1d block distribution) */ PetscCall(PetscLayoutGetRanges(A->cmap,&ranges)); PetscCall(PetscBLASIntCast(ranges[1],&nb)); /* D block size */ dlld = 1; PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); PetscCheckScaLapackInfo("descinit",info); /* allocate 2d vector */ lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); PetscCall(PetscCalloc1(lszd,&d2d)); /* create ScaLAPACK descriptor for vector (2d block distribution) */ PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); PetscCheckScaLapackInfo("descinit",info); /* redistribute d to a row of a 2d matrix */ PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow)); /* broadcast along process columns */ if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld); else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol); /* local scaling */ for (j=0;jlocc;j++) for (i=0;ilocr;i++) a->loc[i+j*a->lld] *= d2d[j]; PetscCall(PetscFree(d2d)); PetscCall(VecRestoreArrayRead(R,(const PetscScalar **)&d)); } if (L) { PetscCall(VecGetArrayRead(L,(const PetscScalar **)&d)); /* create ScaLAPACK descriptor for vector (1d block distribution) */ PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* D block size */ dlld = PetscMax(1,A->rmap->n); PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); PetscCheckScaLapackInfo("descinit",info); /* allocate 2d vector */ lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); PetscCall(PetscCalloc1(lszd,&d2d)); dlld = PetscMax(1,lszd); /* create ScaLAPACK descriptor for vector (2d block distribution) */ PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); PetscCheckScaLapackInfo("descinit",info); /* redistribute d to a column of a 2d matrix */ PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol)); /* broadcast along process rows */ if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld); else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0); /* local scaling */ for (i=0;ilocr;i++) for (j=0;jlocc;j++) a->loc[i+j*a->lld] *= d2d[i]; PetscCall(PetscFree(d2d)); PetscCall(VecRestoreArrayRead(L,(const PetscScalar **)&d)); } PetscFunctionReturn(0); } static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d) { PetscFunctionBegin; *missing = PETSC_FALSE; PetscFunctionReturn(0); } static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a) { Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; PetscBLASInt n,one=1; PetscFunctionBegin; n = x->lld*x->locc; PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one)); PetscFunctionReturn(0); } static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha) { Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; PetscBLASInt i,n; PetscScalar v; PetscFunctionBegin; n = PetscMin(x->M,x->N); for (i=1;i<=n;i++) { PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc)); v += alpha; PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v)); } PetscFunctionReturn(0); } static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str) { Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; Mat_ScaLAPACK *y = (Mat_ScaLAPACK*)Y->data; PetscBLASInt one=1; PetscScalar beta=1.0; PetscFunctionBegin; MatScaLAPACKCheckDistribution(Y,1,X,3); PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc)); PetscCall(PetscObjectStateIncrease((PetscObject)Y)); PetscFunctionReturn(0); } static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; PetscFunctionBegin; PetscCall(PetscArraycpy(b->loc,a->loc,a->lld*a->locc)); PetscCall(PetscObjectStateIncrease((PetscObject)B)); PetscFunctionReturn(0); } static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B) { Mat Bs; MPI_Comm comm; Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b; PetscFunctionBegin; PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); PetscCall(MatCreate(comm,&Bs)); PetscCall(MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); PetscCall(MatSetType(Bs,MATSCALAPACK)); b = (Mat_ScaLAPACK*)Bs->data; b->M = a->M; b->N = a->N; b->mb = a->mb; b->nb = a->nb; b->rsrc = a->rsrc; b->csrc = a->csrc; PetscCall(MatSetUp(Bs)); *B = Bs; if (op == MAT_COPY_VALUES) { PetscCall(PetscArraycpy(b->loc,a->loc,a->lld*a->locc)); } Bs->assembled = PETSC_TRUE; PetscFunctionReturn(0); } static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; Mat Bs = *B; PetscBLASInt one=1; PetscScalar sone=1.0,zero=0.0; #if defined(PETSC_USE_COMPLEX) PetscInt i,j; #endif PetscFunctionBegin; if (reuse == MAT_INITIAL_MATRIX) { PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs)); *B = Bs; } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); b = (Mat_ScaLAPACK*)Bs->data; PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); #if defined(PETSC_USE_COMPLEX) /* undo conjugation */ for (i=0;ilocr;i++) for (j=0;jlocc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]); #endif Bs->assembled = PETSC_TRUE; PetscFunctionReturn(0); } static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; PetscInt i,j; PetscFunctionBegin; for (i=0;ilocr;i++) for (j=0;jlocc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]); PetscFunctionReturn(0); } static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; Mat Bs = *B; PetscBLASInt one=1; PetscScalar sone=1.0,zero=0.0; PetscFunctionBegin; if (reuse == MAT_INITIAL_MATRIX) { PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs)); *B = Bs; } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); b = (Mat_ScaLAPACK*)Bs->data; PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); Bs->assembled = PETSC_TRUE; PetscFunctionReturn(0); } static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; PetscScalar *x,*x2d; const PetscInt *ranges; PetscBLASInt xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info; PetscFunctionBegin; PetscCall(VecCopy(B,X)); PetscCall(VecGetArray(X,&x)); /* create ScaLAPACK descriptor for a vector (1d block distribution) */ PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* x block size */ xlld = PetscMax(1,A->rmap->n); PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); PetscCheckScaLapackInfo("descinit",info); /* allocate 2d vector */ lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); PetscCall(PetscMalloc1(lszx,&x2d)); xlld = PetscMax(1,lszx); /* create ScaLAPACK descriptor for a vector (2d block distribution) */ PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); PetscCheckScaLapackInfo("descinit",info); /* redistribute x as a column of a 2d matrix */ PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); /* call ScaLAPACK subroutine */ switch (A->factortype) { case MAT_FACTOR_LU: PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info)); PetscCheckScaLapackInfo("getrs",info); break; case MAT_FACTOR_CHOLESKY: PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info)); PetscCheckScaLapackInfo("potrs",info); break; default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); } /* redistribute x from a column of a 2d matrix */ PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol)); PetscCall(PetscFree(x2d)); PetscCall(VecRestoreArray(X,&x)); PetscFunctionReturn(0); } static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X) { PetscFunctionBegin; PetscCall(MatSolve_ScaLAPACK(A,B,X)); PetscCall(VecAXPY(X,1,Y)); PetscFunctionReturn(0); } static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x; PetscBool flg1,flg2; PetscBLASInt one=1,info; PetscFunctionBegin; PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1)); PetscCall(PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2)); PetscCheck((flg1 && flg2),PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK"); MatScaLAPACKCheckDistribution(B,1,X,2); b = (Mat_ScaLAPACK*)B->data; x = (Mat_ScaLAPACK*)X->data; PetscCall(PetscArraycpy(x->loc,b->loc,b->lld*b->locc)); switch (A->factortype) { case MAT_FACTOR_LU: PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info)); PetscCheckScaLapackInfo("getrs",info); break; case MAT_FACTOR_CHOLESKY: PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info)); PetscCheckScaLapackInfo("potrs",info); break; default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); } PetscFunctionReturn(0); } static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; PetscBLASInt one=1,info; PetscFunctionBegin; if (!a->pivots) { PetscCall(PetscMalloc1(a->locr+a->mb,&a->pivots)); PetscCall(PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt))); } PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info)); PetscCheckScaLapackInfo("getrf",info); A->factortype = MAT_FACTOR_LU; A->assembled = PETSC_TRUE; PetscCall(PetscFree(A->solvertype)); PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype)); PetscFunctionReturn(0); } static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) { PetscFunctionBegin; PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN)); PetscCall(MatLUFactor_ScaLAPACK(F,0,0,info)); PetscFunctionReturn(0); } static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) { PetscFunctionBegin; /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ PetscFunctionReturn(0); } static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; PetscBLASInt one=1,info; PetscFunctionBegin; PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info)); PetscCheckScaLapackInfo("potrf",info); A->factortype = MAT_FACTOR_CHOLESKY; A->assembled = PETSC_TRUE; PetscCall(PetscFree(A->solvertype)); PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype)); PetscFunctionReturn(0); } static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) { PetscFunctionBegin; PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN)); PetscCall(MatCholeskyFactor_ScaLAPACK(F,0,info)); PetscFunctionReturn(0); } static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info) { PetscFunctionBegin; /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ PetscFunctionReturn(0); } PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type) { PetscFunctionBegin; *type = MATSOLVERSCALAPACK; PetscFunctionReturn(0); } static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F) { Mat B; Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; PetscFunctionBegin; /* Create the factorization matrix */ PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B)); B->trivialsymbolic = PETSC_TRUE; B->factortype = ftype; PetscCall(PetscFree(B->solvertype)); PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype)); PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack)); *F = B; PetscFunctionReturn(0); } PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) { PetscFunctionBegin; PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack)); PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack)); PetscFunctionReturn(0); } static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; PetscBLASInt one=1,lwork=0; const char *ntype; PetscScalar *work=NULL,dummy; PetscFunctionBegin; switch (type) { case NORM_1: ntype = "1"; lwork = PetscMax(a->locr,a->locc); break; case NORM_FROBENIUS: ntype = "F"; work = &dummy; break; case NORM_INFINITY: ntype = "I"; lwork = PetscMax(a->locr,a->locc); break; default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type"); } if (lwork) PetscCall(PetscMalloc1(lwork,&work)); *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work); if (lwork) PetscCall(PetscFree(work)); PetscFunctionReturn(0); } static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; PetscFunctionBegin; PetscCall(PetscArrayzero(a->loc,a->lld*a->locc)); PetscFunctionReturn(0); } static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; PetscInt i,n,nb,isrc,nproc,iproc,*idx; PetscFunctionBegin; if (rows) { n = a->locr; nb = a->mb; isrc = a->rsrc; nproc = a->grid->nprow; iproc = a->grid->myrow; PetscCall(PetscMalloc1(n,&idx)); for (i=0;ilocc; nb = a->nb; isrc = a->csrc; nproc = a->grid->npcol; iproc = a->grid->mycol; PetscCall(PetscMalloc1(n,&idx)); for (i=0;idata; Mat Bmpi; MPI_Comm comm; PetscInt i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz; const PetscInt *ranges,*branges,*cwork; const PetscScalar *vwork; PetscBLASInt bdesc[9],bmb,zero=0,one=1,lld,info; PetscScalar *barray; PetscBool differ=PETSC_FALSE; PetscMPIInt size; PetscFunctionBegin; PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ PetscCallMPI(MPI_Comm_size(comm,&size)); PetscCall(PetscLayoutGetRanges((*B)->rmap,&branges)); for (i=0;irmap->n,1); /* local leading dimension */ PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); PetscCheckScaLapackInfo("descinit",info); /* redistribute matrix */ PetscCall(MatDenseGetArray(Bmpi,&barray)); PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); PetscCall(MatDenseRestoreArray(Bmpi,&barray)); PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY)); /* transfer rows of auxiliary matrix to the final matrix B */ PetscCall(MatGetOwnershipRange(Bmpi,&rstart,&rend)); for (i=rstart;irmap->n,1); /* local leading dimension */ PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); PetscCheckScaLapackInfo("descinit",info); /* redistribute matrix */ PetscCall(MatDenseGetArray(Bmpi,&barray)); PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); PetscCall(MatDenseRestoreArray(Bmpi,&barray)); PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY)); if (reuse == MAT_INPLACE_MATRIX) { PetscCall(MatHeaderReplace(A,&Bmpi)); } else *B = Bmpi; } PetscFunctionReturn(0); } PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B) { Mat_ScaLAPACK *b; Mat Bmpi; MPI_Comm comm; PetscInt M=A->rmap->N,N=A->cmap->N,m,n; const PetscInt *ranges; PetscBLASInt adesc[9],amb,zero=0,one=1,lld,info; PetscScalar *aarray; PetscInt lda; PetscFunctionBegin; PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; else { PetscCall(MatCreate(comm,&Bmpi)); m = PETSC_DECIDE; PetscCall(PetscSplitOwnershipEqual(comm,&m,&M)); n = PETSC_DECIDE; PetscCall(PetscSplitOwnershipEqual(comm,&n,&N)); PetscCall(MatSetSizes(Bmpi,m,n,M,N)); PetscCall(MatSetType(Bmpi,MATSCALAPACK)); PetscCall(MatSetUp(Bmpi)); } b = (Mat_ScaLAPACK*)Bmpi->data; /* create ScaLAPACK descriptor for A (1d block distribution) */ PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); PetscCall(PetscBLASIntCast(ranges[1],&amb)); /* row block size */ PetscCall(MatDenseGetLDA(A,&lda)); lld = PetscMax(lda,1); /* local leading dimension */ PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info)); PetscCheckScaLapackInfo("descinit",info); /* redistribute matrix */ PetscCall(MatDenseGetArray(A,&aarray)); PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol)); PetscCall(MatDenseRestoreArray(A,&aarray)); PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY)); if (reuse == MAT_INPLACE_MATRIX) { PetscCall(MatHeaderReplace(A,&Bmpi)); } else *B = Bmpi; PetscFunctionReturn(0); } PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) { Mat mat_scal; PetscInt M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols; const PetscInt *cols; const PetscScalar *vals; PetscFunctionBegin; if (reuse == MAT_REUSE_MATRIX) { mat_scal = *newmat; PetscCall(MatZeroEntries(mat_scal)); } else { PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal)); m = PETSC_DECIDE; PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M)); n = PETSC_DECIDE; PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N)); PetscCall(MatSetSizes(mat_scal,m,n,M,N)); PetscCall(MatSetType(mat_scal,MATSCALAPACK)); PetscCall(MatSetUp(mat_scal)); } for (row=rstart;rowrmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend; const PetscInt *cols; const PetscScalar *vals; PetscScalar v; PetscFunctionBegin; if (reuse == MAT_REUSE_MATRIX) { mat_scal = *newmat; PetscCall(MatZeroEntries(mat_scal)); } else { PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal)); m = PETSC_DECIDE; PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M)); n = PETSC_DECIDE; PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N)); PetscCall(MatSetSizes(mat_scal,m,n,M,N)); PetscCall(MatSetType(mat_scal,MATSCALAPACK)); PetscCall(MatSetUp(mat_scal)); } PetscCall(MatGetRowUpperTriangular(A)); for (row=rstart;rowhermitian ? PetscConj(vals[j]) : vals[j]; PetscCall(MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES)); } PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals)); } PetscCall(MatRestoreRowUpperTriangular(A)); PetscCall(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY)); if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A,&mat_scal)); else *newmat = mat_scal; PetscFunctionReturn(0); } static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; PetscInt sz=0; PetscFunctionBegin; PetscCall(PetscLayoutSetUp(A->rmap)); PetscCall(PetscLayoutSetUp(A->cmap)); if (!a->lld) a->lld = a->locr; PetscCall(PetscFree(a->loc)); PetscCall(PetscIntMultError(a->lld,a->locc,&sz)); PetscCall(PetscCalloc1(sz,&a->loc)); PetscCall(PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar))); A->preallocated = PETSC_TRUE; PetscFunctionReturn(0); } static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; Mat_ScaLAPACK_Grid *grid; PetscBool flg; MPI_Comm icomm; PetscFunctionBegin; PetscCall(MatStashDestroy_Private(&A->stash)); PetscCall(PetscFree(a->loc)); PetscCall(PetscFree(a->pivots)); PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL)); PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg)); if (--grid->grid_refct == 0) { Cblacs_gridexit(grid->ictxt); Cblacs_gridexit(grid->ictxrow); Cblacs_gridexit(grid->ictxcol); PetscCall(PetscFree(grid)); PetscCallMPI(MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval)); } PetscCall(PetscCommDestroy(&icomm)); PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL)); PetscCall(PetscFree(A->data)); PetscFunctionReturn(0); } static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map) { const PetscInt *ranges; PetscMPIInt size; PetscInt i,n; PetscFunctionBegin; PetscCallMPI(MPI_Comm_size(map->comm,&size)); if (size>2) { PetscCall(PetscLayoutGetRanges(map,&ranges)); n = ranges[1]-ranges[0]; for (i=1;i= size-1 || ranges[i+1]-ranges[i] == 0 || ranges[i+2]-ranges[i+1] == 0,map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); } PetscFunctionReturn(0); } PetscErrorCode MatSetUp_ScaLAPACK(Mat A) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; PetscBLASInt info=0; PetscFunctionBegin; PetscCall(PetscLayoutSetUp(A->rmap)); PetscCall(PetscLayoutSetUp(A->cmap)); /* check that the layout is as enforced by MatCreateScaLAPACK */ PetscCall(MatScaLAPACKCheckLayout(A->rmap)); PetscCall(MatScaLAPACKCheckLayout(A->cmap)); /* compute local sizes */ PetscCall(PetscBLASIntCast(A->rmap->N,&a->M)); PetscCall(PetscBLASIntCast(A->cmap->N,&a->N)); a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); a->lld = PetscMax(1,a->locr); /* allocate local array */ PetscCall(MatScaLAPACKSetPreallocation(A)); /* set up ScaLAPACK descriptor */ PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info)); PetscCheckScaLapackInfo("descinit",info); PetscFunctionReturn(0); } PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type) { PetscInt nstash,reallocs; PetscFunctionBegin; if (A->nooffprocentries) PetscFunctionReturn(0); PetscCall(MatStashScatterBegin_Private(A,&A->stash,NULL)); PetscCall(MatStashGetInfo_Private(&A->stash,&nstash,&reallocs)); PetscCall(PetscInfo(A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); PetscFunctionReturn(0); } PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; PetscMPIInt n; PetscInt i,flg,*row,*col; PetscScalar *val; PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; PetscFunctionBegin; if (A->nooffprocentries) PetscFunctionReturn(0); while (1) { PetscCall(MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg)); if (!flg) break; for (i=0;idesc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol,PetscObjectComm((PetscObject)A),PETSC_ERR_LIB,"Something went wrong, received value does not belong to this process"); switch (A->insertmode) { case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break; case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break; default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode); } } } PetscCall(MatStashScatterEnd_Private(&A->stash)); PetscFunctionReturn(0); } PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer) { Mat Adense,As; MPI_Comm comm; PetscFunctionBegin; PetscCall(PetscObjectGetComm((PetscObject)newMat,&comm)); PetscCall(MatCreate(comm,&Adense)); PetscCall(MatSetType(Adense,MATDENSE)); PetscCall(MatLoad(Adense,viewer)); PetscCall(MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As)); PetscCall(MatDestroy(&Adense)); PetscCall(MatHeaderReplace(newMat,&As)); PetscFunctionReturn(0); } /* -------------------------------------------------------------------*/ static struct _MatOps MatOps_Values = { MatSetValues_ScaLAPACK, 0, 0, MatMult_ScaLAPACK, /* 4*/ MatMultAdd_ScaLAPACK, MatMultTranspose_ScaLAPACK, MatMultTransposeAdd_ScaLAPACK, MatSolve_ScaLAPACK, MatSolveAdd_ScaLAPACK, 0, /*10*/ 0, MatLUFactor_ScaLAPACK, MatCholeskyFactor_ScaLAPACK, 0, MatTranspose_ScaLAPACK, /*15*/ MatGetInfo_ScaLAPACK, 0, MatGetDiagonal_ScaLAPACK, MatDiagonalScale_ScaLAPACK, MatNorm_ScaLAPACK, /*20*/ MatAssemblyBegin_ScaLAPACK, MatAssemblyEnd_ScaLAPACK, MatSetOption_ScaLAPACK, MatZeroEntries_ScaLAPACK, /*24*/ 0, MatLUFactorSymbolic_ScaLAPACK, MatLUFactorNumeric_ScaLAPACK, MatCholeskyFactorSymbolic_ScaLAPACK, MatCholeskyFactorNumeric_ScaLAPACK, /*29*/ MatSetUp_ScaLAPACK, 0, 0, 0, 0, /*34*/ MatDuplicate_ScaLAPACK, 0, 0, 0, 0, /*39*/ MatAXPY_ScaLAPACK, 0, 0, 0, MatCopy_ScaLAPACK, /*44*/ 0, MatScale_ScaLAPACK, MatShift_ScaLAPACK, 0, 0, /*49*/ 0, 0, 0, 0, 0, /*54*/ 0, 0, 0, 0, 0, /*59*/ 0, MatDestroy_ScaLAPACK, MatView_ScaLAPACK, 0, 0, /*64*/ 0, 0, 0, 0, 0, /*69*/ 0, 0, MatConvert_ScaLAPACK_Dense, 0, 0, /*74*/ 0, 0, 0, 0, 0, /*79*/ 0, 0, 0, 0, MatLoad_ScaLAPACK, /*84*/ 0, 0, 0, 0, 0, /*89*/ 0, 0, MatMatMultNumeric_ScaLAPACK, 0, 0, /*94*/ 0, 0, 0, MatMatTransposeMultNumeric_ScaLAPACK, 0, /*99*/ MatProductSetFromOptions_ScaLAPACK, 0, 0, MatConjugate_ScaLAPACK, 0, /*104*/0, 0, 0, 0, 0, /*109*/MatMatSolve_ScaLAPACK, 0, 0, 0, MatMissingDiagonal_ScaLAPACK, /*114*/0, 0, 0, 0, 0, /*119*/0, MatHermitianTranspose_ScaLAPACK, 0, 0, 0, /*124*/0, 0, 0, 0, 0, /*129*/0, 0, 0, 0, 0, /*134*/0, 0, 0, 0, 0, 0, /*140*/0, 0, 0, 0, 0, /*145*/0, 0, 0 }; static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners) { PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; PetscInt size=stash->size,nsends; PetscInt count,*sindices,**rindices,i,j,l; PetscScalar **rvalues,*svalues; MPI_Comm comm = stash->comm; MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; PetscMPIInt *sizes,*nlengths,nreceives; PetscInt *sp_idx,*sp_idy; PetscScalar *sp_val; PetscMatStashSpace space,space_next; PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)mat->data; PetscFunctionBegin; { /* make sure all processors are either in INSERTMODE or ADDMODE */ InsertMode addv; PetscCall(MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat))); PetscCheck(addv != (ADD_VALUES|INSERT_VALUES),PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); mat->insertmode = addv; /* in case this processor had no cache */ } bs2 = stash->bs*stash->bs; /* first count number of contributors to each processor */ PetscCall(PetscCalloc1(size,&nlengths)); PetscCall(PetscMalloc1(stash->n+1,&owner)); i = j = 0; space = stash->space_head; while (space) { space_next = space->next; for (l=0; llocal_used; l++) { PetscCall(PetscBLASIntCast(space->idx[l]+1,&gridx)); PetscCall(PetscBLASIntCast(space->idy[l]+1,&gcidx)); PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc); nlengths[j]++; owner[i] = j; i++; } space = space_next; } /* Now check what procs get messages - and compute nsends. */ PetscCall(PetscCalloc1(size,&sizes)); for (i=0, nsends=0; in,&svalues,2*(stash->n+1),&sindices)); PetscCall(PetscMalloc1(2*nsends,&send_waits)); PetscCall(PetscMalloc2(size,&startv,size,&starti)); /* use 2 sends the first with all_a, the next with all_i and all_j */ startv[0] = 0; starti[0] = 0; for (i=1; ispace_head; while (space) { space_next = space->next; sp_idx = space->idx; sp_idy = space->idy; sp_val = space->val; for (l=0; llocal_used; l++) { j = owner[i]; if (bs2 == 1) { svalues[startv[j]] = sp_val[l]; } else { PetscInt k; PetscScalar *buf1,*buf2; buf1 = svalues+bs2*startv[j]; buf2 = space->val + bs2*l; for (k=0; krecv_waits = recv_waits; PetscCall(PetscFree(recv_waits1)); PetscCall(PetscFree(recv_waits2)); stash->svalues = svalues; stash->sindices = sindices; stash->rvalues = rvalues; stash->rindices = rindices; stash->send_waits = send_waits; stash->nsends = nsends; stash->nrecvs = nreceives; stash->reproduce_count = 0; PetscFunctionReturn(0); } static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; PetscFunctionBegin; PetscCheck(!A->preallocated,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp"); PetscCheck(mb >= 1 || mb == PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %" PetscInt_FMT " must be at least 1",mb); PetscCheck(nb >= 1 || nb == PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %" PetscInt_FMT " must be at least 1",nb); PetscCall(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb)); PetscCall(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb)); PetscFunctionReturn(0); } /*@ MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of the ScaLAPACK matrix Logically Collective on A Input Parameters: + A - a MATSCALAPACK matrix . mb - the row block size - nb - the column block size Level: intermediate .seealso: `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()` @*/ PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb) { PetscFunctionBegin; PetscValidHeaderSpecific(A,MAT_CLASSID,1); PetscValidLogicalCollectiveInt(A,mb,2); PetscValidLogicalCollectiveInt(A,nb,3); PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb)); PetscFunctionReturn(0); } static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb) { Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; PetscFunctionBegin; if (mb) *mb = a->mb; if (nb) *nb = a->nb; PetscFunctionReturn(0); } /*@ MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of the ScaLAPACK matrix Not collective Input Parameter: . A - a MATSCALAPACK matrix Output Parameters: + mb - the row block size - nb - the column block size Level: intermediate .seealso: `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()` @*/ PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb) { PetscFunctionBegin; PetscValidHeaderSpecific(A,MAT_CLASSID,1); PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb)); PetscFunctionReturn(0); } PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*); /*MC MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package Use ./configure --download-scalapack to install PETSc to use ScaLAPACK Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver Options Database Keys: + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions() . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) Level: beginner .seealso: `MATDENSE`, `MATELEMENTAL` M*/ PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) { Mat_ScaLAPACK *a; PetscBool flg,flg1; Mat_ScaLAPACK_Grid *grid; MPI_Comm icomm; PetscBLASInt nprow,npcol,myrow,mycol; PetscInt optv1,k=2,array[2]={0,0}; PetscMPIInt size; PetscFunctionBegin; PetscCall(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps))); A->insertmode = NOT_SET_VALUES; PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash)); A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; A->stash.ScatterEnd = MatStashScatterEnd_Ref; A->stash.ScatterDestroy = NULL; PetscCall(PetscNewLog(A,&a)); A->data = (void*)a; /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0)); PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); PetscCall(PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite)); } PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL)); PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg)); if (!flg) { PetscCall(PetscNewLog(A,&grid)); PetscCallMPI(MPI_Comm_size(icomm,&size)); grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001); PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat"); PetscCall(PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1)); if (flg1) { PetscCheck(size % optv1 == 0,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %" PetscInt_FMT " must evenly divide CommSize %d",optv1,size); grid->nprow = optv1; } PetscOptionsEnd(); if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ grid->npcol = size/grid->nprow; PetscCall(PetscBLASIntCast(grid->nprow,&nprow)); PetscCall(PetscBLASIntCast(grid->npcol,&npcol)); grid->ictxt = Csys2blacs_handle(icomm); Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol); Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol); grid->grid_refct = 1; grid->nprow = nprow; grid->npcol = npcol; grid->myrow = myrow; grid->mycol = mycol; /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ grid->ictxrow = Csys2blacs_handle(icomm); Cblacs_gridinit(&grid->ictxrow,"R",1,size); grid->ictxcol = Csys2blacs_handle(icomm); Cblacs_gridinit(&grid->ictxcol,"R",size,1); PetscCallMPI(MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid)); } else grid->grid_refct++; PetscCall(PetscCommDestroy(&icomm)); a->grid = grid; a->mb = DEFAULT_BLOCKSIZE; a->nb = DEFAULT_BLOCKSIZE; PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat"); PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg)); if (flg) { a->mb = array[0]; a->nb = (k>1)? array[1]: a->mb; } PetscOptionsEnd(); PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK)); PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK)); PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK)); PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK)); PetscFunctionReturn(0); } /*@C MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format (2D block cyclic distribution). Collective Input Parameters: + comm - MPI communicator . mb - row block size (or PETSC_DECIDE to have it set) . nb - column block size (or PETSC_DECIDE to have it set) . M - number of global rows . N - number of global columns . rsrc - coordinate of process that owns the first row of the distributed matrix - csrc - coordinate of process that owns the first column of the distributed matrix Output Parameter: . A - the matrix Options Database Keys: . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), MatXXXXSetPreallocation() paradigm instead of this routine directly. [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] Notes: If PETSC_DECIDE is used for the block sizes, then an appropriate value is chosen. Storage Information: Storate is completely managed by ScaLAPACK, so this requires PETSc to be configured with ScaLAPACK. In particular, PETSc's local sizes lose significance and are thus ignored. The block sizes refer to the values used for the distributed matrix, not the same meaning as in BAIJ. Level: intermediate .seealso: `MatCreate()`, `MatCreateDense()`, `MatSetValues()` @*/ PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A) { Mat_ScaLAPACK *a; PetscInt m,n; PetscFunctionBegin; PetscCall(MatCreate(comm,A)); PetscCall(MatSetType(*A,MATSCALAPACK)); PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions"); /* rows and columns are NOT distributed according to PetscSplitOwnership */ m = PETSC_DECIDE; PetscCall(PetscSplitOwnershipEqual(comm,&m,&M)); n = PETSC_DECIDE; PetscCall(PetscSplitOwnershipEqual(comm,&n,&N)); PetscCall(MatSetSizes(*A,m,n,M,N)); a = (Mat_ScaLAPACK*)(*A)->data; PetscCall(PetscBLASIntCast(M,&a->M)); PetscCall(PetscBLASIntCast(N,&a->N)); PetscCall(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb)); PetscCall(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb)); PetscCall(PetscBLASIntCast(rsrc,&a->rsrc)); PetscCall(PetscBLASIntCast(csrc,&a->csrc)); PetscCall(MatSetUp(*A)); PetscFunctionReturn(0); }