1d24d4204SJose E. Roman #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/
2d24d4204SJose E. Roman
327e75052SPierre Jolivet const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
427e75052SPierre Jolivet " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
527e75052SPierre Jolivet " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
627e75052SPierre Jolivet " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
727e75052SPierre Jolivet " TITLE = {Sca{LAPACK} Users' Guide},\n"
827e75052SPierre Jolivet " PUBLISHER = {SIAM},\n"
927e75052SPierre Jolivet " ADDRESS = {Philadelphia, PA},\n"
1027e75052SPierre Jolivet " YEAR = 1997\n"
1127e75052SPierre Jolivet "}\n";
1227e75052SPierre Jolivet static PetscBool ScaLAPACKCite = PETSC_FALSE;
1327e75052SPierre Jolivet
14d24d4204SJose E. Roman #define DEFAULT_BLOCKSIZE 64
15d24d4204SJose E. Roman
16d24d4204SJose E. Roman /*
17d24d4204SJose E. Roman The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18d24d4204SJose E. Roman is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19d24d4204SJose E. Roman */
20d24d4204SJose E. Roman static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21d24d4204SJose E. Roman
Petsc_ScaLAPACK_keyval_free(void)22d71ae5a4SJacob Faibussowitsch static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23d71ae5a4SJacob Faibussowitsch {
24f7ec113fSDamian Marek PetscFunctionBegin;
259566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
269566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
28f7ec113fSDamian Marek }
29f7ec113fSDamian Marek
MatView_ScaLAPACK(Mat A,PetscViewer viewer)30d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer)
31d71ae5a4SJacob Faibussowitsch {
32d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
339f196a02SMartin Diehl PetscBool isascii;
34d24d4204SJose E. Roman PetscViewerFormat format;
35d24d4204SJose E. Roman Mat Adense;
36d24d4204SJose E. Roman
37d24d4204SJose E. Roman PetscFunctionBegin;
389f196a02SMartin Diehl PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
399f196a02SMartin Diehl if (isascii) {
409566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format));
41d24d4204SJose E. Roman if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
429566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
439566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
449566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
459566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
47d24d4204SJose E. Roman } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
49d24d4204SJose E. Roman }
50d24d4204SJose E. Roman }
51d24d4204SJose E. Roman /* convert to dense format and call MatView() */
529566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
539566063dSJacob Faibussowitsch PetscCall(MatView(Adense, viewer));
549566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Adense));
553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
56d24d4204SJose E. Roman }
57d24d4204SJose E. Roman
MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo * info)58d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info)
59d71ae5a4SJacob Faibussowitsch {
60d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
61d24d4204SJose E. Roman PetscLogDouble isend[2], irecv[2];
62d24d4204SJose E. Roman
63d24d4204SJose E. Roman PetscFunctionBegin;
64d24d4204SJose E. Roman info->block_size = 1.0;
65d24d4204SJose E. Roman
66d24d4204SJose E. Roman isend[0] = a->lld * a->locc; /* locally allocated */
67d24d4204SJose E. Roman isend[1] = a->locr * a->locc; /* used submatrix */
68d24d4204SJose E. Roman if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69d24d4204SJose E. Roman info->nz_allocated = isend[0];
70d24d4204SJose E. Roman info->nz_used = isend[1];
71d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_MAX) {
72462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
73d24d4204SJose E. Roman info->nz_allocated = irecv[0];
74d24d4204SJose E. Roman info->nz_used = irecv[1];
75d24d4204SJose E. Roman } else if (flag == MAT_GLOBAL_SUM) {
76462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
77d24d4204SJose E. Roman info->nz_allocated = irecv[0];
78d24d4204SJose E. Roman info->nz_used = irecv[1];
79d24d4204SJose E. Roman }
80d24d4204SJose E. Roman
81d24d4204SJose E. Roman info->nz_unneeded = 0;
82d24d4204SJose E. Roman info->assemblies = A->num_ass;
83d24d4204SJose E. Roman info->mallocs = 0;
844dfa11a4SJacob Faibussowitsch info->memory = 0; /* REVIEW ME */
85d24d4204SJose E. Roman info->fill_ratio_given = 0;
86d24d4204SJose E. Roman info->fill_ratio_needed = 0;
87d24d4204SJose E. Roman info->factor_mallocs = 0;
883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
89d24d4204SJose E. Roman }
90d24d4204SJose E. Roman
MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)9166976f2fSJacob Faibussowitsch static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg)
92d71ae5a4SJacob Faibussowitsch {
93b12397e7SPierre Jolivet Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
94b12397e7SPierre Jolivet
95d24d4204SJose E. Roman PetscFunctionBegin;
96d24d4204SJose E. Roman switch (op) {
97d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATIONS:
98d24d4204SJose E. Roman case MAT_NEW_NONZERO_LOCATION_ERR:
99d24d4204SJose E. Roman case MAT_NEW_NONZERO_ALLOCATION_ERR:
100d24d4204SJose E. Roman case MAT_SYMMETRIC:
101d24d4204SJose E. Roman case MAT_SORTED_FULL:
102d71ae5a4SJacob Faibussowitsch case MAT_HERMITIAN:
103d71ae5a4SJacob Faibussowitsch break;
104d71ae5a4SJacob Faibussowitsch case MAT_ROW_ORIENTED:
105d71ae5a4SJacob Faibussowitsch a->roworiented = flg;
106d71ae5a4SJacob Faibussowitsch break;
107d71ae5a4SJacob Faibussowitsch default:
108d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
109d24d4204SJose E. Roman }
1103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
111d24d4204SJose E. Roman }
112d24d4204SJose E. Roman
MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt * rows,PetscInt nc,const PetscInt * cols,const PetscScalar * vals,InsertMode imode)113d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
114d71ae5a4SJacob Faibussowitsch {
115d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
116d24d4204SJose E. Roman PetscInt i, j;
117d24d4204SJose E. Roman PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc;
118b12397e7SPierre Jolivet PetscBool roworiented = a->roworiented;
119d24d4204SJose E. Roman
120d24d4204SJose E. Roman PetscFunctionBegin;
121b12397e7SPierre Jolivet PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
122d24d4204SJose E. Roman for (i = 0; i < nr; i++) {
123d24d4204SJose E. Roman if (rows[i] < 0) continue;
1249566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
125d24d4204SJose E. Roman for (j = 0; j < nc; j++) {
126d24d4204SJose E. Roman if (cols[j] < 0) continue;
1279566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
128792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
129d24d4204SJose E. Roman if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
130b12397e7SPierre Jolivet if (roworiented) {
131d24d4204SJose E. Roman switch (imode) {
132d71ae5a4SJacob Faibussowitsch case INSERT_VALUES:
133d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j];
134d71ae5a4SJacob Faibussowitsch break;
135d71ae5a4SJacob Faibussowitsch default:
136d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j];
137d71ae5a4SJacob Faibussowitsch break;
138b12397e7SPierre Jolivet }
139b12397e7SPierre Jolivet } else {
140b12397e7SPierre Jolivet switch (imode) {
141d71ae5a4SJacob Faibussowitsch case INSERT_VALUES:
142d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr];
143d71ae5a4SJacob Faibussowitsch break;
144d71ae5a4SJacob Faibussowitsch default:
145d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr];
146d71ae5a4SJacob Faibussowitsch break;
147b12397e7SPierre Jolivet }
148d24d4204SJose E. Roman }
149d24d4204SJose E. Roman } else {
15028b400f6SJacob Faibussowitsch PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
151d24d4204SJose E. Roman A->assembled = PETSC_FALSE;
152b12397e7SPierre Jolivet PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
153d24d4204SJose E. Roman }
154d24d4204SJose E. Roman }
155d24d4204SJose E. Roman }
1563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
157d24d4204SJose E. Roman }
158d24d4204SJose E. Roman
MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscBool hermitian,PetscScalar beta,const PetscScalar * x,PetscScalar * y)1595e4d33a3SBlanca Mellado Pinto static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscBool hermitian, PetscScalar beta, const PetscScalar *x, PetscScalar *y)
160d71ae5a4SJacob Faibussowitsch {
161d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
162d24d4204SJose E. Roman PetscScalar *x2d, *y2d, alpha = 1.0;
163d24d4204SJose E. Roman const PetscInt *ranges;
164d24d4204SJose E. Roman PetscBLASInt xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
165d24d4204SJose E. Roman
166d24d4204SJose E. Roman PetscFunctionBegin;
167d24d4204SJose E. Roman if (transpose) {
168d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */
1699566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1709566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
1716497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld));
172792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
173d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info);
1749566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
1759566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
176d24d4204SJose E. Roman ylld = 1;
177792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
178d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info);
179d24d4204SJose E. Roman
180d24d4204SJose E. Roman /* allocate 2d vectors */
181d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
182d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1839566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
1846497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld));
185d24d4204SJose E. Roman
186d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */
187792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
188d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info);
189792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
190d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info);
191d24d4204SJose E. Roman
192d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */
193c87776d3SJose E. Roman PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
194d24d4204SJose E. Roman
195d24d4204SJose E. Roman /* redistribute y as a row of a 2d matrix */
196792fecdfSBarry Smith if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
197d24d4204SJose E. Roman
198d24d4204SJose E. Roman /* call PBLAS subroutine */
1995e4d33a3SBlanca Mellado Pinto if (hermitian) PetscCallBLAS("PBLASgemv", PBLASgemv_("C", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
2005e4d33a3SBlanca Mellado Pinto else PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
201d24d4204SJose E. Roman
202d24d4204SJose E. Roman /* redistribute y from a row of a 2d matrix */
203792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
204d24d4204SJose E. Roman
205d24d4204SJose E. Roman } else { /* non-transpose */
206d24d4204SJose E. Roman
207d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (1d block distribution) */
2089566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
2099566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
210d24d4204SJose E. Roman xlld = 1;
211792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
212d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info);
2139566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
2149566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
2156497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &ylld));
216792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
217d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info);
218d24d4204SJose E. Roman
219d24d4204SJose E. Roman /* allocate 2d vectors */
220d24d4204SJose E. Roman lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
221d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
2229566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
2236497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, lszy), &ylld));
224d24d4204SJose E. Roman
225d24d4204SJose E. Roman /* create ScaLAPACK descriptors for vectors (2d block distribution) */
226792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
227d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info);
228792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
229d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info);
230d24d4204SJose E. Roman
231d24d4204SJose E. Roman /* redistribute x as a row of a 2d matrix */
232c87776d3SJose E. Roman PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
233d24d4204SJose E. Roman
234d24d4204SJose E. Roman /* redistribute y as a column of a 2d matrix */
235792fecdfSBarry Smith if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
236d24d4204SJose E. Roman
237d24d4204SJose E. Roman /* call PBLAS subroutine */
238792fecdfSBarry Smith PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
239d24d4204SJose E. Roman
240d24d4204SJose E. Roman /* redistribute y from a column of a 2d matrix */
241792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
242d24d4204SJose E. Roman }
2439566063dSJacob Faibussowitsch PetscCall(PetscFree2(x2d, y2d));
2443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
245d24d4204SJose E. Roman }
246d24d4204SJose E. Roman
MatMult_ScaLAPACK(Mat A,Vec x,Vec y)247d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y)
248d71ae5a4SJacob Faibussowitsch {
249d24d4204SJose E. Roman const PetscScalar *xarray;
250d24d4204SJose E. Roman PetscScalar *yarray;
251d24d4204SJose E. Roman
252d24d4204SJose E. Roman PetscFunctionBegin;
2539566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray));
2549566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yarray));
2555e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 0.0, xarray, yarray));
2569566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray));
2579566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yarray));
2583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
259d24d4204SJose E. Roman }
260d24d4204SJose E. Roman
MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)261d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
262d71ae5a4SJacob Faibussowitsch {
263d24d4204SJose E. Roman const PetscScalar *xarray;
264d24d4204SJose E. Roman PetscScalar *yarray;
265d24d4204SJose E. Roman
266d24d4204SJose E. Roman PetscFunctionBegin;
2679566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray));
2689566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yarray));
2695e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 0.0, xarray, yarray));
2709566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray));
2719566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yarray));
2723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
273d24d4204SJose E. Roman }
274d24d4204SJose E. Roman
MatMultHermitianTranspose_ScaLAPACK(Mat A,Vec x,Vec y)2755e4d33a3SBlanca Mellado Pinto static PetscErrorCode MatMultHermitianTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
2765e4d33a3SBlanca Mellado Pinto {
2775e4d33a3SBlanca Mellado Pinto const PetscScalar *xarray;
2785e4d33a3SBlanca Mellado Pinto PetscScalar *yarray;
2795e4d33a3SBlanca Mellado Pinto
2805e4d33a3SBlanca Mellado Pinto PetscFunctionBegin;
2815e4d33a3SBlanca Mellado Pinto PetscCall(VecGetArrayRead(x, &xarray));
2825e4d33a3SBlanca Mellado Pinto PetscCall(VecGetArrayWrite(y, &yarray));
2835e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 0.0, xarray, yarray));
2845e4d33a3SBlanca Mellado Pinto PetscCall(VecRestoreArrayRead(x, &xarray));
2855e4d33a3SBlanca Mellado Pinto PetscCall(VecRestoreArrayWrite(y, &yarray));
2865e4d33a3SBlanca Mellado Pinto PetscFunctionReturn(PETSC_SUCCESS);
2875e4d33a3SBlanca Mellado Pinto }
2885e4d33a3SBlanca Mellado Pinto
MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)289d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
290d71ae5a4SJacob Faibussowitsch {
291d24d4204SJose E. Roman const PetscScalar *xarray;
292d24d4204SJose E. Roman PetscScalar *zarray;
293d24d4204SJose E. Roman
294d24d4204SJose E. Roman PetscFunctionBegin;
2959566063dSJacob Faibussowitsch if (y != z) PetscCall(VecCopy(y, z));
2969566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray));
2979566063dSJacob Faibussowitsch PetscCall(VecGetArray(z, &zarray));
2985e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 1.0, xarray, zarray));
2999566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray));
3009566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(z, &zarray));
3013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
302d24d4204SJose E. Roman }
303d24d4204SJose E. Roman
MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)304d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
305d71ae5a4SJacob Faibussowitsch {
306d24d4204SJose E. Roman const PetscScalar *xarray;
307d24d4204SJose E. Roman PetscScalar *zarray;
308d24d4204SJose E. Roman
309d24d4204SJose E. Roman PetscFunctionBegin;
3109566063dSJacob Faibussowitsch if (y != z) PetscCall(VecCopy(y, z));
3119566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xarray));
3129566063dSJacob Faibussowitsch PetscCall(VecGetArray(z, &zarray));
3135e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 1.0, xarray, zarray));
3145e4d33a3SBlanca Mellado Pinto PetscCall(VecRestoreArrayRead(x, &xarray));
3155e4d33a3SBlanca Mellado Pinto PetscCall(VecRestoreArray(z, &zarray));
3165e4d33a3SBlanca Mellado Pinto PetscFunctionReturn(PETSC_SUCCESS);
3175e4d33a3SBlanca Mellado Pinto }
3185e4d33a3SBlanca Mellado Pinto
MatMultHermitianTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)3195e4d33a3SBlanca Mellado Pinto static PetscErrorCode MatMultHermitianTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
3205e4d33a3SBlanca Mellado Pinto {
3215e4d33a3SBlanca Mellado Pinto const PetscScalar *xarray;
3225e4d33a3SBlanca Mellado Pinto PetscScalar *zarray;
3235e4d33a3SBlanca Mellado Pinto
3245e4d33a3SBlanca Mellado Pinto PetscFunctionBegin;
3255e4d33a3SBlanca Mellado Pinto if (y != z) PetscCall(VecCopy(y, z));
3265e4d33a3SBlanca Mellado Pinto PetscCall(VecGetArrayRead(x, &xarray));
3275e4d33a3SBlanca Mellado Pinto PetscCall(VecGetArray(z, &zarray));
3285e4d33a3SBlanca Mellado Pinto PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 1.0, xarray, zarray));
3299566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xarray));
3309566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(z, &zarray));
3313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
332d24d4204SJose E. Roman }
333d24d4204SJose E. Roman
MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)334d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
335d71ae5a4SJacob Faibussowitsch {
336d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
337d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
338d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data;
339d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0;
340d24d4204SJose E. Roman PetscBLASInt one = 1;
341d24d4204SJose E. Roman
342d24d4204SJose E. Roman PetscFunctionBegin;
343792fecdfSBarry Smith PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
344d24d4204SJose E. Roman C->assembled = PETSC_TRUE;
3453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
346d24d4204SJose E. Roman }
347d24d4204SJose E. Roman
MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)348d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
349d71ae5a4SJacob Faibussowitsch {
350d24d4204SJose E. Roman PetscFunctionBegin;
3519566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
3529566063dSJacob Faibussowitsch PetscCall(MatSetType(C, MATSCALAPACK));
3539566063dSJacob Faibussowitsch PetscCall(MatSetUp(C));
354d24d4204SJose E. Roman C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
3553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
356d24d4204SJose E. Roman }
357d24d4204SJose E. Roman
MatTransposeMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)358e0d15688SBlanca Mellado Pinto static PetscErrorCode MatTransposeMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
359e0d15688SBlanca Mellado Pinto {
360e0d15688SBlanca Mellado Pinto Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
361e0d15688SBlanca Mellado Pinto Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
362e0d15688SBlanca Mellado Pinto Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data;
363e0d15688SBlanca Mellado Pinto PetscScalar sone = 1.0, zero = 0.0;
364e0d15688SBlanca Mellado Pinto PetscBLASInt one = 1;
365e0d15688SBlanca Mellado Pinto
366e0d15688SBlanca Mellado Pinto PetscFunctionBegin;
367e0d15688SBlanca Mellado Pinto PetscCallBLAS("PBLASgemm", PBLASgemm_("T", "N", &a->N, &b->N, &a->M, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
368e0d15688SBlanca Mellado Pinto C->assembled = PETSC_TRUE;
369e0d15688SBlanca Mellado Pinto PetscFunctionReturn(PETSC_SUCCESS);
370e0d15688SBlanca Mellado Pinto }
371e0d15688SBlanca Mellado Pinto
MatTransposeMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)372e0d15688SBlanca Mellado Pinto static PetscErrorCode MatTransposeMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
373e0d15688SBlanca Mellado Pinto {
374e0d15688SBlanca Mellado Pinto PetscFunctionBegin;
375e0d15688SBlanca Mellado Pinto PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
376e0d15688SBlanca Mellado Pinto PetscCall(MatSetType(C, MATSCALAPACK));
377e0d15688SBlanca Mellado Pinto PetscCall(MatSetUp(C));
378e0d15688SBlanca Mellado Pinto C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_ScaLAPACK;
379e0d15688SBlanca Mellado Pinto PetscFunctionReturn(PETSC_SUCCESS);
380e0d15688SBlanca Mellado Pinto }
381e0d15688SBlanca Mellado Pinto
MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)382d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
383d71ae5a4SJacob Faibussowitsch {
384d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
385d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
386d24d4204SJose E. Roman Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data;
387d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0;
388d24d4204SJose E. Roman PetscBLASInt one = 1;
389d24d4204SJose E. Roman
390d24d4204SJose E. Roman PetscFunctionBegin;
391792fecdfSBarry Smith PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
392d24d4204SJose E. Roman C->assembled = PETSC_TRUE;
3933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
394d24d4204SJose E. Roman }
395d24d4204SJose E. Roman
MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)396d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
397d71ae5a4SJacob Faibussowitsch {
398d24d4204SJose E. Roman PetscFunctionBegin;
3999566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
4009566063dSJacob Faibussowitsch PetscCall(MatSetType(C, MATSCALAPACK));
4019566063dSJacob Faibussowitsch PetscCall(MatSetUp(C));
402e0d15688SBlanca Mellado Pinto C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_ScaLAPACK;
4033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
404d24d4204SJose E. Roman }
405d24d4204SJose E. Roman
MatProductSetFromOptions_ScaLAPACK(Mat C)406d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
407d71ae5a4SJacob Faibussowitsch {
408d24d4204SJose E. Roman Mat_Product *product = C->product;
409d24d4204SJose E. Roman
410d24d4204SJose E. Roman PetscFunctionBegin;
411d24d4204SJose E. Roman switch (product->type) {
412d71ae5a4SJacob Faibussowitsch case MATPRODUCT_AB:
413e0d15688SBlanca Mellado Pinto C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
414e0d15688SBlanca Mellado Pinto C->ops->productsymbolic = MatProductSymbolic_AB;
415e0d15688SBlanca Mellado Pinto break;
416e0d15688SBlanca Mellado Pinto case MATPRODUCT_AtB:
417e0d15688SBlanca Mellado Pinto C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_ScaLAPACK;
418e0d15688SBlanca Mellado Pinto C->ops->productsymbolic = MatProductSymbolic_AtB;
419d71ae5a4SJacob Faibussowitsch break;
420d71ae5a4SJacob Faibussowitsch case MATPRODUCT_ABt:
421e0d15688SBlanca Mellado Pinto C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
422e0d15688SBlanca Mellado Pinto C->ops->productsymbolic = MatProductSymbolic_ABt;
423d71ae5a4SJacob Faibussowitsch break;
424d71ae5a4SJacob Faibussowitsch default:
425d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
426d24d4204SJose E. Roman }
4273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
428d24d4204SJose E. Roman }
429d24d4204SJose E. Roman
MatGetDiagonal_ScaLAPACK(Mat A,Vec D)430d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D)
431d71ae5a4SJacob Faibussowitsch {
432d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
433d24d4204SJose E. Roman PetscScalar *darray, *d2d, v;
434d24d4204SJose E. Roman const PetscInt *ranges;
435d24d4204SJose E. Roman PetscBLASInt j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
436d24d4204SJose E. Roman
437d24d4204SJose E. Roman PetscFunctionBegin;
4389566063dSJacob Faibussowitsch PetscCall(VecGetArray(D, &darray));
439d24d4204SJose E. Roman
440d24d4204SJose E. Roman if (A->rmap->N <= A->cmap->N) { /* row version */
441d24d4204SJose E. Roman
442d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */
4439566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
4449566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
4456497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld));
446792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
447d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info);
448d24d4204SJose E. Roman
449d24d4204SJose E. Roman /* allocate 2d vector */
450d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
4519566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d));
4526497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld));
453d24d4204SJose E. Roman
454d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */
455792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
456d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info);
457d24d4204SJose E. Roman
458d24d4204SJose E. Roman /* collect diagonal */
459d24d4204SJose E. Roman for (j = 1; j <= a->M; j++) {
460792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
461792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
462d24d4204SJose E. Roman }
463d24d4204SJose E. Roman
464d24d4204SJose E. Roman /* redistribute d from a column of a 2d matrix */
465792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
4669566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d));
467d24d4204SJose E. Roman
468d24d4204SJose E. Roman } else { /* column version */
469d24d4204SJose E. Roman
470d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */
4719566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
4729566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
473d24d4204SJose E. Roman dlld = 1;
474792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
475d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info);
476d24d4204SJose E. Roman
477d24d4204SJose E. Roman /* allocate 2d vector */
478d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
4799566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d));
480d24d4204SJose E. Roman
481d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */
482792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
483d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info);
484d24d4204SJose E. Roman
485d24d4204SJose E. Roman /* collect diagonal */
486d24d4204SJose E. Roman for (j = 1; j <= a->N; j++) {
487792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
488792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
489d24d4204SJose E. Roman }
490d24d4204SJose E. Roman
491d24d4204SJose E. Roman /* redistribute d from a row of a 2d matrix */
492792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
4939566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d));
494d24d4204SJose E. Roman }
495d24d4204SJose E. Roman
4969566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(D, &darray));
4979566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(D));
4989566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(D));
4993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
500d24d4204SJose E. Roman }
501d24d4204SJose E. Roman
MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)502d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R)
503d71ae5a4SJacob Faibussowitsch {
504d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
505d24d4204SJose E. Roman const PetscScalar *d;
506d24d4204SJose E. Roman const PetscInt *ranges;
507d24d4204SJose E. Roman PetscScalar *d2d;
508d24d4204SJose E. Roman PetscBLASInt i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
509d24d4204SJose E. Roman
510d24d4204SJose E. Roman PetscFunctionBegin;
511d24d4204SJose E. Roman if (R) {
512eb8278ccSPierre Jolivet PetscCall(VecGetArrayRead(R, &d));
513d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */
5149566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
5159566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
516d24d4204SJose E. Roman dlld = 1;
517792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
518d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info);
519d24d4204SJose E. Roman
520d24d4204SJose E. Roman /* allocate 2d vector */
521d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
5229566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d));
523d24d4204SJose E. Roman
524d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */
525792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
526d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info);
527d24d4204SJose E. Roman
528d24d4204SJose E. Roman /* redistribute d to a row of a 2d matrix */
529c87776d3SJose E. Roman PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
530d24d4204SJose E. Roman
531d24d4204SJose E. Roman /* broadcast along process columns */
532d24d4204SJose E. Roman if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
533d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
534d24d4204SJose E. Roman
535d24d4204SJose E. Roman /* local scaling */
5369371c9d4SSatish Balay for (j = 0; j < a->locc; j++)
5379371c9d4SSatish Balay for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
538d24d4204SJose E. Roman
5399566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d));
540eb8278ccSPierre Jolivet PetscCall(VecRestoreArrayRead(R, &d));
541d24d4204SJose E. Roman }
542d24d4204SJose E. Roman if (L) {
543eb8278ccSPierre Jolivet PetscCall(VecGetArrayRead(L, &d));
544d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (1d block distribution) */
5459566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
5469566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
5476497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld));
548792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
549d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info);
550d24d4204SJose E. Roman
551d24d4204SJose E. Roman /* allocate 2d vector */
552d24d4204SJose E. Roman lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
5539566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(lszd, &d2d));
5546497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld));
555d24d4204SJose E. Roman
556d24d4204SJose E. Roman /* create ScaLAPACK descriptor for vector (2d block distribution) */
557792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
558d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info);
559d24d4204SJose E. Roman
560d24d4204SJose E. Roman /* redistribute d to a column of a 2d matrix */
561c87776d3SJose E. Roman PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
562d24d4204SJose E. Roman
563d24d4204SJose E. Roman /* broadcast along process rows */
564d24d4204SJose E. Roman if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
565d24d4204SJose E. Roman else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
566d24d4204SJose E. Roman
567d24d4204SJose E. Roman /* local scaling */
5689371c9d4SSatish Balay for (i = 0; i < a->locr; i++)
5699371c9d4SSatish Balay for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
570d24d4204SJose E. Roman
5719566063dSJacob Faibussowitsch PetscCall(PetscFree(d2d));
572eb8278ccSPierre Jolivet PetscCall(VecRestoreArrayRead(L, &d));
573d24d4204SJose E. Roman }
5743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
575d24d4204SJose E. Roman }
576d24d4204SJose E. Roman
MatScale_ScaLAPACK(Mat X,PetscScalar a)577d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a)
578d71ae5a4SJacob Faibussowitsch {
579d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
580d24d4204SJose E. Roman PetscBLASInt n, one = 1;
581d24d4204SJose E. Roman
582d24d4204SJose E. Roman PetscFunctionBegin;
583d24d4204SJose E. Roman n = x->lld * x->locc;
584792fecdfSBarry Smith PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
5853ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
586d24d4204SJose E. Roman }
587d24d4204SJose E. Roman
MatShift_ScaLAPACK(Mat X,PetscScalar alpha)588d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha)
589d71ae5a4SJacob Faibussowitsch {
590d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
591d24d4204SJose E. Roman PetscBLASInt i, n;
592d24d4204SJose E. Roman PetscScalar v;
593d24d4204SJose E. Roman
594d24d4204SJose E. Roman PetscFunctionBegin;
595d24d4204SJose E. Roman n = PetscMin(x->M, x->N);
596d24d4204SJose E. Roman for (i = 1; i <= n; i++) {
597792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
598d24d4204SJose E. Roman v += alpha;
599792fecdfSBarry Smith PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
600d24d4204SJose E. Roman }
6013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
602d24d4204SJose E. Roman }
603d24d4204SJose E. Roman
MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)604d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
605d71ae5a4SJacob Faibussowitsch {
606d24d4204SJose E. Roman Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
607d24d4204SJose E. Roman Mat_ScaLAPACK *y = (Mat_ScaLAPACK *)Y->data;
608d24d4204SJose E. Roman PetscBLASInt one = 1;
609d24d4204SJose E. Roman PetscScalar beta = 1.0;
610d24d4204SJose E. Roman
611d24d4204SJose E. Roman PetscFunctionBegin;
612d24d4204SJose E. Roman MatScaLAPACKCheckDistribution(Y, 1, X, 3);
613792fecdfSBarry Smith PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
6149566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Y));
6153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
616d24d4204SJose E. Roman }
617d24d4204SJose E. Roman
MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)618d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str)
619d71ae5a4SJacob Faibussowitsch {
620d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
621d24d4204SJose E. Roman Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
622d24d4204SJose E. Roman
623d24d4204SJose E. Roman PetscFunctionBegin;
6249566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
6259566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B));
6263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
627d24d4204SJose E. Roman }
628d24d4204SJose E. Roman
MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat * B)629d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B)
630d71ae5a4SJacob Faibussowitsch {
631d24d4204SJose E. Roman Mat Bs;
632d24d4204SJose E. Roman MPI_Comm comm;
633d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
634d24d4204SJose E. Roman
635d24d4204SJose E. Roman PetscFunctionBegin;
6369566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
6379566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bs));
6389566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
6399566063dSJacob Faibussowitsch PetscCall(MatSetType(Bs, MATSCALAPACK));
640d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bs->data;
641d24d4204SJose E. Roman b->M = a->M;
642d24d4204SJose E. Roman b->N = a->N;
643d24d4204SJose E. Roman b->mb = a->mb;
644d24d4204SJose E. Roman b->nb = a->nb;
645d24d4204SJose E. Roman b->rsrc = a->rsrc;
646d24d4204SJose E. Roman b->csrc = a->csrc;
6479566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bs));
648d24d4204SJose E. Roman *B = Bs;
64948a46eb9SPierre Jolivet if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
650d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE;
6513ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
652d24d4204SJose E. Roman }
653d24d4204SJose E. Roman
MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat * B)654d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
655d71ae5a4SJacob Faibussowitsch {
656d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
657d24d4204SJose E. Roman Mat Bs = *B;
658d24d4204SJose E. Roman PetscBLASInt one = 1;
659d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0;
660d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX)
661d24d4204SJose E. Roman PetscInt i, j;
662d24d4204SJose E. Roman #endif
663d24d4204SJose E. Roman
664d24d4204SJose E. Roman PetscFunctionBegin;
6657fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
6660fdf79fbSJacob Faibussowitsch PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
6679566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
668d24d4204SJose E. Roman *B = Bs;
669d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bs->data;
670792fecdfSBarry Smith PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
671d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX)
672d24d4204SJose E. Roman /* undo conjugation */
6739371c9d4SSatish Balay for (i = 0; i < b->locr; i++)
6749371c9d4SSatish Balay for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
675d24d4204SJose E. Roman #endif
676d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE;
6773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
678d24d4204SJose E. Roman }
679d24d4204SJose E. Roman
MatConjugate_ScaLAPACK(Mat A)680d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
681d71ae5a4SJacob Faibussowitsch {
682d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
683d24d4204SJose E. Roman PetscInt i, j;
684d24d4204SJose E. Roman
685d24d4204SJose E. Roman PetscFunctionBegin;
6869371c9d4SSatish Balay for (i = 0; i < a->locr; i++)
6879371c9d4SSatish Balay for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
6883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
689d24d4204SJose E. Roman }
690d24d4204SJose E. Roman
MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat * B)691d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
692d71ae5a4SJacob Faibussowitsch {
693d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
694d24d4204SJose E. Roman Mat Bs = *B;
695d24d4204SJose E. Roman PetscBLASInt one = 1;
696d24d4204SJose E. Roman PetscScalar sone = 1.0, zero = 0.0;
697d24d4204SJose E. Roman
698d24d4204SJose E. Roman PetscFunctionBegin;
6990fdf79fbSJacob Faibussowitsch PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
7009566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
701d24d4204SJose E. Roman *B = Bs;
702d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bs->data;
703792fecdfSBarry Smith PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
704d24d4204SJose E. Roman Bs->assembled = PETSC_TRUE;
7053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
706d24d4204SJose E. Roman }
707d24d4204SJose E. Roman
MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)708d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X)
709d71ae5a4SJacob Faibussowitsch {
710d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
711d24d4204SJose E. Roman PetscScalar *x, *x2d;
712d24d4204SJose E. Roman const PetscInt *ranges;
713d24d4204SJose E. Roman PetscBLASInt xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
714d24d4204SJose E. Roman
715d24d4204SJose E. Roman PetscFunctionBegin;
7169566063dSJacob Faibussowitsch PetscCall(VecCopy(B, X));
7179566063dSJacob Faibussowitsch PetscCall(VecGetArray(X, &x));
718d24d4204SJose E. Roman
719d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (1d block distribution) */
7209566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
7219566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
7226497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld));
723792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
724d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info);
725d24d4204SJose E. Roman
726d24d4204SJose E. Roman /* allocate 2d vector */
727d24d4204SJose E. Roman lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
7289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(lszx, &x2d));
7296497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld));
730d24d4204SJose E. Roman
731d24d4204SJose E. Roman /* create ScaLAPACK descriptor for a vector (2d block distribution) */
732792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
733d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info);
734d24d4204SJose E. Roman
735d24d4204SJose E. Roman /* redistribute x as a column of a 2d matrix */
736792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
737d24d4204SJose E. Roman
738d24d4204SJose E. Roman /* call ScaLAPACK subroutine */
739d24d4204SJose E. Roman switch (A->factortype) {
740d24d4204SJose E. Roman case MAT_FACTOR_LU:
741792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
742d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs", info);
743d24d4204SJose E. Roman break;
744d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY:
745792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
746d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs", info);
747d24d4204SJose E. Roman break;
748d71ae5a4SJacob Faibussowitsch default:
749d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
750d24d4204SJose E. Roman }
751d24d4204SJose E. Roman
752d24d4204SJose E. Roman /* redistribute x from a column of a 2d matrix */
753792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
754d24d4204SJose E. Roman
7559566063dSJacob Faibussowitsch PetscCall(PetscFree(x2d));
7569566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(X, &x));
7573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
758d24d4204SJose E. Roman }
759d24d4204SJose E. Roman
MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)760d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X)
761d71ae5a4SJacob Faibussowitsch {
762d24d4204SJose E. Roman PetscFunctionBegin;
7639566063dSJacob Faibussowitsch PetscCall(MatSolve_ScaLAPACK(A, B, X));
7649566063dSJacob Faibussowitsch PetscCall(VecAXPY(X, 1, Y));
7653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
766d24d4204SJose E. Roman }
767d24d4204SJose E. Roman
MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)768d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X)
769d71ae5a4SJacob Faibussowitsch {
7705d5af635SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *x;
771d24d4204SJose E. Roman PetscBool flg1, flg2;
772d24d4204SJose E. Roman PetscBLASInt one = 1, info;
7735d5af635SJose E. Roman Mat C;
7745d5af635SJose E. Roman MatType type;
775d24d4204SJose E. Roman
776d24d4204SJose E. Roman PetscFunctionBegin;
7779566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
7789566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
7795d5af635SJose E. Roman if (flg1 && flg2) MatScaLAPACKCheckDistribution(B, 2, X, 3);
7805d5af635SJose E. Roman if (flg2) {
7815d5af635SJose E. Roman if (flg1) PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN));
7825d5af635SJose E. Roman else PetscCall(MatConvert(B, MATSCALAPACK, MAT_REUSE_MATRIX, &X));
7835d5af635SJose E. Roman C = X;
7845d5af635SJose E. Roman } else {
7855d5af635SJose E. Roman PetscCall(MatConvert(B, MATSCALAPACK, MAT_INITIAL_MATRIX, &C));
7865d5af635SJose E. Roman }
7875d5af635SJose E. Roman x = (Mat_ScaLAPACK *)C->data;
788d24d4204SJose E. Roman
789d24d4204SJose E. Roman switch (A->factortype) {
790d24d4204SJose E. Roman case MAT_FACTOR_LU:
791792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
792d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrs", info);
793d24d4204SJose E. Roman break;
794d24d4204SJose E. Roman case MAT_FACTOR_CHOLESKY:
795792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
796d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrs", info);
797d24d4204SJose E. Roman break;
798d71ae5a4SJacob Faibussowitsch default:
799d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
800d24d4204SJose E. Roman }
8015d5af635SJose E. Roman if (!flg2) {
8025d5af635SJose E. Roman PetscCall(MatGetType(X, &type));
8035d5af635SJose E. Roman PetscCall(MatConvert(C, type, MAT_REUSE_MATRIX, &X));
8045d5af635SJose E. Roman PetscCall(MatDestroy(&C));
8055d5af635SJose E. Roman }
8063ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
807d24d4204SJose E. Roman }
808d24d4204SJose E. Roman
MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo * factorinfo)809d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo)
810d71ae5a4SJacob Faibussowitsch {
811d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
812d24d4204SJose E. Roman PetscBLASInt one = 1, info;
813d24d4204SJose E. Roman
814d24d4204SJose E. Roman PetscFunctionBegin;
815aa624791SPierre Jolivet if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots));
816792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
817d24d4204SJose E. Roman PetscCheckScaLapackInfo("getrf", info);
818d24d4204SJose E. Roman A->factortype = MAT_FACTOR_LU;
819d24d4204SJose E. Roman A->assembled = PETSC_TRUE;
820d24d4204SJose E. Roman
8219566063dSJacob Faibussowitsch PetscCall(PetscFree(A->solvertype));
8229566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
8233ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
824d24d4204SJose E. Roman }
825d24d4204SJose E. Roman
MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo * info)826d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
827d71ae5a4SJacob Faibussowitsch {
828d24d4204SJose E. Roman PetscFunctionBegin;
8299566063dSJacob Faibussowitsch PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
8309566063dSJacob Faibussowitsch PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
8313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
832d24d4204SJose E. Roman }
833d24d4204SJose E. Roman
MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo * info)834d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
835d71ae5a4SJacob Faibussowitsch {
836d24d4204SJose E. Roman PetscFunctionBegin;
837d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
8383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
839d24d4204SJose E. Roman }
840d24d4204SJose E. Roman
MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo * factorinfo)841d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo)
842d71ae5a4SJacob Faibussowitsch {
843d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
844d24d4204SJose E. Roman PetscBLASInt one = 1, info;
845d24d4204SJose E. Roman
846d24d4204SJose E. Roman PetscFunctionBegin;
847792fecdfSBarry Smith PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
848d24d4204SJose E. Roman PetscCheckScaLapackInfo("potrf", info);
849d24d4204SJose E. Roman A->factortype = MAT_FACTOR_CHOLESKY;
850d24d4204SJose E. Roman A->assembled = PETSC_TRUE;
851d24d4204SJose E. Roman
8529566063dSJacob Faibussowitsch PetscCall(PetscFree(A->solvertype));
8539566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
8543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
855d24d4204SJose E. Roman }
856d24d4204SJose E. Roman
MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo * info)857d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
858d71ae5a4SJacob Faibussowitsch {
859d24d4204SJose E. Roman PetscFunctionBegin;
8609566063dSJacob Faibussowitsch PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
8619566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
8623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
863d24d4204SJose E. Roman }
864d24d4204SJose E. Roman
MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo * info)865d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info)
866d71ae5a4SJacob Faibussowitsch {
867d24d4204SJose E. Roman PetscFunctionBegin;
868d24d4204SJose E. Roman /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
8693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
870d24d4204SJose E. Roman }
871d24d4204SJose E. Roman
MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType * type)87266976f2fSJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type)
873d71ae5a4SJacob Faibussowitsch {
874d24d4204SJose E. Roman PetscFunctionBegin;
875d24d4204SJose E. Roman *type = MATSOLVERSCALAPACK;
8763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
877d24d4204SJose E. Roman }
878d24d4204SJose E. Roman
MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat * F)879d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F)
880d71ae5a4SJacob Faibussowitsch {
881d24d4204SJose E. Roman Mat B;
88259172f18SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
883d24d4204SJose E. Roman
884d24d4204SJose E. Roman PetscFunctionBegin;
885d24d4204SJose E. Roman /* Create the factorization matrix */
8869566063dSJacob Faibussowitsch PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
88766e17bc3SBarry Smith B->trivialsymbolic = PETSC_TRUE;
888d24d4204SJose E. Roman B->factortype = ftype;
8899566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype));
8909566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));
891d24d4204SJose E. Roman
8929566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
893d24d4204SJose E. Roman *F = B;
8943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
895d24d4204SJose E. Roman }
896d24d4204SJose E. Roman
MatSolverTypeRegister_ScaLAPACK(void)897d1f0640dSPierre Jolivet PETSC_INTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
898d71ae5a4SJacob Faibussowitsch {
899d24d4204SJose E. Roman PetscFunctionBegin;
9009566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
9019566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
9023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
903d24d4204SJose E. Roman }
904d24d4204SJose E. Roman
MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal * nrm)905d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm)
906d71ae5a4SJacob Faibussowitsch {
907d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
908d24d4204SJose E. Roman PetscBLASInt one = 1, lwork = 0;
909d24d4204SJose E. Roman const char *ntype;
910d68f4f38SPierre Jolivet PetscScalar *work = NULL, dummy;
911d24d4204SJose E. Roman
912d24d4204SJose E. Roman PetscFunctionBegin;
913d24d4204SJose E. Roman switch (type) {
914d24d4204SJose E. Roman case NORM_1:
915d24d4204SJose E. Roman ntype = "1";
916d24d4204SJose E. Roman lwork = PetscMax(a->locr, a->locc);
917d24d4204SJose E. Roman break;
918d24d4204SJose E. Roman case NORM_FROBENIUS:
919d24d4204SJose E. Roman ntype = "F";
920d24d4204SJose E. Roman work = &dummy;
921d24d4204SJose E. Roman break;
922d24d4204SJose E. Roman case NORM_INFINITY:
923d24d4204SJose E. Roman ntype = "I";
924d24d4204SJose E. Roman lwork = PetscMax(a->locr, a->locc);
925d24d4204SJose E. Roman break;
926d71ae5a4SJacob Faibussowitsch default:
927d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
928d24d4204SJose E. Roman }
9299566063dSJacob Faibussowitsch if (lwork) PetscCall(PetscMalloc1(lwork, &work));
930d24d4204SJose E. Roman *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
9319566063dSJacob Faibussowitsch if (lwork) PetscCall(PetscFree(work));
9323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
933d24d4204SJose E. Roman }
934d24d4204SJose E. Roman
MatZeroEntries_ScaLAPACK(Mat A)935d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
936d71ae5a4SJacob Faibussowitsch {
937d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
938d24d4204SJose E. Roman
939d24d4204SJose E. Roman PetscFunctionBegin;
9409566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
9413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
942d24d4204SJose E. Roman }
943d24d4204SJose E. Roman
MatGetOwnershipIS_ScaLAPACK(Mat A,IS * rows,IS * cols)944d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols)
945d71ae5a4SJacob Faibussowitsch {
946d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
947d24d4204SJose E. Roman PetscInt i, n, nb, isrc, nproc, iproc, *idx;
948d24d4204SJose E. Roman
949d24d4204SJose E. Roman PetscFunctionBegin;
950d24d4204SJose E. Roman if (rows) {
951d24d4204SJose E. Roman n = a->locr;
952d24d4204SJose E. Roman nb = a->mb;
953d24d4204SJose E. Roman isrc = a->rsrc;
954d24d4204SJose E. Roman nproc = a->grid->nprow;
955d24d4204SJose E. Roman iproc = a->grid->myrow;
9569566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &idx));
957d24d4204SJose E. Roman for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
9589566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
959d24d4204SJose E. Roman }
960d24d4204SJose E. Roman if (cols) {
961d24d4204SJose E. Roman n = a->locc;
962d24d4204SJose E. Roman nb = a->nb;
963d24d4204SJose E. Roman isrc = a->csrc;
964d24d4204SJose E. Roman nproc = a->grid->npcol;
965d24d4204SJose E. Roman iproc = a->grid->mycol;
9669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &idx));
967d24d4204SJose E. Roman for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
9689566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
969d24d4204SJose E. Roman }
9703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
971d24d4204SJose E. Roman }
972d24d4204SJose E. Roman
MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat * B)973d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B)
974d71ae5a4SJacob Faibussowitsch {
975d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
976d24d4204SJose E. Roman Mat Bmpi;
977d24d4204SJose E. Roman MPI_Comm comm;
978a00b6df3SJose E. Roman PetscInt i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb;
9794b1a79daSJose E. Roman const PetscInt *ranges, *branges, *cwork;
9804b1a79daSJose E. Roman const PetscScalar *vwork;
981d24d4204SJose E. Roman PetscBLASInt bdesc[9], bmb, zero = 0, one = 1, lld, info;
982d24d4204SJose E. Roman PetscScalar *barray;
9834b1a79daSJose E. Roman PetscBool differ = PETSC_FALSE;
9844b1a79daSJose E. Roman PetscMPIInt size;
985d24d4204SJose E. Roman
986d24d4204SJose E. Roman PetscFunctionBegin;
9879566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
9889566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
9894b1a79daSJose E. Roman
9904b1a79daSJose E. Roman if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
9919566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size));
9929566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
9939371c9d4SSatish Balay for (i = 0; i < size; i++)
9949371c9d4SSatish Balay if (ranges[i + 1] != branges[i + 1]) {
9959371c9d4SSatish Balay differ = PETSC_TRUE;
9969371c9d4SSatish Balay break;
9979371c9d4SSatish Balay }
9984b1a79daSJose E. Roman }
9994b1a79daSJose E. Roman
10004b1a79daSJose E. Roman if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
10019566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bmpi));
10024b1a79daSJose E. Roman m = PETSC_DECIDE;
10039566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
10044b1a79daSJose E. Roman n = PETSC_DECIDE;
10059566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
10069566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi, m, n, M, N));
10079566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi, MATDENSE));
10089566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi));
10094b1a79daSJose E. Roman
10104b1a79daSJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */
10119566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1012a00b6df3SJose E. Roman PetscCall(MatDenseGetLDA(Bmpi, &ldb));
10136497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */
1014792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
10154b1a79daSJose E. Roman PetscCheckScaLapackInfo("descinit", info);
10164b1a79daSJose E. Roman
10174b1a79daSJose E. Roman /* redistribute matrix */
10189566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(Bmpi, &barray));
1019792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
10209566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(Bmpi, &barray));
10219566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
10229566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
10234b1a79daSJose E. Roman
10244b1a79daSJose E. Roman /* transfer rows of auxiliary matrix to the final matrix B */
10259566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
10264b1a79daSJose E. Roman for (i = rstart; i < rend; i++) {
10279566063dSJacob Faibussowitsch PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
10289566063dSJacob Faibussowitsch PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
10299566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
10304b1a79daSJose E. Roman }
10319566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
10329566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
10339566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Bmpi));
10344b1a79daSJose E. Roman
10354b1a79daSJose E. Roman } else { /* normal cases */
1036d24d4204SJose E. Roman
1037d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1038d24d4204SJose E. Roman else {
10399566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bmpi));
104092c846b4SJose E. Roman m = PETSC_DECIDE;
10419566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
104292c846b4SJose E. Roman n = PETSC_DECIDE;
10439566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
10449566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi, m, n, M, N));
10459566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi, MATDENSE));
10469566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi));
1047d24d4204SJose E. Roman }
1048d24d4204SJose E. Roman
1049d24d4204SJose E. Roman /* create ScaLAPACK descriptor for B (1d block distribution) */
10509566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1051a00b6df3SJose E. Roman PetscCall(MatDenseGetLDA(Bmpi, &ldb));
10526497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */
1053792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1054d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info);
1055d24d4204SJose E. Roman
1056d24d4204SJose E. Roman /* redistribute matrix */
10579566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(Bmpi, &barray));
1058792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
10599566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1060d24d4204SJose E. Roman
10619566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
10629566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1063ac530a7eSPierre Jolivet if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &Bmpi));
1064ac530a7eSPierre Jolivet else *B = Bmpi;
10654b1a79daSJose E. Roman }
10663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1067d24d4204SJose E. Roman }
1068d24d4204SJose E. Roman
MatScaLAPACKCheckLayout(PetscLayout map,PetscBool * correct)1069d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct)
1070d71ae5a4SJacob Faibussowitsch {
1071b12397e7SPierre Jolivet const PetscInt *ranges;
1072b12397e7SPierre Jolivet PetscMPIInt size;
1073b12397e7SPierre Jolivet PetscInt i, n;
1074b12397e7SPierre Jolivet
1075b12397e7SPierre Jolivet PetscFunctionBegin;
1076b12397e7SPierre Jolivet *correct = PETSC_TRUE;
1077b12397e7SPierre Jolivet PetscCallMPI(MPI_Comm_size(map->comm, &size));
1078b12397e7SPierre Jolivet if (size > 1) {
1079b12397e7SPierre Jolivet PetscCall(PetscLayoutGetRanges(map, &ranges));
1080b12397e7SPierre Jolivet n = ranges[1] - ranges[0];
10819371c9d4SSatish Balay for (i = 1; i < size; i++)
10829371c9d4SSatish Balay if (ranges[i + 1] - ranges[i] != n) break;
1083b12397e7SPierre Jolivet *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
1084b12397e7SPierre Jolivet }
10853ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1086b12397e7SPierre Jolivet }
1087b12397e7SPierre Jolivet
MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat * B)1088d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B)
1089d71ae5a4SJacob Faibussowitsch {
1090d24d4204SJose E. Roman Mat_ScaLAPACK *b;
1091d24d4204SJose E. Roman Mat Bmpi;
1092d24d4204SJose E. Roman MPI_Comm comm;
109392c846b4SJose E. Roman PetscInt M = A->rmap->N, N = A->cmap->N, m, n;
1094b12397e7SPierre Jolivet const PetscInt *ranges, *rows, *cols;
1095d24d4204SJose E. Roman PetscBLASInt adesc[9], amb, zero = 0, one = 1, lld, info;
1096c87776d3SJose E. Roman const PetscScalar *aarray;
1097b12397e7SPierre Jolivet IS ir, ic;
10984e8b52a1SJose E. Roman PetscInt lda;
1099b12397e7SPierre Jolivet PetscBool flg;
1100d24d4204SJose E. Roman
1101d24d4204SJose E. Roman PetscFunctionBegin;
11029566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1103d24d4204SJose E. Roman
1104d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1105d24d4204SJose E. Roman else {
11069566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Bmpi));
110792c846b4SJose E. Roman m = PETSC_DECIDE;
11089566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
110992c846b4SJose E. Roman n = PETSC_DECIDE;
11109566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
11119566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bmpi, m, n, M, N));
11129566063dSJacob Faibussowitsch PetscCall(MatSetType(Bmpi, MATSCALAPACK));
11139566063dSJacob Faibussowitsch PetscCall(MatSetUp(Bmpi));
1114d24d4204SJose E. Roman }
1115d24d4204SJose E. Roman b = (Mat_ScaLAPACK *)Bmpi->data;
1116d24d4204SJose E. Roman
1117b12397e7SPierre Jolivet PetscCall(MatDenseGetLDA(A, &lda));
1118c87776d3SJose E. Roman PetscCall(MatDenseGetArrayRead(A, &aarray));
1119b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1120b12397e7SPierre Jolivet if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1121b12397e7SPierre Jolivet if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1122d24d4204SJose E. Roman /* create ScaLAPACK descriptor for A (1d block distribution) */
11239566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
11249566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */
11256497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(lda, 1), &lld)); /* local leading dimension */
1126792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1127d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info);
1128d24d4204SJose E. Roman
1129d24d4204SJose E. Roman /* redistribute matrix */
1130792fecdfSBarry Smith PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1131b12397e7SPierre Jolivet Bmpi->nooffprocentries = PETSC_TRUE;
1132b12397e7SPierre Jolivet } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1133b12397e7SPierre Jolivet PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1134b12397e7SPierre Jolivet b->roworiented = PETSC_FALSE;
1135b12397e7SPierre Jolivet PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1136b12397e7SPierre Jolivet PetscCall(ISGetIndices(ir, &rows));
1137b12397e7SPierre Jolivet PetscCall(ISGetIndices(ic, &cols));
1138b12397e7SPierre Jolivet PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1139b12397e7SPierre Jolivet PetscCall(ISRestoreIndices(ir, &rows));
1140b12397e7SPierre Jolivet PetscCall(ISRestoreIndices(ic, &cols));
1141b12397e7SPierre Jolivet PetscCall(ISDestroy(&ic));
1142b12397e7SPierre Jolivet PetscCall(ISDestroy(&ir));
1143b12397e7SPierre Jolivet }
1144c87776d3SJose E. Roman PetscCall(MatDenseRestoreArrayRead(A, &aarray));
11459566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
11469566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1147ac530a7eSPierre Jolivet if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &Bmpi));
1148ac530a7eSPierre Jolivet else *B = Bmpi;
11493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1150d24d4204SJose E. Roman }
1151d24d4204SJose E. Roman
MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)1152d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1153d71ae5a4SJacob Faibussowitsch {
1154d24d4204SJose E. Roman Mat mat_scal;
1155d24d4204SJose E. Roman PetscInt M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1156d24d4204SJose E. Roman const PetscInt *cols;
1157d24d4204SJose E. Roman const PetscScalar *vals;
1158d24d4204SJose E. Roman
1159d24d4204SJose E. Roman PetscFunctionBegin;
1160d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) {
1161d24d4204SJose E. Roman mat_scal = *newmat;
11629566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(mat_scal));
1163d24d4204SJose E. Roman } else {
11649566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1165d24d4204SJose E. Roman m = PETSC_DECIDE;
11669566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1167d24d4204SJose E. Roman n = PETSC_DECIDE;
11689566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
11699566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat_scal, m, n, M, N));
11709566063dSJacob Faibussowitsch PetscCall(MatSetType(mat_scal, MATSCALAPACK));
11719566063dSJacob Faibussowitsch PetscCall(MatSetUp(mat_scal));
1172d24d4204SJose E. Roman }
1173d24d4204SJose E. Roman for (row = rstart; row < rend; row++) {
11749566063dSJacob Faibussowitsch PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
11759566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
11769566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1177d24d4204SJose E. Roman }
11789566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
11799566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1180d24d4204SJose E. Roman
11819566063dSJacob Faibussowitsch if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1182d24d4204SJose E. Roman else *newmat = mat_scal;
11833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1184d24d4204SJose E. Roman }
1185d24d4204SJose E. Roman
MatConvert_SBAIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)1186d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1187d71ae5a4SJacob Faibussowitsch {
1188d24d4204SJose E. Roman Mat mat_scal;
1189d24d4204SJose E. Roman PetscInt M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1190d24d4204SJose E. Roman const PetscInt *cols;
1191d24d4204SJose E. Roman const PetscScalar *vals;
1192d24d4204SJose E. Roman PetscScalar v;
1193d24d4204SJose E. Roman
1194d24d4204SJose E. Roman PetscFunctionBegin;
1195d24d4204SJose E. Roman if (reuse == MAT_REUSE_MATRIX) {
1196d24d4204SJose E. Roman mat_scal = *newmat;
11979566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(mat_scal));
1198d24d4204SJose E. Roman } else {
11999566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1200d24d4204SJose E. Roman m = PETSC_DECIDE;
12019566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1202d24d4204SJose E. Roman n = PETSC_DECIDE;
12039566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
12049566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat_scal, m, n, M, N));
12059566063dSJacob Faibussowitsch PetscCall(MatSetType(mat_scal, MATSCALAPACK));
12069566063dSJacob Faibussowitsch PetscCall(MatSetUp(mat_scal));
1207d24d4204SJose E. Roman }
12089566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(A));
1209d24d4204SJose E. Roman for (row = rstart; row < rend; row++) {
12109566063dSJacob Faibussowitsch PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
12119566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1212d24d4204SJose E. Roman for (j = 0; j < ncols; j++) { /* lower triangular part */
1213d24d4204SJose E. Roman if (cols[j] == row) continue;
1214b94d7dedSBarry Smith v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
12159566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1216d24d4204SJose E. Roman }
12179566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1218d24d4204SJose E. Roman }
12199566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(A));
12209566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
12219566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1222d24d4204SJose E. Roman
12239566063dSJacob Faibussowitsch if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1224d24d4204SJose E. Roman else *newmat = mat_scal;
12253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1226d24d4204SJose E. Roman }
1227d24d4204SJose E. Roman
MatScaLAPACKSetPreallocation(Mat A)1228d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1229d71ae5a4SJacob Faibussowitsch {
1230d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1231d24d4204SJose E. Roman PetscInt sz = 0;
1232d24d4204SJose E. Roman
1233d24d4204SJose E. Roman PetscFunctionBegin;
12349566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap));
12359566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap));
1236d24d4204SJose E. Roman if (!a->lld) a->lld = a->locr;
1237d24d4204SJose E. Roman
12389566063dSJacob Faibussowitsch PetscCall(PetscFree(a->loc));
12399566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
12409566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(sz, &a->loc));
1241d24d4204SJose E. Roman
1242d24d4204SJose E. Roman A->preallocated = PETSC_TRUE;
12433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1244d24d4204SJose E. Roman }
1245d24d4204SJose E. Roman
MatDestroy_ScaLAPACK(Mat A)1246d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1247d71ae5a4SJacob Faibussowitsch {
1248d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1249d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid;
1250b8b5be36SMartin Diehl PetscMPIInt iflg;
1251d24d4204SJose E. Roman MPI_Comm icomm;
1252d24d4204SJose E. Roman
1253d24d4204SJose E. Roman PetscFunctionBegin;
12549566063dSJacob Faibussowitsch PetscCall(MatStashDestroy_Private(&A->stash));
12559566063dSJacob Faibussowitsch PetscCall(PetscFree(a->loc));
12569566063dSJacob Faibussowitsch PetscCall(PetscFree(a->pivots));
12579566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1258b8b5be36SMartin Diehl PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, &iflg));
1259d24d4204SJose E. Roman if (--grid->grid_refct == 0) {
1260d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxt);
1261d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxrow);
1262d24d4204SJose E. Roman Cblacs_gridexit(grid->ictxcol);
12639566063dSJacob Faibussowitsch PetscCall(PetscFree(grid));
12649566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1265d24d4204SJose E. Roman }
12669566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&icomm));
12679566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
12689566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
12699566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
12709566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
12719566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data));
12723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1273d24d4204SJose E. Roman }
1274d24d4204SJose E. Roman
MatSetUp_ScaLAPACK(Mat A)127566976f2fSJacob Faibussowitsch static PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1276d71ae5a4SJacob Faibussowitsch {
1277d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1278d24d4204SJose E. Roman PetscBLASInt info = 0;
1279b12397e7SPierre Jolivet PetscBool flg;
1280d24d4204SJose E. Roman
1281d24d4204SJose E. Roman PetscFunctionBegin;
12829566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap));
12839566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap));
1284d24d4204SJose E. Roman
1285b12397e7SPierre Jolivet /* check that the layout is as enforced by MatCreateScaLAPACK() */
1286b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1287b12397e7SPierre Jolivet PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1288b12397e7SPierre Jolivet PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1289b12397e7SPierre Jolivet PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1290d24d4204SJose E. Roman
1291d24d4204SJose E. Roman /* compute local sizes */
12929566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
12939566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1294d24d4204SJose E. Roman a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1295d24d4204SJose E. Roman a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1296d24d4204SJose E. Roman a->lld = PetscMax(1, a->locr);
1297d24d4204SJose E. Roman
1298d24d4204SJose E. Roman /* allocate local array */
12999566063dSJacob Faibussowitsch PetscCall(MatScaLAPACKSetPreallocation(A));
1300d24d4204SJose E. Roman
1301d24d4204SJose E. Roman /* set up ScaLAPACK descriptor */
1302792fecdfSBarry Smith PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1303d24d4204SJose E. Roman PetscCheckScaLapackInfo("descinit", info);
13043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1305d24d4204SJose E. Roman }
1306d24d4204SJose E. Roman
MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)130766976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type)
1308d71ae5a4SJacob Faibussowitsch {
1309d24d4204SJose E. Roman PetscInt nstash, reallocs;
1310d24d4204SJose E. Roman
1311d24d4204SJose E. Roman PetscFunctionBegin;
13123ba16761SJacob Faibussowitsch if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
13139566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
13149566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
13159566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
13163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1317d24d4204SJose E. Roman }
1318d24d4204SJose E. Roman
MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)131966976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type)
1320d71ae5a4SJacob Faibussowitsch {
1321d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1322d24d4204SJose E. Roman PetscMPIInt n;
1323d24d4204SJose E. Roman PetscInt i, flg, *row, *col;
1324d24d4204SJose E. Roman PetscScalar *val;
1325d24d4204SJose E. Roman PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc;
1326d24d4204SJose E. Roman
1327d24d4204SJose E. Roman PetscFunctionBegin;
13283ba16761SJacob Faibussowitsch if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1329d24d4204SJose E. Roman while (1) {
13309566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1331d24d4204SJose E. Roman if (!flg) break;
1332d24d4204SJose E. Roman for (i = 0; i < n; i++) {
13339566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
13349566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1335792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1336aed4548fSBarry Smith PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1337d24d4204SJose E. Roman switch (A->insertmode) {
1338d71ae5a4SJacob Faibussowitsch case INSERT_VALUES:
1339d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i];
1340d71ae5a4SJacob Faibussowitsch break;
1341d71ae5a4SJacob Faibussowitsch case ADD_VALUES:
1342d71ae5a4SJacob Faibussowitsch a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i];
1343d71ae5a4SJacob Faibussowitsch break;
1344d71ae5a4SJacob Faibussowitsch default:
1345d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1346d24d4204SJose E. Roman }
1347d24d4204SJose E. Roman }
1348d24d4204SJose E. Roman }
13499566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&A->stash));
13503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1351d24d4204SJose E. Roman }
1352d24d4204SJose E. Roman
MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)135366976f2fSJacob Faibussowitsch static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer)
1354d71ae5a4SJacob Faibussowitsch {
1355d24d4204SJose E. Roman Mat Adense, As;
1356d24d4204SJose E. Roman MPI_Comm comm;
1357d24d4204SJose E. Roman
1358d24d4204SJose E. Roman PetscFunctionBegin;
13599566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
13609566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &Adense));
13619566063dSJacob Faibussowitsch PetscCall(MatSetType(Adense, MATDENSE));
13629566063dSJacob Faibussowitsch PetscCall(MatLoad(Adense, viewer));
13639566063dSJacob Faibussowitsch PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
13649566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Adense));
13659566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(newMat, &As));
13663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1367d24d4204SJose E. Roman }
1368d24d4204SJose E. Roman
13699371c9d4SSatish Balay static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
13704cc2b5b5SPierre Jolivet NULL,
13714cc2b5b5SPierre Jolivet NULL,
1372d24d4204SJose E. Roman MatMult_ScaLAPACK,
1373d24d4204SJose E. Roman /* 4*/ MatMultAdd_ScaLAPACK,
1374d24d4204SJose E. Roman MatMultTranspose_ScaLAPACK,
1375d24d4204SJose E. Roman MatMultTransposeAdd_ScaLAPACK,
1376d24d4204SJose E. Roman MatSolve_ScaLAPACK,
1377d24d4204SJose E. Roman MatSolveAdd_ScaLAPACK,
13784cc2b5b5SPierre Jolivet NULL,
13794cc2b5b5SPierre Jolivet /*10*/ NULL,
1380d24d4204SJose E. Roman MatLUFactor_ScaLAPACK,
1381d24d4204SJose E. Roman MatCholeskyFactor_ScaLAPACK,
13824cc2b5b5SPierre Jolivet NULL,
1383d24d4204SJose E. Roman MatTranspose_ScaLAPACK,
1384d24d4204SJose E. Roman /*15*/ MatGetInfo_ScaLAPACK,
13854cc2b5b5SPierre Jolivet NULL,
1386d24d4204SJose E. Roman MatGetDiagonal_ScaLAPACK,
1387d24d4204SJose E. Roman MatDiagonalScale_ScaLAPACK,
1388d24d4204SJose E. Roman MatNorm_ScaLAPACK,
1389d24d4204SJose E. Roman /*20*/ MatAssemblyBegin_ScaLAPACK,
1390d24d4204SJose E. Roman MatAssemblyEnd_ScaLAPACK,
1391d24d4204SJose E. Roman MatSetOption_ScaLAPACK,
1392d24d4204SJose E. Roman MatZeroEntries_ScaLAPACK,
13934cc2b5b5SPierre Jolivet /*24*/ NULL,
1394d24d4204SJose E. Roman MatLUFactorSymbolic_ScaLAPACK,
1395d24d4204SJose E. Roman MatLUFactorNumeric_ScaLAPACK,
1396d24d4204SJose E. Roman MatCholeskyFactorSymbolic_ScaLAPACK,
1397d24d4204SJose E. Roman MatCholeskyFactorNumeric_ScaLAPACK,
1398d24d4204SJose E. Roman /*29*/ MatSetUp_ScaLAPACK,
13994cc2b5b5SPierre Jolivet NULL,
14004cc2b5b5SPierre Jolivet NULL,
14014cc2b5b5SPierre Jolivet NULL,
14024cc2b5b5SPierre Jolivet NULL,
1403d24d4204SJose E. Roman /*34*/ MatDuplicate_ScaLAPACK,
14044cc2b5b5SPierre Jolivet NULL,
14054cc2b5b5SPierre Jolivet NULL,
14064cc2b5b5SPierre Jolivet NULL,
14074cc2b5b5SPierre Jolivet NULL,
1408d24d4204SJose E. Roman /*39*/ MatAXPY_ScaLAPACK,
14094cc2b5b5SPierre Jolivet NULL,
14104cc2b5b5SPierre Jolivet NULL,
14114cc2b5b5SPierre Jolivet NULL,
1412d24d4204SJose E. Roman MatCopy_ScaLAPACK,
14134cc2b5b5SPierre Jolivet /*44*/ NULL,
1414d24d4204SJose E. Roman MatScale_ScaLAPACK,
1415d24d4204SJose E. Roman MatShift_ScaLAPACK,
14164cc2b5b5SPierre Jolivet NULL,
14174cc2b5b5SPierre Jolivet NULL,
14184cc2b5b5SPierre Jolivet /*49*/ NULL,
14194cc2b5b5SPierre Jolivet NULL,
14204cc2b5b5SPierre Jolivet NULL,
14214cc2b5b5SPierre Jolivet NULL,
14224cc2b5b5SPierre Jolivet NULL,
14234cc2b5b5SPierre Jolivet /*54*/ NULL,
14244cc2b5b5SPierre Jolivet NULL,
14254cc2b5b5SPierre Jolivet NULL,
14264cc2b5b5SPierre Jolivet NULL,
14274cc2b5b5SPierre Jolivet NULL,
14284cc2b5b5SPierre Jolivet /*59*/ NULL,
1429d24d4204SJose E. Roman MatDestroy_ScaLAPACK,
1430d24d4204SJose E. Roman MatView_ScaLAPACK,
14314cc2b5b5SPierre Jolivet NULL,
14324cc2b5b5SPierre Jolivet NULL,
14334cc2b5b5SPierre Jolivet /*64*/ NULL,
14344cc2b5b5SPierre Jolivet NULL,
14354cc2b5b5SPierre Jolivet NULL,
14364cc2b5b5SPierre Jolivet NULL,
14374cc2b5b5SPierre Jolivet NULL,
14384cc2b5b5SPierre Jolivet /*69*/ NULL,
1439d24d4204SJose E. Roman MatConvert_ScaLAPACK_Dense,
14404cc2b5b5SPierre Jolivet NULL,
14414cc2b5b5SPierre Jolivet NULL,
14428bb0f5c6SPierre Jolivet NULL,
14434cc2b5b5SPierre Jolivet /*74*/ NULL,
14444cc2b5b5SPierre Jolivet NULL,
14454cc2b5b5SPierre Jolivet NULL,
14464cc2b5b5SPierre Jolivet NULL,
14478bb0f5c6SPierre Jolivet MatLoad_ScaLAPACK,
14484cc2b5b5SPierre Jolivet /*79*/ NULL,
14494cc2b5b5SPierre Jolivet NULL,
14504cc2b5b5SPierre Jolivet NULL,
14514cc2b5b5SPierre Jolivet NULL,
14528bb0f5c6SPierre Jolivet NULL,
14534cc2b5b5SPierre Jolivet /*84*/ NULL,
1454d24d4204SJose E. Roman MatMatMultNumeric_ScaLAPACK,
14554cc2b5b5SPierre Jolivet NULL,
14564cc2b5b5SPierre Jolivet NULL,
1457d24d4204SJose E. Roman MatMatTransposeMultNumeric_ScaLAPACK,
14588bb0f5c6SPierre Jolivet /*89*/ NULL,
14598bb0f5c6SPierre Jolivet MatProductSetFromOptions_ScaLAPACK,
14604cc2b5b5SPierre Jolivet NULL,
14614cc2b5b5SPierre Jolivet NULL,
1462d24d4204SJose E. Roman MatConjugate_ScaLAPACK,
14638bb0f5c6SPierre Jolivet /*94*/ NULL,
14644cc2b5b5SPierre Jolivet NULL,
14654cc2b5b5SPierre Jolivet NULL,
14664cc2b5b5SPierre Jolivet NULL,
14674cc2b5b5SPierre Jolivet NULL,
14688bb0f5c6SPierre Jolivet /*99*/ NULL,
14698bb0f5c6SPierre Jolivet MatMatSolve_ScaLAPACK,
14704cc2b5b5SPierre Jolivet NULL,
14714cc2b5b5SPierre Jolivet NULL,
14724cc2b5b5SPierre Jolivet NULL,
1473*421480d9SBarry Smith /*104*/ NULL,
14748bb0f5c6SPierre Jolivet NULL,
14758bb0f5c6SPierre Jolivet NULL,
14768bb0f5c6SPierre Jolivet NULL,
14778bb0f5c6SPierre Jolivet NULL,
14788bb0f5c6SPierre Jolivet /*109*/ NULL,
14798bb0f5c6SPierre Jolivet MatHermitianTranspose_ScaLAPACK,
14808bb0f5c6SPierre Jolivet MatMultHermitianTranspose_ScaLAPACK,
14818bb0f5c6SPierre Jolivet MatMultHermitianTransposeAdd_ScaLAPACK,
1482*421480d9SBarry Smith NULL,
14834cc2b5b5SPierre Jolivet /*114*/ NULL,
14844cc2b5b5SPierre Jolivet NULL,
14854cc2b5b5SPierre Jolivet NULL,
14864cc2b5b5SPierre Jolivet NULL,
14874cc2b5b5SPierre Jolivet NULL,
14884cc2b5b5SPierre Jolivet /*119*/ NULL,
14898bb0f5c6SPierre Jolivet NULL,
14908bb0f5c6SPierre Jolivet MatTransposeMatMultNumeric_ScaLAPACK,
14914cc2b5b5SPierre Jolivet NULL,
14924cc2b5b5SPierre Jolivet /*124*/ NULL,
14934cc2b5b5SPierre Jolivet NULL,
14944cc2b5b5SPierre Jolivet NULL,
14954cc2b5b5SPierre Jolivet NULL,
14964cc2b5b5SPierre Jolivet NULL,
14974cc2b5b5SPierre Jolivet /*129*/ NULL,
14984cc2b5b5SPierre Jolivet NULL,
14994cc2b5b5SPierre Jolivet NULL,
15008bb0f5c6SPierre Jolivet NULL,
15014cc2b5b5SPierre Jolivet NULL,
15024cc2b5b5SPierre Jolivet /*134*/ NULL,
15034cc2b5b5SPierre Jolivet NULL,
15044cc2b5b5SPierre Jolivet NULL,
15054cc2b5b5SPierre Jolivet NULL,
15064cc2b5b5SPierre Jolivet NULL,
15074cc2b5b5SPierre Jolivet NULL,
15084cc2b5b5SPierre Jolivet /*140*/ NULL,
15094cc2b5b5SPierre Jolivet NULL,
151003db1824SAlex Lindsay NULL,
1511c2be7ffeSStefano Zampini NULL,
15124cc2b5b5SPierre Jolivet NULL};
1513d24d4204SJose E. Roman
MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash * stash,PetscInt * owners)1514d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners)
1515d71ae5a4SJacob Faibussowitsch {
15166497c311SBarry Smith PetscInt *owner, *startv, *starti, bs2;
1517d24d4204SJose E. Roman PetscInt size = stash->size, nsends;
15186497c311SBarry Smith PetscInt *sindices, **rindices, j, l;
1519d24d4204SJose E. Roman PetscScalar **rvalues, *svalues;
1520d24d4204SJose E. Roman MPI_Comm comm = stash->comm;
1521d24d4204SJose E. Roman MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
15226497c311SBarry Smith PetscMPIInt tag1 = stash->tag1, tag2 = stash->tag2, *sizes, *nlengths, nreceives, insends, ii;
1523d24d4204SJose E. Roman PetscInt *sp_idx, *sp_idy;
1524d24d4204SJose E. Roman PetscScalar *sp_val;
1525d24d4204SJose E. Roman PetscMatStashSpace space, space_next;
1526d24d4204SJose E. Roman PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc;
1527d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)mat->data;
1528d24d4204SJose E. Roman
1529d24d4204SJose E. Roman PetscFunctionBegin;
1530d24d4204SJose E. Roman { /* make sure all processors are either in INSERTMODE or ADDMODE */
1531d24d4204SJose E. Roman InsertMode addv;
1532462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
153308401ef6SPierre Jolivet PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1534d24d4204SJose E. Roman mat->insertmode = addv; /* in case this processor had no cache */
1535d24d4204SJose E. Roman }
1536d24d4204SJose E. Roman
1537d24d4204SJose E. Roman bs2 = stash->bs * stash->bs;
1538d24d4204SJose E. Roman
1539d24d4204SJose E. Roman /* first count number of contributors to each processor */
15409566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size, &nlengths));
15419566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(stash->n + 1, &owner));
1542d24d4204SJose E. Roman
15436497c311SBarry Smith ii = j = 0;
1544d24d4204SJose E. Roman space = stash->space_head;
1545d24d4204SJose E. Roman while (space) {
1546d24d4204SJose E. Roman space_next = space->next;
1547d24d4204SJose E. Roman for (l = 0; l < space->local_used; l++) {
15489566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
15499566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1550792fecdfSBarry Smith PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1551d24d4204SJose E. Roman j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
15529371c9d4SSatish Balay nlengths[j]++;
15536497c311SBarry Smith owner[ii] = j;
15546497c311SBarry Smith ii++;
1555d24d4204SJose E. Roman }
1556d24d4204SJose E. Roman space = space_next;
1557d24d4204SJose E. Roman }
1558d24d4204SJose E. Roman
1559d24d4204SJose E. Roman /* Now check what procs get messages - and compute nsends. */
15609566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size, &sizes));
15616497c311SBarry Smith nsends = 0;
15626497c311SBarry Smith for (PetscMPIInt i = 0; i < size; i++) {
1563d24d4204SJose E. Roman if (nlengths[i]) {
15649371c9d4SSatish Balay sizes[i] = 1;
15659371c9d4SSatish Balay nsends++;
1566d24d4204SJose E. Roman }
1567d24d4204SJose E. Roman }
1568d24d4204SJose E. Roman
15699371c9d4SSatish Balay {
15709371c9d4SSatish Balay PetscMPIInt *onodes, *olengths;
15716497c311SBarry Smith
1572d24d4204SJose E. Roman /* Determine the number of messages to expect, their lengths, from from-ids */
15739566063dSJacob Faibussowitsch PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
15746497c311SBarry Smith PetscCall(PetscMPIIntCast(nsends, &insends));
15756497c311SBarry Smith PetscCall(PetscGatherMessageLengths(comm, insends, nreceives, nlengths, &onodes, &olengths));
1576d24d4204SJose E. Roman /* since clubbing row,col - lengths are multiplied by 2 */
15776497c311SBarry Smith for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] *= 2;
15789566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1579d24d4204SJose E. Roman /* values are size 'bs2' lengths (and remove earlier factor 2 */
15806497c311SBarry Smith for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] = (PetscMPIInt)(olengths[i] * bs2 / 2);
15819566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
15829566063dSJacob Faibussowitsch PetscCall(PetscFree(onodes));
15839371c9d4SSatish Balay PetscCall(PetscFree(olengths));
15849371c9d4SSatish Balay }
1585d24d4204SJose E. Roman
1586d24d4204SJose E. Roman /* do sends:
1587d24d4204SJose E. Roman 1) starts[i] gives the starting index in svalues for stuff going to
1588d24d4204SJose E. Roman the ith processor
1589d24d4204SJose E. Roman */
15909566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
15919566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nsends, &send_waits));
15929566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(size, &startv, size, &starti));
1593d24d4204SJose E. Roman /* use 2 sends the first with all_a, the next with all_i and all_j */
15949371c9d4SSatish Balay startv[0] = 0;
15959371c9d4SSatish Balay starti[0] = 0;
15966497c311SBarry Smith for (PetscMPIInt i = 1; i < size; i++) {
1597d24d4204SJose E. Roman startv[i] = startv[i - 1] + nlengths[i - 1];
1598d24d4204SJose E. Roman starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1599d24d4204SJose E. Roman }
1600d24d4204SJose E. Roman
16016497c311SBarry Smith ii = 0;
1602d24d4204SJose E. Roman space = stash->space_head;
1603d24d4204SJose E. Roman while (space) {
1604d24d4204SJose E. Roman space_next = space->next;
1605d24d4204SJose E. Roman sp_idx = space->idx;
1606d24d4204SJose E. Roman sp_idy = space->idy;
1607d24d4204SJose E. Roman sp_val = space->val;
1608d24d4204SJose E. Roman for (l = 0; l < space->local_used; l++) {
16096497c311SBarry Smith j = owner[ii];
1610d24d4204SJose E. Roman if (bs2 == 1) {
1611d24d4204SJose E. Roman svalues[startv[j]] = sp_val[l];
1612d24d4204SJose E. Roman } else {
1613d24d4204SJose E. Roman PetscInt k;
1614d24d4204SJose E. Roman PetscScalar *buf1, *buf2;
1615d24d4204SJose E. Roman buf1 = svalues + bs2 * startv[j];
1616d24d4204SJose E. Roman buf2 = space->val + bs2 * l;
1617d24d4204SJose E. Roman for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1618d24d4204SJose E. Roman }
1619d24d4204SJose E. Roman sindices[starti[j]] = sp_idx[l];
1620d24d4204SJose E. Roman sindices[starti[j] + nlengths[j]] = sp_idy[l];
1621d24d4204SJose E. Roman startv[j]++;
1622d24d4204SJose E. Roman starti[j]++;
16236497c311SBarry Smith ii++;
1624d24d4204SJose E. Roman }
1625d24d4204SJose E. Roman space = space_next;
1626d24d4204SJose E. Roman }
1627d24d4204SJose E. Roman startv[0] = 0;
16286497c311SBarry Smith for (PetscMPIInt i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1629d24d4204SJose E. Roman
16306497c311SBarry Smith for (PetscMPIInt i = 0, count = 0; i < size; i++) {
1631d24d4204SJose E. Roman if (sizes[i]) {
16326497c311SBarry Smith PetscCallMPI(MPIU_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
16336497c311SBarry Smith PetscCallMPI(MPIU_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1634d24d4204SJose E. Roman }
1635d24d4204SJose E. Roman }
1636d24d4204SJose E. Roman #if defined(PETSC_USE_INFO)
16379566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
16386497c311SBarry Smith for (PetscMPIInt i = 0; i < size; i++) {
16396497c311SBarry Smith if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %d: size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1640d24d4204SJose E. Roman }
1641d24d4204SJose E. Roman #endif
16429566063dSJacob Faibussowitsch PetscCall(PetscFree(nlengths));
16439566063dSJacob Faibussowitsch PetscCall(PetscFree(owner));
16449566063dSJacob Faibussowitsch PetscCall(PetscFree2(startv, starti));
16459566063dSJacob Faibussowitsch PetscCall(PetscFree(sizes));
1646d24d4204SJose E. Roman
1647d24d4204SJose E. Roman /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
16489566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
1649d24d4204SJose E. Roman
16506497c311SBarry Smith for (PetscMPIInt i = 0; i < nreceives; i++) {
1651d24d4204SJose E. Roman recv_waits[2 * i] = recv_waits1[i];
1652d24d4204SJose E. Roman recv_waits[2 * i + 1] = recv_waits2[i];
1653d24d4204SJose E. Roman }
1654d24d4204SJose E. Roman stash->recv_waits = recv_waits;
1655d24d4204SJose E. Roman
16569566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits1));
16579566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits2));
1658d24d4204SJose E. Roman
1659d24d4204SJose E. Roman stash->svalues = svalues;
1660d24d4204SJose E. Roman stash->sindices = sindices;
1661d24d4204SJose E. Roman stash->rvalues = rvalues;
1662d24d4204SJose E. Roman stash->rindices = rindices;
1663d24d4204SJose E. Roman stash->send_waits = send_waits;
16646497c311SBarry Smith stash->nsends = (PetscMPIInt)nsends;
1665d24d4204SJose E. Roman stash->nrecvs = nreceives;
1666d24d4204SJose E. Roman stash->reproduce_count = 0;
16673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1668d24d4204SJose E. Roman }
1669d24d4204SJose E. Roman
MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)1670d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb)
1671d71ae5a4SJacob Faibussowitsch {
1672d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1673d24d4204SJose E. Roman
1674d24d4204SJose E. Roman PetscFunctionBegin;
167528b400f6SJacob Faibussowitsch PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1676aed4548fSBarry Smith PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1677aed4548fSBarry Smith PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
16789566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
16799566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
16803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1681d24d4204SJose E. Roman }
1682d24d4204SJose E. Roman
1683d24d4204SJose E. Roman /*@
16846aad120cSJose E. Roman MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
168511a5261eSBarry Smith the `MATSCALAPACK` matrix
1686d24d4204SJose E. Roman
1687c3339decSBarry Smith Logically Collective
1688d24d4204SJose E. Roman
1689d8d19677SJose E. Roman Input Parameters:
169011a5261eSBarry Smith + A - a `MATSCALAPACK` matrix
1691d24d4204SJose E. Roman . mb - the row block size
1692d24d4204SJose E. Roman - nb - the column block size
1693d24d4204SJose E. Roman
1694d24d4204SJose E. Roman Level: intermediate
1695d24d4204SJose E. Roman
16962ef1f0ffSBarry Smith Note:
16972ef1f0ffSBarry Smith This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
16982ef1f0ffSBarry Smith
16991cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1700d24d4204SJose E. Roman @*/
MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)1701d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb)
1702d71ae5a4SJacob Faibussowitsch {
1703d24d4204SJose E. Roman PetscFunctionBegin;
1704d24d4204SJose E. Roman PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1705d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A, mb, 2);
1706d24d4204SJose E. Roman PetscValidLogicalCollectiveInt(A, nb, 3);
1707cac4c232SBarry Smith PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
17083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1709d24d4204SJose E. Roman }
1710d24d4204SJose E. Roman
MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt * mb,PetscInt * nb)1711d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb)
1712d71ae5a4SJacob Faibussowitsch {
1713d24d4204SJose E. Roman Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1714d24d4204SJose E. Roman
1715d24d4204SJose E. Roman PetscFunctionBegin;
1716d24d4204SJose E. Roman if (mb) *mb = a->mb;
1717d24d4204SJose E. Roman if (nb) *nb = a->nb;
17183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1719d24d4204SJose E. Roman }
1720d24d4204SJose E. Roman
1721d24d4204SJose E. Roman /*@
17226aad120cSJose E. Roman MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
172311a5261eSBarry Smith the `MATSCALAPACK` matrix
1724d24d4204SJose E. Roman
17252ef1f0ffSBarry Smith Not Collective
1726d24d4204SJose E. Roman
1727d24d4204SJose E. Roman Input Parameter:
172811a5261eSBarry Smith . A - a `MATSCALAPACK` matrix
1729d24d4204SJose E. Roman
1730d24d4204SJose E. Roman Output Parameters:
1731d24d4204SJose E. Roman + mb - the row block size
1732d24d4204SJose E. Roman - nb - the column block size
1733d24d4204SJose E. Roman
1734d24d4204SJose E. Roman Level: intermediate
1735d24d4204SJose E. Roman
17362ef1f0ffSBarry Smith Note:
17372ef1f0ffSBarry Smith This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
17382ef1f0ffSBarry Smith
17391cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1740d24d4204SJose E. Roman @*/
MatScaLAPACKGetBlockSizes(Mat A,PetscInt * mb,PetscInt * nb)1741d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb)
1742d71ae5a4SJacob Faibussowitsch {
1743d24d4204SJose E. Roman PetscFunctionBegin;
1744d24d4204SJose E. Roman PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1745cac4c232SBarry Smith PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
17463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1747d24d4204SJose E. Roman }
1748d24d4204SJose E. Roman
1749d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1750d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1751d24d4204SJose E. Roman
1752d24d4204SJose E. Roman /*MC
1753d24d4204SJose E. Roman MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1754d24d4204SJose E. Roman
17552ef1f0ffSBarry Smith Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK
1756d24d4204SJose E. Roman
1757d24d4204SJose E. Roman Options Database Keys:
17582ef1f0ffSBarry Smith + -mat_type scalapack - sets the matrix type to `MATSCALAPACK`
17592ef1f0ffSBarry Smith . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu`
1760d24d4204SJose E. Roman . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1761d24d4204SJose E. Roman - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1762d24d4204SJose E. Roman
17632ef1f0ffSBarry Smith Level: intermediate
17642ef1f0ffSBarry Smith
176589bba20eSBarry Smith Note:
176689bba20eSBarry Smith Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
176789bba20eSBarry Smith range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
176889bba20eSBarry Smith the given rank.
176989bba20eSBarry Smith
17701cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()`
1771d24d4204SJose E. Roman M*/
1772d24d4204SJose E. Roman
MatCreate_ScaLAPACK(Mat A)1773d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1774d71ae5a4SJacob Faibussowitsch {
1775d24d4204SJose E. Roman Mat_ScaLAPACK *a;
1776b8b5be36SMartin Diehl PetscBool flg;
1777b8b5be36SMartin Diehl PetscMPIInt iflg;
1778d24d4204SJose E. Roman Mat_ScaLAPACK_Grid *grid;
1779d24d4204SJose E. Roman MPI_Comm icomm;
1780d24d4204SJose E. Roman PetscBLASInt nprow, npcol, myrow, mycol;
1781d24d4204SJose E. Roman PetscInt optv1, k = 2, array[2] = {0, 0};
1782d24d4204SJose E. Roman PetscMPIInt size;
1783d24d4204SJose E. Roman
1784d24d4204SJose E. Roman PetscFunctionBegin;
1785aea10558SJacob Faibussowitsch A->ops[0] = MatOps_Values;
1786d24d4204SJose E. Roman A->insertmode = NOT_SET_VALUES;
1787d24d4204SJose E. Roman
17889566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1789d24d4204SJose E. Roman A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK;
1790d24d4204SJose E. Roman A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1791d24d4204SJose E. Roman A->stash.ScatterEnd = MatStashScatterEnd_Ref;
1792d24d4204SJose E. Roman A->stash.ScatterDestroy = NULL;
1793d24d4204SJose E. Roman
17944dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&a));
1795d24d4204SJose E. Roman A->data = (void *)a;
1796d24d4204SJose E. Roman
1797d24d4204SJose E. Roman /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1798d24d4204SJose E. Roman if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1799c8025a54SPierre Jolivet PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, NULL));
18009566063dSJacob Faibussowitsch PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
18019566063dSJacob Faibussowitsch PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1802d24d4204SJose E. Roman }
18039566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1804b8b5be36SMartin Diehl PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, &iflg));
1805b8b5be36SMartin Diehl if (!iflg) {
18064dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&grid));
1807d24d4204SJose E. Roman
18089566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(icomm, &size));
18096497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscSqrtReal((PetscReal)size) + 0.001, &grid->nprow));
1810d24d4204SJose E. Roman
1811d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
1812b8b5be36SMartin Diehl PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg));
1813b8b5be36SMartin Diehl if (flg) {
181408401ef6SPierre Jolivet PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
18156497c311SBarry Smith PetscCall(PetscBLASIntCast(optv1, &grid->nprow));
1816d24d4204SJose E. Roman }
1817d0609cedSBarry Smith PetscOptionsEnd();
1818d24d4204SJose E. Roman
1819d24d4204SJose E. Roman if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1820d24d4204SJose E. Roman grid->npcol = size / grid->nprow;
18219566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
18229566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1823f7ec113fSDamian Marek grid->ictxt = Csys2blacs_handle(icomm);
1824d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1825d24d4204SJose E. Roman Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1826d24d4204SJose E. Roman grid->grid_refct = 1;
1827d24d4204SJose E. Roman grid->nprow = nprow;
1828d24d4204SJose E. Roman grid->npcol = npcol;
1829d24d4204SJose E. Roman grid->myrow = myrow;
1830d24d4204SJose E. Roman grid->mycol = mycol;
1831d24d4204SJose E. Roman /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1832f7ec113fSDamian Marek grid->ictxrow = Csys2blacs_handle(icomm);
1833d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1834f7ec113fSDamian Marek grid->ictxcol = Csys2blacs_handle(icomm);
1835d24d4204SJose E. Roman Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
18369566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));
1837d24d4204SJose E. Roman
1838d24d4204SJose E. Roman } else grid->grid_refct++;
18399566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&icomm));
1840d24d4204SJose E. Roman a->grid = grid;
1841d24d4204SJose E. Roman a->mb = DEFAULT_BLOCKSIZE;
1842d24d4204SJose E. Roman a->nb = DEFAULT_BLOCKSIZE;
1843d24d4204SJose E. Roman
1844d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
18459566063dSJacob Faibussowitsch PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1846d24d4204SJose E. Roman if (flg) {
18476497c311SBarry Smith a->mb = (PetscMPIInt)array[0];
18486497c311SBarry Smith a->nb = (k > 1) ? (PetscMPIInt)array[1] : a->mb;
1849d24d4204SJose E. Roman }
1850d0609cedSBarry Smith PetscOptionsEnd();
1851d24d4204SJose E. Roman
1852b12397e7SPierre Jolivet a->roworiented = PETSC_TRUE;
18539566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
18549566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
18559566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
18569566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
18573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1858d24d4204SJose E. Roman }
1859d24d4204SJose E. Roman
1860d24d4204SJose E. Roman /*@C
1861d24d4204SJose E. Roman MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
186211a5261eSBarry Smith (2D block cyclic distribution) for a `MATSCALAPACK` matrix
1863d24d4204SJose E. Roman
1864d24d4204SJose E. Roman Collective
1865d24d4204SJose E. Roman
1866d24d4204SJose E. Roman Input Parameters:
1867d24d4204SJose E. Roman + comm - MPI communicator
186811a5261eSBarry Smith . mb - row block size (or `PETSC_DECIDE` to have it set)
186911a5261eSBarry Smith . nb - column block size (or `PETSC_DECIDE` to have it set)
1870d24d4204SJose E. Roman . M - number of global rows
1871d24d4204SJose E. Roman . N - number of global columns
1872d24d4204SJose E. Roman . rsrc - coordinate of process that owns the first row of the distributed matrix
1873d24d4204SJose E. Roman - csrc - coordinate of process that owns the first column of the distributed matrix
1874d24d4204SJose E. Roman
1875d24d4204SJose E. Roman Output Parameter:
1876d24d4204SJose E. Roman . A - the matrix
1877d24d4204SJose E. Roman
187811a5261eSBarry Smith Options Database Key:
1879d24d4204SJose E. Roman . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1880d24d4204SJose E. Roman
18812ef1f0ffSBarry Smith Level: intermediate
18822ef1f0ffSBarry Smith
18832ef1f0ffSBarry Smith Notes:
18842ef1f0ffSBarry Smith If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen
18852ef1f0ffSBarry Smith
188611a5261eSBarry Smith It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1887d24d4204SJose E. Roman MatXXXXSetPreallocation() paradigm instead of this routine directly.
188811a5261eSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
1889d24d4204SJose E. Roman
189046091a0eSPierre Jolivet Storage is completely managed by ScaLAPACK, so this requires PETSc to be
1891d24d4204SJose E. Roman configured with ScaLAPACK. In particular, PETSc's local sizes lose
1892d24d4204SJose E. Roman significance and are thus ignored. The block sizes refer to the values
189311a5261eSBarry Smith used for the distributed matrix, not the same meaning as in `MATBAIJ`.
1894d24d4204SJose E. Roman
18951cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1896d24d4204SJose E. Roman @*/
MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat * A)1897d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A)
1898d71ae5a4SJacob Faibussowitsch {
1899d24d4204SJose E. Roman Mat_ScaLAPACK *a;
1900d24d4204SJose E. Roman PetscInt m, n;
1901d24d4204SJose E. Roman
1902d24d4204SJose E. Roman PetscFunctionBegin;
19039566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A));
19049566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSCALAPACK));
1905aed4548fSBarry Smith PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1906d24d4204SJose E. Roman /* rows and columns are NOT distributed according to PetscSplitOwnership */
1907d24d4204SJose E. Roman m = PETSC_DECIDE;
19089566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1909d24d4204SJose E. Roman n = PETSC_DECIDE;
19109566063dSJacob Faibussowitsch PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
19119566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, M, N));
1912d24d4204SJose E. Roman a = (Mat_ScaLAPACK *)(*A)->data;
19139566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(M, &a->M));
19149566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(N, &a->N));
19159566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
19169566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
19179566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
19189566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(csrc, &a->csrc));
19199566063dSJacob Faibussowitsch PetscCall(MatSetUp(*A));
19203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1921d24d4204SJose E. Roman }
1922