xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision e8c0849ab8fe171bed529bea27238c9b402db591)
1d24d4204SJose E. Roman #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/
2d24d4204SJose E. Roman 
327e75052SPierre Jolivet const char       ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
427e75052SPierre Jolivet                                        "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
527e75052SPierre Jolivet                                        "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
627e75052SPierre Jolivet                                        "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
727e75052SPierre Jolivet                                        "       TITLE = {Sca{LAPACK} Users' Guide},\n"
827e75052SPierre Jolivet                                        "       PUBLISHER = {SIAM},\n"
927e75052SPierre Jolivet                                        "       ADDRESS = {Philadelphia, PA},\n"
1027e75052SPierre Jolivet                                        "       YEAR = 1997\n"
1127e75052SPierre Jolivet                                        "}\n";
1227e75052SPierre Jolivet static PetscBool ScaLAPACKCite       = PETSC_FALSE;
1327e75052SPierre Jolivet 
14d24d4204SJose E. Roman #define DEFAULT_BLOCKSIZE 64
15d24d4204SJose E. Roman 
16d24d4204SJose E. Roman /*
17d24d4204SJose E. Roman     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18d24d4204SJose E. Roman   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19d24d4204SJose E. Roman */
20d24d4204SJose E. Roman static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21d24d4204SJose E. Roman 
Petsc_ScaLAPACK_keyval_free(void)22d71ae5a4SJacob Faibussowitsch static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23d71ae5a4SJacob Faibussowitsch {
24f7ec113fSDamian Marek   PetscFunctionBegin;
259566063dSJacob Faibussowitsch   PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
269566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
28f7ec113fSDamian Marek }
29f7ec113fSDamian Marek 
MatView_ScaLAPACK(Mat A,PetscViewer viewer)30d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer)
31d71ae5a4SJacob Faibussowitsch {
32d24d4204SJose E. Roman   Mat_ScaLAPACK    *a = (Mat_ScaLAPACK *)A->data;
339f196a02SMartin Diehl   PetscBool         isascii;
34d24d4204SJose E. Roman   PetscViewerFormat format;
35d24d4204SJose E. Roman   Mat               Adense;
36d24d4204SJose E. Roman 
37d24d4204SJose E. Roman   PetscFunctionBegin;
389f196a02SMartin Diehl   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
399f196a02SMartin Diehl   if (isascii) {
409566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
41d24d4204SJose E. Roman     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
429566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
439566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
449566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
459566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
463ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
47d24d4204SJose E. Roman     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
483ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
49d24d4204SJose E. Roman     }
50d24d4204SJose E. Roman   }
51d24d4204SJose E. Roman   /* convert to dense format and call MatView() */
529566063dSJacob Faibussowitsch   PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
539566063dSJacob Faibussowitsch   PetscCall(MatView(Adense, viewer));
549566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Adense));
553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
56d24d4204SJose E. Roman }
57d24d4204SJose E. Roman 
MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo * info)58d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info)
59d71ae5a4SJacob Faibussowitsch {
60d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
61d24d4204SJose E. Roman   PetscLogDouble isend[2], irecv[2];
62d24d4204SJose E. Roman 
63d24d4204SJose E. Roman   PetscFunctionBegin;
64d24d4204SJose E. Roman   info->block_size = 1.0;
65d24d4204SJose E. Roman 
66d24d4204SJose E. Roman   isend[0] = a->lld * a->locc;  /* locally allocated */
67d24d4204SJose E. Roman   isend[1] = a->locr * a->locc; /* used submatrix */
68d24d4204SJose E. Roman   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69d24d4204SJose E. Roman     info->nz_allocated = isend[0];
70d24d4204SJose E. Roman     info->nz_used      = isend[1];
71d24d4204SJose E. Roman   } else if (flag == MAT_GLOBAL_MAX) {
72462c564dSBarry Smith     PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
73d24d4204SJose E. Roman     info->nz_allocated = irecv[0];
74d24d4204SJose E. Roman     info->nz_used      = irecv[1];
75d24d4204SJose E. Roman   } else if (flag == MAT_GLOBAL_SUM) {
76462c564dSBarry Smith     PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
77d24d4204SJose E. Roman     info->nz_allocated = irecv[0];
78d24d4204SJose E. Roman     info->nz_used      = irecv[1];
79d24d4204SJose E. Roman   }
80d24d4204SJose E. Roman 
81d24d4204SJose E. Roman   info->nz_unneeded       = 0;
82d24d4204SJose E. Roman   info->assemblies        = A->num_ass;
83d24d4204SJose E. Roman   info->mallocs           = 0;
844dfa11a4SJacob Faibussowitsch   info->memory            = 0; /* REVIEW ME */
85d24d4204SJose E. Roman   info->fill_ratio_given  = 0;
86d24d4204SJose E. Roman   info->fill_ratio_needed = 0;
87d24d4204SJose E. Roman   info->factor_mallocs    = 0;
883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
89d24d4204SJose E. Roman }
90d24d4204SJose E. Roman 
MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)9166976f2fSJacob Faibussowitsch static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg)
92d71ae5a4SJacob Faibussowitsch {
93b12397e7SPierre Jolivet   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
94b12397e7SPierre Jolivet 
95d24d4204SJose E. Roman   PetscFunctionBegin;
96d24d4204SJose E. Roman   switch (op) {
97d24d4204SJose E. Roman   case MAT_NEW_NONZERO_LOCATIONS:
98d24d4204SJose E. Roman   case MAT_NEW_NONZERO_LOCATION_ERR:
99d24d4204SJose E. Roman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
100d24d4204SJose E. Roman   case MAT_SYMMETRIC:
101d24d4204SJose E. Roman   case MAT_SORTED_FULL:
102d71ae5a4SJacob Faibussowitsch   case MAT_HERMITIAN:
103d71ae5a4SJacob Faibussowitsch     break;
104d71ae5a4SJacob Faibussowitsch   case MAT_ROW_ORIENTED:
105d71ae5a4SJacob Faibussowitsch     a->roworiented = flg;
106d71ae5a4SJacob Faibussowitsch     break;
107d71ae5a4SJacob Faibussowitsch   default:
108d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
109d24d4204SJose E. Roman   }
1103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
111d24d4204SJose E. Roman }
112d24d4204SJose E. Roman 
MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt * rows,PetscInt nc,const PetscInt * cols,const PetscScalar * vals,InsertMode imode)113d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
114d71ae5a4SJacob Faibussowitsch {
115d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
116d24d4204SJose E. Roman   PetscInt       i, j;
117d24d4204SJose E. Roman   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
118b12397e7SPierre Jolivet   PetscBool      roworiented = a->roworiented;
119d24d4204SJose E. Roman 
120d24d4204SJose E. Roman   PetscFunctionBegin;
121b12397e7SPierre Jolivet   PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
122d24d4204SJose E. Roman   for (i = 0; i < nr; i++) {
123d24d4204SJose E. Roman     if (rows[i] < 0) continue;
1249566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
125d24d4204SJose E. Roman     for (j = 0; j < nc; j++) {
126d24d4204SJose E. Roman       if (cols[j] < 0) continue;
1279566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
128792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
129d24d4204SJose E. Roman       if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
130b12397e7SPierre Jolivet         if (roworiented) {
131d24d4204SJose E. Roman           switch (imode) {
132d71ae5a4SJacob Faibussowitsch           case INSERT_VALUES:
133d71ae5a4SJacob Faibussowitsch             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j];
134d71ae5a4SJacob Faibussowitsch             break;
135d71ae5a4SJacob Faibussowitsch           default:
136d71ae5a4SJacob Faibussowitsch             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j];
137d71ae5a4SJacob Faibussowitsch             break;
138b12397e7SPierre Jolivet           }
139b12397e7SPierre Jolivet         } else {
140b12397e7SPierre Jolivet           switch (imode) {
141d71ae5a4SJacob Faibussowitsch           case INSERT_VALUES:
142d71ae5a4SJacob Faibussowitsch             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr];
143d71ae5a4SJacob Faibussowitsch             break;
144d71ae5a4SJacob Faibussowitsch           default:
145d71ae5a4SJacob Faibussowitsch             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr];
146d71ae5a4SJacob Faibussowitsch             break;
147b12397e7SPierre Jolivet           }
148d24d4204SJose E. Roman         }
149d24d4204SJose E. Roman       } else {
15028b400f6SJacob Faibussowitsch         PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
151d24d4204SJose E. Roman         A->assembled = PETSC_FALSE;
152b12397e7SPierre Jolivet         PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
153d24d4204SJose E. Roman       }
154d24d4204SJose E. Roman     }
155d24d4204SJose E. Roman   }
1563ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
157d24d4204SJose E. Roman }
158d24d4204SJose E. Roman 
MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscBool hermitian,PetscScalar beta,const PetscScalar * x,PetscScalar * y)1595e4d33a3SBlanca Mellado Pinto static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscBool hermitian, PetscScalar beta, const PetscScalar *x, PetscScalar *y)
160d71ae5a4SJacob Faibussowitsch {
161d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
162d24d4204SJose E. Roman   PetscScalar    *x2d, *y2d, alpha = 1.0;
163d24d4204SJose E. Roman   const PetscInt *ranges;
164d24d4204SJose E. Roman   PetscBLASInt    xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
165d24d4204SJose E. Roman 
166d24d4204SJose E. Roman   PetscFunctionBegin;
167d24d4204SJose E. Roman   if (transpose) {
168d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
1699566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1709566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
1716497c311SBarry Smith     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld));
172792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
173d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
1749566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
1759566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
176d24d4204SJose E. Roman     ylld = 1;
177792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
178d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
179d24d4204SJose E. Roman 
180d24d4204SJose E. Roman     /* allocate 2d vectors */
181d24d4204SJose E. Roman     lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
182d24d4204SJose E. Roman     lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1839566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
1846497c311SBarry Smith     PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld));
185d24d4204SJose E. Roman 
186d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
187792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
188d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
189792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
190d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
191d24d4204SJose E. Roman 
192d24d4204SJose E. Roman     /* redistribute x as a column of a 2d matrix */
193c87776d3SJose E. Roman     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
194d24d4204SJose E. Roman 
195d24d4204SJose E. Roman     /* redistribute y as a row of a 2d matrix */
196792fecdfSBarry Smith     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
197d24d4204SJose E. Roman 
198d24d4204SJose E. Roman     /* call PBLAS subroutine */
1995e4d33a3SBlanca Mellado Pinto     if (hermitian) PetscCallBLAS("PBLASgemv", PBLASgemv_("C", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
2005e4d33a3SBlanca Mellado Pinto     else PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
201d24d4204SJose E. Roman 
202d24d4204SJose E. Roman     /* redistribute y from a row of a 2d matrix */
203792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
204d24d4204SJose E. Roman 
205d24d4204SJose E. Roman   } else { /* non-transpose */
206d24d4204SJose E. Roman 
207d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
2089566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
2099566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
210d24d4204SJose E. Roman     xlld = 1;
211792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
212d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
2139566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
2149566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
2156497c311SBarry Smith     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &ylld));
216792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
217d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
218d24d4204SJose E. Roman 
219d24d4204SJose E. Roman     /* allocate 2d vectors */
220d24d4204SJose E. Roman     lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
221d24d4204SJose E. Roman     lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
2229566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
2236497c311SBarry Smith     PetscCall(PetscBLASIntCast(PetscMax(1, lszy), &ylld));
224d24d4204SJose E. Roman 
225d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
226792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
227d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
228792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
229d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
230d24d4204SJose E. Roman 
231d24d4204SJose E. Roman     /* redistribute x as a row of a 2d matrix */
232c87776d3SJose E. Roman     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
233d24d4204SJose E. Roman 
234d24d4204SJose E. Roman     /* redistribute y as a column of a 2d matrix */
235792fecdfSBarry Smith     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
236d24d4204SJose E. Roman 
237d24d4204SJose E. Roman     /* call PBLAS subroutine */
238792fecdfSBarry Smith     PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
239d24d4204SJose E. Roman 
240d24d4204SJose E. Roman     /* redistribute y from a column of a 2d matrix */
241792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
242d24d4204SJose E. Roman   }
2439566063dSJacob Faibussowitsch   PetscCall(PetscFree2(x2d, y2d));
2443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
245d24d4204SJose E. Roman }
246d24d4204SJose E. Roman 
MatMult_ScaLAPACK(Mat A,Vec x,Vec y)247d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y)
248d71ae5a4SJacob Faibussowitsch {
249d24d4204SJose E. Roman   const PetscScalar *xarray;
250d24d4204SJose E. Roman   PetscScalar       *yarray;
251d24d4204SJose E. Roman 
252d24d4204SJose E. Roman   PetscFunctionBegin;
2539566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xarray));
2549566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &yarray));
2555e4d33a3SBlanca Mellado Pinto   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 0.0, xarray, yarray));
2569566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xarray));
2579566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &yarray));
2583ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
259d24d4204SJose E. Roman }
260d24d4204SJose E. Roman 
MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)261d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
262d71ae5a4SJacob Faibussowitsch {
263d24d4204SJose E. Roman   const PetscScalar *xarray;
264d24d4204SJose E. Roman   PetscScalar       *yarray;
265d24d4204SJose E. Roman 
266d24d4204SJose E. Roman   PetscFunctionBegin;
2679566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xarray));
2689566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &yarray));
2695e4d33a3SBlanca Mellado Pinto   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 0.0, xarray, yarray));
2709566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xarray));
2719566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &yarray));
2723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
273d24d4204SJose E. Roman }
274d24d4204SJose E. Roman 
MatMultHermitianTranspose_ScaLAPACK(Mat A,Vec x,Vec y)2755e4d33a3SBlanca Mellado Pinto static PetscErrorCode MatMultHermitianTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
2765e4d33a3SBlanca Mellado Pinto {
2775e4d33a3SBlanca Mellado Pinto   const PetscScalar *xarray;
2785e4d33a3SBlanca Mellado Pinto   PetscScalar       *yarray;
2795e4d33a3SBlanca Mellado Pinto 
2805e4d33a3SBlanca Mellado Pinto   PetscFunctionBegin;
2815e4d33a3SBlanca Mellado Pinto   PetscCall(VecGetArrayRead(x, &xarray));
2825e4d33a3SBlanca Mellado Pinto   PetscCall(VecGetArrayWrite(y, &yarray));
2835e4d33a3SBlanca Mellado Pinto   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 0.0, xarray, yarray));
2845e4d33a3SBlanca Mellado Pinto   PetscCall(VecRestoreArrayRead(x, &xarray));
2855e4d33a3SBlanca Mellado Pinto   PetscCall(VecRestoreArrayWrite(y, &yarray));
2865e4d33a3SBlanca Mellado Pinto   PetscFunctionReturn(PETSC_SUCCESS);
2875e4d33a3SBlanca Mellado Pinto }
2885e4d33a3SBlanca Mellado Pinto 
MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)289d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
290d71ae5a4SJacob Faibussowitsch {
291d24d4204SJose E. Roman   const PetscScalar *xarray;
292d24d4204SJose E. Roman   PetscScalar       *zarray;
293d24d4204SJose E. Roman 
294d24d4204SJose E. Roman   PetscFunctionBegin;
2959566063dSJacob Faibussowitsch   if (y != z) PetscCall(VecCopy(y, z));
2969566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xarray));
2979566063dSJacob Faibussowitsch   PetscCall(VecGetArray(z, &zarray));
2985e4d33a3SBlanca Mellado Pinto   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 1.0, xarray, zarray));
2999566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xarray));
3009566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(z, &zarray));
3013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
302d24d4204SJose E. Roman }
303d24d4204SJose E. Roman 
MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)304d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
305d71ae5a4SJacob Faibussowitsch {
306d24d4204SJose E. Roman   const PetscScalar *xarray;
307d24d4204SJose E. Roman   PetscScalar       *zarray;
308d24d4204SJose E. Roman 
309d24d4204SJose E. Roman   PetscFunctionBegin;
3109566063dSJacob Faibussowitsch   if (y != z) PetscCall(VecCopy(y, z));
3119566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xarray));
3129566063dSJacob Faibussowitsch   PetscCall(VecGetArray(z, &zarray));
3135e4d33a3SBlanca Mellado Pinto   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 1.0, xarray, zarray));
3145e4d33a3SBlanca Mellado Pinto   PetscCall(VecRestoreArrayRead(x, &xarray));
3155e4d33a3SBlanca Mellado Pinto   PetscCall(VecRestoreArray(z, &zarray));
3165e4d33a3SBlanca Mellado Pinto   PetscFunctionReturn(PETSC_SUCCESS);
3175e4d33a3SBlanca Mellado Pinto }
3185e4d33a3SBlanca Mellado Pinto 
MatMultHermitianTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)3195e4d33a3SBlanca Mellado Pinto static PetscErrorCode MatMultHermitianTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
3205e4d33a3SBlanca Mellado Pinto {
3215e4d33a3SBlanca Mellado Pinto   const PetscScalar *xarray;
3225e4d33a3SBlanca Mellado Pinto   PetscScalar       *zarray;
3235e4d33a3SBlanca Mellado Pinto 
3245e4d33a3SBlanca Mellado Pinto   PetscFunctionBegin;
3255e4d33a3SBlanca Mellado Pinto   if (y != z) PetscCall(VecCopy(y, z));
3265e4d33a3SBlanca Mellado Pinto   PetscCall(VecGetArrayRead(x, &xarray));
3275e4d33a3SBlanca Mellado Pinto   PetscCall(VecGetArray(z, &zarray));
3285e4d33a3SBlanca Mellado Pinto   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 1.0, xarray, zarray));
3299566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xarray));
3309566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(z, &zarray));
3313ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
332d24d4204SJose E. Roman }
333d24d4204SJose E. Roman 
MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)334d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
335d71ae5a4SJacob Faibussowitsch {
336d24d4204SJose E. Roman   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
337d24d4204SJose E. Roman   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
338d24d4204SJose E. Roman   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
339d24d4204SJose E. Roman   PetscScalar    sone = 1.0, zero = 0.0;
340d24d4204SJose E. Roman   PetscBLASInt   one = 1;
341d24d4204SJose E. Roman 
342d24d4204SJose E. Roman   PetscFunctionBegin;
343792fecdfSBarry Smith   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
344d24d4204SJose E. Roman   C->assembled = PETSC_TRUE;
3453ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
346d24d4204SJose E. Roman }
347d24d4204SJose E. Roman 
MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)348d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
349d71ae5a4SJacob Faibussowitsch {
350d24d4204SJose E. Roman   PetscFunctionBegin;
3519566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
3529566063dSJacob Faibussowitsch   PetscCall(MatSetType(C, MATSCALAPACK));
3539566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
354d24d4204SJose E. Roman   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
3553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
356d24d4204SJose E. Roman }
357d24d4204SJose E. Roman 
MatTransposeMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)358e0d15688SBlanca Mellado Pinto static PetscErrorCode MatTransposeMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
359e0d15688SBlanca Mellado Pinto {
360e0d15688SBlanca Mellado Pinto   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
361e0d15688SBlanca Mellado Pinto   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
362e0d15688SBlanca Mellado Pinto   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
363e0d15688SBlanca Mellado Pinto   PetscScalar    sone = 1.0, zero = 0.0;
364e0d15688SBlanca Mellado Pinto   PetscBLASInt   one = 1;
365e0d15688SBlanca Mellado Pinto 
366e0d15688SBlanca Mellado Pinto   PetscFunctionBegin;
367e0d15688SBlanca Mellado Pinto   PetscCallBLAS("PBLASgemm", PBLASgemm_("T", "N", &a->N, &b->N, &a->M, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
368e0d15688SBlanca Mellado Pinto   C->assembled = PETSC_TRUE;
369e0d15688SBlanca Mellado Pinto   PetscFunctionReturn(PETSC_SUCCESS);
370e0d15688SBlanca Mellado Pinto }
371e0d15688SBlanca Mellado Pinto 
MatTransposeMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)372e0d15688SBlanca Mellado Pinto static PetscErrorCode MatTransposeMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
373e0d15688SBlanca Mellado Pinto {
374e0d15688SBlanca Mellado Pinto   PetscFunctionBegin;
375e0d15688SBlanca Mellado Pinto   PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
376e0d15688SBlanca Mellado Pinto   PetscCall(MatSetType(C, MATSCALAPACK));
377e0d15688SBlanca Mellado Pinto   PetscCall(MatSetUp(C));
378e0d15688SBlanca Mellado Pinto   C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_ScaLAPACK;
379e0d15688SBlanca Mellado Pinto   PetscFunctionReturn(PETSC_SUCCESS);
380e0d15688SBlanca Mellado Pinto }
381e0d15688SBlanca Mellado Pinto 
MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)382d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
383d71ae5a4SJacob Faibussowitsch {
384d24d4204SJose E. Roman   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
385d24d4204SJose E. Roman   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
386d24d4204SJose E. Roman   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
387d24d4204SJose E. Roman   PetscScalar    sone = 1.0, zero = 0.0;
388d24d4204SJose E. Roman   PetscBLASInt   one = 1;
389d24d4204SJose E. Roman 
390d24d4204SJose E. Roman   PetscFunctionBegin;
391792fecdfSBarry Smith   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
392d24d4204SJose E. Roman   C->assembled = PETSC_TRUE;
3933ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
394d24d4204SJose E. Roman }
395d24d4204SJose E. Roman 
MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)396d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
397d71ae5a4SJacob Faibussowitsch {
398d24d4204SJose E. Roman   PetscFunctionBegin;
3999566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
4009566063dSJacob Faibussowitsch   PetscCall(MatSetType(C, MATSCALAPACK));
4019566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
402e0d15688SBlanca Mellado Pinto   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_ScaLAPACK;
4033ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
404d24d4204SJose E. Roman }
405d24d4204SJose E. Roman 
MatProductSetFromOptions_ScaLAPACK(Mat C)406d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
407d71ae5a4SJacob Faibussowitsch {
408d24d4204SJose E. Roman   Mat_Product *product = C->product;
409d24d4204SJose E. Roman 
410d24d4204SJose E. Roman   PetscFunctionBegin;
411d24d4204SJose E. Roman   switch (product->type) {
412d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_AB:
413e0d15688SBlanca Mellado Pinto     C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
414e0d15688SBlanca Mellado Pinto     C->ops->productsymbolic = MatProductSymbolic_AB;
415e0d15688SBlanca Mellado Pinto     break;
416e0d15688SBlanca Mellado Pinto   case MATPRODUCT_AtB:
417e0d15688SBlanca Mellado Pinto     C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_ScaLAPACK;
418e0d15688SBlanca Mellado Pinto     C->ops->productsymbolic          = MatProductSymbolic_AtB;
419d71ae5a4SJacob Faibussowitsch     break;
420d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_ABt:
421e0d15688SBlanca Mellado Pinto     C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
422e0d15688SBlanca Mellado Pinto     C->ops->productsymbolic          = MatProductSymbolic_ABt;
423d71ae5a4SJacob Faibussowitsch     break;
424d71ae5a4SJacob Faibussowitsch   default:
425d71ae5a4SJacob Faibussowitsch     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
426d24d4204SJose E. Roman   }
4273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
428d24d4204SJose E. Roman }
429d24d4204SJose E. Roman 
MatGetDiagonal_ScaLAPACK(Mat A,Vec D)430d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D)
431d71ae5a4SJacob Faibussowitsch {
432d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
433d24d4204SJose E. Roman   PetscScalar    *darray, *d2d, v;
434d24d4204SJose E. Roman   const PetscInt *ranges;
435d24d4204SJose E. Roman   PetscBLASInt    j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
436d24d4204SJose E. Roman 
437d24d4204SJose E. Roman   PetscFunctionBegin;
4389566063dSJacob Faibussowitsch   PetscCall(VecGetArray(D, &darray));
439d24d4204SJose E. Roman 
440d24d4204SJose E. Roman   if (A->rmap->N <= A->cmap->N) { /* row version */
441d24d4204SJose E. Roman 
442d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
4439566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
4449566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
4456497c311SBarry Smith     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld));
446792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
447d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
448d24d4204SJose E. Roman 
449d24d4204SJose E. Roman     /* allocate 2d vector */
450d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
4519566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(lszd, &d2d));
4526497c311SBarry Smith     PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld));
453d24d4204SJose E. Roman 
454d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
455792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
456d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
457d24d4204SJose E. Roman 
458d24d4204SJose E. Roman     /* collect diagonal */
459d24d4204SJose E. Roman     for (j = 1; j <= a->M; j++) {
460792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
461792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
462d24d4204SJose E. Roman     }
463d24d4204SJose E. Roman 
464d24d4204SJose E. Roman     /* redistribute d from a column of a 2d matrix */
465792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
4669566063dSJacob Faibussowitsch     PetscCall(PetscFree(d2d));
467d24d4204SJose E. Roman 
468d24d4204SJose E. Roman   } else { /* column version */
469d24d4204SJose E. Roman 
470d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
4719566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
4729566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
473d24d4204SJose E. Roman     dlld = 1;
474792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
475d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
476d24d4204SJose E. Roman 
477d24d4204SJose E. Roman     /* allocate 2d vector */
478d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
4799566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(lszd, &d2d));
480d24d4204SJose E. Roman 
481d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
482792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
483d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
484d24d4204SJose E. Roman 
485d24d4204SJose E. Roman     /* collect diagonal */
486d24d4204SJose E. Roman     for (j = 1; j <= a->N; j++) {
487792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
488792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
489d24d4204SJose E. Roman     }
490d24d4204SJose E. Roman 
491d24d4204SJose E. Roman     /* redistribute d from a row of a 2d matrix */
492792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
4939566063dSJacob Faibussowitsch     PetscCall(PetscFree(d2d));
494d24d4204SJose E. Roman   }
495d24d4204SJose E. Roman 
4969566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(D, &darray));
4979566063dSJacob Faibussowitsch   PetscCall(VecAssemblyBegin(D));
4989566063dSJacob Faibussowitsch   PetscCall(VecAssemblyEnd(D));
4993ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
500d24d4204SJose E. Roman }
501d24d4204SJose E. Roman 
MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)502d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R)
503d71ae5a4SJacob Faibussowitsch {
504d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
505d24d4204SJose E. Roman   const PetscScalar *d;
506d24d4204SJose E. Roman   const PetscInt    *ranges;
507d24d4204SJose E. Roman   PetscScalar       *d2d;
508d24d4204SJose E. Roman   PetscBLASInt       i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
509d24d4204SJose E. Roman 
510d24d4204SJose E. Roman   PetscFunctionBegin;
511d24d4204SJose E. Roman   if (R) {
512eb8278ccSPierre Jolivet     PetscCall(VecGetArrayRead(R, &d));
513d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
5149566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
5159566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
516d24d4204SJose E. Roman     dlld = 1;
517792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
518d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
519d24d4204SJose E. Roman 
520d24d4204SJose E. Roman     /* allocate 2d vector */
521d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
5229566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(lszd, &d2d));
523d24d4204SJose E. Roman 
524d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
525792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
526d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
527d24d4204SJose E. Roman 
528d24d4204SJose E. Roman     /* redistribute d to a row of a 2d matrix */
529c87776d3SJose E. Roman     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
530d24d4204SJose E. Roman 
531d24d4204SJose E. Roman     /* broadcast along process columns */
532d24d4204SJose E. Roman     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
533d24d4204SJose E. Roman     else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
534d24d4204SJose E. Roman 
535d24d4204SJose E. Roman     /* local scaling */
5369371c9d4SSatish Balay     for (j = 0; j < a->locc; j++)
5379371c9d4SSatish Balay       for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
538d24d4204SJose E. Roman 
5399566063dSJacob Faibussowitsch     PetscCall(PetscFree(d2d));
540eb8278ccSPierre Jolivet     PetscCall(VecRestoreArrayRead(R, &d));
541d24d4204SJose E. Roman   }
542d24d4204SJose E. Roman   if (L) {
543eb8278ccSPierre Jolivet     PetscCall(VecGetArrayRead(L, &d));
544d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
5459566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
5469566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
5476497c311SBarry Smith     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld));
548792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
549d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
550d24d4204SJose E. Roman 
551d24d4204SJose E. Roman     /* allocate 2d vector */
552d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
5539566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(lszd, &d2d));
5546497c311SBarry Smith     PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld));
555d24d4204SJose E. Roman 
556d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
557792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
558d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
559d24d4204SJose E. Roman 
560d24d4204SJose E. Roman     /* redistribute d to a column of a 2d matrix */
561c87776d3SJose E. Roman     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
562d24d4204SJose E. Roman 
563d24d4204SJose E. Roman     /* broadcast along process rows */
564d24d4204SJose E. Roman     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
565d24d4204SJose E. Roman     else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
566d24d4204SJose E. Roman 
567d24d4204SJose E. Roman     /* local scaling */
5689371c9d4SSatish Balay     for (i = 0; i < a->locr; i++)
5699371c9d4SSatish Balay       for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
570d24d4204SJose E. Roman 
5719566063dSJacob Faibussowitsch     PetscCall(PetscFree(d2d));
572eb8278ccSPierre Jolivet     PetscCall(VecRestoreArrayRead(L, &d));
573d24d4204SJose E. Roman   }
5743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
575d24d4204SJose E. Roman }
576d24d4204SJose E. Roman 
MatScale_ScaLAPACK(Mat X,PetscScalar a)577d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a)
578d71ae5a4SJacob Faibussowitsch {
579d24d4204SJose E. Roman   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
580d24d4204SJose E. Roman   PetscBLASInt   n, one = 1;
581d24d4204SJose E. Roman 
582d24d4204SJose E. Roman   PetscFunctionBegin;
583d24d4204SJose E. Roman   n = x->lld * x->locc;
584792fecdfSBarry Smith   PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
5853ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
586d24d4204SJose E. Roman }
587d24d4204SJose E. Roman 
MatShift_ScaLAPACK(Mat X,PetscScalar alpha)588d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha)
589d71ae5a4SJacob Faibussowitsch {
590d24d4204SJose E. Roman   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
591d24d4204SJose E. Roman   PetscBLASInt   i, n;
592d24d4204SJose E. Roman   PetscScalar    v;
593d24d4204SJose E. Roman 
594d24d4204SJose E. Roman   PetscFunctionBegin;
595d24d4204SJose E. Roman   n = PetscMin(x->M, x->N);
596d24d4204SJose E. Roman   for (i = 1; i <= n; i++) {
597792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
598d24d4204SJose E. Roman     v += alpha;
599792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
600d24d4204SJose E. Roman   }
6013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
602d24d4204SJose E. Roman }
603d24d4204SJose E. Roman 
MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)604d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
605d71ae5a4SJacob Faibussowitsch {
606d24d4204SJose E. Roman   Mat_ScaLAPACK *x    = (Mat_ScaLAPACK *)X->data;
607d24d4204SJose E. Roman   Mat_ScaLAPACK *y    = (Mat_ScaLAPACK *)Y->data;
608d24d4204SJose E. Roman   PetscBLASInt   one  = 1;
609d24d4204SJose E. Roman   PetscScalar    beta = 1.0;
610d24d4204SJose E. Roman 
611d24d4204SJose E. Roman   PetscFunctionBegin;
612d24d4204SJose E. Roman   MatScaLAPACKCheckDistribution(Y, 1, X, 3);
613792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
6149566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
6153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
616d24d4204SJose E. Roman }
617d24d4204SJose E. Roman 
MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)618d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str)
619d71ae5a4SJacob Faibussowitsch {
620d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
621d24d4204SJose E. Roman   Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
622d24d4204SJose E. Roman 
623d24d4204SJose E. Roman   PetscFunctionBegin;
6249566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
6259566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)B));
6263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
627d24d4204SJose E. Roman }
628d24d4204SJose E. Roman 
MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat * B)629d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B)
630d71ae5a4SJacob Faibussowitsch {
631d24d4204SJose E. Roman   Mat            Bs;
632d24d4204SJose E. Roman   MPI_Comm       comm;
633d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
634d24d4204SJose E. Roman 
635d24d4204SJose E. Roman   PetscFunctionBegin;
6369566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
6379566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, &Bs));
6389566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
6399566063dSJacob Faibussowitsch   PetscCall(MatSetType(Bs, MATSCALAPACK));
640d24d4204SJose E. Roman   b       = (Mat_ScaLAPACK *)Bs->data;
641d24d4204SJose E. Roman   b->M    = a->M;
642d24d4204SJose E. Roman   b->N    = a->N;
643d24d4204SJose E. Roman   b->mb   = a->mb;
644d24d4204SJose E. Roman   b->nb   = a->nb;
645d24d4204SJose E. Roman   b->rsrc = a->rsrc;
646d24d4204SJose E. Roman   b->csrc = a->csrc;
6479566063dSJacob Faibussowitsch   PetscCall(MatSetUp(Bs));
648d24d4204SJose E. Roman   *B = Bs;
64948a46eb9SPierre Jolivet   if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
650d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
6513ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
652d24d4204SJose E. Roman }
653d24d4204SJose E. Roman 
MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat * B)654d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
655d71ae5a4SJacob Faibussowitsch {
656d24d4204SJose E. Roman   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
657d24d4204SJose E. Roman   Mat            Bs   = *B;
658d24d4204SJose E. Roman   PetscBLASInt   one  = 1;
659d24d4204SJose E. Roman   PetscScalar    sone = 1.0, zero = 0.0;
660d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX)
661d24d4204SJose E. Roman   PetscInt i, j;
662d24d4204SJose E. Roman #endif
663d24d4204SJose E. Roman 
664d24d4204SJose E. Roman   PetscFunctionBegin;
6657fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
6660fdf79fbSJacob Faibussowitsch   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
6679566063dSJacob Faibussowitsch   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
668d24d4204SJose E. Roman   *B = Bs;
669d24d4204SJose E. Roman   b  = (Mat_ScaLAPACK *)Bs->data;
670792fecdfSBarry Smith   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
671d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX)
672d24d4204SJose E. Roman   /* undo conjugation */
6739371c9d4SSatish Balay   for (i = 0; i < b->locr; i++)
6749371c9d4SSatish Balay     for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
675d24d4204SJose E. Roman #endif
676d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
6773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
678d24d4204SJose E. Roman }
679d24d4204SJose E. Roman 
MatConjugate_ScaLAPACK(Mat A)680d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
681d71ae5a4SJacob Faibussowitsch {
682d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
683d24d4204SJose E. Roman   PetscInt       i, j;
684d24d4204SJose E. Roman 
685d24d4204SJose E. Roman   PetscFunctionBegin;
6869371c9d4SSatish Balay   for (i = 0; i < a->locr; i++)
6879371c9d4SSatish Balay     for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
6883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
689d24d4204SJose E. Roman }
690d24d4204SJose E. Roman 
MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat * B)691d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
692d71ae5a4SJacob Faibussowitsch {
693d24d4204SJose E. Roman   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
694d24d4204SJose E. Roman   Mat            Bs   = *B;
695d24d4204SJose E. Roman   PetscBLASInt   one  = 1;
696d24d4204SJose E. Roman   PetscScalar    sone = 1.0, zero = 0.0;
697d24d4204SJose E. Roman 
698d24d4204SJose E. Roman   PetscFunctionBegin;
6990fdf79fbSJacob Faibussowitsch   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
7009566063dSJacob Faibussowitsch   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
701d24d4204SJose E. Roman   *B = Bs;
702d24d4204SJose E. Roman   b  = (Mat_ScaLAPACK *)Bs->data;
703792fecdfSBarry Smith   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
704d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
7053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
706d24d4204SJose E. Roman }
707d24d4204SJose E. Roman 
MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)708d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X)
709d71ae5a4SJacob Faibussowitsch {
710d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
711d24d4204SJose E. Roman   PetscScalar    *x, *x2d;
712d24d4204SJose E. Roman   const PetscInt *ranges;
713d24d4204SJose E. Roman   PetscBLASInt    xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
714d24d4204SJose E. Roman 
715d24d4204SJose E. Roman   PetscFunctionBegin;
7169566063dSJacob Faibussowitsch   PetscCall(VecCopy(B, X));
7179566063dSJacob Faibussowitsch   PetscCall(VecGetArray(X, &x));
718d24d4204SJose E. Roman 
719d24d4204SJose E. Roman   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
7209566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
7219566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
7226497c311SBarry Smith   PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld));
723792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
724d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit", info);
725d24d4204SJose E. Roman 
726d24d4204SJose E. Roman   /* allocate 2d vector */
727d24d4204SJose E. Roman   lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
7289566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(lszx, &x2d));
7296497c311SBarry Smith   PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld));
730d24d4204SJose E. Roman 
731d24d4204SJose E. Roman   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
732792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
733d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit", info);
734d24d4204SJose E. Roman 
735d24d4204SJose E. Roman   /* redistribute x as a column of a 2d matrix */
736792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
737d24d4204SJose E. Roman 
738d24d4204SJose E. Roman   /* call ScaLAPACK subroutine */
739d24d4204SJose E. Roman   switch (A->factortype) {
740d24d4204SJose E. Roman   case MAT_FACTOR_LU:
741792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
742d24d4204SJose E. Roman     PetscCheckScaLapackInfo("getrs", info);
743d24d4204SJose E. Roman     break;
744d24d4204SJose E. Roman   case MAT_FACTOR_CHOLESKY:
745792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
746d24d4204SJose E. Roman     PetscCheckScaLapackInfo("potrs", info);
747d24d4204SJose E. Roman     break;
748d71ae5a4SJacob Faibussowitsch   default:
749d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
750d24d4204SJose E. Roman   }
751d24d4204SJose E. Roman 
752d24d4204SJose E. Roman   /* redistribute x from a column of a 2d matrix */
753792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
754d24d4204SJose E. Roman 
7559566063dSJacob Faibussowitsch   PetscCall(PetscFree(x2d));
7569566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(X, &x));
7573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
758d24d4204SJose E. Roman }
759d24d4204SJose E. Roman 
MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)760d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X)
761d71ae5a4SJacob Faibussowitsch {
762d24d4204SJose E. Roman   PetscFunctionBegin;
7639566063dSJacob Faibussowitsch   PetscCall(MatSolve_ScaLAPACK(A, B, X));
7649566063dSJacob Faibussowitsch   PetscCall(VecAXPY(X, 1, Y));
7653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
766d24d4204SJose E. Roman }
767d24d4204SJose E. Roman 
MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)768d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X)
769d71ae5a4SJacob Faibussowitsch {
7705d5af635SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *x;
771d24d4204SJose E. Roman   PetscBool      flg1, flg2;
772d24d4204SJose E. Roman   PetscBLASInt   one = 1, info;
7735d5af635SJose E. Roman   Mat            C;
7745d5af635SJose E. Roman   MatType        type;
775d24d4204SJose E. Roman 
776d24d4204SJose E. Roman   PetscFunctionBegin;
7779566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
7789566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
7795d5af635SJose E. Roman   if (flg1 && flg2) MatScaLAPACKCheckDistribution(B, 2, X, 3);
7805d5af635SJose E. Roman   if (flg2) {
7815d5af635SJose E. Roman     if (flg1) PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN));
7825d5af635SJose E. Roman     else PetscCall(MatConvert(B, MATSCALAPACK, MAT_REUSE_MATRIX, &X));
7835d5af635SJose E. Roman     C = X;
7845d5af635SJose E. Roman   } else {
7855d5af635SJose E. Roman     PetscCall(MatConvert(B, MATSCALAPACK, MAT_INITIAL_MATRIX, &C));
7865d5af635SJose E. Roman   }
7875d5af635SJose E. Roman   x = (Mat_ScaLAPACK *)C->data;
788d24d4204SJose E. Roman 
789d24d4204SJose E. Roman   switch (A->factortype) {
790d24d4204SJose E. Roman   case MAT_FACTOR_LU:
791792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
792d24d4204SJose E. Roman     PetscCheckScaLapackInfo("getrs", info);
793d24d4204SJose E. Roman     break;
794d24d4204SJose E. Roman   case MAT_FACTOR_CHOLESKY:
795792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
796d24d4204SJose E. Roman     PetscCheckScaLapackInfo("potrs", info);
797d24d4204SJose E. Roman     break;
798d71ae5a4SJacob Faibussowitsch   default:
799d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
800d24d4204SJose E. Roman   }
8015d5af635SJose E. Roman   if (!flg2) {
8025d5af635SJose E. Roman     PetscCall(MatGetType(X, &type));
8035d5af635SJose E. Roman     PetscCall(MatConvert(C, type, MAT_REUSE_MATRIX, &X));
8045d5af635SJose E. Roman     PetscCall(MatDestroy(&C));
8055d5af635SJose E. Roman   }
8063ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
807d24d4204SJose E. Roman }
808d24d4204SJose E. Roman 
MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo * factorinfo)809d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo)
810d71ae5a4SJacob Faibussowitsch {
811d24d4204SJose E. Roman   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
812d24d4204SJose E. Roman   PetscBLASInt   one = 1, info;
813d24d4204SJose E. Roman 
814d24d4204SJose E. Roman   PetscFunctionBegin;
815aa624791SPierre Jolivet   if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots));
816792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
817d24d4204SJose E. Roman   PetscCheckScaLapackInfo("getrf", info);
818d24d4204SJose E. Roman   A->factortype = MAT_FACTOR_LU;
819d24d4204SJose E. Roman   A->assembled  = PETSC_TRUE;
820d24d4204SJose E. Roman 
8219566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->solvertype));
8229566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
8233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
824d24d4204SJose E. Roman }
825d24d4204SJose E. Roman 
MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo * info)826d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
827d71ae5a4SJacob Faibussowitsch {
828d24d4204SJose E. Roman   PetscFunctionBegin;
8299566063dSJacob Faibussowitsch   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
8309566063dSJacob Faibussowitsch   PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
8313ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
832d24d4204SJose E. Roman }
833d24d4204SJose E. Roman 
MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo * info)834d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
835d71ae5a4SJacob Faibussowitsch {
836d24d4204SJose E. Roman   PetscFunctionBegin;
837d24d4204SJose E. Roman   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
8383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
839d24d4204SJose E. Roman }
840d24d4204SJose E. Roman 
MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo * factorinfo)841d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo)
842d71ae5a4SJacob Faibussowitsch {
843d24d4204SJose E. Roman   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
844d24d4204SJose E. Roman   PetscBLASInt   one = 1, info;
845d24d4204SJose E. Roman 
846d24d4204SJose E. Roman   PetscFunctionBegin;
847792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
848d24d4204SJose E. Roman   PetscCheckScaLapackInfo("potrf", info);
849d24d4204SJose E. Roman   A->factortype = MAT_FACTOR_CHOLESKY;
850d24d4204SJose E. Roman   A->assembled  = PETSC_TRUE;
851d24d4204SJose E. Roman 
8529566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->solvertype));
8539566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
8543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
855d24d4204SJose E. Roman }
856d24d4204SJose E. Roman 
MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo * info)857d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
858d71ae5a4SJacob Faibussowitsch {
859d24d4204SJose E. Roman   PetscFunctionBegin;
8609566063dSJacob Faibussowitsch   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
8619566063dSJacob Faibussowitsch   PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
8623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
863d24d4204SJose E. Roman }
864d24d4204SJose E. Roman 
MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo * info)865d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info)
866d71ae5a4SJacob Faibussowitsch {
867d24d4204SJose E. Roman   PetscFunctionBegin;
868d24d4204SJose E. Roman   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
8693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
870d24d4204SJose E. Roman }
871d24d4204SJose E. Roman 
MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType * type)87266976f2fSJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type)
873d71ae5a4SJacob Faibussowitsch {
874d24d4204SJose E. Roman   PetscFunctionBegin;
875d24d4204SJose E. Roman   *type = MATSOLVERSCALAPACK;
8763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
877d24d4204SJose E. Roman }
878d24d4204SJose E. Roman 
MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat * F)879d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F)
880d71ae5a4SJacob Faibussowitsch {
881d24d4204SJose E. Roman   Mat            B;
88259172f18SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
883d24d4204SJose E. Roman 
884d24d4204SJose E. Roman   PetscFunctionBegin;
885d24d4204SJose E. Roman   /* Create the factorization matrix */
8869566063dSJacob Faibussowitsch   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
88766e17bc3SBarry Smith   B->trivialsymbolic = PETSC_TRUE;
888d24d4204SJose E. Roman   B->factortype      = ftype;
8899566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->solvertype));
8909566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));
891d24d4204SJose E. Roman 
8929566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
893d24d4204SJose E. Roman   *F = B;
8943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
895d24d4204SJose E. Roman }
896d24d4204SJose E. Roman 
MatSolverTypeRegister_ScaLAPACK(void)897d1f0640dSPierre Jolivet PETSC_INTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
898d71ae5a4SJacob Faibussowitsch {
899d24d4204SJose E. Roman   PetscFunctionBegin;
9009566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
9019566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
9023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
903d24d4204SJose E. Roman }
904d24d4204SJose E. Roman 
MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal * nrm)905d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm)
906d71ae5a4SJacob Faibussowitsch {
907d24d4204SJose E. Roman   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
908d24d4204SJose E. Roman   PetscBLASInt   one = 1, lwork = 0;
909d24d4204SJose E. Roman   const char    *ntype;
910d68f4f38SPierre Jolivet   PetscScalar   *work = NULL, dummy;
911d24d4204SJose E. Roman 
912d24d4204SJose E. Roman   PetscFunctionBegin;
913d24d4204SJose E. Roman   switch (type) {
914d24d4204SJose E. Roman   case NORM_1:
915d24d4204SJose E. Roman     ntype = "1";
916d24d4204SJose E. Roman     lwork = PetscMax(a->locr, a->locc);
917d24d4204SJose E. Roman     break;
918d24d4204SJose E. Roman   case NORM_FROBENIUS:
919d24d4204SJose E. Roman     ntype = "F";
920d24d4204SJose E. Roman     work  = &dummy;
921d24d4204SJose E. Roman     break;
922d24d4204SJose E. Roman   case NORM_INFINITY:
923d24d4204SJose E. Roman     ntype = "I";
924d24d4204SJose E. Roman     lwork = PetscMax(a->locr, a->locc);
925d24d4204SJose E. Roman     break;
926d71ae5a4SJacob Faibussowitsch   default:
927d71ae5a4SJacob Faibussowitsch     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
928d24d4204SJose E. Roman   }
9299566063dSJacob Faibussowitsch   if (lwork) PetscCall(PetscMalloc1(lwork, &work));
930d24d4204SJose E. Roman   *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
9319566063dSJacob Faibussowitsch   if (lwork) PetscCall(PetscFree(work));
9323ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
933d24d4204SJose E. Roman }
934d24d4204SJose E. Roman 
MatZeroEntries_ScaLAPACK(Mat A)935d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
936d71ae5a4SJacob Faibussowitsch {
937d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
938d24d4204SJose E. Roman 
939d24d4204SJose E. Roman   PetscFunctionBegin;
9409566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
9413ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
942d24d4204SJose E. Roman }
943d24d4204SJose E. Roman 
MatGetOwnershipIS_ScaLAPACK(Mat A,IS * rows,IS * cols)944d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols)
945d71ae5a4SJacob Faibussowitsch {
946d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
947d24d4204SJose E. Roman   PetscInt       i, n, nb, isrc, nproc, iproc, *idx;
948d24d4204SJose E. Roman 
949d24d4204SJose E. Roman   PetscFunctionBegin;
950d24d4204SJose E. Roman   if (rows) {
951d24d4204SJose E. Roman     n     = a->locr;
952d24d4204SJose E. Roman     nb    = a->mb;
953d24d4204SJose E. Roman     isrc  = a->rsrc;
954d24d4204SJose E. Roman     nproc = a->grid->nprow;
955d24d4204SJose E. Roman     iproc = a->grid->myrow;
9569566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(n, &idx));
957d24d4204SJose E. Roman     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
9589566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
959d24d4204SJose E. Roman   }
960d24d4204SJose E. Roman   if (cols) {
961d24d4204SJose E. Roman     n     = a->locc;
962d24d4204SJose E. Roman     nb    = a->nb;
963d24d4204SJose E. Roman     isrc  = a->csrc;
964d24d4204SJose E. Roman     nproc = a->grid->npcol;
965d24d4204SJose E. Roman     iproc = a->grid->mycol;
9669566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(n, &idx));
967d24d4204SJose E. Roman     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
9689566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
969d24d4204SJose E. Roman   }
9703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
971d24d4204SJose E. Roman }
972d24d4204SJose E. Roman 
MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat * B)973d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B)
974d71ae5a4SJacob Faibussowitsch {
975d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
976d24d4204SJose E. Roman   Mat                Bmpi;
977d24d4204SJose E. Roman   MPI_Comm           comm;
978a00b6df3SJose E. Roman   PetscInt           i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb;
9794b1a79daSJose E. Roman   const PetscInt    *ranges, *branges, *cwork;
9804b1a79daSJose E. Roman   const PetscScalar *vwork;
981d24d4204SJose E. Roman   PetscBLASInt       bdesc[9], bmb, zero = 0, one = 1, lld, info;
982d24d4204SJose E. Roman   PetscScalar       *barray;
9834b1a79daSJose E. Roman   PetscBool          differ = PETSC_FALSE;
9844b1a79daSJose E. Roman   PetscMPIInt        size;
985d24d4204SJose E. Roman 
986d24d4204SJose E. Roman   PetscFunctionBegin;
9879566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
9889566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
9894b1a79daSJose E. Roman 
9904b1a79daSJose E. Roman   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
9919566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_size(comm, &size));
9929566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
9939371c9d4SSatish Balay     for (i = 0; i < size; i++)
9949371c9d4SSatish Balay       if (ranges[i + 1] != branges[i + 1]) {
9959371c9d4SSatish Balay         differ = PETSC_TRUE;
9969371c9d4SSatish Balay         break;
9979371c9d4SSatish Balay       }
9984b1a79daSJose E. Roman   }
9994b1a79daSJose E. Roman 
10004b1a79daSJose E. Roman   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
10019566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, &Bmpi));
10024b1a79daSJose E. Roman     m = PETSC_DECIDE;
10039566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
10044b1a79daSJose E. Roman     n = PETSC_DECIDE;
10059566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
10069566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
10079566063dSJacob Faibussowitsch     PetscCall(MatSetType(Bmpi, MATDENSE));
10089566063dSJacob Faibussowitsch     PetscCall(MatSetUp(Bmpi));
10094b1a79daSJose E. Roman 
10104b1a79daSJose E. Roman     /* create ScaLAPACK descriptor for B (1d block distribution) */
10119566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1012a00b6df3SJose E. Roman     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
10136497c311SBarry Smith     PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */
1014792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
10154b1a79daSJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
10164b1a79daSJose E. Roman 
10174b1a79daSJose E. Roman     /* redistribute matrix */
10189566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArray(Bmpi, &barray));
1019792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
10209566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
10219566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
10229566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
10234b1a79daSJose E. Roman 
10244b1a79daSJose E. Roman     /* transfer rows of auxiliary matrix to the final matrix B */
10259566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
10264b1a79daSJose E. Roman     for (i = rstart; i < rend; i++) {
10279566063dSJacob Faibussowitsch       PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
10289566063dSJacob Faibussowitsch       PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
10299566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
10304b1a79daSJose E. Roman     }
10319566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
10329566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
10339566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&Bmpi));
10344b1a79daSJose E. Roman 
10354b1a79daSJose E. Roman   } else { /* normal cases */
1036d24d4204SJose E. Roman 
1037d24d4204SJose E. Roman     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1038d24d4204SJose E. Roman     else {
10399566063dSJacob Faibussowitsch       PetscCall(MatCreate(comm, &Bmpi));
104092c846b4SJose E. Roman       m = PETSC_DECIDE;
10419566063dSJacob Faibussowitsch       PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
104292c846b4SJose E. Roman       n = PETSC_DECIDE;
10439566063dSJacob Faibussowitsch       PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
10449566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(Bmpi, m, n, M, N));
10459566063dSJacob Faibussowitsch       PetscCall(MatSetType(Bmpi, MATDENSE));
10469566063dSJacob Faibussowitsch       PetscCall(MatSetUp(Bmpi));
1047d24d4204SJose E. Roman     }
1048d24d4204SJose E. Roman 
1049d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for B (1d block distribution) */
10509566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1051a00b6df3SJose E. Roman     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
10526497c311SBarry Smith     PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */
1053792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1054d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
1055d24d4204SJose E. Roman 
1056d24d4204SJose E. Roman     /* redistribute matrix */
10579566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArray(Bmpi, &barray));
1058792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
10599566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1060d24d4204SJose E. Roman 
10619566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
10629566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1063ac530a7eSPierre Jolivet     if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &Bmpi));
1064ac530a7eSPierre Jolivet     else *B = Bmpi;
10654b1a79daSJose E. Roman   }
10663ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1067d24d4204SJose E. Roman }
1068d24d4204SJose E. Roman 
MatScaLAPACKCheckLayout(PetscLayout map,PetscBool * correct)1069d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct)
1070d71ae5a4SJacob Faibussowitsch {
1071b12397e7SPierre Jolivet   const PetscInt *ranges;
1072b12397e7SPierre Jolivet   PetscMPIInt     size;
1073b12397e7SPierre Jolivet   PetscInt        i, n;
1074b12397e7SPierre Jolivet 
1075b12397e7SPierre Jolivet   PetscFunctionBegin;
1076b12397e7SPierre Jolivet   *correct = PETSC_TRUE;
1077b12397e7SPierre Jolivet   PetscCallMPI(MPI_Comm_size(map->comm, &size));
1078b12397e7SPierre Jolivet   if (size > 1) {
1079b12397e7SPierre Jolivet     PetscCall(PetscLayoutGetRanges(map, &ranges));
1080b12397e7SPierre Jolivet     n = ranges[1] - ranges[0];
10819371c9d4SSatish Balay     for (i = 1; i < size; i++)
10829371c9d4SSatish Balay       if (ranges[i + 1] - ranges[i] != n) break;
1083b12397e7SPierre Jolivet     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
1084b12397e7SPierre Jolivet   }
10853ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1086b12397e7SPierre Jolivet }
1087b12397e7SPierre Jolivet 
MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat * B)1088d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B)
1089d71ae5a4SJacob Faibussowitsch {
1090d24d4204SJose E. Roman   Mat_ScaLAPACK     *b;
1091d24d4204SJose E. Roman   Mat                Bmpi;
1092d24d4204SJose E. Roman   MPI_Comm           comm;
109392c846b4SJose E. Roman   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n;
1094b12397e7SPierre Jolivet   const PetscInt    *ranges, *rows, *cols;
1095d24d4204SJose E. Roman   PetscBLASInt       adesc[9], amb, zero = 0, one = 1, lld, info;
1096c87776d3SJose E. Roman   const PetscScalar *aarray;
1097b12397e7SPierre Jolivet   IS                 ir, ic;
10984e8b52a1SJose E. Roman   PetscInt           lda;
1099b12397e7SPierre Jolivet   PetscBool          flg;
1100d24d4204SJose E. Roman 
1101d24d4204SJose E. Roman   PetscFunctionBegin;
11029566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1103d24d4204SJose E. Roman 
1104d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1105d24d4204SJose E. Roman   else {
11069566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, &Bmpi));
110792c846b4SJose E. Roman     m = PETSC_DECIDE;
11089566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
110992c846b4SJose E. Roman     n = PETSC_DECIDE;
11109566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
11119566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
11129566063dSJacob Faibussowitsch     PetscCall(MatSetType(Bmpi, MATSCALAPACK));
11139566063dSJacob Faibussowitsch     PetscCall(MatSetUp(Bmpi));
1114d24d4204SJose E. Roman   }
1115d24d4204SJose E. Roman   b = (Mat_ScaLAPACK *)Bmpi->data;
1116d24d4204SJose E. Roman 
1117b12397e7SPierre Jolivet   PetscCall(MatDenseGetLDA(A, &lda));
1118c87776d3SJose E. Roman   PetscCall(MatDenseGetArrayRead(A, &aarray));
1119b12397e7SPierre Jolivet   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1120b12397e7SPierre Jolivet   if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1121b12397e7SPierre Jolivet   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1122d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for A (1d block distribution) */
11239566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
11249566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[1], &amb));        /* row block size */
11256497c311SBarry Smith     PetscCall(PetscBLASIntCast(PetscMax(lda, 1), &lld)); /* local leading dimension */
1126792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1127d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit", info);
1128d24d4204SJose E. Roman 
1129d24d4204SJose E. Roman     /* redistribute matrix */
1130792fecdfSBarry Smith     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1131b12397e7SPierre Jolivet     Bmpi->nooffprocentries = PETSC_TRUE;
1132b12397e7SPierre Jolivet   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1133b12397e7SPierre Jolivet     PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1134b12397e7SPierre Jolivet     b->roworiented = PETSC_FALSE;
1135b12397e7SPierre Jolivet     PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1136b12397e7SPierre Jolivet     PetscCall(ISGetIndices(ir, &rows));
1137b12397e7SPierre Jolivet     PetscCall(ISGetIndices(ic, &cols));
1138b12397e7SPierre Jolivet     PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1139b12397e7SPierre Jolivet     PetscCall(ISRestoreIndices(ir, &rows));
1140b12397e7SPierre Jolivet     PetscCall(ISRestoreIndices(ic, &cols));
1141b12397e7SPierre Jolivet     PetscCall(ISDestroy(&ic));
1142b12397e7SPierre Jolivet     PetscCall(ISDestroy(&ir));
1143b12397e7SPierre Jolivet   }
1144c87776d3SJose E. Roman   PetscCall(MatDenseRestoreArrayRead(A, &aarray));
11459566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
11469566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1147ac530a7eSPierre Jolivet   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &Bmpi));
1148ac530a7eSPierre Jolivet   else *B = Bmpi;
11493ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1150d24d4204SJose E. Roman }
1151d24d4204SJose E. Roman 
MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)1152d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1153d71ae5a4SJacob Faibussowitsch {
1154d24d4204SJose E. Roman   Mat                mat_scal;
1155d24d4204SJose E. Roman   PetscInt           M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1156d24d4204SJose E. Roman   const PetscInt    *cols;
1157d24d4204SJose E. Roman   const PetscScalar *vals;
1158d24d4204SJose E. Roman 
1159d24d4204SJose E. Roman   PetscFunctionBegin;
1160d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) {
1161d24d4204SJose E. Roman     mat_scal = *newmat;
11629566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(mat_scal));
1163d24d4204SJose E. Roman   } else {
11649566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1165d24d4204SJose E. Roman     m = PETSC_DECIDE;
11669566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1167d24d4204SJose E. Roman     n = PETSC_DECIDE;
11689566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
11699566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
11709566063dSJacob Faibussowitsch     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
11719566063dSJacob Faibussowitsch     PetscCall(MatSetUp(mat_scal));
1172d24d4204SJose E. Roman   }
1173d24d4204SJose E. Roman   for (row = rstart; row < rend; row++) {
11749566063dSJacob Faibussowitsch     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
11759566063dSJacob Faibussowitsch     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
11769566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1177d24d4204SJose E. Roman   }
11789566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
11799566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1180d24d4204SJose E. Roman 
11819566063dSJacob Faibussowitsch   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1182d24d4204SJose E. Roman   else *newmat = mat_scal;
11833ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1184d24d4204SJose E. Roman }
1185d24d4204SJose E. Roman 
MatConvert_SBAIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)1186d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1187d71ae5a4SJacob Faibussowitsch {
1188d24d4204SJose E. Roman   Mat                mat_scal;
1189d24d4204SJose E. Roman   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1190d24d4204SJose E. Roman   const PetscInt    *cols;
1191d24d4204SJose E. Roman   const PetscScalar *vals;
1192d24d4204SJose E. Roman   PetscScalar        v;
1193d24d4204SJose E. Roman 
1194d24d4204SJose E. Roman   PetscFunctionBegin;
1195d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) {
1196d24d4204SJose E. Roman     mat_scal = *newmat;
11979566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(mat_scal));
1198d24d4204SJose E. Roman   } else {
11999566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1200d24d4204SJose E. Roman     m = PETSC_DECIDE;
12019566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1202d24d4204SJose E. Roman     n = PETSC_DECIDE;
12039566063dSJacob Faibussowitsch     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
12049566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
12059566063dSJacob Faibussowitsch     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
12069566063dSJacob Faibussowitsch     PetscCall(MatSetUp(mat_scal));
1207d24d4204SJose E. Roman   }
12089566063dSJacob Faibussowitsch   PetscCall(MatGetRowUpperTriangular(A));
1209d24d4204SJose E. Roman   for (row = rstart; row < rend; row++) {
12109566063dSJacob Faibussowitsch     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
12119566063dSJacob Faibussowitsch     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1212d24d4204SJose E. Roman     for (j = 0; j < ncols; j++) { /* lower triangular part */
1213d24d4204SJose E. Roman       if (cols[j] == row) continue;
1214b94d7dedSBarry Smith       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
12159566063dSJacob Faibussowitsch       PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1216d24d4204SJose E. Roman     }
12179566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1218d24d4204SJose E. Roman   }
12199566063dSJacob Faibussowitsch   PetscCall(MatRestoreRowUpperTriangular(A));
12209566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
12219566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1222d24d4204SJose E. Roman 
12239566063dSJacob Faibussowitsch   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1224d24d4204SJose E. Roman   else *newmat = mat_scal;
12253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1226d24d4204SJose E. Roman }
1227d24d4204SJose E. Roman 
MatScaLAPACKSetPreallocation(Mat A)1228d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1229d71ae5a4SJacob Faibussowitsch {
1230d24d4204SJose E. Roman   Mat_ScaLAPACK *a  = (Mat_ScaLAPACK *)A->data;
1231d24d4204SJose E. Roman   PetscInt       sz = 0;
1232d24d4204SJose E. Roman 
1233d24d4204SJose E. Roman   PetscFunctionBegin;
12349566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->rmap));
12359566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->cmap));
1236d24d4204SJose E. Roman   if (!a->lld) a->lld = a->locr;
1237d24d4204SJose E. Roman 
12389566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->loc));
12399566063dSJacob Faibussowitsch   PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
12409566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(sz, &a->loc));
1241d24d4204SJose E. Roman 
1242d24d4204SJose E. Roman   A->preallocated = PETSC_TRUE;
12433ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1244d24d4204SJose E. Roman }
1245d24d4204SJose E. Roman 
MatDestroy_ScaLAPACK(Mat A)1246d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1247d71ae5a4SJacob Faibussowitsch {
1248d24d4204SJose E. Roman   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK *)A->data;
1249d24d4204SJose E. Roman   Mat_ScaLAPACK_Grid *grid;
1250b8b5be36SMartin Diehl   PetscMPIInt         iflg;
1251d24d4204SJose E. Roman   MPI_Comm            icomm;
1252d24d4204SJose E. Roman 
1253d24d4204SJose E. Roman   PetscFunctionBegin;
12549566063dSJacob Faibussowitsch   PetscCall(MatStashDestroy_Private(&A->stash));
12559566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->loc));
12569566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->pivots));
12579566063dSJacob Faibussowitsch   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1258b8b5be36SMartin Diehl   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, &iflg));
1259d24d4204SJose E. Roman   if (--grid->grid_refct == 0) {
1260d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxt);
1261d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxrow);
1262d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxcol);
12639566063dSJacob Faibussowitsch     PetscCall(PetscFree(grid));
12649566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1265d24d4204SJose E. Roman   }
12669566063dSJacob Faibussowitsch   PetscCall(PetscCommDestroy(&icomm));
12679566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
12689566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
12699566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
12709566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
12719566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
12723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1273d24d4204SJose E. Roman }
1274d24d4204SJose E. Roman 
MatSetUp_ScaLAPACK(Mat A)127566976f2fSJacob Faibussowitsch static PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1276d71ae5a4SJacob Faibussowitsch {
1277d24d4204SJose E. Roman   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
1278d24d4204SJose E. Roman   PetscBLASInt   info = 0;
1279b12397e7SPierre Jolivet   PetscBool      flg;
1280d24d4204SJose E. Roman 
1281d24d4204SJose E. Roman   PetscFunctionBegin;
12829566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->rmap));
12839566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->cmap));
1284d24d4204SJose E. Roman 
1285b12397e7SPierre Jolivet   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1286b12397e7SPierre Jolivet   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1287b12397e7SPierre Jolivet   PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1288b12397e7SPierre Jolivet   PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1289b12397e7SPierre Jolivet   PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1290d24d4204SJose E. Roman 
1291d24d4204SJose E. Roman   /* compute local sizes */
12929566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
12939566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1294d24d4204SJose E. Roman   a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1295d24d4204SJose E. Roman   a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1296d24d4204SJose E. Roman   a->lld  = PetscMax(1, a->locr);
1297d24d4204SJose E. Roman 
1298d24d4204SJose E. Roman   /* allocate local array */
12999566063dSJacob Faibussowitsch   PetscCall(MatScaLAPACKSetPreallocation(A));
1300d24d4204SJose E. Roman 
1301d24d4204SJose E. Roman   /* set up ScaLAPACK descriptor */
1302792fecdfSBarry Smith   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1303d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit", info);
13043ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1305d24d4204SJose E. Roman }
1306d24d4204SJose E. Roman 
MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)130766976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type)
1308d71ae5a4SJacob Faibussowitsch {
1309d24d4204SJose E. Roman   PetscInt nstash, reallocs;
1310d24d4204SJose E. Roman 
1311d24d4204SJose E. Roman   PetscFunctionBegin;
13123ba16761SJacob Faibussowitsch   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
13139566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
13149566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
13159566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
13163ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1317d24d4204SJose E. Roman }
1318d24d4204SJose E. Roman 
MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)131966976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type)
1320d71ae5a4SJacob Faibussowitsch {
1321d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1322d24d4204SJose E. Roman   PetscMPIInt    n;
1323d24d4204SJose E. Roman   PetscInt       i, flg, *row, *col;
1324d24d4204SJose E. Roman   PetscScalar   *val;
1325d24d4204SJose E. Roman   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
1326d24d4204SJose E. Roman 
1327d24d4204SJose E. Roman   PetscFunctionBegin;
13283ba16761SJacob Faibussowitsch   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1329d24d4204SJose E. Roman   while (1) {
13309566063dSJacob Faibussowitsch     PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1331d24d4204SJose E. Roman     if (!flg) break;
1332d24d4204SJose E. Roman     for (i = 0; i < n; i++) {
13339566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
13349566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1335792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1336aed4548fSBarry Smith       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1337d24d4204SJose E. Roman       switch (A->insertmode) {
1338d71ae5a4SJacob Faibussowitsch       case INSERT_VALUES:
1339d71ae5a4SJacob Faibussowitsch         a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i];
1340d71ae5a4SJacob Faibussowitsch         break;
1341d71ae5a4SJacob Faibussowitsch       case ADD_VALUES:
1342d71ae5a4SJacob Faibussowitsch         a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i];
1343d71ae5a4SJacob Faibussowitsch         break;
1344d71ae5a4SJacob Faibussowitsch       default:
1345d71ae5a4SJacob Faibussowitsch         SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1346d24d4204SJose E. Roman       }
1347d24d4204SJose E. Roman     }
1348d24d4204SJose E. Roman   }
13499566063dSJacob Faibussowitsch   PetscCall(MatStashScatterEnd_Private(&A->stash));
13503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1351d24d4204SJose E. Roman }
1352d24d4204SJose E. Roman 
MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)135366976f2fSJacob Faibussowitsch static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer)
1354d71ae5a4SJacob Faibussowitsch {
1355d24d4204SJose E. Roman   Mat      Adense, As;
1356d24d4204SJose E. Roman   MPI_Comm comm;
1357d24d4204SJose E. Roman 
1358d24d4204SJose E. Roman   PetscFunctionBegin;
13599566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
13609566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, &Adense));
13619566063dSJacob Faibussowitsch   PetscCall(MatSetType(Adense, MATDENSE));
13629566063dSJacob Faibussowitsch   PetscCall(MatLoad(Adense, viewer));
13639566063dSJacob Faibussowitsch   PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
13649566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Adense));
13659566063dSJacob Faibussowitsch   PetscCall(MatHeaderReplace(newMat, &As));
13663ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1367d24d4204SJose E. Roman }
1368d24d4204SJose E. Roman 
13699371c9d4SSatish Balay static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
13704cc2b5b5SPierre Jolivet                                        NULL,
13714cc2b5b5SPierre Jolivet                                        NULL,
1372d24d4204SJose E. Roman                                        MatMult_ScaLAPACK,
1373d24d4204SJose E. Roman                                        /* 4*/ MatMultAdd_ScaLAPACK,
1374d24d4204SJose E. Roman                                        MatMultTranspose_ScaLAPACK,
1375d24d4204SJose E. Roman                                        MatMultTransposeAdd_ScaLAPACK,
1376d24d4204SJose E. Roman                                        MatSolve_ScaLAPACK,
1377d24d4204SJose E. Roman                                        MatSolveAdd_ScaLAPACK,
13784cc2b5b5SPierre Jolivet                                        NULL,
13794cc2b5b5SPierre Jolivet                                        /*10*/ NULL,
1380d24d4204SJose E. Roman                                        MatLUFactor_ScaLAPACK,
1381d24d4204SJose E. Roman                                        MatCholeskyFactor_ScaLAPACK,
13824cc2b5b5SPierre Jolivet                                        NULL,
1383d24d4204SJose E. Roman                                        MatTranspose_ScaLAPACK,
1384d24d4204SJose E. Roman                                        /*15*/ MatGetInfo_ScaLAPACK,
13854cc2b5b5SPierre Jolivet                                        NULL,
1386d24d4204SJose E. Roman                                        MatGetDiagonal_ScaLAPACK,
1387d24d4204SJose E. Roman                                        MatDiagonalScale_ScaLAPACK,
1388d24d4204SJose E. Roman                                        MatNorm_ScaLAPACK,
1389d24d4204SJose E. Roman                                        /*20*/ MatAssemblyBegin_ScaLAPACK,
1390d24d4204SJose E. Roman                                        MatAssemblyEnd_ScaLAPACK,
1391d24d4204SJose E. Roman                                        MatSetOption_ScaLAPACK,
1392d24d4204SJose E. Roman                                        MatZeroEntries_ScaLAPACK,
13934cc2b5b5SPierre Jolivet                                        /*24*/ NULL,
1394d24d4204SJose E. Roman                                        MatLUFactorSymbolic_ScaLAPACK,
1395d24d4204SJose E. Roman                                        MatLUFactorNumeric_ScaLAPACK,
1396d24d4204SJose E. Roman                                        MatCholeskyFactorSymbolic_ScaLAPACK,
1397d24d4204SJose E. Roman                                        MatCholeskyFactorNumeric_ScaLAPACK,
1398d24d4204SJose E. Roman                                        /*29*/ MatSetUp_ScaLAPACK,
13994cc2b5b5SPierre Jolivet                                        NULL,
14004cc2b5b5SPierre Jolivet                                        NULL,
14014cc2b5b5SPierre Jolivet                                        NULL,
14024cc2b5b5SPierre Jolivet                                        NULL,
1403d24d4204SJose E. Roman                                        /*34*/ MatDuplicate_ScaLAPACK,
14044cc2b5b5SPierre Jolivet                                        NULL,
14054cc2b5b5SPierre Jolivet                                        NULL,
14064cc2b5b5SPierre Jolivet                                        NULL,
14074cc2b5b5SPierre Jolivet                                        NULL,
1408d24d4204SJose E. Roman                                        /*39*/ MatAXPY_ScaLAPACK,
14094cc2b5b5SPierre Jolivet                                        NULL,
14104cc2b5b5SPierre Jolivet                                        NULL,
14114cc2b5b5SPierre Jolivet                                        NULL,
1412d24d4204SJose E. Roman                                        MatCopy_ScaLAPACK,
14134cc2b5b5SPierre Jolivet                                        /*44*/ NULL,
1414d24d4204SJose E. Roman                                        MatScale_ScaLAPACK,
1415d24d4204SJose E. Roman                                        MatShift_ScaLAPACK,
14164cc2b5b5SPierre Jolivet                                        NULL,
14174cc2b5b5SPierre Jolivet                                        NULL,
14184cc2b5b5SPierre Jolivet                                        /*49*/ NULL,
14194cc2b5b5SPierre Jolivet                                        NULL,
14204cc2b5b5SPierre Jolivet                                        NULL,
14214cc2b5b5SPierre Jolivet                                        NULL,
14224cc2b5b5SPierre Jolivet                                        NULL,
14234cc2b5b5SPierre Jolivet                                        /*54*/ NULL,
14244cc2b5b5SPierre Jolivet                                        NULL,
14254cc2b5b5SPierre Jolivet                                        NULL,
14264cc2b5b5SPierre Jolivet                                        NULL,
14274cc2b5b5SPierre Jolivet                                        NULL,
14284cc2b5b5SPierre Jolivet                                        /*59*/ NULL,
1429d24d4204SJose E. Roman                                        MatDestroy_ScaLAPACK,
1430d24d4204SJose E. Roman                                        MatView_ScaLAPACK,
14314cc2b5b5SPierre Jolivet                                        NULL,
14324cc2b5b5SPierre Jolivet                                        NULL,
14334cc2b5b5SPierre Jolivet                                        /*64*/ NULL,
14344cc2b5b5SPierre Jolivet                                        NULL,
14354cc2b5b5SPierre Jolivet                                        NULL,
14364cc2b5b5SPierre Jolivet                                        NULL,
14374cc2b5b5SPierre Jolivet                                        NULL,
14384cc2b5b5SPierre Jolivet                                        /*69*/ NULL,
1439d24d4204SJose E. Roman                                        MatConvert_ScaLAPACK_Dense,
14404cc2b5b5SPierre Jolivet                                        NULL,
14414cc2b5b5SPierre Jolivet                                        NULL,
14428bb0f5c6SPierre Jolivet                                        NULL,
14434cc2b5b5SPierre Jolivet                                        /*74*/ NULL,
14444cc2b5b5SPierre Jolivet                                        NULL,
14454cc2b5b5SPierre Jolivet                                        NULL,
14464cc2b5b5SPierre Jolivet                                        NULL,
14478bb0f5c6SPierre Jolivet                                        MatLoad_ScaLAPACK,
14484cc2b5b5SPierre Jolivet                                        /*79*/ NULL,
14494cc2b5b5SPierre Jolivet                                        NULL,
14504cc2b5b5SPierre Jolivet                                        NULL,
14514cc2b5b5SPierre Jolivet                                        NULL,
14528bb0f5c6SPierre Jolivet                                        NULL,
14534cc2b5b5SPierre Jolivet                                        /*84*/ NULL,
1454d24d4204SJose E. Roman                                        MatMatMultNumeric_ScaLAPACK,
14554cc2b5b5SPierre Jolivet                                        NULL,
14564cc2b5b5SPierre Jolivet                                        NULL,
1457d24d4204SJose E. Roman                                        MatMatTransposeMultNumeric_ScaLAPACK,
14588bb0f5c6SPierre Jolivet                                        /*89*/ NULL,
14598bb0f5c6SPierre Jolivet                                        MatProductSetFromOptions_ScaLAPACK,
14604cc2b5b5SPierre Jolivet                                        NULL,
14614cc2b5b5SPierre Jolivet                                        NULL,
1462d24d4204SJose E. Roman                                        MatConjugate_ScaLAPACK,
14638bb0f5c6SPierre Jolivet                                        /*94*/ NULL,
14644cc2b5b5SPierre Jolivet                                        NULL,
14654cc2b5b5SPierre Jolivet                                        NULL,
14664cc2b5b5SPierre Jolivet                                        NULL,
14674cc2b5b5SPierre Jolivet                                        NULL,
14688bb0f5c6SPierre Jolivet                                        /*99*/ NULL,
14698bb0f5c6SPierre Jolivet                                        MatMatSolve_ScaLAPACK,
14704cc2b5b5SPierre Jolivet                                        NULL,
14714cc2b5b5SPierre Jolivet                                        NULL,
14724cc2b5b5SPierre Jolivet                                        NULL,
1473*421480d9SBarry Smith                                        /*104*/ NULL,
14748bb0f5c6SPierre Jolivet                                        NULL,
14758bb0f5c6SPierre Jolivet                                        NULL,
14768bb0f5c6SPierre Jolivet                                        NULL,
14778bb0f5c6SPierre Jolivet                                        NULL,
14788bb0f5c6SPierre Jolivet                                        /*109*/ NULL,
14798bb0f5c6SPierre Jolivet                                        MatHermitianTranspose_ScaLAPACK,
14808bb0f5c6SPierre Jolivet                                        MatMultHermitianTranspose_ScaLAPACK,
14818bb0f5c6SPierre Jolivet                                        MatMultHermitianTransposeAdd_ScaLAPACK,
1482*421480d9SBarry Smith                                        NULL,
14834cc2b5b5SPierre Jolivet                                        /*114*/ NULL,
14844cc2b5b5SPierre Jolivet                                        NULL,
14854cc2b5b5SPierre Jolivet                                        NULL,
14864cc2b5b5SPierre Jolivet                                        NULL,
14874cc2b5b5SPierre Jolivet                                        NULL,
14884cc2b5b5SPierre Jolivet                                        /*119*/ NULL,
14898bb0f5c6SPierre Jolivet                                        NULL,
14908bb0f5c6SPierre Jolivet                                        MatTransposeMatMultNumeric_ScaLAPACK,
14914cc2b5b5SPierre Jolivet                                        NULL,
14924cc2b5b5SPierre Jolivet                                        /*124*/ NULL,
14934cc2b5b5SPierre Jolivet                                        NULL,
14944cc2b5b5SPierre Jolivet                                        NULL,
14954cc2b5b5SPierre Jolivet                                        NULL,
14964cc2b5b5SPierre Jolivet                                        NULL,
14974cc2b5b5SPierre Jolivet                                        /*129*/ NULL,
14984cc2b5b5SPierre Jolivet                                        NULL,
14994cc2b5b5SPierre Jolivet                                        NULL,
15008bb0f5c6SPierre Jolivet                                        NULL,
15014cc2b5b5SPierre Jolivet                                        NULL,
15024cc2b5b5SPierre Jolivet                                        /*134*/ NULL,
15034cc2b5b5SPierre Jolivet                                        NULL,
15044cc2b5b5SPierre Jolivet                                        NULL,
15054cc2b5b5SPierre Jolivet                                        NULL,
15064cc2b5b5SPierre Jolivet                                        NULL,
15074cc2b5b5SPierre Jolivet                                        NULL,
15084cc2b5b5SPierre Jolivet                                        /*140*/ NULL,
15094cc2b5b5SPierre Jolivet                                        NULL,
151003db1824SAlex Lindsay                                        NULL,
1511c2be7ffeSStefano Zampini                                        NULL,
15124cc2b5b5SPierre Jolivet                                        NULL};
1513d24d4204SJose E. Roman 
MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash * stash,PetscInt * owners)1514d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners)
1515d71ae5a4SJacob Faibussowitsch {
15166497c311SBarry Smith   PetscInt          *owner, *startv, *starti, bs2;
1517d24d4204SJose E. Roman   PetscInt           size = stash->size, nsends;
15186497c311SBarry Smith   PetscInt          *sindices, **rindices, j, l;
1519d24d4204SJose E. Roman   PetscScalar      **rvalues, *svalues;
1520d24d4204SJose E. Roman   MPI_Comm           comm = stash->comm;
1521d24d4204SJose E. Roman   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
15226497c311SBarry Smith   PetscMPIInt        tag1 = stash->tag1, tag2 = stash->tag2, *sizes, *nlengths, nreceives, insends, ii;
1523d24d4204SJose E. Roman   PetscInt          *sp_idx, *sp_idy;
1524d24d4204SJose E. Roman   PetscScalar       *sp_val;
1525d24d4204SJose E. Roman   PetscMatStashSpace space, space_next;
1526d24d4204SJose E. Roman   PetscBLASInt       gridx, gcidx, lridx, lcidx, rsrc, csrc;
1527d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)mat->data;
1528d24d4204SJose E. Roman 
1529d24d4204SJose E. Roman   PetscFunctionBegin;
1530d24d4204SJose E. Roman   { /* make sure all processors are either in INSERTMODE or ADDMODE */
1531d24d4204SJose E. Roman     InsertMode addv;
1532462c564dSBarry Smith     PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
153308401ef6SPierre Jolivet     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1534d24d4204SJose E. Roman     mat->insertmode = addv; /* in case this processor had no cache */
1535d24d4204SJose E. Roman   }
1536d24d4204SJose E. Roman 
1537d24d4204SJose E. Roman   bs2 = stash->bs * stash->bs;
1538d24d4204SJose E. Roman 
1539d24d4204SJose E. Roman   /*  first count number of contributors to each processor */
15409566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(size, &nlengths));
15419566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(stash->n + 1, &owner));
1542d24d4204SJose E. Roman 
15436497c311SBarry Smith   ii = j = 0;
1544d24d4204SJose E. Roman   space  = stash->space_head;
1545d24d4204SJose E. Roman   while (space) {
1546d24d4204SJose E. Roman     space_next = space->next;
1547d24d4204SJose E. Roman     for (l = 0; l < space->local_used; l++) {
15489566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
15499566063dSJacob Faibussowitsch       PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1550792fecdfSBarry Smith       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1551d24d4204SJose E. Roman       j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
15529371c9d4SSatish Balay       nlengths[j]++;
15536497c311SBarry Smith       owner[ii] = j;
15546497c311SBarry Smith       ii++;
1555d24d4204SJose E. Roman     }
1556d24d4204SJose E. Roman     space = space_next;
1557d24d4204SJose E. Roman   }
1558d24d4204SJose E. Roman 
1559d24d4204SJose E. Roman   /* Now check what procs get messages - and compute nsends. */
15609566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(size, &sizes));
15616497c311SBarry Smith   nsends = 0;
15626497c311SBarry Smith   for (PetscMPIInt i = 0; i < size; i++) {
1563d24d4204SJose E. Roman     if (nlengths[i]) {
15649371c9d4SSatish Balay       sizes[i] = 1;
15659371c9d4SSatish Balay       nsends++;
1566d24d4204SJose E. Roman     }
1567d24d4204SJose E. Roman   }
1568d24d4204SJose E. Roman 
15699371c9d4SSatish Balay   {
15709371c9d4SSatish Balay     PetscMPIInt *onodes, *olengths;
15716497c311SBarry Smith 
1572d24d4204SJose E. Roman     /* Determine the number of messages to expect, their lengths, from from-ids */
15739566063dSJacob Faibussowitsch     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
15746497c311SBarry Smith     PetscCall(PetscMPIIntCast(nsends, &insends));
15756497c311SBarry Smith     PetscCall(PetscGatherMessageLengths(comm, insends, nreceives, nlengths, &onodes, &olengths));
1576d24d4204SJose E. Roman     /* since clubbing row,col - lengths are multiplied by 2 */
15776497c311SBarry Smith     for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] *= 2;
15789566063dSJacob Faibussowitsch     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1579d24d4204SJose E. Roman     /* values are size 'bs2' lengths (and remove earlier factor 2 */
15806497c311SBarry Smith     for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] = (PetscMPIInt)(olengths[i] * bs2 / 2);
15819566063dSJacob Faibussowitsch     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
15829566063dSJacob Faibussowitsch     PetscCall(PetscFree(onodes));
15839371c9d4SSatish Balay     PetscCall(PetscFree(olengths));
15849371c9d4SSatish Balay   }
1585d24d4204SJose E. Roman 
1586d24d4204SJose E. Roman   /* do sends:
1587d24d4204SJose E. Roman       1) starts[i] gives the starting index in svalues for stuff going to
1588d24d4204SJose E. Roman          the ith processor
1589d24d4204SJose E. Roman   */
15909566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
15919566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
15929566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(size, &startv, size, &starti));
1593d24d4204SJose E. Roman   /* use 2 sends the first with all_a, the next with all_i and all_j */
15949371c9d4SSatish Balay   startv[0] = 0;
15959371c9d4SSatish Balay   starti[0] = 0;
15966497c311SBarry Smith   for (PetscMPIInt i = 1; i < size; i++) {
1597d24d4204SJose E. Roman     startv[i] = startv[i - 1] + nlengths[i - 1];
1598d24d4204SJose E. Roman     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1599d24d4204SJose E. Roman   }
1600d24d4204SJose E. Roman 
16016497c311SBarry Smith   ii    = 0;
1602d24d4204SJose E. Roman   space = stash->space_head;
1603d24d4204SJose E. Roman   while (space) {
1604d24d4204SJose E. Roman     space_next = space->next;
1605d24d4204SJose E. Roman     sp_idx     = space->idx;
1606d24d4204SJose E. Roman     sp_idy     = space->idy;
1607d24d4204SJose E. Roman     sp_val     = space->val;
1608d24d4204SJose E. Roman     for (l = 0; l < space->local_used; l++) {
16096497c311SBarry Smith       j = owner[ii];
1610d24d4204SJose E. Roman       if (bs2 == 1) {
1611d24d4204SJose E. Roman         svalues[startv[j]] = sp_val[l];
1612d24d4204SJose E. Roman       } else {
1613d24d4204SJose E. Roman         PetscInt     k;
1614d24d4204SJose E. Roman         PetscScalar *buf1, *buf2;
1615d24d4204SJose E. Roman         buf1 = svalues + bs2 * startv[j];
1616d24d4204SJose E. Roman         buf2 = space->val + bs2 * l;
1617d24d4204SJose E. Roman         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1618d24d4204SJose E. Roman       }
1619d24d4204SJose E. Roman       sindices[starti[j]]               = sp_idx[l];
1620d24d4204SJose E. Roman       sindices[starti[j] + nlengths[j]] = sp_idy[l];
1621d24d4204SJose E. Roman       startv[j]++;
1622d24d4204SJose E. Roman       starti[j]++;
16236497c311SBarry Smith       ii++;
1624d24d4204SJose E. Roman     }
1625d24d4204SJose E. Roman     space = space_next;
1626d24d4204SJose E. Roman   }
1627d24d4204SJose E. Roman   startv[0] = 0;
16286497c311SBarry Smith   for (PetscMPIInt i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1629d24d4204SJose E. Roman 
16306497c311SBarry Smith   for (PetscMPIInt i = 0, count = 0; i < size; i++) {
1631d24d4204SJose E. Roman     if (sizes[i]) {
16326497c311SBarry Smith       PetscCallMPI(MPIU_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
16336497c311SBarry Smith       PetscCallMPI(MPIU_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1634d24d4204SJose E. Roman     }
1635d24d4204SJose E. Roman   }
1636d24d4204SJose E. Roman #if defined(PETSC_USE_INFO)
16379566063dSJacob Faibussowitsch   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
16386497c311SBarry Smith   for (PetscMPIInt i = 0; i < size; i++) {
16396497c311SBarry Smith     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %d: size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1640d24d4204SJose E. Roman   }
1641d24d4204SJose E. Roman #endif
16429566063dSJacob Faibussowitsch   PetscCall(PetscFree(nlengths));
16439566063dSJacob Faibussowitsch   PetscCall(PetscFree(owner));
16449566063dSJacob Faibussowitsch   PetscCall(PetscFree2(startv, starti));
16459566063dSJacob Faibussowitsch   PetscCall(PetscFree(sizes));
1646d24d4204SJose E. Roman 
1647d24d4204SJose E. Roman   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
16489566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
1649d24d4204SJose E. Roman 
16506497c311SBarry Smith   for (PetscMPIInt i = 0; i < nreceives; i++) {
1651d24d4204SJose E. Roman     recv_waits[2 * i]     = recv_waits1[i];
1652d24d4204SJose E. Roman     recv_waits[2 * i + 1] = recv_waits2[i];
1653d24d4204SJose E. Roman   }
1654d24d4204SJose E. Roman   stash->recv_waits = recv_waits;
1655d24d4204SJose E. Roman 
16569566063dSJacob Faibussowitsch   PetscCall(PetscFree(recv_waits1));
16579566063dSJacob Faibussowitsch   PetscCall(PetscFree(recv_waits2));
1658d24d4204SJose E. Roman 
1659d24d4204SJose E. Roman   stash->svalues         = svalues;
1660d24d4204SJose E. Roman   stash->sindices        = sindices;
1661d24d4204SJose E. Roman   stash->rvalues         = rvalues;
1662d24d4204SJose E. Roman   stash->rindices        = rindices;
1663d24d4204SJose E. Roman   stash->send_waits      = send_waits;
16646497c311SBarry Smith   stash->nsends          = (PetscMPIInt)nsends;
1665d24d4204SJose E. Roman   stash->nrecvs          = nreceives;
1666d24d4204SJose E. Roman   stash->reproduce_count = 0;
16673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1668d24d4204SJose E. Roman }
1669d24d4204SJose E. Roman 
MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)1670d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb)
1671d71ae5a4SJacob Faibussowitsch {
1672d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1673d24d4204SJose E. Roman 
1674d24d4204SJose E. Roman   PetscFunctionBegin;
167528b400f6SJacob Faibussowitsch   PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1676aed4548fSBarry Smith   PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1677aed4548fSBarry Smith   PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
16789566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
16799566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
16803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1681d24d4204SJose E. Roman }
1682d24d4204SJose E. Roman 
1683d24d4204SJose E. Roman /*@
16846aad120cSJose E. Roman   MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
168511a5261eSBarry Smith   the `MATSCALAPACK` matrix
1686d24d4204SJose E. Roman 
1687c3339decSBarry Smith   Logically Collective
1688d24d4204SJose E. Roman 
1689d8d19677SJose E. Roman   Input Parameters:
169011a5261eSBarry Smith + A  - a `MATSCALAPACK` matrix
1691d24d4204SJose E. Roman . mb - the row block size
1692d24d4204SJose E. Roman - nb - the column block size
1693d24d4204SJose E. Roman 
1694d24d4204SJose E. Roman   Level: intermediate
1695d24d4204SJose E. Roman 
16962ef1f0ffSBarry Smith   Note:
16972ef1f0ffSBarry Smith   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
16982ef1f0ffSBarry Smith 
16991cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1700d24d4204SJose E. Roman @*/
MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)1701d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb)
1702d71ae5a4SJacob Faibussowitsch {
1703d24d4204SJose E. Roman   PetscFunctionBegin;
1704d24d4204SJose E. Roman   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1705d24d4204SJose E. Roman   PetscValidLogicalCollectiveInt(A, mb, 2);
1706d24d4204SJose E. Roman   PetscValidLogicalCollectiveInt(A, nb, 3);
1707cac4c232SBarry Smith   PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
17083ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1709d24d4204SJose E. Roman }
1710d24d4204SJose E. Roman 
MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt * mb,PetscInt * nb)1711d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb)
1712d71ae5a4SJacob Faibussowitsch {
1713d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1714d24d4204SJose E. Roman 
1715d24d4204SJose E. Roman   PetscFunctionBegin;
1716d24d4204SJose E. Roman   if (mb) *mb = a->mb;
1717d24d4204SJose E. Roman   if (nb) *nb = a->nb;
17183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1719d24d4204SJose E. Roman }
1720d24d4204SJose E. Roman 
1721d24d4204SJose E. Roman /*@
17226aad120cSJose E. Roman   MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
172311a5261eSBarry Smith   the `MATSCALAPACK` matrix
1724d24d4204SJose E. Roman 
17252ef1f0ffSBarry Smith   Not Collective
1726d24d4204SJose E. Roman 
1727d24d4204SJose E. Roman   Input Parameter:
172811a5261eSBarry Smith . A - a `MATSCALAPACK` matrix
1729d24d4204SJose E. Roman 
1730d24d4204SJose E. Roman   Output Parameters:
1731d24d4204SJose E. Roman + mb - the row block size
1732d24d4204SJose E. Roman - nb - the column block size
1733d24d4204SJose E. Roman 
1734d24d4204SJose E. Roman   Level: intermediate
1735d24d4204SJose E. Roman 
17362ef1f0ffSBarry Smith   Note:
17372ef1f0ffSBarry Smith   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
17382ef1f0ffSBarry Smith 
17391cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1740d24d4204SJose E. Roman @*/
MatScaLAPACKGetBlockSizes(Mat A,PetscInt * mb,PetscInt * nb)1741d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb)
1742d71ae5a4SJacob Faibussowitsch {
1743d24d4204SJose E. Roman   PetscFunctionBegin;
1744d24d4204SJose E. Roman   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1745cac4c232SBarry Smith   PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
17463ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1747d24d4204SJose E. Roman }
1748d24d4204SJose E. Roman 
1749d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1750d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1751d24d4204SJose E. Roman 
1752d24d4204SJose E. Roman /*MC
1753d24d4204SJose E. Roman    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1754d24d4204SJose E. Roman 
17552ef1f0ffSBarry Smith    Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK
1756d24d4204SJose E. Roman 
1757d24d4204SJose E. Roman    Options Database Keys:
17582ef1f0ffSBarry Smith +  -mat_type scalapack - sets the matrix type to `MATSCALAPACK`
17592ef1f0ffSBarry Smith .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu`
1760d24d4204SJose E. Roman .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1761d24d4204SJose E. Roman -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1762d24d4204SJose E. Roman 
17632ef1f0ffSBarry Smith    Level: intermediate
17642ef1f0ffSBarry Smith 
176589bba20eSBarry Smith   Note:
176689bba20eSBarry Smith    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
176789bba20eSBarry Smith    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
176889bba20eSBarry Smith    the given rank.
176989bba20eSBarry Smith 
17701cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()`
1771d24d4204SJose E. Roman M*/
1772d24d4204SJose E. Roman 
MatCreate_ScaLAPACK(Mat A)1773d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1774d71ae5a4SJacob Faibussowitsch {
1775d24d4204SJose E. Roman   Mat_ScaLAPACK      *a;
1776b8b5be36SMartin Diehl   PetscBool           flg;
1777b8b5be36SMartin Diehl   PetscMPIInt         iflg;
1778d24d4204SJose E. Roman   Mat_ScaLAPACK_Grid *grid;
1779d24d4204SJose E. Roman   MPI_Comm            icomm;
1780d24d4204SJose E. Roman   PetscBLASInt        nprow, npcol, myrow, mycol;
1781d24d4204SJose E. Roman   PetscInt            optv1, k = 2, array[2] = {0, 0};
1782d24d4204SJose E. Roman   PetscMPIInt         size;
1783d24d4204SJose E. Roman 
1784d24d4204SJose E. Roman   PetscFunctionBegin;
1785aea10558SJacob Faibussowitsch   A->ops[0]     = MatOps_Values;
1786d24d4204SJose E. Roman   A->insertmode = NOT_SET_VALUES;
1787d24d4204SJose E. Roman 
17889566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1789d24d4204SJose E. Roman   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1790d24d4204SJose E. Roman   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1791d24d4204SJose E. Roman   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1792d24d4204SJose E. Roman   A->stash.ScatterDestroy = NULL;
1793d24d4204SJose E. Roman 
17944dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&a));
1795d24d4204SJose E. Roman   A->data = (void *)a;
1796d24d4204SJose E. Roman 
1797d24d4204SJose E. Roman   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1798d24d4204SJose E. Roman   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1799c8025a54SPierre Jolivet     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, NULL));
18009566063dSJacob Faibussowitsch     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
18019566063dSJacob Faibussowitsch     PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1802d24d4204SJose E. Roman   }
18039566063dSJacob Faibussowitsch   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1804b8b5be36SMartin Diehl   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, &iflg));
1805b8b5be36SMartin Diehl   if (!iflg) {
18064dfa11a4SJacob Faibussowitsch     PetscCall(PetscNew(&grid));
1807d24d4204SJose E. Roman 
18089566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_size(icomm, &size));
18096497c311SBarry Smith     PetscCall(PetscBLASIntCast(PetscSqrtReal((PetscReal)size) + 0.001, &grid->nprow));
1810d24d4204SJose E. Roman 
1811d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
1812b8b5be36SMartin Diehl     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg));
1813b8b5be36SMartin Diehl     if (flg) {
181408401ef6SPierre Jolivet       PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
18156497c311SBarry Smith       PetscCall(PetscBLASIntCast(optv1, &grid->nprow));
1816d24d4204SJose E. Roman     }
1817d0609cedSBarry Smith     PetscOptionsEnd();
1818d24d4204SJose E. Roman 
1819d24d4204SJose E. Roman     if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1820d24d4204SJose E. Roman     grid->npcol = size / grid->nprow;
18219566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
18229566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1823f7ec113fSDamian Marek     grid->ictxt = Csys2blacs_handle(icomm);
1824d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1825d24d4204SJose E. Roman     Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1826d24d4204SJose E. Roman     grid->grid_refct = 1;
1827d24d4204SJose E. Roman     grid->nprow      = nprow;
1828d24d4204SJose E. Roman     grid->npcol      = npcol;
1829d24d4204SJose E. Roman     grid->myrow      = myrow;
1830d24d4204SJose E. Roman     grid->mycol      = mycol;
1831d24d4204SJose E. Roman     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1832f7ec113fSDamian Marek     grid->ictxrow = Csys2blacs_handle(icomm);
1833d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1834f7ec113fSDamian Marek     grid->ictxcol = Csys2blacs_handle(icomm);
1835d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
18369566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));
1837d24d4204SJose E. Roman 
1838d24d4204SJose E. Roman   } else grid->grid_refct++;
18399566063dSJacob Faibussowitsch   PetscCall(PetscCommDestroy(&icomm));
1840d24d4204SJose E. Roman   a->grid = grid;
1841d24d4204SJose E. Roman   a->mb   = DEFAULT_BLOCKSIZE;
1842d24d4204SJose E. Roman   a->nb   = DEFAULT_BLOCKSIZE;
1843d24d4204SJose E. Roman 
1844d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
18459566063dSJacob Faibussowitsch   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1846d24d4204SJose E. Roman   if (flg) {
18476497c311SBarry Smith     a->mb = (PetscMPIInt)array[0];
18486497c311SBarry Smith     a->nb = (k > 1) ? (PetscMPIInt)array[1] : a->mb;
1849d24d4204SJose E. Roman   }
1850d0609cedSBarry Smith   PetscOptionsEnd();
1851d24d4204SJose E. Roman 
1852b12397e7SPierre Jolivet   a->roworiented = PETSC_TRUE;
18539566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
18549566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
18559566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
18569566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
18573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1858d24d4204SJose E. Roman }
1859d24d4204SJose E. Roman 
1860d24d4204SJose E. Roman /*@C
1861d24d4204SJose E. Roman   MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
186211a5261eSBarry Smith   (2D block cyclic distribution) for a `MATSCALAPACK` matrix
1863d24d4204SJose E. Roman 
1864d24d4204SJose E. Roman   Collective
1865d24d4204SJose E. Roman 
1866d24d4204SJose E. Roman   Input Parameters:
1867d24d4204SJose E. Roman + comm - MPI communicator
186811a5261eSBarry Smith . mb   - row block size (or `PETSC_DECIDE` to have it set)
186911a5261eSBarry Smith . nb   - column block size (or `PETSC_DECIDE` to have it set)
1870d24d4204SJose E. Roman . M    - number of global rows
1871d24d4204SJose E. Roman . N    - number of global columns
1872d24d4204SJose E. Roman . rsrc - coordinate of process that owns the first row of the distributed matrix
1873d24d4204SJose E. Roman - csrc - coordinate of process that owns the first column of the distributed matrix
1874d24d4204SJose E. Roman 
1875d24d4204SJose E. Roman   Output Parameter:
1876d24d4204SJose E. Roman . A - the matrix
1877d24d4204SJose E. Roman 
187811a5261eSBarry Smith   Options Database Key:
1879d24d4204SJose E. Roman . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1880d24d4204SJose E. Roman 
18812ef1f0ffSBarry Smith   Level: intermediate
18822ef1f0ffSBarry Smith 
18832ef1f0ffSBarry Smith   Notes:
18842ef1f0ffSBarry Smith   If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen
18852ef1f0ffSBarry Smith 
188611a5261eSBarry Smith   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1887d24d4204SJose E. Roman   MatXXXXSetPreallocation() paradigm instead of this routine directly.
188811a5261eSBarry Smith   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
1889d24d4204SJose E. Roman 
189046091a0eSPierre Jolivet   Storage is completely managed by ScaLAPACK, so this requires PETSc to be
1891d24d4204SJose E. Roman   configured with ScaLAPACK. In particular, PETSc's local sizes lose
1892d24d4204SJose E. Roman   significance and are thus ignored. The block sizes refer to the values
189311a5261eSBarry Smith   used for the distributed matrix, not the same meaning as in `MATBAIJ`.
1894d24d4204SJose E. Roman 
18951cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1896d24d4204SJose E. Roman @*/
MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat * A)1897d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A)
1898d71ae5a4SJacob Faibussowitsch {
1899d24d4204SJose E. Roman   Mat_ScaLAPACK *a;
1900d24d4204SJose E. Roman   PetscInt       m, n;
1901d24d4204SJose E. Roman 
1902d24d4204SJose E. Roman   PetscFunctionBegin;
19039566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
19049566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSCALAPACK));
1905aed4548fSBarry Smith   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1906d24d4204SJose E. Roman   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1907d24d4204SJose E. Roman   m = PETSC_DECIDE;
19089566063dSJacob Faibussowitsch   PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1909d24d4204SJose E. Roman   n = PETSC_DECIDE;
19109566063dSJacob Faibussowitsch   PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
19119566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, M, N));
1912d24d4204SJose E. Roman   a = (Mat_ScaLAPACK *)(*A)->data;
19139566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(M, &a->M));
19149566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(N, &a->N));
19159566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
19169566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
19179566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
19189566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(csrc, &a->csrc));
19199566063dSJacob Faibussowitsch   PetscCall(MatSetUp(*A));
19203ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1921d24d4204SJose E. Roman }
1922