xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision 7f296bb328fcd4c99f2da7bfe8ba7ed8a4ebceee)
1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/
2 
3 const char       ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4                                        "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5                                        "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6                                        "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7                                        "       TITLE = {Sca{LAPACK} Users' Guide},\n"
8                                        "       PUBLISHER = {SIAM},\n"
9                                        "       ADDRESS = {Philadelphia, PA},\n"
10                                        "       YEAR = 1997\n"
11                                        "}\n";
12 static PetscBool ScaLAPACKCite       = PETSC_FALSE;
13 
14 #define DEFAULT_BLOCKSIZE 64
15 
16 /*
17     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19 */
20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21 
22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23 {
24   PetscFunctionBegin;
25   PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
26   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
27   PetscFunctionReturn(PETSC_SUCCESS);
28 }
29 
30 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer)
31 {
32   Mat_ScaLAPACK    *a = (Mat_ScaLAPACK *)A->data;
33   PetscBool         iascii;
34   PetscViewerFormat format;
35   Mat               Adense;
36 
37   PetscFunctionBegin;
38   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
39   if (iascii) {
40     PetscCall(PetscViewerGetFormat(viewer, &format));
41     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
42       PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
43       PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
44       PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
45       PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
46       PetscFunctionReturn(PETSC_SUCCESS);
47     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
48       PetscFunctionReturn(PETSC_SUCCESS);
49     }
50   }
51   /* convert to dense format and call MatView() */
52   PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
53   PetscCall(MatView(Adense, viewer));
54   PetscCall(MatDestroy(&Adense));
55   PetscFunctionReturn(PETSC_SUCCESS);
56 }
57 
58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info)
59 {
60   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
61   PetscLogDouble isend[2], irecv[2];
62 
63   PetscFunctionBegin;
64   info->block_size = 1.0;
65 
66   isend[0] = a->lld * a->locc;  /* locally allocated */
67   isend[1] = a->locr * a->locc; /* used submatrix */
68   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69     info->nz_allocated = isend[0];
70     info->nz_used      = isend[1];
71   } else if (flag == MAT_GLOBAL_MAX) {
72     PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
73     info->nz_allocated = irecv[0];
74     info->nz_used      = irecv[1];
75   } else if (flag == MAT_GLOBAL_SUM) {
76     PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
77     info->nz_allocated = irecv[0];
78     info->nz_used      = irecv[1];
79   }
80 
81   info->nz_unneeded       = 0;
82   info->assemblies        = A->num_ass;
83   info->mallocs           = 0;
84   info->memory            = 0; /* REVIEW ME */
85   info->fill_ratio_given  = 0;
86   info->fill_ratio_needed = 0;
87   info->factor_mallocs    = 0;
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 
91 static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg)
92 {
93   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
94 
95   PetscFunctionBegin;
96   switch (op) {
97   case MAT_NEW_NONZERO_LOCATIONS:
98   case MAT_NEW_NONZERO_LOCATION_ERR:
99   case MAT_NEW_NONZERO_ALLOCATION_ERR:
100   case MAT_SYMMETRIC:
101   case MAT_SORTED_FULL:
102   case MAT_HERMITIAN:
103     break;
104   case MAT_ROW_ORIENTED:
105     a->roworiented = flg;
106     break;
107   default:
108     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
109   }
110   PetscFunctionReturn(PETSC_SUCCESS);
111 }
112 
113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
114 {
115   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
116   PetscInt       i, j;
117   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
118   PetscBool      roworiented = a->roworiented;
119 
120   PetscFunctionBegin;
121   PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
122   for (i = 0; i < nr; i++) {
123     if (rows[i] < 0) continue;
124     PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
125     for (j = 0; j < nc; j++) {
126       if (cols[j] < 0) continue;
127       PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
128       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
129       if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
130         if (roworiented) {
131           switch (imode) {
132           case INSERT_VALUES:
133             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j];
134             break;
135           default:
136             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j];
137             break;
138           }
139         } else {
140           switch (imode) {
141           case INSERT_VALUES:
142             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr];
143             break;
144           default:
145             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr];
146             break;
147           }
148         }
149       } else {
150         PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
151         A->assembled = PETSC_FALSE;
152         PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
153       }
154     }
155   }
156   PetscFunctionReturn(PETSC_SUCCESS);
157 }
158 
159 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscBool hermitian, PetscScalar beta, const PetscScalar *x, PetscScalar *y)
160 {
161   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
162   PetscScalar    *x2d, *y2d, alpha = 1.0;
163   const PetscInt *ranges;
164   PetscBLASInt    xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
165 
166   PetscFunctionBegin;
167   if (transpose) {
168     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
169     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
170     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
171     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld));
172     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
173     PetscCheckScaLapackInfo("descinit", info);
174     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
175     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
176     ylld = 1;
177     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
178     PetscCheckScaLapackInfo("descinit", info);
179 
180     /* allocate 2d vectors */
181     lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
182     lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
183     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
184     PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld));
185 
186     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
187     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
188     PetscCheckScaLapackInfo("descinit", info);
189     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
190     PetscCheckScaLapackInfo("descinit", info);
191 
192     /* redistribute x as a column of a 2d matrix */
193     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
194 
195     /* redistribute y as a row of a 2d matrix */
196     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
197 
198     /* call PBLAS subroutine */
199     if (hermitian) PetscCallBLAS("PBLASgemv", PBLASgemv_("C", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
200     else PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
201 
202     /* redistribute y from a row of a 2d matrix */
203     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
204 
205   } else { /* non-transpose */
206 
207     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
208     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
209     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
210     xlld = 1;
211     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
212     PetscCheckScaLapackInfo("descinit", info);
213     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
214     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
215     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &ylld));
216     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
217     PetscCheckScaLapackInfo("descinit", info);
218 
219     /* allocate 2d vectors */
220     lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
221     lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
222     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
223     PetscCall(PetscBLASIntCast(PetscMax(1, lszy), &ylld));
224 
225     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
226     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
227     PetscCheckScaLapackInfo("descinit", info);
228     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
229     PetscCheckScaLapackInfo("descinit", info);
230 
231     /* redistribute x as a row of a 2d matrix */
232     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
233 
234     /* redistribute y as a column of a 2d matrix */
235     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
236 
237     /* call PBLAS subroutine */
238     PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
239 
240     /* redistribute y from a column of a 2d matrix */
241     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
242   }
243   PetscCall(PetscFree2(x2d, y2d));
244   PetscFunctionReturn(PETSC_SUCCESS);
245 }
246 
247 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y)
248 {
249   const PetscScalar *xarray;
250   PetscScalar       *yarray;
251 
252   PetscFunctionBegin;
253   PetscCall(VecGetArrayRead(x, &xarray));
254   PetscCall(VecGetArray(y, &yarray));
255   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 0.0, xarray, yarray));
256   PetscCall(VecRestoreArrayRead(x, &xarray));
257   PetscCall(VecRestoreArray(y, &yarray));
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
261 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
262 {
263   const PetscScalar *xarray;
264   PetscScalar       *yarray;
265 
266   PetscFunctionBegin;
267   PetscCall(VecGetArrayRead(x, &xarray));
268   PetscCall(VecGetArray(y, &yarray));
269   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 0.0, xarray, yarray));
270   PetscCall(VecRestoreArrayRead(x, &xarray));
271   PetscCall(VecRestoreArray(y, &yarray));
272   PetscFunctionReturn(PETSC_SUCCESS);
273 }
274 
275 static PetscErrorCode MatMultHermitianTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
276 {
277   const PetscScalar *xarray;
278   PetscScalar       *yarray;
279 
280   PetscFunctionBegin;
281   PetscCall(VecGetArrayRead(x, &xarray));
282   PetscCall(VecGetArrayWrite(y, &yarray));
283   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 0.0, xarray, yarray));
284   PetscCall(VecRestoreArrayRead(x, &xarray));
285   PetscCall(VecRestoreArrayWrite(y, &yarray));
286   PetscFunctionReturn(PETSC_SUCCESS);
287 }
288 
289 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
290 {
291   const PetscScalar *xarray;
292   PetscScalar       *zarray;
293 
294   PetscFunctionBegin;
295   if (y != z) PetscCall(VecCopy(y, z));
296   PetscCall(VecGetArrayRead(x, &xarray));
297   PetscCall(VecGetArray(z, &zarray));
298   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 1.0, xarray, zarray));
299   PetscCall(VecRestoreArrayRead(x, &xarray));
300   PetscCall(VecRestoreArray(z, &zarray));
301   PetscFunctionReturn(PETSC_SUCCESS);
302 }
303 
304 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
305 {
306   const PetscScalar *xarray;
307   PetscScalar       *zarray;
308 
309   PetscFunctionBegin;
310   if (y != z) PetscCall(VecCopy(y, z));
311   PetscCall(VecGetArrayRead(x, &xarray));
312   PetscCall(VecGetArray(z, &zarray));
313   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 1.0, xarray, zarray));
314   PetscCall(VecRestoreArrayRead(x, &xarray));
315   PetscCall(VecRestoreArray(z, &zarray));
316   PetscFunctionReturn(PETSC_SUCCESS);
317 }
318 
319 static PetscErrorCode MatMultHermitianTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
320 {
321   const PetscScalar *xarray;
322   PetscScalar       *zarray;
323 
324   PetscFunctionBegin;
325   if (y != z) PetscCall(VecCopy(y, z));
326   PetscCall(VecGetArrayRead(x, &xarray));
327   PetscCall(VecGetArray(z, &zarray));
328   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 1.0, xarray, zarray));
329   PetscCall(VecRestoreArrayRead(x, &xarray));
330   PetscCall(VecRestoreArray(z, &zarray));
331   PetscFunctionReturn(PETSC_SUCCESS);
332 }
333 
334 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
335 {
336   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
337   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
338   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
339   PetscScalar    sone = 1.0, zero = 0.0;
340   PetscBLASInt   one = 1;
341 
342   PetscFunctionBegin;
343   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
344   C->assembled = PETSC_TRUE;
345   PetscFunctionReturn(PETSC_SUCCESS);
346 }
347 
348 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
349 {
350   PetscFunctionBegin;
351   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
352   PetscCall(MatSetType(C, MATSCALAPACK));
353   PetscCall(MatSetUp(C));
354   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
355   PetscFunctionReturn(PETSC_SUCCESS);
356 }
357 
358 static PetscErrorCode MatTransposeMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
359 {
360   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
361   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
362   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
363   PetscScalar    sone = 1.0, zero = 0.0;
364   PetscBLASInt   one = 1;
365 
366   PetscFunctionBegin;
367   PetscCallBLAS("PBLASgemm", PBLASgemm_("T", "N", &a->N, &b->N, &a->M, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
368   C->assembled = PETSC_TRUE;
369   PetscFunctionReturn(PETSC_SUCCESS);
370 }
371 
372 static PetscErrorCode MatTransposeMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
373 {
374   PetscFunctionBegin;
375   PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
376   PetscCall(MatSetType(C, MATSCALAPACK));
377   PetscCall(MatSetUp(C));
378   C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_ScaLAPACK;
379   PetscFunctionReturn(PETSC_SUCCESS);
380 }
381 
382 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
383 {
384   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
385   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
386   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
387   PetscScalar    sone = 1.0, zero = 0.0;
388   PetscBLASInt   one = 1;
389 
390   PetscFunctionBegin;
391   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
392   C->assembled = PETSC_TRUE;
393   PetscFunctionReturn(PETSC_SUCCESS);
394 }
395 
396 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
397 {
398   PetscFunctionBegin;
399   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
400   PetscCall(MatSetType(C, MATSCALAPACK));
401   PetscCall(MatSetUp(C));
402   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_ScaLAPACK;
403   PetscFunctionReturn(PETSC_SUCCESS);
404 }
405 
406 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
407 {
408   Mat_Product *product = C->product;
409 
410   PetscFunctionBegin;
411   switch (product->type) {
412   case MATPRODUCT_AB:
413     C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
414     C->ops->productsymbolic = MatProductSymbolic_AB;
415     break;
416   case MATPRODUCT_AtB:
417     C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_ScaLAPACK;
418     C->ops->productsymbolic          = MatProductSymbolic_AtB;
419     break;
420   case MATPRODUCT_ABt:
421     C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
422     C->ops->productsymbolic          = MatProductSymbolic_ABt;
423     break;
424   default:
425     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
426   }
427   PetscFunctionReturn(PETSC_SUCCESS);
428 }
429 
430 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D)
431 {
432   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
433   PetscScalar    *darray, *d2d, v;
434   const PetscInt *ranges;
435   PetscBLASInt    j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
436 
437   PetscFunctionBegin;
438   PetscCall(VecGetArray(D, &darray));
439 
440   if (A->rmap->N <= A->cmap->N) { /* row version */
441 
442     /* create ScaLAPACK descriptor for vector (1d block distribution) */
443     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
444     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
445     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld));
446     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
447     PetscCheckScaLapackInfo("descinit", info);
448 
449     /* allocate 2d vector */
450     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
451     PetscCall(PetscCalloc1(lszd, &d2d));
452     PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld));
453 
454     /* create ScaLAPACK descriptor for vector (2d block distribution) */
455     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
456     PetscCheckScaLapackInfo("descinit", info);
457 
458     /* collect diagonal */
459     for (j = 1; j <= a->M; j++) {
460       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
461       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
462     }
463 
464     /* redistribute d from a column of a 2d matrix */
465     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
466     PetscCall(PetscFree(d2d));
467 
468   } else { /* column version */
469 
470     /* create ScaLAPACK descriptor for vector (1d block distribution) */
471     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
472     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
473     dlld = 1;
474     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
475     PetscCheckScaLapackInfo("descinit", info);
476 
477     /* allocate 2d vector */
478     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
479     PetscCall(PetscCalloc1(lszd, &d2d));
480 
481     /* create ScaLAPACK descriptor for vector (2d block distribution) */
482     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
483     PetscCheckScaLapackInfo("descinit", info);
484 
485     /* collect diagonal */
486     for (j = 1; j <= a->N; j++) {
487       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
488       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
489     }
490 
491     /* redistribute d from a row of a 2d matrix */
492     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
493     PetscCall(PetscFree(d2d));
494   }
495 
496   PetscCall(VecRestoreArray(D, &darray));
497   PetscCall(VecAssemblyBegin(D));
498   PetscCall(VecAssemblyEnd(D));
499   PetscFunctionReturn(PETSC_SUCCESS);
500 }
501 
502 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R)
503 {
504   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
505   const PetscScalar *d;
506   const PetscInt    *ranges;
507   PetscScalar       *d2d;
508   PetscBLASInt       i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
509 
510   PetscFunctionBegin;
511   if (R) {
512     PetscCall(VecGetArrayRead(R, &d));
513     /* create ScaLAPACK descriptor for vector (1d block distribution) */
514     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
515     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
516     dlld = 1;
517     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
518     PetscCheckScaLapackInfo("descinit", info);
519 
520     /* allocate 2d vector */
521     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
522     PetscCall(PetscCalloc1(lszd, &d2d));
523 
524     /* create ScaLAPACK descriptor for vector (2d block distribution) */
525     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
526     PetscCheckScaLapackInfo("descinit", info);
527 
528     /* redistribute d to a row of a 2d matrix */
529     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
530 
531     /* broadcast along process columns */
532     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
533     else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
534 
535     /* local scaling */
536     for (j = 0; j < a->locc; j++)
537       for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
538 
539     PetscCall(PetscFree(d2d));
540     PetscCall(VecRestoreArrayRead(R, &d));
541   }
542   if (L) {
543     PetscCall(VecGetArrayRead(L, &d));
544     /* create ScaLAPACK descriptor for vector (1d block distribution) */
545     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
546     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
547     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld));
548     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
549     PetscCheckScaLapackInfo("descinit", info);
550 
551     /* allocate 2d vector */
552     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
553     PetscCall(PetscCalloc1(lszd, &d2d));
554     PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld));
555 
556     /* create ScaLAPACK descriptor for vector (2d block distribution) */
557     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
558     PetscCheckScaLapackInfo("descinit", info);
559 
560     /* redistribute d to a column of a 2d matrix */
561     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
562 
563     /* broadcast along process rows */
564     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
565     else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
566 
567     /* local scaling */
568     for (i = 0; i < a->locr; i++)
569       for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
570 
571     PetscCall(PetscFree(d2d));
572     PetscCall(VecRestoreArrayRead(L, &d));
573   }
574   PetscFunctionReturn(PETSC_SUCCESS);
575 }
576 
577 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d)
578 {
579   PetscFunctionBegin;
580   *missing = PETSC_FALSE;
581   PetscFunctionReturn(PETSC_SUCCESS);
582 }
583 
584 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a)
585 {
586   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
587   PetscBLASInt   n, one = 1;
588 
589   PetscFunctionBegin;
590   n = x->lld * x->locc;
591   PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
592   PetscFunctionReturn(PETSC_SUCCESS);
593 }
594 
595 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha)
596 {
597   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
598   PetscBLASInt   i, n;
599   PetscScalar    v;
600 
601   PetscFunctionBegin;
602   n = PetscMin(x->M, x->N);
603   for (i = 1; i <= n; i++) {
604     PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
605     v += alpha;
606     PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
607   }
608   PetscFunctionReturn(PETSC_SUCCESS);
609 }
610 
611 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
612 {
613   Mat_ScaLAPACK *x    = (Mat_ScaLAPACK *)X->data;
614   Mat_ScaLAPACK *y    = (Mat_ScaLAPACK *)Y->data;
615   PetscBLASInt   one  = 1;
616   PetscScalar    beta = 1.0;
617 
618   PetscFunctionBegin;
619   MatScaLAPACKCheckDistribution(Y, 1, X, 3);
620   PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
621   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
622   PetscFunctionReturn(PETSC_SUCCESS);
623 }
624 
625 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str)
626 {
627   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
628   Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
629 
630   PetscFunctionBegin;
631   PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
632   PetscCall(PetscObjectStateIncrease((PetscObject)B));
633   PetscFunctionReturn(PETSC_SUCCESS);
634 }
635 
636 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B)
637 {
638   Mat            Bs;
639   MPI_Comm       comm;
640   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
641 
642   PetscFunctionBegin;
643   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
644   PetscCall(MatCreate(comm, &Bs));
645   PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
646   PetscCall(MatSetType(Bs, MATSCALAPACK));
647   b       = (Mat_ScaLAPACK *)Bs->data;
648   b->M    = a->M;
649   b->N    = a->N;
650   b->mb   = a->mb;
651   b->nb   = a->nb;
652   b->rsrc = a->rsrc;
653   b->csrc = a->csrc;
654   PetscCall(MatSetUp(Bs));
655   *B = Bs;
656   if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
657   Bs->assembled = PETSC_TRUE;
658   PetscFunctionReturn(PETSC_SUCCESS);
659 }
660 
661 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
662 {
663   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
664   Mat            Bs   = *B;
665   PetscBLASInt   one  = 1;
666   PetscScalar    sone = 1.0, zero = 0.0;
667 #if defined(PETSC_USE_COMPLEX)
668   PetscInt i, j;
669 #endif
670 
671   PetscFunctionBegin;
672   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
673   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
674   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
675   *B = Bs;
676   b  = (Mat_ScaLAPACK *)Bs->data;
677   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
678 #if defined(PETSC_USE_COMPLEX)
679   /* undo conjugation */
680   for (i = 0; i < b->locr; i++)
681     for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
682 #endif
683   Bs->assembled = PETSC_TRUE;
684   PetscFunctionReturn(PETSC_SUCCESS);
685 }
686 
687 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
688 {
689   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
690   PetscInt       i, j;
691 
692   PetscFunctionBegin;
693   for (i = 0; i < a->locr; i++)
694     for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
695   PetscFunctionReturn(PETSC_SUCCESS);
696 }
697 
698 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
699 {
700   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
701   Mat            Bs   = *B;
702   PetscBLASInt   one  = 1;
703   PetscScalar    sone = 1.0, zero = 0.0;
704 
705   PetscFunctionBegin;
706   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
707   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
708   *B = Bs;
709   b  = (Mat_ScaLAPACK *)Bs->data;
710   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
711   Bs->assembled = PETSC_TRUE;
712   PetscFunctionReturn(PETSC_SUCCESS);
713 }
714 
715 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X)
716 {
717   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
718   PetscScalar    *x, *x2d;
719   const PetscInt *ranges;
720   PetscBLASInt    xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
721 
722   PetscFunctionBegin;
723   PetscCall(VecCopy(B, X));
724   PetscCall(VecGetArray(X, &x));
725 
726   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
727   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
728   PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
729   PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld));
730   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
731   PetscCheckScaLapackInfo("descinit", info);
732 
733   /* allocate 2d vector */
734   lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
735   PetscCall(PetscMalloc1(lszx, &x2d));
736   PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld));
737 
738   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
739   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
740   PetscCheckScaLapackInfo("descinit", info);
741 
742   /* redistribute x as a column of a 2d matrix */
743   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
744 
745   /* call ScaLAPACK subroutine */
746   switch (A->factortype) {
747   case MAT_FACTOR_LU:
748     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
749     PetscCheckScaLapackInfo("getrs", info);
750     break;
751   case MAT_FACTOR_CHOLESKY:
752     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
753     PetscCheckScaLapackInfo("potrs", info);
754     break;
755   default:
756     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
757   }
758 
759   /* redistribute x from a column of a 2d matrix */
760   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
761 
762   PetscCall(PetscFree(x2d));
763   PetscCall(VecRestoreArray(X, &x));
764   PetscFunctionReturn(PETSC_SUCCESS);
765 }
766 
767 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X)
768 {
769   PetscFunctionBegin;
770   PetscCall(MatSolve_ScaLAPACK(A, B, X));
771   PetscCall(VecAXPY(X, 1, Y));
772   PetscFunctionReturn(PETSC_SUCCESS);
773 }
774 
775 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X)
776 {
777   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *x;
778   PetscBool      flg1, flg2;
779   PetscBLASInt   one = 1, info;
780   Mat            C;
781   MatType        type;
782 
783   PetscFunctionBegin;
784   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
785   PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
786   if (flg1 && flg2) MatScaLAPACKCheckDistribution(B, 2, X, 3);
787   if (flg2) {
788     if (flg1) PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN));
789     else PetscCall(MatConvert(B, MATSCALAPACK, MAT_REUSE_MATRIX, &X));
790     C = X;
791   } else {
792     PetscCall(MatConvert(B, MATSCALAPACK, MAT_INITIAL_MATRIX, &C));
793   }
794   x = (Mat_ScaLAPACK *)C->data;
795 
796   switch (A->factortype) {
797   case MAT_FACTOR_LU:
798     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
799     PetscCheckScaLapackInfo("getrs", info);
800     break;
801   case MAT_FACTOR_CHOLESKY:
802     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
803     PetscCheckScaLapackInfo("potrs", info);
804     break;
805   default:
806     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
807   }
808   if (!flg2) {
809     PetscCall(MatGetType(X, &type));
810     PetscCall(MatConvert(C, type, MAT_REUSE_MATRIX, &X));
811     PetscCall(MatDestroy(&C));
812   }
813   PetscFunctionReturn(PETSC_SUCCESS);
814 }
815 
816 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo)
817 {
818   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
819   PetscBLASInt   one = 1, info;
820 
821   PetscFunctionBegin;
822   if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots));
823   PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
824   PetscCheckScaLapackInfo("getrf", info);
825   A->factortype = MAT_FACTOR_LU;
826   A->assembled  = PETSC_TRUE;
827 
828   PetscCall(PetscFree(A->solvertype));
829   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
830   PetscFunctionReturn(PETSC_SUCCESS);
831 }
832 
833 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
834 {
835   PetscFunctionBegin;
836   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
837   PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
838   PetscFunctionReturn(PETSC_SUCCESS);
839 }
840 
841 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
842 {
843   PetscFunctionBegin;
844   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
845   PetscFunctionReturn(PETSC_SUCCESS);
846 }
847 
848 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo)
849 {
850   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
851   PetscBLASInt   one = 1, info;
852 
853   PetscFunctionBegin;
854   PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
855   PetscCheckScaLapackInfo("potrf", info);
856   A->factortype = MAT_FACTOR_CHOLESKY;
857   A->assembled  = PETSC_TRUE;
858 
859   PetscCall(PetscFree(A->solvertype));
860   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
861   PetscFunctionReturn(PETSC_SUCCESS);
862 }
863 
864 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
865 {
866   PetscFunctionBegin;
867   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
868   PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
869   PetscFunctionReturn(PETSC_SUCCESS);
870 }
871 
872 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info)
873 {
874   PetscFunctionBegin;
875   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
876   PetscFunctionReturn(PETSC_SUCCESS);
877 }
878 
879 static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type)
880 {
881   PetscFunctionBegin;
882   *type = MATSOLVERSCALAPACK;
883   PetscFunctionReturn(PETSC_SUCCESS);
884 }
885 
886 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F)
887 {
888   Mat            B;
889   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
890 
891   PetscFunctionBegin;
892   /* Create the factorization matrix */
893   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
894   B->trivialsymbolic = PETSC_TRUE;
895   B->factortype      = ftype;
896   PetscCall(PetscFree(B->solvertype));
897   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));
898 
899   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
900   *F = B;
901   PetscFunctionReturn(PETSC_SUCCESS);
902 }
903 
904 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
905 {
906   PetscFunctionBegin;
907   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
908   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
909   PetscFunctionReturn(PETSC_SUCCESS);
910 }
911 
912 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm)
913 {
914   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
915   PetscBLASInt   one = 1, lwork = 0;
916   const char    *ntype;
917   PetscScalar   *work = NULL, dummy;
918 
919   PetscFunctionBegin;
920   switch (type) {
921   case NORM_1:
922     ntype = "1";
923     lwork = PetscMax(a->locr, a->locc);
924     break;
925   case NORM_FROBENIUS:
926     ntype = "F";
927     work  = &dummy;
928     break;
929   case NORM_INFINITY:
930     ntype = "I";
931     lwork = PetscMax(a->locr, a->locc);
932     break;
933   default:
934     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
935   }
936   if (lwork) PetscCall(PetscMalloc1(lwork, &work));
937   *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
938   if (lwork) PetscCall(PetscFree(work));
939   PetscFunctionReturn(PETSC_SUCCESS);
940 }
941 
942 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
943 {
944   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
945 
946   PetscFunctionBegin;
947   PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
948   PetscFunctionReturn(PETSC_SUCCESS);
949 }
950 
951 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols)
952 {
953   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
954   PetscInt       i, n, nb, isrc, nproc, iproc, *idx;
955 
956   PetscFunctionBegin;
957   if (rows) {
958     n     = a->locr;
959     nb    = a->mb;
960     isrc  = a->rsrc;
961     nproc = a->grid->nprow;
962     iproc = a->grid->myrow;
963     PetscCall(PetscMalloc1(n, &idx));
964     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
965     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
966   }
967   if (cols) {
968     n     = a->locc;
969     nb    = a->nb;
970     isrc  = a->csrc;
971     nproc = a->grid->npcol;
972     iproc = a->grid->mycol;
973     PetscCall(PetscMalloc1(n, &idx));
974     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
975     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
976   }
977   PetscFunctionReturn(PETSC_SUCCESS);
978 }
979 
980 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B)
981 {
982   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
983   Mat                Bmpi;
984   MPI_Comm           comm;
985   PetscInt           i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb;
986   const PetscInt    *ranges, *branges, *cwork;
987   const PetscScalar *vwork;
988   PetscBLASInt       bdesc[9], bmb, zero = 0, one = 1, lld, info;
989   PetscScalar       *barray;
990   PetscBool          differ = PETSC_FALSE;
991   PetscMPIInt        size;
992 
993   PetscFunctionBegin;
994   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
995   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
996 
997   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
998     PetscCallMPI(MPI_Comm_size(comm, &size));
999     PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
1000     for (i = 0; i < size; i++)
1001       if (ranges[i + 1] != branges[i + 1]) {
1002         differ = PETSC_TRUE;
1003         break;
1004       }
1005   }
1006 
1007   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
1008     PetscCall(MatCreate(comm, &Bmpi));
1009     m = PETSC_DECIDE;
1010     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1011     n = PETSC_DECIDE;
1012     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1013     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1014     PetscCall(MatSetType(Bmpi, MATDENSE));
1015     PetscCall(MatSetUp(Bmpi));
1016 
1017     /* create ScaLAPACK descriptor for B (1d block distribution) */
1018     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1019     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1020     PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */
1021     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1022     PetscCheckScaLapackInfo("descinit", info);
1023 
1024     /* redistribute matrix */
1025     PetscCall(MatDenseGetArray(Bmpi, &barray));
1026     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1027     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1028     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1029     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1030 
1031     /* transfer rows of auxiliary matrix to the final matrix B */
1032     PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
1033     for (i = rstart; i < rend; i++) {
1034       PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
1035       PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
1036       PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
1037     }
1038     PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
1039     PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
1040     PetscCall(MatDestroy(&Bmpi));
1041 
1042   } else { /* normal cases */
1043 
1044     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1045     else {
1046       PetscCall(MatCreate(comm, &Bmpi));
1047       m = PETSC_DECIDE;
1048       PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1049       n = PETSC_DECIDE;
1050       PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1051       PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1052       PetscCall(MatSetType(Bmpi, MATDENSE));
1053       PetscCall(MatSetUp(Bmpi));
1054     }
1055 
1056     /* create ScaLAPACK descriptor for B (1d block distribution) */
1057     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1058     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1059     PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */
1060     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1061     PetscCheckScaLapackInfo("descinit", info);
1062 
1063     /* redistribute matrix */
1064     PetscCall(MatDenseGetArray(Bmpi, &barray));
1065     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1066     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1067 
1068     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1069     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1070     if (reuse == MAT_INPLACE_MATRIX) {
1071       PetscCall(MatHeaderReplace(A, &Bmpi));
1072     } else *B = Bmpi;
1073   }
1074   PetscFunctionReturn(PETSC_SUCCESS);
1075 }
1076 
1077 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct)
1078 {
1079   const PetscInt *ranges;
1080   PetscMPIInt     size;
1081   PetscInt        i, n;
1082 
1083   PetscFunctionBegin;
1084   *correct = PETSC_TRUE;
1085   PetscCallMPI(MPI_Comm_size(map->comm, &size));
1086   if (size > 1) {
1087     PetscCall(PetscLayoutGetRanges(map, &ranges));
1088     n = ranges[1] - ranges[0];
1089     for (i = 1; i < size; i++)
1090       if (ranges[i + 1] - ranges[i] != n) break;
1091     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
1092   }
1093   PetscFunctionReturn(PETSC_SUCCESS);
1094 }
1095 
1096 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B)
1097 {
1098   Mat_ScaLAPACK     *b;
1099   Mat                Bmpi;
1100   MPI_Comm           comm;
1101   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n;
1102   const PetscInt    *ranges, *rows, *cols;
1103   PetscBLASInt       adesc[9], amb, zero = 0, one = 1, lld, info;
1104   const PetscScalar *aarray;
1105   IS                 ir, ic;
1106   PetscInt           lda;
1107   PetscBool          flg;
1108 
1109   PetscFunctionBegin;
1110   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1111 
1112   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1113   else {
1114     PetscCall(MatCreate(comm, &Bmpi));
1115     m = PETSC_DECIDE;
1116     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1117     n = PETSC_DECIDE;
1118     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1119     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1120     PetscCall(MatSetType(Bmpi, MATSCALAPACK));
1121     PetscCall(MatSetUp(Bmpi));
1122   }
1123   b = (Mat_ScaLAPACK *)Bmpi->data;
1124 
1125   PetscCall(MatDenseGetLDA(A, &lda));
1126   PetscCall(MatDenseGetArrayRead(A, &aarray));
1127   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1128   if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1129   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1130     /* create ScaLAPACK descriptor for A (1d block distribution) */
1131     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1132     PetscCall(PetscBLASIntCast(ranges[1], &amb));        /* row block size */
1133     PetscCall(PetscBLASIntCast(PetscMax(lda, 1), &lld)); /* local leading dimension */
1134     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1135     PetscCheckScaLapackInfo("descinit", info);
1136 
1137     /* redistribute matrix */
1138     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1139     Bmpi->nooffprocentries = PETSC_TRUE;
1140   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1141     PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1142     b->roworiented = PETSC_FALSE;
1143     PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1144     PetscCall(ISGetIndices(ir, &rows));
1145     PetscCall(ISGetIndices(ic, &cols));
1146     PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1147     PetscCall(ISRestoreIndices(ir, &rows));
1148     PetscCall(ISRestoreIndices(ic, &cols));
1149     PetscCall(ISDestroy(&ic));
1150     PetscCall(ISDestroy(&ir));
1151   }
1152   PetscCall(MatDenseRestoreArrayRead(A, &aarray));
1153   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1154   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1155   if (reuse == MAT_INPLACE_MATRIX) {
1156     PetscCall(MatHeaderReplace(A, &Bmpi));
1157   } else *B = Bmpi;
1158   PetscFunctionReturn(PETSC_SUCCESS);
1159 }
1160 
1161 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1162 {
1163   Mat                mat_scal;
1164   PetscInt           M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1165   const PetscInt    *cols;
1166   const PetscScalar *vals;
1167 
1168   PetscFunctionBegin;
1169   if (reuse == MAT_REUSE_MATRIX) {
1170     mat_scal = *newmat;
1171     PetscCall(MatZeroEntries(mat_scal));
1172   } else {
1173     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1174     m = PETSC_DECIDE;
1175     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1176     n = PETSC_DECIDE;
1177     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1178     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1179     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1180     PetscCall(MatSetUp(mat_scal));
1181   }
1182   for (row = rstart; row < rend; row++) {
1183     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1184     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
1185     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1186   }
1187   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1188   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1189 
1190   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1191   else *newmat = mat_scal;
1192   PetscFunctionReturn(PETSC_SUCCESS);
1193 }
1194 
1195 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1196 {
1197   Mat                mat_scal;
1198   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1199   const PetscInt    *cols;
1200   const PetscScalar *vals;
1201   PetscScalar        v;
1202 
1203   PetscFunctionBegin;
1204   if (reuse == MAT_REUSE_MATRIX) {
1205     mat_scal = *newmat;
1206     PetscCall(MatZeroEntries(mat_scal));
1207   } else {
1208     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1209     m = PETSC_DECIDE;
1210     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1211     n = PETSC_DECIDE;
1212     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1213     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1214     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1215     PetscCall(MatSetUp(mat_scal));
1216   }
1217   PetscCall(MatGetRowUpperTriangular(A));
1218   for (row = rstart; row < rend; row++) {
1219     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1220     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1221     for (j = 0; j < ncols; j++) { /* lower triangular part */
1222       if (cols[j] == row) continue;
1223       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1224       PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1225     }
1226     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1227   }
1228   PetscCall(MatRestoreRowUpperTriangular(A));
1229   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1230   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1231 
1232   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1233   else *newmat = mat_scal;
1234   PetscFunctionReturn(PETSC_SUCCESS);
1235 }
1236 
1237 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1238 {
1239   Mat_ScaLAPACK *a  = (Mat_ScaLAPACK *)A->data;
1240   PetscInt       sz = 0;
1241 
1242   PetscFunctionBegin;
1243   PetscCall(PetscLayoutSetUp(A->rmap));
1244   PetscCall(PetscLayoutSetUp(A->cmap));
1245   if (!a->lld) a->lld = a->locr;
1246 
1247   PetscCall(PetscFree(a->loc));
1248   PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
1249   PetscCall(PetscCalloc1(sz, &a->loc));
1250 
1251   A->preallocated = PETSC_TRUE;
1252   PetscFunctionReturn(PETSC_SUCCESS);
1253 }
1254 
1255 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1256 {
1257   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK *)A->data;
1258   Mat_ScaLAPACK_Grid *grid;
1259   PetscBool           flg;
1260   MPI_Comm            icomm;
1261 
1262   PetscFunctionBegin;
1263   PetscCall(MatStashDestroy_Private(&A->stash));
1264   PetscCall(PetscFree(a->loc));
1265   PetscCall(PetscFree(a->pivots));
1266   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1267   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1268   if (--grid->grid_refct == 0) {
1269     Cblacs_gridexit(grid->ictxt);
1270     Cblacs_gridexit(grid->ictxrow);
1271     Cblacs_gridexit(grid->ictxcol);
1272     PetscCall(PetscFree(grid));
1273     PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1274   }
1275   PetscCall(PetscCommDestroy(&icomm));
1276   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
1277   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1278   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
1279   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
1280   PetscCall(PetscFree(A->data));
1281   PetscFunctionReturn(PETSC_SUCCESS);
1282 }
1283 
1284 static PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1285 {
1286   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
1287   PetscBLASInt   info = 0;
1288   PetscBool      flg;
1289 
1290   PetscFunctionBegin;
1291   PetscCall(PetscLayoutSetUp(A->rmap));
1292   PetscCall(PetscLayoutSetUp(A->cmap));
1293 
1294   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1295   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1296   PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1297   PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1298   PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1299 
1300   /* compute local sizes */
1301   PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
1302   PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1303   a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1304   a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1305   a->lld  = PetscMax(1, a->locr);
1306 
1307   /* allocate local array */
1308   PetscCall(MatScaLAPACKSetPreallocation(A));
1309 
1310   /* set up ScaLAPACK descriptor */
1311   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1312   PetscCheckScaLapackInfo("descinit", info);
1313   PetscFunctionReturn(PETSC_SUCCESS);
1314 }
1315 
1316 static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type)
1317 {
1318   PetscInt nstash, reallocs;
1319 
1320   PetscFunctionBegin;
1321   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1322   PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
1323   PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
1324   PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
1325   PetscFunctionReturn(PETSC_SUCCESS);
1326 }
1327 
1328 static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type)
1329 {
1330   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1331   PetscMPIInt    n;
1332   PetscInt       i, flg, *row, *col;
1333   PetscScalar   *val;
1334   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
1335 
1336   PetscFunctionBegin;
1337   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1338   while (1) {
1339     PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1340     if (!flg) break;
1341     for (i = 0; i < n; i++) {
1342       PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
1343       PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1344       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1345       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1346       switch (A->insertmode) {
1347       case INSERT_VALUES:
1348         a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i];
1349         break;
1350       case ADD_VALUES:
1351         a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i];
1352         break;
1353       default:
1354         SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1355       }
1356     }
1357   }
1358   PetscCall(MatStashScatterEnd_Private(&A->stash));
1359   PetscFunctionReturn(PETSC_SUCCESS);
1360 }
1361 
1362 static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer)
1363 {
1364   Mat      Adense, As;
1365   MPI_Comm comm;
1366 
1367   PetscFunctionBegin;
1368   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1369   PetscCall(MatCreate(comm, &Adense));
1370   PetscCall(MatSetType(Adense, MATDENSE));
1371   PetscCall(MatLoad(Adense, viewer));
1372   PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
1373   PetscCall(MatDestroy(&Adense));
1374   PetscCall(MatHeaderReplace(newMat, &As));
1375   PetscFunctionReturn(PETSC_SUCCESS);
1376 }
1377 
1378 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1379                                        NULL,
1380                                        NULL,
1381                                        MatMult_ScaLAPACK,
1382                                        /* 4*/ MatMultAdd_ScaLAPACK,
1383                                        MatMultTranspose_ScaLAPACK,
1384                                        MatMultTransposeAdd_ScaLAPACK,
1385                                        MatSolve_ScaLAPACK,
1386                                        MatSolveAdd_ScaLAPACK,
1387                                        NULL,
1388                                        /*10*/ NULL,
1389                                        MatLUFactor_ScaLAPACK,
1390                                        MatCholeskyFactor_ScaLAPACK,
1391                                        NULL,
1392                                        MatTranspose_ScaLAPACK,
1393                                        /*15*/ MatGetInfo_ScaLAPACK,
1394                                        NULL,
1395                                        MatGetDiagonal_ScaLAPACK,
1396                                        MatDiagonalScale_ScaLAPACK,
1397                                        MatNorm_ScaLAPACK,
1398                                        /*20*/ MatAssemblyBegin_ScaLAPACK,
1399                                        MatAssemblyEnd_ScaLAPACK,
1400                                        MatSetOption_ScaLAPACK,
1401                                        MatZeroEntries_ScaLAPACK,
1402                                        /*24*/ NULL,
1403                                        MatLUFactorSymbolic_ScaLAPACK,
1404                                        MatLUFactorNumeric_ScaLAPACK,
1405                                        MatCholeskyFactorSymbolic_ScaLAPACK,
1406                                        MatCholeskyFactorNumeric_ScaLAPACK,
1407                                        /*29*/ MatSetUp_ScaLAPACK,
1408                                        NULL,
1409                                        NULL,
1410                                        NULL,
1411                                        NULL,
1412                                        /*34*/ MatDuplicate_ScaLAPACK,
1413                                        NULL,
1414                                        NULL,
1415                                        NULL,
1416                                        NULL,
1417                                        /*39*/ MatAXPY_ScaLAPACK,
1418                                        NULL,
1419                                        NULL,
1420                                        NULL,
1421                                        MatCopy_ScaLAPACK,
1422                                        /*44*/ NULL,
1423                                        MatScale_ScaLAPACK,
1424                                        MatShift_ScaLAPACK,
1425                                        NULL,
1426                                        NULL,
1427                                        /*49*/ NULL,
1428                                        NULL,
1429                                        NULL,
1430                                        NULL,
1431                                        NULL,
1432                                        /*54*/ NULL,
1433                                        NULL,
1434                                        NULL,
1435                                        NULL,
1436                                        NULL,
1437                                        /*59*/ NULL,
1438                                        MatDestroy_ScaLAPACK,
1439                                        MatView_ScaLAPACK,
1440                                        NULL,
1441                                        NULL,
1442                                        /*64*/ NULL,
1443                                        NULL,
1444                                        NULL,
1445                                        NULL,
1446                                        NULL,
1447                                        /*69*/ NULL,
1448                                        NULL,
1449                                        MatConvert_ScaLAPACK_Dense,
1450                                        NULL,
1451                                        NULL,
1452                                        /*74*/ NULL,
1453                                        NULL,
1454                                        NULL,
1455                                        NULL,
1456                                        NULL,
1457                                        /*79*/ NULL,
1458                                        NULL,
1459                                        NULL,
1460                                        NULL,
1461                                        MatLoad_ScaLAPACK,
1462                                        /*84*/ NULL,
1463                                        NULL,
1464                                        NULL,
1465                                        NULL,
1466                                        NULL,
1467                                        /*89*/ NULL,
1468                                        NULL,
1469                                        MatMatMultNumeric_ScaLAPACK,
1470                                        NULL,
1471                                        NULL,
1472                                        /*94*/ NULL,
1473                                        NULL,
1474                                        NULL,
1475                                        MatMatTransposeMultNumeric_ScaLAPACK,
1476                                        NULL,
1477                                        /*99*/ MatProductSetFromOptions_ScaLAPACK,
1478                                        NULL,
1479                                        NULL,
1480                                        MatConjugate_ScaLAPACK,
1481                                        NULL,
1482                                        /*104*/ NULL,
1483                                        NULL,
1484                                        NULL,
1485                                        NULL,
1486                                        NULL,
1487                                        /*109*/ MatMatSolve_ScaLAPACK,
1488                                        NULL,
1489                                        NULL,
1490                                        NULL,
1491                                        MatMissingDiagonal_ScaLAPACK,
1492                                        /*114*/ NULL,
1493                                        NULL,
1494                                        NULL,
1495                                        NULL,
1496                                        NULL,
1497                                        /*119*/ NULL,
1498                                        MatHermitianTranspose_ScaLAPACK,
1499                                        MatMultHermitianTranspose_ScaLAPACK,
1500                                        MatMultHermitianTransposeAdd_ScaLAPACK,
1501                                        NULL,
1502                                        /*124*/ NULL,
1503                                        NULL,
1504                                        NULL,
1505                                        NULL,
1506                                        NULL,
1507                                        /*129*/ NULL,
1508                                        NULL,
1509                                        NULL,
1510                                        MatTransposeMatMultNumeric_ScaLAPACK,
1511                                        NULL,
1512                                        /*134*/ NULL,
1513                                        NULL,
1514                                        NULL,
1515                                        NULL,
1516                                        NULL,
1517                                        NULL,
1518                                        /*140*/ NULL,
1519                                        NULL,
1520                                        NULL,
1521                                        NULL,
1522                                        NULL,
1523                                        /*145*/ NULL,
1524                                        NULL,
1525                                        NULL,
1526                                        NULL,
1527                                        NULL,
1528                                        /*150*/ NULL,
1529                                        NULL,
1530                                        NULL,
1531                                        NULL,
1532                                        NULL,
1533                                        /*155*/ NULL,
1534                                        NULL};
1535 
1536 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners)
1537 {
1538   PetscInt          *owner, *startv, *starti, bs2;
1539   PetscInt           size = stash->size, nsends;
1540   PetscInt          *sindices, **rindices, j, l;
1541   PetscScalar      **rvalues, *svalues;
1542   MPI_Comm           comm = stash->comm;
1543   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1544   PetscMPIInt        tag1 = stash->tag1, tag2 = stash->tag2, *sizes, *nlengths, nreceives, insends, ii;
1545   PetscInt          *sp_idx, *sp_idy;
1546   PetscScalar       *sp_val;
1547   PetscMatStashSpace space, space_next;
1548   PetscBLASInt       gridx, gcidx, lridx, lcidx, rsrc, csrc;
1549   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)mat->data;
1550 
1551   PetscFunctionBegin;
1552   { /* make sure all processors are either in INSERTMODE or ADDMODE */
1553     InsertMode addv;
1554     PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
1555     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1556     mat->insertmode = addv; /* in case this processor had no cache */
1557   }
1558 
1559   bs2 = stash->bs * stash->bs;
1560 
1561   /*  first count number of contributors to each processor */
1562   PetscCall(PetscCalloc1(size, &nlengths));
1563   PetscCall(PetscMalloc1(stash->n + 1, &owner));
1564 
1565   ii = j = 0;
1566   space  = stash->space_head;
1567   while (space) {
1568     space_next = space->next;
1569     for (l = 0; l < space->local_used; l++) {
1570       PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
1571       PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1572       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1573       j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
1574       nlengths[j]++;
1575       owner[ii] = j;
1576       ii++;
1577     }
1578     space = space_next;
1579   }
1580 
1581   /* Now check what procs get messages - and compute nsends. */
1582   PetscCall(PetscCalloc1(size, &sizes));
1583   nsends = 0;
1584   for (PetscMPIInt i = 0; i < size; i++) {
1585     if (nlengths[i]) {
1586       sizes[i] = 1;
1587       nsends++;
1588     }
1589   }
1590 
1591   {
1592     PetscMPIInt *onodes, *olengths;
1593 
1594     /* Determine the number of messages to expect, their lengths, from from-ids */
1595     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
1596     PetscCall(PetscMPIIntCast(nsends, &insends));
1597     PetscCall(PetscGatherMessageLengths(comm, insends, nreceives, nlengths, &onodes, &olengths));
1598     /* since clubbing row,col - lengths are multiplied by 2 */
1599     for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] *= 2;
1600     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1601     /* values are size 'bs2' lengths (and remove earlier factor 2 */
1602     for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] = (PetscMPIInt)(olengths[i] * bs2 / 2);
1603     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
1604     PetscCall(PetscFree(onodes));
1605     PetscCall(PetscFree(olengths));
1606   }
1607 
1608   /* do sends:
1609       1) starts[i] gives the starting index in svalues for stuff going to
1610          the ith processor
1611   */
1612   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
1613   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
1614   PetscCall(PetscMalloc2(size, &startv, size, &starti));
1615   /* use 2 sends the first with all_a, the next with all_i and all_j */
1616   startv[0] = 0;
1617   starti[0] = 0;
1618   for (PetscMPIInt i = 1; i < size; i++) {
1619     startv[i] = startv[i - 1] + nlengths[i - 1];
1620     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1621   }
1622 
1623   ii    = 0;
1624   space = stash->space_head;
1625   while (space) {
1626     space_next = space->next;
1627     sp_idx     = space->idx;
1628     sp_idy     = space->idy;
1629     sp_val     = space->val;
1630     for (l = 0; l < space->local_used; l++) {
1631       j = owner[ii];
1632       if (bs2 == 1) {
1633         svalues[startv[j]] = sp_val[l];
1634       } else {
1635         PetscInt     k;
1636         PetscScalar *buf1, *buf2;
1637         buf1 = svalues + bs2 * startv[j];
1638         buf2 = space->val + bs2 * l;
1639         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1640       }
1641       sindices[starti[j]]               = sp_idx[l];
1642       sindices[starti[j] + nlengths[j]] = sp_idy[l];
1643       startv[j]++;
1644       starti[j]++;
1645       ii++;
1646     }
1647     space = space_next;
1648   }
1649   startv[0] = 0;
1650   for (PetscMPIInt i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1651 
1652   for (PetscMPIInt i = 0, count = 0; i < size; i++) {
1653     if (sizes[i]) {
1654       PetscCallMPI(MPIU_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
1655       PetscCallMPI(MPIU_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1656     }
1657   }
1658 #if defined(PETSC_USE_INFO)
1659   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
1660   for (PetscMPIInt i = 0; i < size; i++) {
1661     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %d: size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1662   }
1663 #endif
1664   PetscCall(PetscFree(nlengths));
1665   PetscCall(PetscFree(owner));
1666   PetscCall(PetscFree2(startv, starti));
1667   PetscCall(PetscFree(sizes));
1668 
1669   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1670   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
1671 
1672   for (PetscMPIInt i = 0; i < nreceives; i++) {
1673     recv_waits[2 * i]     = recv_waits1[i];
1674     recv_waits[2 * i + 1] = recv_waits2[i];
1675   }
1676   stash->recv_waits = recv_waits;
1677 
1678   PetscCall(PetscFree(recv_waits1));
1679   PetscCall(PetscFree(recv_waits2));
1680 
1681   stash->svalues         = svalues;
1682   stash->sindices        = sindices;
1683   stash->rvalues         = rvalues;
1684   stash->rindices        = rindices;
1685   stash->send_waits      = send_waits;
1686   stash->nsends          = (PetscMPIInt)nsends;
1687   stash->nrecvs          = nreceives;
1688   stash->reproduce_count = 0;
1689   PetscFunctionReturn(PETSC_SUCCESS);
1690 }
1691 
1692 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb)
1693 {
1694   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1695 
1696   PetscFunctionBegin;
1697   PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1698   PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1699   PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
1700   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1701   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1702   PetscFunctionReturn(PETSC_SUCCESS);
1703 }
1704 
1705 /*@
1706   MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1707   the `MATSCALAPACK` matrix
1708 
1709   Logically Collective
1710 
1711   Input Parameters:
1712 + A  - a `MATSCALAPACK` matrix
1713 . mb - the row block size
1714 - nb - the column block size
1715 
1716   Level: intermediate
1717 
1718   Note:
1719   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1720 
1721 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1722 @*/
1723 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb)
1724 {
1725   PetscFunctionBegin;
1726   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1727   PetscValidLogicalCollectiveInt(A, mb, 2);
1728   PetscValidLogicalCollectiveInt(A, nb, 3);
1729   PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
1730   PetscFunctionReturn(PETSC_SUCCESS);
1731 }
1732 
1733 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb)
1734 {
1735   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1736 
1737   PetscFunctionBegin;
1738   if (mb) *mb = a->mb;
1739   if (nb) *nb = a->nb;
1740   PetscFunctionReturn(PETSC_SUCCESS);
1741 }
1742 
1743 /*@
1744   MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1745   the `MATSCALAPACK` matrix
1746 
1747   Not Collective
1748 
1749   Input Parameter:
1750 . A - a `MATSCALAPACK` matrix
1751 
1752   Output Parameters:
1753 + mb - the row block size
1754 - nb - the column block size
1755 
1756   Level: intermediate
1757 
1758   Note:
1759   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1760 
1761 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1762 @*/
1763 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb)
1764 {
1765   PetscFunctionBegin;
1766   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1767   PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
1768   PetscFunctionReturn(PETSC_SUCCESS);
1769 }
1770 
1771 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1772 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1773 
1774 /*MC
1775    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1776 
1777    Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK
1778 
1779    Options Database Keys:
1780 +  -mat_type scalapack - sets the matrix type to `MATSCALAPACK`
1781 .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu`
1782 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1783 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1784 
1785    Level: intermediate
1786 
1787   Note:
1788    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1789    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1790    the given rank.
1791 
1792 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()`
1793 M*/
1794 
1795 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1796 {
1797   Mat_ScaLAPACK      *a;
1798   PetscBool           flg, flg1;
1799   Mat_ScaLAPACK_Grid *grid;
1800   MPI_Comm            icomm;
1801   PetscBLASInt        nprow, npcol, myrow, mycol;
1802   PetscInt            optv1, k = 2, array[2] = {0, 0};
1803   PetscMPIInt         size;
1804 
1805   PetscFunctionBegin;
1806   A->ops[0]     = MatOps_Values;
1807   A->insertmode = NOT_SET_VALUES;
1808 
1809   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1810   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1811   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1812   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1813   A->stash.ScatterDestroy = NULL;
1814 
1815   PetscCall(PetscNew(&a));
1816   A->data = (void *)a;
1817 
1818   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1819   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1820     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, NULL));
1821     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1822     PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1823   }
1824   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1825   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1826   if (!flg) {
1827     PetscCall(PetscNew(&grid));
1828 
1829     PetscCallMPI(MPI_Comm_size(icomm, &size));
1830     PetscCall(PetscBLASIntCast(PetscSqrtReal((PetscReal)size) + 0.001, &grid->nprow));
1831 
1832     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
1833     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1));
1834     if (flg1) {
1835       PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
1836       PetscCall(PetscBLASIntCast(optv1, &grid->nprow));
1837     }
1838     PetscOptionsEnd();
1839 
1840     if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1841     grid->npcol = size / grid->nprow;
1842     PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
1843     PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1844     grid->ictxt = Csys2blacs_handle(icomm);
1845     Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1846     Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1847     grid->grid_refct = 1;
1848     grid->nprow      = nprow;
1849     grid->npcol      = npcol;
1850     grid->myrow      = myrow;
1851     grid->mycol      = mycol;
1852     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1853     grid->ictxrow = Csys2blacs_handle(icomm);
1854     Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1855     grid->ictxcol = Csys2blacs_handle(icomm);
1856     Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
1857     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));
1858 
1859   } else grid->grid_refct++;
1860   PetscCall(PetscCommDestroy(&icomm));
1861   a->grid = grid;
1862   a->mb   = DEFAULT_BLOCKSIZE;
1863   a->nb   = DEFAULT_BLOCKSIZE;
1864 
1865   PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
1866   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1867   if (flg) {
1868     a->mb = (PetscMPIInt)array[0];
1869     a->nb = (k > 1) ? (PetscMPIInt)array[1] : a->mb;
1870   }
1871   PetscOptionsEnd();
1872 
1873   a->roworiented = PETSC_TRUE;
1874   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
1875   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
1876   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
1877   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
1878   PetscFunctionReturn(PETSC_SUCCESS);
1879 }
1880 
1881 /*@C
1882   MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1883   (2D block cyclic distribution) for a `MATSCALAPACK` matrix
1884 
1885   Collective
1886 
1887   Input Parameters:
1888 + comm - MPI communicator
1889 . mb   - row block size (or `PETSC_DECIDE` to have it set)
1890 . nb   - column block size (or `PETSC_DECIDE` to have it set)
1891 . M    - number of global rows
1892 . N    - number of global columns
1893 . rsrc - coordinate of process that owns the first row of the distributed matrix
1894 - csrc - coordinate of process that owns the first column of the distributed matrix
1895 
1896   Output Parameter:
1897 . A - the matrix
1898 
1899   Options Database Key:
1900 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1901 
1902   Level: intermediate
1903 
1904   Notes:
1905   If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen
1906 
1907   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1908   MatXXXXSetPreallocation() paradigm instead of this routine directly.
1909   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
1910 
1911   Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1912   configured with ScaLAPACK. In particular, PETSc's local sizes lose
1913   significance and are thus ignored. The block sizes refer to the values
1914   used for the distributed matrix, not the same meaning as in `MATBAIJ`.
1915 
1916 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1917 @*/
1918 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A)
1919 {
1920   Mat_ScaLAPACK *a;
1921   PetscInt       m, n;
1922 
1923   PetscFunctionBegin;
1924   PetscCall(MatCreate(comm, A));
1925   PetscCall(MatSetType(*A, MATSCALAPACK));
1926   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1927   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1928   m = PETSC_DECIDE;
1929   PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1930   n = PETSC_DECIDE;
1931   PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1932   PetscCall(MatSetSizes(*A, m, n, M, N));
1933   a = (Mat_ScaLAPACK *)(*A)->data;
1934   PetscCall(PetscBLASIntCast(M, &a->M));
1935   PetscCall(PetscBLASIntCast(N, &a->N));
1936   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1937   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1938   PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
1939   PetscCall(PetscBLASIntCast(csrc, &a->csrc));
1940   PetscCall(MatSetUp(*A));
1941   PetscFunctionReturn(PETSC_SUCCESS);
1942 }
1943