xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision 7e1a0bbe36d2be40a00a95404ece00db4857f70d)
1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/
2 
3 const char       ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4                                        "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5                                        "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6                                        "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7                                        "       TITLE = {Sca{LAPACK} Users' Guide},\n"
8                                        "       PUBLISHER = {SIAM},\n"
9                                        "       ADDRESS = {Philadelphia, PA},\n"
10                                        "       YEAR = 1997\n"
11                                        "}\n";
12 static PetscBool ScaLAPACKCite       = PETSC_FALSE;
13 
14 #define DEFAULT_BLOCKSIZE 64
15 
16 /*
17     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19 */
20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21 
22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23 {
24   PetscFunctionBegin;
25   PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
26   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
27   PetscFunctionReturn(PETSC_SUCCESS);
28 }
29 
30 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer)
31 {
32   Mat_ScaLAPACK    *a = (Mat_ScaLAPACK *)A->data;
33   PetscBool         isascii;
34   PetscViewerFormat format;
35   Mat               Adense;
36 
37   PetscFunctionBegin;
38   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
39   if (isascii) {
40     PetscCall(PetscViewerGetFormat(viewer, &format));
41     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
42       PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
43       PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
44       PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
45       PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
46       PetscFunctionReturn(PETSC_SUCCESS);
47     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
48       PetscFunctionReturn(PETSC_SUCCESS);
49     }
50   }
51   /* convert to dense format and call MatView() */
52   PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
53   PetscCall(MatView(Adense, viewer));
54   PetscCall(MatDestroy(&Adense));
55   PetscFunctionReturn(PETSC_SUCCESS);
56 }
57 
58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info)
59 {
60   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
61   PetscLogDouble isend[2], irecv[2];
62 
63   PetscFunctionBegin;
64   info->block_size = 1.0;
65 
66   isend[0] = a->lld * a->locc;  /* locally allocated */
67   isend[1] = a->locr * a->locc; /* used submatrix */
68   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69     info->nz_allocated = isend[0];
70     info->nz_used      = isend[1];
71   } else if (flag == MAT_GLOBAL_MAX) {
72     PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
73     info->nz_allocated = irecv[0];
74     info->nz_used      = irecv[1];
75   } else if (flag == MAT_GLOBAL_SUM) {
76     PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
77     info->nz_allocated = irecv[0];
78     info->nz_used      = irecv[1];
79   }
80 
81   info->nz_unneeded       = 0;
82   info->assemblies        = A->num_ass;
83   info->mallocs           = 0;
84   info->memory            = 0; /* REVIEW ME */
85   info->fill_ratio_given  = 0;
86   info->fill_ratio_needed = 0;
87   info->factor_mallocs    = 0;
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 
91 static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg)
92 {
93   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
94 
95   PetscFunctionBegin;
96   switch (op) {
97   case MAT_NEW_NONZERO_LOCATIONS:
98   case MAT_NEW_NONZERO_LOCATION_ERR:
99   case MAT_NEW_NONZERO_ALLOCATION_ERR:
100   case MAT_SYMMETRIC:
101   case MAT_SORTED_FULL:
102   case MAT_HERMITIAN:
103     break;
104   case MAT_ROW_ORIENTED:
105     a->roworiented = flg;
106     break;
107   default:
108     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
109   }
110   PetscFunctionReturn(PETSC_SUCCESS);
111 }
112 
113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
114 {
115   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
116   PetscInt       i, j;
117   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
118   PetscBool      roworiented = a->roworiented;
119 
120   PetscFunctionBegin;
121   PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
122   for (i = 0; i < nr; i++) {
123     if (rows[i] < 0) continue;
124     PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
125     for (j = 0; j < nc; j++) {
126       if (cols[j] < 0) continue;
127       PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
128       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
129       if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
130         if (roworiented) {
131           switch (imode) {
132           case INSERT_VALUES:
133             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j];
134             break;
135           default:
136             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j];
137             break;
138           }
139         } else {
140           switch (imode) {
141           case INSERT_VALUES:
142             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr];
143             break;
144           default:
145             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr];
146             break;
147           }
148         }
149       } else {
150         PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
151         A->assembled = PETSC_FALSE;
152         PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
153       }
154     }
155   }
156   PetscFunctionReturn(PETSC_SUCCESS);
157 }
158 
159 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscBool hermitian, PetscScalar beta, const PetscScalar *x, PetscScalar *y)
160 {
161   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
162   PetscScalar    *x2d, *y2d, alpha = 1.0;
163   const PetscInt *ranges;
164   PetscBLASInt    xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
165 
166   PetscFunctionBegin;
167   if (transpose) {
168     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
169     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
170     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
171     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld));
172     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
173     PetscCheckScaLapackInfo("descinit", info);
174     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
175     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
176     ylld = 1;
177     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
178     PetscCheckScaLapackInfo("descinit", info);
179 
180     /* allocate 2d vectors */
181     lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
182     lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
183     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
184     PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld));
185 
186     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
187     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
188     PetscCheckScaLapackInfo("descinit", info);
189     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
190     PetscCheckScaLapackInfo("descinit", info);
191 
192     /* redistribute x as a column of a 2d matrix */
193     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
194 
195     /* redistribute y as a row of a 2d matrix */
196     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
197 
198     /* call PBLAS subroutine */
199     if (hermitian) PetscCallBLAS("PBLASgemv", PBLASgemv_("C", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
200     else PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
201 
202     /* redistribute y from a row of a 2d matrix */
203     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
204 
205   } else { /* non-transpose */
206 
207     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
208     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
209     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
210     xlld = 1;
211     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
212     PetscCheckScaLapackInfo("descinit", info);
213     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
214     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
215     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &ylld));
216     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
217     PetscCheckScaLapackInfo("descinit", info);
218 
219     /* allocate 2d vectors */
220     lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
221     lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
222     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
223     PetscCall(PetscBLASIntCast(PetscMax(1, lszy), &ylld));
224 
225     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
226     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
227     PetscCheckScaLapackInfo("descinit", info);
228     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
229     PetscCheckScaLapackInfo("descinit", info);
230 
231     /* redistribute x as a row of a 2d matrix */
232     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
233 
234     /* redistribute y as a column of a 2d matrix */
235     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
236 
237     /* call PBLAS subroutine */
238     PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
239 
240     /* redistribute y from a column of a 2d matrix */
241     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
242   }
243   PetscCall(PetscFree2(x2d, y2d));
244   PetscFunctionReturn(PETSC_SUCCESS);
245 }
246 
247 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y)
248 {
249   const PetscScalar *xarray;
250   PetscScalar       *yarray;
251 
252   PetscFunctionBegin;
253   PetscCall(VecGetArrayRead(x, &xarray));
254   PetscCall(VecGetArray(y, &yarray));
255   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 0.0, xarray, yarray));
256   PetscCall(VecRestoreArrayRead(x, &xarray));
257   PetscCall(VecRestoreArray(y, &yarray));
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
261 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
262 {
263   const PetscScalar *xarray;
264   PetscScalar       *yarray;
265 
266   PetscFunctionBegin;
267   PetscCall(VecGetArrayRead(x, &xarray));
268   PetscCall(VecGetArray(y, &yarray));
269   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 0.0, xarray, yarray));
270   PetscCall(VecRestoreArrayRead(x, &xarray));
271   PetscCall(VecRestoreArray(y, &yarray));
272   PetscFunctionReturn(PETSC_SUCCESS);
273 }
274 
275 static PetscErrorCode MatMultHermitianTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
276 {
277   const PetscScalar *xarray;
278   PetscScalar       *yarray;
279 
280   PetscFunctionBegin;
281   PetscCall(VecGetArrayRead(x, &xarray));
282   PetscCall(VecGetArrayWrite(y, &yarray));
283   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 0.0, xarray, yarray));
284   PetscCall(VecRestoreArrayRead(x, &xarray));
285   PetscCall(VecRestoreArrayWrite(y, &yarray));
286   PetscFunctionReturn(PETSC_SUCCESS);
287 }
288 
289 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
290 {
291   const PetscScalar *xarray;
292   PetscScalar       *zarray;
293 
294   PetscFunctionBegin;
295   if (y != z) PetscCall(VecCopy(y, z));
296   PetscCall(VecGetArrayRead(x, &xarray));
297   PetscCall(VecGetArray(z, &zarray));
298   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 1.0, xarray, zarray));
299   PetscCall(VecRestoreArrayRead(x, &xarray));
300   PetscCall(VecRestoreArray(z, &zarray));
301   PetscFunctionReturn(PETSC_SUCCESS);
302 }
303 
304 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
305 {
306   const PetscScalar *xarray;
307   PetscScalar       *zarray;
308 
309   PetscFunctionBegin;
310   if (y != z) PetscCall(VecCopy(y, z));
311   PetscCall(VecGetArrayRead(x, &xarray));
312   PetscCall(VecGetArray(z, &zarray));
313   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 1.0, xarray, zarray));
314   PetscCall(VecRestoreArrayRead(x, &xarray));
315   PetscCall(VecRestoreArray(z, &zarray));
316   PetscFunctionReturn(PETSC_SUCCESS);
317 }
318 
319 static PetscErrorCode MatMultHermitianTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
320 {
321   const PetscScalar *xarray;
322   PetscScalar       *zarray;
323 
324   PetscFunctionBegin;
325   if (y != z) PetscCall(VecCopy(y, z));
326   PetscCall(VecGetArrayRead(x, &xarray));
327   PetscCall(VecGetArray(z, &zarray));
328   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 1.0, xarray, zarray));
329   PetscCall(VecRestoreArrayRead(x, &xarray));
330   PetscCall(VecRestoreArray(z, &zarray));
331   PetscFunctionReturn(PETSC_SUCCESS);
332 }
333 
334 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
335 {
336   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
337   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
338   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
339   PetscScalar    sone = 1.0, zero = 0.0;
340   PetscBLASInt   one = 1;
341 
342   PetscFunctionBegin;
343   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
344   C->assembled = PETSC_TRUE;
345   PetscFunctionReturn(PETSC_SUCCESS);
346 }
347 
348 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
349 {
350   PetscFunctionBegin;
351   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
352   PetscCall(MatSetType(C, MATSCALAPACK));
353   PetscCall(MatSetUp(C));
354   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
355   PetscFunctionReturn(PETSC_SUCCESS);
356 }
357 
358 static PetscErrorCode MatTransposeMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
359 {
360   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
361   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
362   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
363   PetscScalar    sone = 1.0, zero = 0.0;
364   PetscBLASInt   one = 1;
365 
366   PetscFunctionBegin;
367   PetscCallBLAS("PBLASgemm", PBLASgemm_("T", "N", &a->N, &b->N, &a->M, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
368   C->assembled = PETSC_TRUE;
369   PetscFunctionReturn(PETSC_SUCCESS);
370 }
371 
372 static PetscErrorCode MatTransposeMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
373 {
374   PetscFunctionBegin;
375   PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
376   PetscCall(MatSetType(C, MATSCALAPACK));
377   PetscCall(MatSetUp(C));
378   C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_ScaLAPACK;
379   PetscFunctionReturn(PETSC_SUCCESS);
380 }
381 
382 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
383 {
384   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
385   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
386   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
387   PetscScalar    sone = 1.0, zero = 0.0;
388   PetscBLASInt   one = 1;
389 
390   PetscFunctionBegin;
391   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
392   C->assembled = PETSC_TRUE;
393   PetscFunctionReturn(PETSC_SUCCESS);
394 }
395 
396 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
397 {
398   PetscFunctionBegin;
399   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
400   PetscCall(MatSetType(C, MATSCALAPACK));
401   PetscCall(MatSetUp(C));
402   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_ScaLAPACK;
403   PetscFunctionReturn(PETSC_SUCCESS);
404 }
405 
406 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
407 {
408   Mat_Product *product = C->product;
409 
410   PetscFunctionBegin;
411   switch (product->type) {
412   case MATPRODUCT_AB:
413     C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
414     C->ops->productsymbolic = MatProductSymbolic_AB;
415     break;
416   case MATPRODUCT_AtB:
417     C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_ScaLAPACK;
418     C->ops->productsymbolic          = MatProductSymbolic_AtB;
419     break;
420   case MATPRODUCT_ABt:
421     C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
422     C->ops->productsymbolic          = MatProductSymbolic_ABt;
423     break;
424   default:
425     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
426   }
427   PetscFunctionReturn(PETSC_SUCCESS);
428 }
429 
430 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D)
431 {
432   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
433   PetscScalar    *darray, *d2d, v;
434   const PetscInt *ranges;
435   PetscBLASInt    j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
436 
437   PetscFunctionBegin;
438   PetscCall(VecGetArray(D, &darray));
439 
440   if (A->rmap->N <= A->cmap->N) { /* row version */
441 
442     /* create ScaLAPACK descriptor for vector (1d block distribution) */
443     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
444     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
445     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld));
446     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
447     PetscCheckScaLapackInfo("descinit", info);
448 
449     /* allocate 2d vector */
450     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
451     PetscCall(PetscCalloc1(lszd, &d2d));
452     PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld));
453 
454     /* create ScaLAPACK descriptor for vector (2d block distribution) */
455     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
456     PetscCheckScaLapackInfo("descinit", info);
457 
458     /* collect diagonal */
459     for (j = 1; j <= a->M; j++) {
460       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
461       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
462     }
463 
464     /* redistribute d from a column of a 2d matrix */
465     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
466     PetscCall(PetscFree(d2d));
467 
468   } else { /* column version */
469 
470     /* create ScaLAPACK descriptor for vector (1d block distribution) */
471     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
472     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
473     dlld = 1;
474     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
475     PetscCheckScaLapackInfo("descinit", info);
476 
477     /* allocate 2d vector */
478     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
479     PetscCall(PetscCalloc1(lszd, &d2d));
480 
481     /* create ScaLAPACK descriptor for vector (2d block distribution) */
482     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
483     PetscCheckScaLapackInfo("descinit", info);
484 
485     /* collect diagonal */
486     for (j = 1; j <= a->N; j++) {
487       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
488       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
489     }
490 
491     /* redistribute d from a row of a 2d matrix */
492     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
493     PetscCall(PetscFree(d2d));
494   }
495 
496   PetscCall(VecRestoreArray(D, &darray));
497   PetscCall(VecAssemblyBegin(D));
498   PetscCall(VecAssemblyEnd(D));
499   PetscFunctionReturn(PETSC_SUCCESS);
500 }
501 
502 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R)
503 {
504   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
505   const PetscScalar *d;
506   const PetscInt    *ranges;
507   PetscScalar       *d2d;
508   PetscBLASInt       i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
509 
510   PetscFunctionBegin;
511   if (R) {
512     PetscCall(VecGetArrayRead(R, &d));
513     /* create ScaLAPACK descriptor for vector (1d block distribution) */
514     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
515     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
516     dlld = 1;
517     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
518     PetscCheckScaLapackInfo("descinit", info);
519 
520     /* allocate 2d vector */
521     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
522     PetscCall(PetscCalloc1(lszd, &d2d));
523 
524     /* create ScaLAPACK descriptor for vector (2d block distribution) */
525     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
526     PetscCheckScaLapackInfo("descinit", info);
527 
528     /* redistribute d to a row of a 2d matrix */
529     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
530 
531     /* broadcast along process columns */
532     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
533     else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
534 
535     /* local scaling */
536     for (j = 0; j < a->locc; j++)
537       for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
538 
539     PetscCall(PetscFree(d2d));
540     PetscCall(VecRestoreArrayRead(R, &d));
541   }
542   if (L) {
543     PetscCall(VecGetArrayRead(L, &d));
544     /* create ScaLAPACK descriptor for vector (1d block distribution) */
545     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
546     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
547     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld));
548     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
549     PetscCheckScaLapackInfo("descinit", info);
550 
551     /* allocate 2d vector */
552     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
553     PetscCall(PetscCalloc1(lszd, &d2d));
554     PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld));
555 
556     /* create ScaLAPACK descriptor for vector (2d block distribution) */
557     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
558     PetscCheckScaLapackInfo("descinit", info);
559 
560     /* redistribute d to a column of a 2d matrix */
561     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
562 
563     /* broadcast along process rows */
564     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
565     else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
566 
567     /* local scaling */
568     for (i = 0; i < a->locr; i++)
569       for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
570 
571     PetscCall(PetscFree(d2d));
572     PetscCall(VecRestoreArrayRead(L, &d));
573   }
574   PetscFunctionReturn(PETSC_SUCCESS);
575 }
576 
577 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d)
578 {
579   PetscFunctionBegin;
580   *missing = PETSC_FALSE;
581   PetscFunctionReturn(PETSC_SUCCESS);
582 }
583 
584 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a)
585 {
586   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
587   PetscBLASInt   n, one = 1;
588 
589   PetscFunctionBegin;
590   n = x->lld * x->locc;
591   PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
592   PetscFunctionReturn(PETSC_SUCCESS);
593 }
594 
595 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha)
596 {
597   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
598   PetscBLASInt   i, n;
599   PetscScalar    v;
600 
601   PetscFunctionBegin;
602   n = PetscMin(x->M, x->N);
603   for (i = 1; i <= n; i++) {
604     PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
605     v += alpha;
606     PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
607   }
608   PetscFunctionReturn(PETSC_SUCCESS);
609 }
610 
611 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
612 {
613   Mat_ScaLAPACK *x    = (Mat_ScaLAPACK *)X->data;
614   Mat_ScaLAPACK *y    = (Mat_ScaLAPACK *)Y->data;
615   PetscBLASInt   one  = 1;
616   PetscScalar    beta = 1.0;
617 
618   PetscFunctionBegin;
619   MatScaLAPACKCheckDistribution(Y, 1, X, 3);
620   PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
621   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
622   PetscFunctionReturn(PETSC_SUCCESS);
623 }
624 
625 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str)
626 {
627   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
628   Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
629 
630   PetscFunctionBegin;
631   PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
632   PetscCall(PetscObjectStateIncrease((PetscObject)B));
633   PetscFunctionReturn(PETSC_SUCCESS);
634 }
635 
636 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B)
637 {
638   Mat            Bs;
639   MPI_Comm       comm;
640   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
641 
642   PetscFunctionBegin;
643   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
644   PetscCall(MatCreate(comm, &Bs));
645   PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
646   PetscCall(MatSetType(Bs, MATSCALAPACK));
647   b       = (Mat_ScaLAPACK *)Bs->data;
648   b->M    = a->M;
649   b->N    = a->N;
650   b->mb   = a->mb;
651   b->nb   = a->nb;
652   b->rsrc = a->rsrc;
653   b->csrc = a->csrc;
654   PetscCall(MatSetUp(Bs));
655   *B = Bs;
656   if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
657   Bs->assembled = PETSC_TRUE;
658   PetscFunctionReturn(PETSC_SUCCESS);
659 }
660 
661 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
662 {
663   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
664   Mat            Bs   = *B;
665   PetscBLASInt   one  = 1;
666   PetscScalar    sone = 1.0, zero = 0.0;
667 #if defined(PETSC_USE_COMPLEX)
668   PetscInt i, j;
669 #endif
670 
671   PetscFunctionBegin;
672   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
673   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
674   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
675   *B = Bs;
676   b  = (Mat_ScaLAPACK *)Bs->data;
677   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
678 #if defined(PETSC_USE_COMPLEX)
679   /* undo conjugation */
680   for (i = 0; i < b->locr; i++)
681     for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
682 #endif
683   Bs->assembled = PETSC_TRUE;
684   PetscFunctionReturn(PETSC_SUCCESS);
685 }
686 
687 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
688 {
689   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
690   PetscInt       i, j;
691 
692   PetscFunctionBegin;
693   for (i = 0; i < a->locr; i++)
694     for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
695   PetscFunctionReturn(PETSC_SUCCESS);
696 }
697 
698 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
699 {
700   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
701   Mat            Bs   = *B;
702   PetscBLASInt   one  = 1;
703   PetscScalar    sone = 1.0, zero = 0.0;
704 
705   PetscFunctionBegin;
706   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
707   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
708   *B = Bs;
709   b  = (Mat_ScaLAPACK *)Bs->data;
710   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
711   Bs->assembled = PETSC_TRUE;
712   PetscFunctionReturn(PETSC_SUCCESS);
713 }
714 
715 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X)
716 {
717   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
718   PetscScalar    *x, *x2d;
719   const PetscInt *ranges;
720   PetscBLASInt    xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
721 
722   PetscFunctionBegin;
723   PetscCall(VecCopy(B, X));
724   PetscCall(VecGetArray(X, &x));
725 
726   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
727   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
728   PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
729   PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld));
730   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
731   PetscCheckScaLapackInfo("descinit", info);
732 
733   /* allocate 2d vector */
734   lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
735   PetscCall(PetscMalloc1(lszx, &x2d));
736   PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld));
737 
738   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
739   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
740   PetscCheckScaLapackInfo("descinit", info);
741 
742   /* redistribute x as a column of a 2d matrix */
743   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
744 
745   /* call ScaLAPACK subroutine */
746   switch (A->factortype) {
747   case MAT_FACTOR_LU:
748     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
749     PetscCheckScaLapackInfo("getrs", info);
750     break;
751   case MAT_FACTOR_CHOLESKY:
752     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
753     PetscCheckScaLapackInfo("potrs", info);
754     break;
755   default:
756     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
757   }
758 
759   /* redistribute x from a column of a 2d matrix */
760   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
761 
762   PetscCall(PetscFree(x2d));
763   PetscCall(VecRestoreArray(X, &x));
764   PetscFunctionReturn(PETSC_SUCCESS);
765 }
766 
767 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X)
768 {
769   PetscFunctionBegin;
770   PetscCall(MatSolve_ScaLAPACK(A, B, X));
771   PetscCall(VecAXPY(X, 1, Y));
772   PetscFunctionReturn(PETSC_SUCCESS);
773 }
774 
775 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X)
776 {
777   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *x;
778   PetscBool      flg1, flg2;
779   PetscBLASInt   one = 1, info;
780   Mat            C;
781   MatType        type;
782 
783   PetscFunctionBegin;
784   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
785   PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
786   if (flg1 && flg2) MatScaLAPACKCheckDistribution(B, 2, X, 3);
787   if (flg2) {
788     if (flg1) PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN));
789     else PetscCall(MatConvert(B, MATSCALAPACK, MAT_REUSE_MATRIX, &X));
790     C = X;
791   } else {
792     PetscCall(MatConvert(B, MATSCALAPACK, MAT_INITIAL_MATRIX, &C));
793   }
794   x = (Mat_ScaLAPACK *)C->data;
795 
796   switch (A->factortype) {
797   case MAT_FACTOR_LU:
798     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
799     PetscCheckScaLapackInfo("getrs", info);
800     break;
801   case MAT_FACTOR_CHOLESKY:
802     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
803     PetscCheckScaLapackInfo("potrs", info);
804     break;
805   default:
806     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
807   }
808   if (!flg2) {
809     PetscCall(MatGetType(X, &type));
810     PetscCall(MatConvert(C, type, MAT_REUSE_MATRIX, &X));
811     PetscCall(MatDestroy(&C));
812   }
813   PetscFunctionReturn(PETSC_SUCCESS);
814 }
815 
816 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo)
817 {
818   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
819   PetscBLASInt   one = 1, info;
820 
821   PetscFunctionBegin;
822   if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots));
823   PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
824   PetscCheckScaLapackInfo("getrf", info);
825   A->factortype = MAT_FACTOR_LU;
826   A->assembled  = PETSC_TRUE;
827 
828   PetscCall(PetscFree(A->solvertype));
829   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
830   PetscFunctionReturn(PETSC_SUCCESS);
831 }
832 
833 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
834 {
835   PetscFunctionBegin;
836   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
837   PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
838   PetscFunctionReturn(PETSC_SUCCESS);
839 }
840 
841 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
842 {
843   PetscFunctionBegin;
844   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
845   PetscFunctionReturn(PETSC_SUCCESS);
846 }
847 
848 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo)
849 {
850   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
851   PetscBLASInt   one = 1, info;
852 
853   PetscFunctionBegin;
854   PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
855   PetscCheckScaLapackInfo("potrf", info);
856   A->factortype = MAT_FACTOR_CHOLESKY;
857   A->assembled  = PETSC_TRUE;
858 
859   PetscCall(PetscFree(A->solvertype));
860   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
861   PetscFunctionReturn(PETSC_SUCCESS);
862 }
863 
864 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
865 {
866   PetscFunctionBegin;
867   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
868   PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
869   PetscFunctionReturn(PETSC_SUCCESS);
870 }
871 
872 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info)
873 {
874   PetscFunctionBegin;
875   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
876   PetscFunctionReturn(PETSC_SUCCESS);
877 }
878 
879 static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type)
880 {
881   PetscFunctionBegin;
882   *type = MATSOLVERSCALAPACK;
883   PetscFunctionReturn(PETSC_SUCCESS);
884 }
885 
886 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F)
887 {
888   Mat            B;
889   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
890 
891   PetscFunctionBegin;
892   /* Create the factorization matrix */
893   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
894   B->trivialsymbolic = PETSC_TRUE;
895   B->factortype      = ftype;
896   PetscCall(PetscFree(B->solvertype));
897   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));
898 
899   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
900   *F = B;
901   PetscFunctionReturn(PETSC_SUCCESS);
902 }
903 
904 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
905 {
906   PetscFunctionBegin;
907   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
908   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
909   PetscFunctionReturn(PETSC_SUCCESS);
910 }
911 
912 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm)
913 {
914   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
915   PetscBLASInt   one = 1, lwork = 0;
916   const char    *ntype;
917   PetscScalar   *work = NULL, dummy;
918 
919   PetscFunctionBegin;
920   switch (type) {
921   case NORM_1:
922     ntype = "1";
923     lwork = PetscMax(a->locr, a->locc);
924     break;
925   case NORM_FROBENIUS:
926     ntype = "F";
927     work  = &dummy;
928     break;
929   case NORM_INFINITY:
930     ntype = "I";
931     lwork = PetscMax(a->locr, a->locc);
932     break;
933   default:
934     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
935   }
936   if (lwork) PetscCall(PetscMalloc1(lwork, &work));
937   *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
938   if (lwork) PetscCall(PetscFree(work));
939   PetscFunctionReturn(PETSC_SUCCESS);
940 }
941 
942 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
943 {
944   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
945 
946   PetscFunctionBegin;
947   PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
948   PetscFunctionReturn(PETSC_SUCCESS);
949 }
950 
951 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols)
952 {
953   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
954   PetscInt       i, n, nb, isrc, nproc, iproc, *idx;
955 
956   PetscFunctionBegin;
957   if (rows) {
958     n     = a->locr;
959     nb    = a->mb;
960     isrc  = a->rsrc;
961     nproc = a->grid->nprow;
962     iproc = a->grid->myrow;
963     PetscCall(PetscMalloc1(n, &idx));
964     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
965     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
966   }
967   if (cols) {
968     n     = a->locc;
969     nb    = a->nb;
970     isrc  = a->csrc;
971     nproc = a->grid->npcol;
972     iproc = a->grid->mycol;
973     PetscCall(PetscMalloc1(n, &idx));
974     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
975     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
976   }
977   PetscFunctionReturn(PETSC_SUCCESS);
978 }
979 
980 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B)
981 {
982   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
983   Mat                Bmpi;
984   MPI_Comm           comm;
985   PetscInt           i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb;
986   const PetscInt    *ranges, *branges, *cwork;
987   const PetscScalar *vwork;
988   PetscBLASInt       bdesc[9], bmb, zero = 0, one = 1, lld, info;
989   PetscScalar       *barray;
990   PetscBool          differ = PETSC_FALSE;
991   PetscMPIInt        size;
992 
993   PetscFunctionBegin;
994   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
995   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
996 
997   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
998     PetscCallMPI(MPI_Comm_size(comm, &size));
999     PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
1000     for (i = 0; i < size; i++)
1001       if (ranges[i + 1] != branges[i + 1]) {
1002         differ = PETSC_TRUE;
1003         break;
1004       }
1005   }
1006 
1007   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
1008     PetscCall(MatCreate(comm, &Bmpi));
1009     m = PETSC_DECIDE;
1010     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1011     n = PETSC_DECIDE;
1012     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1013     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1014     PetscCall(MatSetType(Bmpi, MATDENSE));
1015     PetscCall(MatSetUp(Bmpi));
1016 
1017     /* create ScaLAPACK descriptor for B (1d block distribution) */
1018     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1019     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1020     PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */
1021     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1022     PetscCheckScaLapackInfo("descinit", info);
1023 
1024     /* redistribute matrix */
1025     PetscCall(MatDenseGetArray(Bmpi, &barray));
1026     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1027     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1028     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1029     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1030 
1031     /* transfer rows of auxiliary matrix to the final matrix B */
1032     PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
1033     for (i = rstart; i < rend; i++) {
1034       PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
1035       PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
1036       PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
1037     }
1038     PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
1039     PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
1040     PetscCall(MatDestroy(&Bmpi));
1041 
1042   } else { /* normal cases */
1043 
1044     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1045     else {
1046       PetscCall(MatCreate(comm, &Bmpi));
1047       m = PETSC_DECIDE;
1048       PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1049       n = PETSC_DECIDE;
1050       PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1051       PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1052       PetscCall(MatSetType(Bmpi, MATDENSE));
1053       PetscCall(MatSetUp(Bmpi));
1054     }
1055 
1056     /* create ScaLAPACK descriptor for B (1d block distribution) */
1057     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1058     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1059     PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */
1060     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1061     PetscCheckScaLapackInfo("descinit", info);
1062 
1063     /* redistribute matrix */
1064     PetscCall(MatDenseGetArray(Bmpi, &barray));
1065     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1066     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1067 
1068     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1069     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1070     if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &Bmpi));
1071     else *B = Bmpi;
1072   }
1073   PetscFunctionReturn(PETSC_SUCCESS);
1074 }
1075 
1076 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct)
1077 {
1078   const PetscInt *ranges;
1079   PetscMPIInt     size;
1080   PetscInt        i, n;
1081 
1082   PetscFunctionBegin;
1083   *correct = PETSC_TRUE;
1084   PetscCallMPI(MPI_Comm_size(map->comm, &size));
1085   if (size > 1) {
1086     PetscCall(PetscLayoutGetRanges(map, &ranges));
1087     n = ranges[1] - ranges[0];
1088     for (i = 1; i < size; i++)
1089       if (ranges[i + 1] - ranges[i] != n) break;
1090     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
1091   }
1092   PetscFunctionReturn(PETSC_SUCCESS);
1093 }
1094 
1095 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B)
1096 {
1097   Mat_ScaLAPACK     *b;
1098   Mat                Bmpi;
1099   MPI_Comm           comm;
1100   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n;
1101   const PetscInt    *ranges, *rows, *cols;
1102   PetscBLASInt       adesc[9], amb, zero = 0, one = 1, lld, info;
1103   const PetscScalar *aarray;
1104   IS                 ir, ic;
1105   PetscInt           lda;
1106   PetscBool          flg;
1107 
1108   PetscFunctionBegin;
1109   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1110 
1111   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1112   else {
1113     PetscCall(MatCreate(comm, &Bmpi));
1114     m = PETSC_DECIDE;
1115     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1116     n = PETSC_DECIDE;
1117     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1118     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1119     PetscCall(MatSetType(Bmpi, MATSCALAPACK));
1120     PetscCall(MatSetUp(Bmpi));
1121   }
1122   b = (Mat_ScaLAPACK *)Bmpi->data;
1123 
1124   PetscCall(MatDenseGetLDA(A, &lda));
1125   PetscCall(MatDenseGetArrayRead(A, &aarray));
1126   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1127   if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1128   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1129     /* create ScaLAPACK descriptor for A (1d block distribution) */
1130     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1131     PetscCall(PetscBLASIntCast(ranges[1], &amb));        /* row block size */
1132     PetscCall(PetscBLASIntCast(PetscMax(lda, 1), &lld)); /* local leading dimension */
1133     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1134     PetscCheckScaLapackInfo("descinit", info);
1135 
1136     /* redistribute matrix */
1137     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1138     Bmpi->nooffprocentries = PETSC_TRUE;
1139   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1140     PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1141     b->roworiented = PETSC_FALSE;
1142     PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1143     PetscCall(ISGetIndices(ir, &rows));
1144     PetscCall(ISGetIndices(ic, &cols));
1145     PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1146     PetscCall(ISRestoreIndices(ir, &rows));
1147     PetscCall(ISRestoreIndices(ic, &cols));
1148     PetscCall(ISDestroy(&ic));
1149     PetscCall(ISDestroy(&ir));
1150   }
1151   PetscCall(MatDenseRestoreArrayRead(A, &aarray));
1152   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1153   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1154   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &Bmpi));
1155   else *B = Bmpi;
1156   PetscFunctionReturn(PETSC_SUCCESS);
1157 }
1158 
1159 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1160 {
1161   Mat                mat_scal;
1162   PetscInt           M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1163   const PetscInt    *cols;
1164   const PetscScalar *vals;
1165 
1166   PetscFunctionBegin;
1167   if (reuse == MAT_REUSE_MATRIX) {
1168     mat_scal = *newmat;
1169     PetscCall(MatZeroEntries(mat_scal));
1170   } else {
1171     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1172     m = PETSC_DECIDE;
1173     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1174     n = PETSC_DECIDE;
1175     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1176     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1177     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1178     PetscCall(MatSetUp(mat_scal));
1179   }
1180   for (row = rstart; row < rend; row++) {
1181     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1182     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
1183     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1184   }
1185   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1186   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1187 
1188   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1189   else *newmat = mat_scal;
1190   PetscFunctionReturn(PETSC_SUCCESS);
1191 }
1192 
1193 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1194 {
1195   Mat                mat_scal;
1196   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1197   const PetscInt    *cols;
1198   const PetscScalar *vals;
1199   PetscScalar        v;
1200 
1201   PetscFunctionBegin;
1202   if (reuse == MAT_REUSE_MATRIX) {
1203     mat_scal = *newmat;
1204     PetscCall(MatZeroEntries(mat_scal));
1205   } else {
1206     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1207     m = PETSC_DECIDE;
1208     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1209     n = PETSC_DECIDE;
1210     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1211     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1212     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1213     PetscCall(MatSetUp(mat_scal));
1214   }
1215   PetscCall(MatGetRowUpperTriangular(A));
1216   for (row = rstart; row < rend; row++) {
1217     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1218     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1219     for (j = 0; j < ncols; j++) { /* lower triangular part */
1220       if (cols[j] == row) continue;
1221       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1222       PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1223     }
1224     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1225   }
1226   PetscCall(MatRestoreRowUpperTriangular(A));
1227   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1228   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1229 
1230   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1231   else *newmat = mat_scal;
1232   PetscFunctionReturn(PETSC_SUCCESS);
1233 }
1234 
1235 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1236 {
1237   Mat_ScaLAPACK *a  = (Mat_ScaLAPACK *)A->data;
1238   PetscInt       sz = 0;
1239 
1240   PetscFunctionBegin;
1241   PetscCall(PetscLayoutSetUp(A->rmap));
1242   PetscCall(PetscLayoutSetUp(A->cmap));
1243   if (!a->lld) a->lld = a->locr;
1244 
1245   PetscCall(PetscFree(a->loc));
1246   PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
1247   PetscCall(PetscCalloc1(sz, &a->loc));
1248 
1249   A->preallocated = PETSC_TRUE;
1250   PetscFunctionReturn(PETSC_SUCCESS);
1251 }
1252 
1253 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1254 {
1255   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK *)A->data;
1256   Mat_ScaLAPACK_Grid *grid;
1257   PetscMPIInt         iflg;
1258   MPI_Comm            icomm;
1259 
1260   PetscFunctionBegin;
1261   PetscCall(MatStashDestroy_Private(&A->stash));
1262   PetscCall(PetscFree(a->loc));
1263   PetscCall(PetscFree(a->pivots));
1264   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1265   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, &iflg));
1266   if (--grid->grid_refct == 0) {
1267     Cblacs_gridexit(grid->ictxt);
1268     Cblacs_gridexit(grid->ictxrow);
1269     Cblacs_gridexit(grid->ictxcol);
1270     PetscCall(PetscFree(grid));
1271     PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1272   }
1273   PetscCall(PetscCommDestroy(&icomm));
1274   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
1275   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1276   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
1277   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
1278   PetscCall(PetscFree(A->data));
1279   PetscFunctionReturn(PETSC_SUCCESS);
1280 }
1281 
1282 static PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1283 {
1284   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
1285   PetscBLASInt   info = 0;
1286   PetscBool      flg;
1287 
1288   PetscFunctionBegin;
1289   PetscCall(PetscLayoutSetUp(A->rmap));
1290   PetscCall(PetscLayoutSetUp(A->cmap));
1291 
1292   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1293   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1294   PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1295   PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1296   PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1297 
1298   /* compute local sizes */
1299   PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
1300   PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1301   a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1302   a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1303   a->lld  = PetscMax(1, a->locr);
1304 
1305   /* allocate local array */
1306   PetscCall(MatScaLAPACKSetPreallocation(A));
1307 
1308   /* set up ScaLAPACK descriptor */
1309   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1310   PetscCheckScaLapackInfo("descinit", info);
1311   PetscFunctionReturn(PETSC_SUCCESS);
1312 }
1313 
1314 static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type)
1315 {
1316   PetscInt nstash, reallocs;
1317 
1318   PetscFunctionBegin;
1319   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1320   PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
1321   PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
1322   PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
1323   PetscFunctionReturn(PETSC_SUCCESS);
1324 }
1325 
1326 static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type)
1327 {
1328   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1329   PetscMPIInt    n;
1330   PetscInt       i, flg, *row, *col;
1331   PetscScalar   *val;
1332   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
1333 
1334   PetscFunctionBegin;
1335   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1336   while (1) {
1337     PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1338     if (!flg) break;
1339     for (i = 0; i < n; i++) {
1340       PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
1341       PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1342       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1343       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1344       switch (A->insertmode) {
1345       case INSERT_VALUES:
1346         a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i];
1347         break;
1348       case ADD_VALUES:
1349         a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i];
1350         break;
1351       default:
1352         SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1353       }
1354     }
1355   }
1356   PetscCall(MatStashScatterEnd_Private(&A->stash));
1357   PetscFunctionReturn(PETSC_SUCCESS);
1358 }
1359 
1360 static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer)
1361 {
1362   Mat      Adense, As;
1363   MPI_Comm comm;
1364 
1365   PetscFunctionBegin;
1366   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1367   PetscCall(MatCreate(comm, &Adense));
1368   PetscCall(MatSetType(Adense, MATDENSE));
1369   PetscCall(MatLoad(Adense, viewer));
1370   PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
1371   PetscCall(MatDestroy(&Adense));
1372   PetscCall(MatHeaderReplace(newMat, &As));
1373   PetscFunctionReturn(PETSC_SUCCESS);
1374 }
1375 
1376 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1377                                        NULL,
1378                                        NULL,
1379                                        MatMult_ScaLAPACK,
1380                                        /* 4*/ MatMultAdd_ScaLAPACK,
1381                                        MatMultTranspose_ScaLAPACK,
1382                                        MatMultTransposeAdd_ScaLAPACK,
1383                                        MatSolve_ScaLAPACK,
1384                                        MatSolveAdd_ScaLAPACK,
1385                                        NULL,
1386                                        /*10*/ NULL,
1387                                        MatLUFactor_ScaLAPACK,
1388                                        MatCholeskyFactor_ScaLAPACK,
1389                                        NULL,
1390                                        MatTranspose_ScaLAPACK,
1391                                        /*15*/ MatGetInfo_ScaLAPACK,
1392                                        NULL,
1393                                        MatGetDiagonal_ScaLAPACK,
1394                                        MatDiagonalScale_ScaLAPACK,
1395                                        MatNorm_ScaLAPACK,
1396                                        /*20*/ MatAssemblyBegin_ScaLAPACK,
1397                                        MatAssemblyEnd_ScaLAPACK,
1398                                        MatSetOption_ScaLAPACK,
1399                                        MatZeroEntries_ScaLAPACK,
1400                                        /*24*/ NULL,
1401                                        MatLUFactorSymbolic_ScaLAPACK,
1402                                        MatLUFactorNumeric_ScaLAPACK,
1403                                        MatCholeskyFactorSymbolic_ScaLAPACK,
1404                                        MatCholeskyFactorNumeric_ScaLAPACK,
1405                                        /*29*/ MatSetUp_ScaLAPACK,
1406                                        NULL,
1407                                        NULL,
1408                                        NULL,
1409                                        NULL,
1410                                        /*34*/ MatDuplicate_ScaLAPACK,
1411                                        NULL,
1412                                        NULL,
1413                                        NULL,
1414                                        NULL,
1415                                        /*39*/ MatAXPY_ScaLAPACK,
1416                                        NULL,
1417                                        NULL,
1418                                        NULL,
1419                                        MatCopy_ScaLAPACK,
1420                                        /*44*/ NULL,
1421                                        MatScale_ScaLAPACK,
1422                                        MatShift_ScaLAPACK,
1423                                        NULL,
1424                                        NULL,
1425                                        /*49*/ NULL,
1426                                        NULL,
1427                                        NULL,
1428                                        NULL,
1429                                        NULL,
1430                                        /*54*/ NULL,
1431                                        NULL,
1432                                        NULL,
1433                                        NULL,
1434                                        NULL,
1435                                        /*59*/ NULL,
1436                                        MatDestroy_ScaLAPACK,
1437                                        MatView_ScaLAPACK,
1438                                        NULL,
1439                                        NULL,
1440                                        /*64*/ NULL,
1441                                        NULL,
1442                                        NULL,
1443                                        NULL,
1444                                        NULL,
1445                                        /*69*/ NULL,
1446                                        MatConvert_ScaLAPACK_Dense,
1447                                        NULL,
1448                                        NULL,
1449                                        NULL,
1450                                        /*74*/ NULL,
1451                                        NULL,
1452                                        NULL,
1453                                        NULL,
1454                                        MatLoad_ScaLAPACK,
1455                                        /*79*/ NULL,
1456                                        NULL,
1457                                        NULL,
1458                                        NULL,
1459                                        NULL,
1460                                        /*84*/ NULL,
1461                                        MatMatMultNumeric_ScaLAPACK,
1462                                        NULL,
1463                                        NULL,
1464                                        MatMatTransposeMultNumeric_ScaLAPACK,
1465                                        /*89*/ NULL,
1466                                        MatProductSetFromOptions_ScaLAPACK,
1467                                        NULL,
1468                                        NULL,
1469                                        MatConjugate_ScaLAPACK,
1470                                        /*94*/ NULL,
1471                                        NULL,
1472                                        NULL,
1473                                        NULL,
1474                                        NULL,
1475                                        /*99*/ NULL,
1476                                        MatMatSolve_ScaLAPACK,
1477                                        NULL,
1478                                        NULL,
1479                                        NULL,
1480                                        /*104*/ MatMissingDiagonal_ScaLAPACK,
1481                                        NULL,
1482                                        NULL,
1483                                        NULL,
1484                                        NULL,
1485                                        /*109*/ NULL,
1486                                        NULL,
1487                                        MatHermitianTranspose_ScaLAPACK,
1488                                        MatMultHermitianTranspose_ScaLAPACK,
1489                                        MatMultHermitianTransposeAdd_ScaLAPACK,
1490                                        /*114*/ NULL,
1491                                        NULL,
1492                                        NULL,
1493                                        NULL,
1494                                        NULL,
1495                                        /*119*/ NULL,
1496                                        NULL,
1497                                        NULL,
1498                                        MatTransposeMatMultNumeric_ScaLAPACK,
1499                                        NULL,
1500                                        /*124*/ NULL,
1501                                        NULL,
1502                                        NULL,
1503                                        NULL,
1504                                        NULL,
1505                                        /*129*/ NULL,
1506                                        NULL,
1507                                        NULL,
1508                                        NULL,
1509                                        NULL,
1510                                        /*134*/ NULL,
1511                                        NULL,
1512                                        NULL,
1513                                        NULL,
1514                                        NULL,
1515                                        NULL,
1516                                        /*140*/ NULL,
1517                                        NULL,
1518                                        NULL,
1519                                        NULL,
1520                                        NULL};
1521 
1522 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners)
1523 {
1524   PetscInt          *owner, *startv, *starti, bs2;
1525   PetscInt           size = stash->size, nsends;
1526   PetscInt          *sindices, **rindices, j, l;
1527   PetscScalar      **rvalues, *svalues;
1528   MPI_Comm           comm = stash->comm;
1529   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1530   PetscMPIInt        tag1 = stash->tag1, tag2 = stash->tag2, *sizes, *nlengths, nreceives, insends, ii;
1531   PetscInt          *sp_idx, *sp_idy;
1532   PetscScalar       *sp_val;
1533   PetscMatStashSpace space, space_next;
1534   PetscBLASInt       gridx, gcidx, lridx, lcidx, rsrc, csrc;
1535   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)mat->data;
1536 
1537   PetscFunctionBegin;
1538   { /* make sure all processors are either in INSERTMODE or ADDMODE */
1539     InsertMode addv;
1540     PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
1541     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1542     mat->insertmode = addv; /* in case this processor had no cache */
1543   }
1544 
1545   bs2 = stash->bs * stash->bs;
1546 
1547   /*  first count number of contributors to each processor */
1548   PetscCall(PetscCalloc1(size, &nlengths));
1549   PetscCall(PetscMalloc1(stash->n + 1, &owner));
1550 
1551   ii = j = 0;
1552   space  = stash->space_head;
1553   while (space) {
1554     space_next = space->next;
1555     for (l = 0; l < space->local_used; l++) {
1556       PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
1557       PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1558       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1559       j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
1560       nlengths[j]++;
1561       owner[ii] = j;
1562       ii++;
1563     }
1564     space = space_next;
1565   }
1566 
1567   /* Now check what procs get messages - and compute nsends. */
1568   PetscCall(PetscCalloc1(size, &sizes));
1569   nsends = 0;
1570   for (PetscMPIInt i = 0; i < size; i++) {
1571     if (nlengths[i]) {
1572       sizes[i] = 1;
1573       nsends++;
1574     }
1575   }
1576 
1577   {
1578     PetscMPIInt *onodes, *olengths;
1579 
1580     /* Determine the number of messages to expect, their lengths, from from-ids */
1581     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
1582     PetscCall(PetscMPIIntCast(nsends, &insends));
1583     PetscCall(PetscGatherMessageLengths(comm, insends, nreceives, nlengths, &onodes, &olengths));
1584     /* since clubbing row,col - lengths are multiplied by 2 */
1585     for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] *= 2;
1586     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1587     /* values are size 'bs2' lengths (and remove earlier factor 2 */
1588     for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] = (PetscMPIInt)(olengths[i] * bs2 / 2);
1589     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
1590     PetscCall(PetscFree(onodes));
1591     PetscCall(PetscFree(olengths));
1592   }
1593 
1594   /* do sends:
1595       1) starts[i] gives the starting index in svalues for stuff going to
1596          the ith processor
1597   */
1598   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
1599   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
1600   PetscCall(PetscMalloc2(size, &startv, size, &starti));
1601   /* use 2 sends the first with all_a, the next with all_i and all_j */
1602   startv[0] = 0;
1603   starti[0] = 0;
1604   for (PetscMPIInt i = 1; i < size; i++) {
1605     startv[i] = startv[i - 1] + nlengths[i - 1];
1606     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1607   }
1608 
1609   ii    = 0;
1610   space = stash->space_head;
1611   while (space) {
1612     space_next = space->next;
1613     sp_idx     = space->idx;
1614     sp_idy     = space->idy;
1615     sp_val     = space->val;
1616     for (l = 0; l < space->local_used; l++) {
1617       j = owner[ii];
1618       if (bs2 == 1) {
1619         svalues[startv[j]] = sp_val[l];
1620       } else {
1621         PetscInt     k;
1622         PetscScalar *buf1, *buf2;
1623         buf1 = svalues + bs2 * startv[j];
1624         buf2 = space->val + bs2 * l;
1625         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1626       }
1627       sindices[starti[j]]               = sp_idx[l];
1628       sindices[starti[j] + nlengths[j]] = sp_idy[l];
1629       startv[j]++;
1630       starti[j]++;
1631       ii++;
1632     }
1633     space = space_next;
1634   }
1635   startv[0] = 0;
1636   for (PetscMPIInt i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1637 
1638   for (PetscMPIInt i = 0, count = 0; i < size; i++) {
1639     if (sizes[i]) {
1640       PetscCallMPI(MPIU_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
1641       PetscCallMPI(MPIU_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1642     }
1643   }
1644 #if defined(PETSC_USE_INFO)
1645   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
1646   for (PetscMPIInt i = 0; i < size; i++) {
1647     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %d: size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1648   }
1649 #endif
1650   PetscCall(PetscFree(nlengths));
1651   PetscCall(PetscFree(owner));
1652   PetscCall(PetscFree2(startv, starti));
1653   PetscCall(PetscFree(sizes));
1654 
1655   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1656   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
1657 
1658   for (PetscMPIInt i = 0; i < nreceives; i++) {
1659     recv_waits[2 * i]     = recv_waits1[i];
1660     recv_waits[2 * i + 1] = recv_waits2[i];
1661   }
1662   stash->recv_waits = recv_waits;
1663 
1664   PetscCall(PetscFree(recv_waits1));
1665   PetscCall(PetscFree(recv_waits2));
1666 
1667   stash->svalues         = svalues;
1668   stash->sindices        = sindices;
1669   stash->rvalues         = rvalues;
1670   stash->rindices        = rindices;
1671   stash->send_waits      = send_waits;
1672   stash->nsends          = (PetscMPIInt)nsends;
1673   stash->nrecvs          = nreceives;
1674   stash->reproduce_count = 0;
1675   PetscFunctionReturn(PETSC_SUCCESS);
1676 }
1677 
1678 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb)
1679 {
1680   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1681 
1682   PetscFunctionBegin;
1683   PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1684   PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1685   PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
1686   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1687   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1688   PetscFunctionReturn(PETSC_SUCCESS);
1689 }
1690 
1691 /*@
1692   MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1693   the `MATSCALAPACK` matrix
1694 
1695   Logically Collective
1696 
1697   Input Parameters:
1698 + A  - a `MATSCALAPACK` matrix
1699 . mb - the row block size
1700 - nb - the column block size
1701 
1702   Level: intermediate
1703 
1704   Note:
1705   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1706 
1707 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1708 @*/
1709 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb)
1710 {
1711   PetscFunctionBegin;
1712   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1713   PetscValidLogicalCollectiveInt(A, mb, 2);
1714   PetscValidLogicalCollectiveInt(A, nb, 3);
1715   PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
1716   PetscFunctionReturn(PETSC_SUCCESS);
1717 }
1718 
1719 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb)
1720 {
1721   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1722 
1723   PetscFunctionBegin;
1724   if (mb) *mb = a->mb;
1725   if (nb) *nb = a->nb;
1726   PetscFunctionReturn(PETSC_SUCCESS);
1727 }
1728 
1729 /*@
1730   MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1731   the `MATSCALAPACK` matrix
1732 
1733   Not Collective
1734 
1735   Input Parameter:
1736 . A - a `MATSCALAPACK` matrix
1737 
1738   Output Parameters:
1739 + mb - the row block size
1740 - nb - the column block size
1741 
1742   Level: intermediate
1743 
1744   Note:
1745   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1746 
1747 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1748 @*/
1749 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb)
1750 {
1751   PetscFunctionBegin;
1752   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1753   PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
1754   PetscFunctionReturn(PETSC_SUCCESS);
1755 }
1756 
1757 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1758 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1759 
1760 /*MC
1761    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1762 
1763    Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK
1764 
1765    Options Database Keys:
1766 +  -mat_type scalapack - sets the matrix type to `MATSCALAPACK`
1767 .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu`
1768 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1769 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1770 
1771    Level: intermediate
1772 
1773   Note:
1774    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1775    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1776    the given rank.
1777 
1778 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()`
1779 M*/
1780 
1781 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1782 {
1783   Mat_ScaLAPACK      *a;
1784   PetscBool           flg;
1785   PetscMPIInt         iflg;
1786   Mat_ScaLAPACK_Grid *grid;
1787   MPI_Comm            icomm;
1788   PetscBLASInt        nprow, npcol, myrow, mycol;
1789   PetscInt            optv1, k = 2, array[2] = {0, 0};
1790   PetscMPIInt         size;
1791 
1792   PetscFunctionBegin;
1793   A->ops[0]     = MatOps_Values;
1794   A->insertmode = NOT_SET_VALUES;
1795 
1796   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1797   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1798   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1799   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1800   A->stash.ScatterDestroy = NULL;
1801 
1802   PetscCall(PetscNew(&a));
1803   A->data = (void *)a;
1804 
1805   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1806   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1807     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, NULL));
1808     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1809     PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1810   }
1811   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1812   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, &iflg));
1813   if (!iflg) {
1814     PetscCall(PetscNew(&grid));
1815 
1816     PetscCallMPI(MPI_Comm_size(icomm, &size));
1817     PetscCall(PetscBLASIntCast(PetscSqrtReal((PetscReal)size) + 0.001, &grid->nprow));
1818 
1819     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
1820     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg));
1821     if (flg) {
1822       PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
1823       PetscCall(PetscBLASIntCast(optv1, &grid->nprow));
1824     }
1825     PetscOptionsEnd();
1826 
1827     if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1828     grid->npcol = size / grid->nprow;
1829     PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
1830     PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1831     grid->ictxt = Csys2blacs_handle(icomm);
1832     Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1833     Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1834     grid->grid_refct = 1;
1835     grid->nprow      = nprow;
1836     grid->npcol      = npcol;
1837     grid->myrow      = myrow;
1838     grid->mycol      = mycol;
1839     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1840     grid->ictxrow = Csys2blacs_handle(icomm);
1841     Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1842     grid->ictxcol = Csys2blacs_handle(icomm);
1843     Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
1844     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));
1845 
1846   } else grid->grid_refct++;
1847   PetscCall(PetscCommDestroy(&icomm));
1848   a->grid = grid;
1849   a->mb   = DEFAULT_BLOCKSIZE;
1850   a->nb   = DEFAULT_BLOCKSIZE;
1851 
1852   PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
1853   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1854   if (flg) {
1855     a->mb = (PetscMPIInt)array[0];
1856     a->nb = (k > 1) ? (PetscMPIInt)array[1] : a->mb;
1857   }
1858   PetscOptionsEnd();
1859 
1860   a->roworiented = PETSC_TRUE;
1861   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
1862   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
1863   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
1864   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
1865   PetscFunctionReturn(PETSC_SUCCESS);
1866 }
1867 
1868 /*@C
1869   MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1870   (2D block cyclic distribution) for a `MATSCALAPACK` matrix
1871 
1872   Collective
1873 
1874   Input Parameters:
1875 + comm - MPI communicator
1876 . mb   - row block size (or `PETSC_DECIDE` to have it set)
1877 . nb   - column block size (or `PETSC_DECIDE` to have it set)
1878 . M    - number of global rows
1879 . N    - number of global columns
1880 . rsrc - coordinate of process that owns the first row of the distributed matrix
1881 - csrc - coordinate of process that owns the first column of the distributed matrix
1882 
1883   Output Parameter:
1884 . A - the matrix
1885 
1886   Options Database Key:
1887 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1888 
1889   Level: intermediate
1890 
1891   Notes:
1892   If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen
1893 
1894   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1895   MatXXXXSetPreallocation() paradigm instead of this routine directly.
1896   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
1897 
1898   Storage is completely managed by ScaLAPACK, so this requires PETSc to be
1899   configured with ScaLAPACK. In particular, PETSc's local sizes lose
1900   significance and are thus ignored. The block sizes refer to the values
1901   used for the distributed matrix, not the same meaning as in `MATBAIJ`.
1902 
1903 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1904 @*/
1905 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A)
1906 {
1907   Mat_ScaLAPACK *a;
1908   PetscInt       m, n;
1909 
1910   PetscFunctionBegin;
1911   PetscCall(MatCreate(comm, A));
1912   PetscCall(MatSetType(*A, MATSCALAPACK));
1913   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1914   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1915   m = PETSC_DECIDE;
1916   PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1917   n = PETSC_DECIDE;
1918   PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1919   PetscCall(MatSetSizes(*A, m, n, M, N));
1920   a = (Mat_ScaLAPACK *)(*A)->data;
1921   PetscCall(PetscBLASIntCast(M, &a->M));
1922   PetscCall(PetscBLASIntCast(N, &a->N));
1923   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1924   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1925   PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
1926   PetscCall(PetscBLASIntCast(csrc, &a->csrc));
1927   PetscCall(MatSetUp(*A));
1928   PetscFunctionReturn(PETSC_SUCCESS);
1929 }
1930