xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision f13dfd9ea68e0ddeee984e65c377a1819eab8a8a)
1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/
2 
3 const char       ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4                                        "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5                                        "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6                                        "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7                                        "       TITLE = {Sca{LAPACK} Users' Guide},\n"
8                                        "       PUBLISHER = {SIAM},\n"
9                                        "       ADDRESS = {Philadelphia, PA},\n"
10                                        "       YEAR = 1997\n"
11                                        "}\n";
12 static PetscBool ScaLAPACKCite       = PETSC_FALSE;
13 
14 #define DEFAULT_BLOCKSIZE 64
15 
16 /*
17     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19 */
20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21 
22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23 {
24   PetscFunctionBegin;
25   PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
26   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
27   PetscFunctionReturn(PETSC_SUCCESS);
28 }
29 
30 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer)
31 {
32   Mat_ScaLAPACK    *a = (Mat_ScaLAPACK *)A->data;
33   PetscBool         iascii;
34   PetscViewerFormat format;
35   Mat               Adense;
36 
37   PetscFunctionBegin;
38   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
39   if (iascii) {
40     PetscCall(PetscViewerGetFormat(viewer, &format));
41     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
42       PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
43       PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
44       PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
45       PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
46       PetscFunctionReturn(PETSC_SUCCESS);
47     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
48       PetscFunctionReturn(PETSC_SUCCESS);
49     }
50   }
51   /* convert to dense format and call MatView() */
52   PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
53   PetscCall(MatView(Adense, viewer));
54   PetscCall(MatDestroy(&Adense));
55   PetscFunctionReturn(PETSC_SUCCESS);
56 }
57 
58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info)
59 {
60   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
61   PetscLogDouble isend[2], irecv[2];
62 
63   PetscFunctionBegin;
64   info->block_size = 1.0;
65 
66   isend[0] = a->lld * a->locc;  /* locally allocated */
67   isend[1] = a->locr * a->locc; /* used submatrix */
68   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69     info->nz_allocated = isend[0];
70     info->nz_used      = isend[1];
71   } else if (flag == MAT_GLOBAL_MAX) {
72     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
73     info->nz_allocated = irecv[0];
74     info->nz_used      = irecv[1];
75   } else if (flag == MAT_GLOBAL_SUM) {
76     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
77     info->nz_allocated = irecv[0];
78     info->nz_used      = irecv[1];
79   }
80 
81   info->nz_unneeded       = 0;
82   info->assemblies        = A->num_ass;
83   info->mallocs           = 0;
84   info->memory            = 0; /* REVIEW ME */
85   info->fill_ratio_given  = 0;
86   info->fill_ratio_needed = 0;
87   info->factor_mallocs    = 0;
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 
91 static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg)
92 {
93   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
94 
95   PetscFunctionBegin;
96   switch (op) {
97   case MAT_NEW_NONZERO_LOCATIONS:
98   case MAT_NEW_NONZERO_LOCATION_ERR:
99   case MAT_NEW_NONZERO_ALLOCATION_ERR:
100   case MAT_SYMMETRIC:
101   case MAT_SORTED_FULL:
102   case MAT_HERMITIAN:
103     break;
104   case MAT_ROW_ORIENTED:
105     a->roworiented = flg;
106     break;
107   default:
108     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
109   }
110   PetscFunctionReturn(PETSC_SUCCESS);
111 }
112 
113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
114 {
115   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
116   PetscInt       i, j;
117   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
118   PetscBool      roworiented = a->roworiented;
119 
120   PetscFunctionBegin;
121   PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
122   for (i = 0; i < nr; i++) {
123     if (rows[i] < 0) continue;
124     PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
125     for (j = 0; j < nc; j++) {
126       if (cols[j] < 0) continue;
127       PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
128       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
129       if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
130         if (roworiented) {
131           switch (imode) {
132           case INSERT_VALUES:
133             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j];
134             break;
135           default:
136             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j];
137             break;
138           }
139         } else {
140           switch (imode) {
141           case INSERT_VALUES:
142             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr];
143             break;
144           default:
145             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr];
146             break;
147           }
148         }
149       } else {
150         PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
151         A->assembled = PETSC_FALSE;
152         PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
153       }
154     }
155   }
156   PetscFunctionReturn(PETSC_SUCCESS);
157 }
158 
159 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscBool hermitian, PetscScalar beta, const PetscScalar *x, PetscScalar *y)
160 {
161   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
162   PetscScalar    *x2d, *y2d, alpha = 1.0;
163   const PetscInt *ranges;
164   PetscBLASInt    xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
165 
166   PetscFunctionBegin;
167   if (transpose) {
168     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
169     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
170     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
171     xlld = PetscMax(1, A->rmap->n);
172     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
173     PetscCheckScaLapackInfo("descinit", info);
174     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
175     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
176     ylld = 1;
177     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
178     PetscCheckScaLapackInfo("descinit", info);
179 
180     /* allocate 2d vectors */
181     lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
182     lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
183     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
184     xlld = PetscMax(1, lszx);
185 
186     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
187     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
188     PetscCheckScaLapackInfo("descinit", info);
189     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
190     PetscCheckScaLapackInfo("descinit", info);
191 
192     /* redistribute x as a column of a 2d matrix */
193     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
194 
195     /* redistribute y as a row of a 2d matrix */
196     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
197 
198     /* call PBLAS subroutine */
199     if (hermitian) PetscCallBLAS("PBLASgemv", PBLASgemv_("C", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
200     else PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
201 
202     /* redistribute y from a row of a 2d matrix */
203     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
204 
205   } else { /* non-transpose */
206 
207     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
208     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
209     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
210     xlld = 1;
211     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
212     PetscCheckScaLapackInfo("descinit", info);
213     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
214     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
215     ylld = PetscMax(1, A->rmap->n);
216     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
217     PetscCheckScaLapackInfo("descinit", info);
218 
219     /* allocate 2d vectors */
220     lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
221     lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
222     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
223     ylld = PetscMax(1, lszy);
224 
225     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
226     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
227     PetscCheckScaLapackInfo("descinit", info);
228     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
229     PetscCheckScaLapackInfo("descinit", info);
230 
231     /* redistribute x as a row of a 2d matrix */
232     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
233 
234     /* redistribute y as a column of a 2d matrix */
235     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
236 
237     /* call PBLAS subroutine */
238     PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
239 
240     /* redistribute y from a column of a 2d matrix */
241     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
242   }
243   PetscCall(PetscFree2(x2d, y2d));
244   PetscFunctionReturn(PETSC_SUCCESS);
245 }
246 
247 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y)
248 {
249   const PetscScalar *xarray;
250   PetscScalar       *yarray;
251 
252   PetscFunctionBegin;
253   PetscCall(VecGetArrayRead(x, &xarray));
254   PetscCall(VecGetArray(y, &yarray));
255   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 0.0, xarray, yarray));
256   PetscCall(VecRestoreArrayRead(x, &xarray));
257   PetscCall(VecRestoreArray(y, &yarray));
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
261 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
262 {
263   const PetscScalar *xarray;
264   PetscScalar       *yarray;
265 
266   PetscFunctionBegin;
267   PetscCall(VecGetArrayRead(x, &xarray));
268   PetscCall(VecGetArray(y, &yarray));
269   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 0.0, xarray, yarray));
270   PetscCall(VecRestoreArrayRead(x, &xarray));
271   PetscCall(VecRestoreArray(y, &yarray));
272   PetscFunctionReturn(PETSC_SUCCESS);
273 }
274 
275 static PetscErrorCode MatMultHermitianTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
276 {
277   const PetscScalar *xarray;
278   PetscScalar       *yarray;
279 
280   PetscFunctionBegin;
281   PetscCall(VecGetArrayRead(x, &xarray));
282   PetscCall(VecGetArrayWrite(y, &yarray));
283   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 0.0, xarray, yarray));
284   PetscCall(VecRestoreArrayRead(x, &xarray));
285   PetscCall(VecRestoreArrayWrite(y, &yarray));
286   PetscFunctionReturn(PETSC_SUCCESS);
287 }
288 
289 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
290 {
291   const PetscScalar *xarray;
292   PetscScalar       *zarray;
293 
294   PetscFunctionBegin;
295   if (y != z) PetscCall(VecCopy(y, z));
296   PetscCall(VecGetArrayRead(x, &xarray));
297   PetscCall(VecGetArray(z, &zarray));
298   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 1.0, xarray, zarray));
299   PetscCall(VecRestoreArrayRead(x, &xarray));
300   PetscCall(VecRestoreArray(z, &zarray));
301   PetscFunctionReturn(PETSC_SUCCESS);
302 }
303 
304 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
305 {
306   const PetscScalar *xarray;
307   PetscScalar       *zarray;
308 
309   PetscFunctionBegin;
310   if (y != z) PetscCall(VecCopy(y, z));
311   PetscCall(VecGetArrayRead(x, &xarray));
312   PetscCall(VecGetArray(z, &zarray));
313   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 1.0, xarray, zarray));
314   PetscCall(VecRestoreArrayRead(x, &xarray));
315   PetscCall(VecRestoreArray(z, &zarray));
316   PetscFunctionReturn(PETSC_SUCCESS);
317 }
318 
319 static PetscErrorCode MatMultHermitianTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
320 {
321   const PetscScalar *xarray;
322   PetscScalar       *zarray;
323 
324   PetscFunctionBegin;
325   if (y != z) PetscCall(VecCopy(y, z));
326   PetscCall(VecGetArrayRead(x, &xarray));
327   PetscCall(VecGetArray(z, &zarray));
328   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 1.0, xarray, zarray));
329   PetscCall(VecRestoreArrayRead(x, &xarray));
330   PetscCall(VecRestoreArray(z, &zarray));
331   PetscFunctionReturn(PETSC_SUCCESS);
332 }
333 
334 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
335 {
336   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
337   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
338   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
339   PetscScalar    sone = 1.0, zero = 0.0;
340   PetscBLASInt   one = 1;
341 
342   PetscFunctionBegin;
343   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
344   C->assembled = PETSC_TRUE;
345   PetscFunctionReturn(PETSC_SUCCESS);
346 }
347 
348 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
349 {
350   PetscFunctionBegin;
351   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
352   PetscCall(MatSetType(C, MATSCALAPACK));
353   PetscCall(MatSetUp(C));
354   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
355   PetscFunctionReturn(PETSC_SUCCESS);
356 }
357 
358 static PetscErrorCode MatTransposeMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
359 {
360   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
361   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
362   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
363   PetscScalar    sone = 1.0, zero = 0.0;
364   PetscBLASInt   one = 1;
365 
366   PetscFunctionBegin;
367   PetscCallBLAS("PBLASgemm", PBLASgemm_("T", "N", &a->N, &b->N, &a->M, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
368   C->assembled = PETSC_TRUE;
369   PetscFunctionReturn(PETSC_SUCCESS);
370 }
371 
372 static PetscErrorCode MatTransposeMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
373 {
374   PetscFunctionBegin;
375   PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
376   PetscCall(MatSetType(C, MATSCALAPACK));
377   PetscCall(MatSetUp(C));
378   C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_ScaLAPACK;
379   PetscFunctionReturn(PETSC_SUCCESS);
380 }
381 
382 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
383 {
384   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
385   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
386   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
387   PetscScalar    sone = 1.0, zero = 0.0;
388   PetscBLASInt   one = 1;
389 
390   PetscFunctionBegin;
391   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
392   C->assembled = PETSC_TRUE;
393   PetscFunctionReturn(PETSC_SUCCESS);
394 }
395 
396 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
397 {
398   PetscFunctionBegin;
399   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
400   PetscCall(MatSetType(C, MATSCALAPACK));
401   PetscCall(MatSetUp(C));
402   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_ScaLAPACK;
403   PetscFunctionReturn(PETSC_SUCCESS);
404 }
405 
406 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
407 {
408   Mat_Product *product = C->product;
409 
410   PetscFunctionBegin;
411   switch (product->type) {
412   case MATPRODUCT_AB:
413     C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
414     C->ops->productsymbolic = MatProductSymbolic_AB;
415     break;
416   case MATPRODUCT_AtB:
417     C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_ScaLAPACK;
418     C->ops->productsymbolic          = MatProductSymbolic_AtB;
419     break;
420   case MATPRODUCT_ABt:
421     C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
422     C->ops->productsymbolic          = MatProductSymbolic_ABt;
423     break;
424   default:
425     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
426   }
427   PetscFunctionReturn(PETSC_SUCCESS);
428 }
429 
430 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D)
431 {
432   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
433   PetscScalar    *darray, *d2d, v;
434   const PetscInt *ranges;
435   PetscBLASInt    j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
436 
437   PetscFunctionBegin;
438   PetscCall(VecGetArray(D, &darray));
439 
440   if (A->rmap->N <= A->cmap->N) { /* row version */
441 
442     /* create ScaLAPACK descriptor for vector (1d block distribution) */
443     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
444     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
445     dlld = PetscMax(1, A->rmap->n);
446     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
447     PetscCheckScaLapackInfo("descinit", info);
448 
449     /* allocate 2d vector */
450     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
451     PetscCall(PetscCalloc1(lszd, &d2d));
452     dlld = PetscMax(1, lszd);
453 
454     /* create ScaLAPACK descriptor for vector (2d block distribution) */
455     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
456     PetscCheckScaLapackInfo("descinit", info);
457 
458     /* collect diagonal */
459     for (j = 1; j <= a->M; j++) {
460       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
461       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
462     }
463 
464     /* redistribute d from a column of a 2d matrix */
465     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
466     PetscCall(PetscFree(d2d));
467 
468   } else { /* column version */
469 
470     /* create ScaLAPACK descriptor for vector (1d block distribution) */
471     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
472     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
473     dlld = 1;
474     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
475     PetscCheckScaLapackInfo("descinit", info);
476 
477     /* allocate 2d vector */
478     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
479     PetscCall(PetscCalloc1(lszd, &d2d));
480 
481     /* create ScaLAPACK descriptor for vector (2d block distribution) */
482     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
483     PetscCheckScaLapackInfo("descinit", info);
484 
485     /* collect diagonal */
486     for (j = 1; j <= a->N; j++) {
487       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
488       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
489     }
490 
491     /* redistribute d from a row of a 2d matrix */
492     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
493     PetscCall(PetscFree(d2d));
494   }
495 
496   PetscCall(VecRestoreArray(D, &darray));
497   PetscCall(VecAssemblyBegin(D));
498   PetscCall(VecAssemblyEnd(D));
499   PetscFunctionReturn(PETSC_SUCCESS);
500 }
501 
502 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R)
503 {
504   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
505   const PetscScalar *d;
506   const PetscInt    *ranges;
507   PetscScalar       *d2d;
508   PetscBLASInt       i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
509 
510   PetscFunctionBegin;
511   if (R) {
512     PetscCall(VecGetArrayRead(R, &d));
513     /* create ScaLAPACK descriptor for vector (1d block distribution) */
514     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
515     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
516     dlld = 1;
517     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
518     PetscCheckScaLapackInfo("descinit", info);
519 
520     /* allocate 2d vector */
521     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
522     PetscCall(PetscCalloc1(lszd, &d2d));
523 
524     /* create ScaLAPACK descriptor for vector (2d block distribution) */
525     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
526     PetscCheckScaLapackInfo("descinit", info);
527 
528     /* redistribute d to a row of a 2d matrix */
529     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
530 
531     /* broadcast along process columns */
532     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
533     else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
534 
535     /* local scaling */
536     for (j = 0; j < a->locc; j++)
537       for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
538 
539     PetscCall(PetscFree(d2d));
540     PetscCall(VecRestoreArrayRead(R, &d));
541   }
542   if (L) {
543     PetscCall(VecGetArrayRead(L, &d));
544     /* create ScaLAPACK descriptor for vector (1d block distribution) */
545     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
546     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
547     dlld = PetscMax(1, A->rmap->n);
548     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
549     PetscCheckScaLapackInfo("descinit", info);
550 
551     /* allocate 2d vector */
552     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
553     PetscCall(PetscCalloc1(lszd, &d2d));
554     dlld = PetscMax(1, lszd);
555 
556     /* create ScaLAPACK descriptor for vector (2d block distribution) */
557     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
558     PetscCheckScaLapackInfo("descinit", info);
559 
560     /* redistribute d to a column of a 2d matrix */
561     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
562 
563     /* broadcast along process rows */
564     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
565     else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
566 
567     /* local scaling */
568     for (i = 0; i < a->locr; i++)
569       for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
570 
571     PetscCall(PetscFree(d2d));
572     PetscCall(VecRestoreArrayRead(L, &d));
573   }
574   PetscFunctionReturn(PETSC_SUCCESS);
575 }
576 
577 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d)
578 {
579   PetscFunctionBegin;
580   *missing = PETSC_FALSE;
581   PetscFunctionReturn(PETSC_SUCCESS);
582 }
583 
584 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a)
585 {
586   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
587   PetscBLASInt   n, one = 1;
588 
589   PetscFunctionBegin;
590   n = x->lld * x->locc;
591   PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
592   PetscFunctionReturn(PETSC_SUCCESS);
593 }
594 
595 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha)
596 {
597   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
598   PetscBLASInt   i, n;
599   PetscScalar    v;
600 
601   PetscFunctionBegin;
602   n = PetscMin(x->M, x->N);
603   for (i = 1; i <= n; i++) {
604     PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
605     v += alpha;
606     PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
607   }
608   PetscFunctionReturn(PETSC_SUCCESS);
609 }
610 
611 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
612 {
613   Mat_ScaLAPACK *x    = (Mat_ScaLAPACK *)X->data;
614   Mat_ScaLAPACK *y    = (Mat_ScaLAPACK *)Y->data;
615   PetscBLASInt   one  = 1;
616   PetscScalar    beta = 1.0;
617 
618   PetscFunctionBegin;
619   MatScaLAPACKCheckDistribution(Y, 1, X, 3);
620   PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
621   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
622   PetscFunctionReturn(PETSC_SUCCESS);
623 }
624 
625 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str)
626 {
627   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
628   Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
629 
630   PetscFunctionBegin;
631   PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
632   PetscCall(PetscObjectStateIncrease((PetscObject)B));
633   PetscFunctionReturn(PETSC_SUCCESS);
634 }
635 
636 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B)
637 {
638   Mat            Bs;
639   MPI_Comm       comm;
640   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
641 
642   PetscFunctionBegin;
643   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
644   PetscCall(MatCreate(comm, &Bs));
645   PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
646   PetscCall(MatSetType(Bs, MATSCALAPACK));
647   b       = (Mat_ScaLAPACK *)Bs->data;
648   b->M    = a->M;
649   b->N    = a->N;
650   b->mb   = a->mb;
651   b->nb   = a->nb;
652   b->rsrc = a->rsrc;
653   b->csrc = a->csrc;
654   PetscCall(MatSetUp(Bs));
655   *B = Bs;
656   if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
657   Bs->assembled = PETSC_TRUE;
658   PetscFunctionReturn(PETSC_SUCCESS);
659 }
660 
661 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
662 {
663   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
664   Mat            Bs   = *B;
665   PetscBLASInt   one  = 1;
666   PetscScalar    sone = 1.0, zero = 0.0;
667 #if defined(PETSC_USE_COMPLEX)
668   PetscInt i, j;
669 #endif
670 
671   PetscFunctionBegin;
672   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
673   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
674   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
675   *B = Bs;
676   b  = (Mat_ScaLAPACK *)Bs->data;
677   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
678 #if defined(PETSC_USE_COMPLEX)
679   /* undo conjugation */
680   for (i = 0; i < b->locr; i++)
681     for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
682 #endif
683   Bs->assembled = PETSC_TRUE;
684   PetscFunctionReturn(PETSC_SUCCESS);
685 }
686 
687 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
688 {
689   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
690   PetscInt       i, j;
691 
692   PetscFunctionBegin;
693   for (i = 0; i < a->locr; i++)
694     for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
695   PetscFunctionReturn(PETSC_SUCCESS);
696 }
697 
698 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
699 {
700   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
701   Mat            Bs   = *B;
702   PetscBLASInt   one  = 1;
703   PetscScalar    sone = 1.0, zero = 0.0;
704 
705   PetscFunctionBegin;
706   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
707   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
708   *B = Bs;
709   b  = (Mat_ScaLAPACK *)Bs->data;
710   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
711   Bs->assembled = PETSC_TRUE;
712   PetscFunctionReturn(PETSC_SUCCESS);
713 }
714 
715 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X)
716 {
717   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
718   PetscScalar    *x, *x2d;
719   const PetscInt *ranges;
720   PetscBLASInt    xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
721 
722   PetscFunctionBegin;
723   PetscCall(VecCopy(B, X));
724   PetscCall(VecGetArray(X, &x));
725 
726   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
727   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
728   PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
729   xlld = PetscMax(1, A->rmap->n);
730   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
731   PetscCheckScaLapackInfo("descinit", info);
732 
733   /* allocate 2d vector */
734   lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
735   PetscCall(PetscMalloc1(lszx, &x2d));
736   xlld = PetscMax(1, lszx);
737 
738   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
739   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
740   PetscCheckScaLapackInfo("descinit", info);
741 
742   /* redistribute x as a column of a 2d matrix */
743   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
744 
745   /* call ScaLAPACK subroutine */
746   switch (A->factortype) {
747   case MAT_FACTOR_LU:
748     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
749     PetscCheckScaLapackInfo("getrs", info);
750     break;
751   case MAT_FACTOR_CHOLESKY:
752     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
753     PetscCheckScaLapackInfo("potrs", info);
754     break;
755   default:
756     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
757   }
758 
759   /* redistribute x from a column of a 2d matrix */
760   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
761 
762   PetscCall(PetscFree(x2d));
763   PetscCall(VecRestoreArray(X, &x));
764   PetscFunctionReturn(PETSC_SUCCESS);
765 }
766 
767 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X)
768 {
769   PetscFunctionBegin;
770   PetscCall(MatSolve_ScaLAPACK(A, B, X));
771   PetscCall(VecAXPY(X, 1, Y));
772   PetscFunctionReturn(PETSC_SUCCESS);
773 }
774 
775 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X)
776 {
777   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x;
778   PetscBool      flg1, flg2;
779   PetscBLASInt   one = 1, info;
780 
781   PetscFunctionBegin;
782   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
783   PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
784   PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK");
785   MatScaLAPACKCheckDistribution(B, 1, X, 2);
786   b = (Mat_ScaLAPACK *)B->data;
787   x = (Mat_ScaLAPACK *)X->data;
788   PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc));
789 
790   switch (A->factortype) {
791   case MAT_FACTOR_LU:
792     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
793     PetscCheckScaLapackInfo("getrs", info);
794     break;
795   case MAT_FACTOR_CHOLESKY:
796     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
797     PetscCheckScaLapackInfo("potrs", info);
798     break;
799   default:
800     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
801   }
802   PetscFunctionReturn(PETSC_SUCCESS);
803 }
804 
805 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo)
806 {
807   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
808   PetscBLASInt   one = 1, info;
809 
810   PetscFunctionBegin;
811   if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots));
812   PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
813   PetscCheckScaLapackInfo("getrf", info);
814   A->factortype = MAT_FACTOR_LU;
815   A->assembled  = PETSC_TRUE;
816 
817   PetscCall(PetscFree(A->solvertype));
818   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
819   PetscFunctionReturn(PETSC_SUCCESS);
820 }
821 
822 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
823 {
824   PetscFunctionBegin;
825   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
826   PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
827   PetscFunctionReturn(PETSC_SUCCESS);
828 }
829 
830 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
831 {
832   PetscFunctionBegin;
833   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
834   PetscFunctionReturn(PETSC_SUCCESS);
835 }
836 
837 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo)
838 {
839   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
840   PetscBLASInt   one = 1, info;
841 
842   PetscFunctionBegin;
843   PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
844   PetscCheckScaLapackInfo("potrf", info);
845   A->factortype = MAT_FACTOR_CHOLESKY;
846   A->assembled  = PETSC_TRUE;
847 
848   PetscCall(PetscFree(A->solvertype));
849   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
850   PetscFunctionReturn(PETSC_SUCCESS);
851 }
852 
853 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
854 {
855   PetscFunctionBegin;
856   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
857   PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
858   PetscFunctionReturn(PETSC_SUCCESS);
859 }
860 
861 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info)
862 {
863   PetscFunctionBegin;
864   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
865   PetscFunctionReturn(PETSC_SUCCESS);
866 }
867 
868 static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type)
869 {
870   PetscFunctionBegin;
871   *type = MATSOLVERSCALAPACK;
872   PetscFunctionReturn(PETSC_SUCCESS);
873 }
874 
875 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F)
876 {
877   Mat            B;
878   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
879 
880   PetscFunctionBegin;
881   /* Create the factorization matrix */
882   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
883   B->trivialsymbolic = PETSC_TRUE;
884   B->factortype      = ftype;
885   PetscCall(PetscFree(B->solvertype));
886   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));
887 
888   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
889   *F = B;
890   PetscFunctionReturn(PETSC_SUCCESS);
891 }
892 
893 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
894 {
895   PetscFunctionBegin;
896   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
897   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
898   PetscFunctionReturn(PETSC_SUCCESS);
899 }
900 
901 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm)
902 {
903   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
904   PetscBLASInt   one = 1, lwork = 0;
905   const char    *ntype;
906   PetscScalar   *work = NULL, dummy;
907 
908   PetscFunctionBegin;
909   switch (type) {
910   case NORM_1:
911     ntype = "1";
912     lwork = PetscMax(a->locr, a->locc);
913     break;
914   case NORM_FROBENIUS:
915     ntype = "F";
916     work  = &dummy;
917     break;
918   case NORM_INFINITY:
919     ntype = "I";
920     lwork = PetscMax(a->locr, a->locc);
921     break;
922   default:
923     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
924   }
925   if (lwork) PetscCall(PetscMalloc1(lwork, &work));
926   *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
927   if (lwork) PetscCall(PetscFree(work));
928   PetscFunctionReturn(PETSC_SUCCESS);
929 }
930 
931 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
932 {
933   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
934 
935   PetscFunctionBegin;
936   PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
937   PetscFunctionReturn(PETSC_SUCCESS);
938 }
939 
940 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols)
941 {
942   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
943   PetscInt       i, n, nb, isrc, nproc, iproc, *idx;
944 
945   PetscFunctionBegin;
946   if (rows) {
947     n     = a->locr;
948     nb    = a->mb;
949     isrc  = a->rsrc;
950     nproc = a->grid->nprow;
951     iproc = a->grid->myrow;
952     PetscCall(PetscMalloc1(n, &idx));
953     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
954     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
955   }
956   if (cols) {
957     n     = a->locc;
958     nb    = a->nb;
959     isrc  = a->csrc;
960     nproc = a->grid->npcol;
961     iproc = a->grid->mycol;
962     PetscCall(PetscMalloc1(n, &idx));
963     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
964     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
965   }
966   PetscFunctionReturn(PETSC_SUCCESS);
967 }
968 
969 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B)
970 {
971   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
972   Mat                Bmpi;
973   MPI_Comm           comm;
974   PetscInt           i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb;
975   const PetscInt    *ranges, *branges, *cwork;
976   const PetscScalar *vwork;
977   PetscBLASInt       bdesc[9], bmb, zero = 0, one = 1, lld, info;
978   PetscScalar       *barray;
979   PetscBool          differ = PETSC_FALSE;
980   PetscMPIInt        size;
981 
982   PetscFunctionBegin;
983   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
984   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
985 
986   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
987     PetscCallMPI(MPI_Comm_size(comm, &size));
988     PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
989     for (i = 0; i < size; i++)
990       if (ranges[i + 1] != branges[i + 1]) {
991         differ = PETSC_TRUE;
992         break;
993       }
994   }
995 
996   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
997     PetscCall(MatCreate(comm, &Bmpi));
998     m = PETSC_DECIDE;
999     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1000     n = PETSC_DECIDE;
1001     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1002     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1003     PetscCall(MatSetType(Bmpi, MATDENSE));
1004     PetscCall(MatSetUp(Bmpi));
1005 
1006     /* create ScaLAPACK descriptor for B (1d block distribution) */
1007     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1008     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1009     lld = PetscMax(ldb, 1); /* local leading dimension */
1010     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1011     PetscCheckScaLapackInfo("descinit", info);
1012 
1013     /* redistribute matrix */
1014     PetscCall(MatDenseGetArray(Bmpi, &barray));
1015     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1016     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1017     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1018     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1019 
1020     /* transfer rows of auxiliary matrix to the final matrix B */
1021     PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
1022     for (i = rstart; i < rend; i++) {
1023       PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
1024       PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
1025       PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
1026     }
1027     PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
1028     PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
1029     PetscCall(MatDestroy(&Bmpi));
1030 
1031   } else { /* normal cases */
1032 
1033     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1034     else {
1035       PetscCall(MatCreate(comm, &Bmpi));
1036       m = PETSC_DECIDE;
1037       PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1038       n = PETSC_DECIDE;
1039       PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1040       PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1041       PetscCall(MatSetType(Bmpi, MATDENSE));
1042       PetscCall(MatSetUp(Bmpi));
1043     }
1044 
1045     /* create ScaLAPACK descriptor for B (1d block distribution) */
1046     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1047     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1048     lld = PetscMax(ldb, 1); /* local leading dimension */
1049     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1050     PetscCheckScaLapackInfo("descinit", info);
1051 
1052     /* redistribute matrix */
1053     PetscCall(MatDenseGetArray(Bmpi, &barray));
1054     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1055     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1056 
1057     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1058     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1059     if (reuse == MAT_INPLACE_MATRIX) {
1060       PetscCall(MatHeaderReplace(A, &Bmpi));
1061     } else *B = Bmpi;
1062   }
1063   PetscFunctionReturn(PETSC_SUCCESS);
1064 }
1065 
1066 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct)
1067 {
1068   const PetscInt *ranges;
1069   PetscMPIInt     size;
1070   PetscInt        i, n;
1071 
1072   PetscFunctionBegin;
1073   *correct = PETSC_TRUE;
1074   PetscCallMPI(MPI_Comm_size(map->comm, &size));
1075   if (size > 1) {
1076     PetscCall(PetscLayoutGetRanges(map, &ranges));
1077     n = ranges[1] - ranges[0];
1078     for (i = 1; i < size; i++)
1079       if (ranges[i + 1] - ranges[i] != n) break;
1080     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
1081   }
1082   PetscFunctionReturn(PETSC_SUCCESS);
1083 }
1084 
1085 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B)
1086 {
1087   Mat_ScaLAPACK     *b;
1088   Mat                Bmpi;
1089   MPI_Comm           comm;
1090   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n;
1091   const PetscInt    *ranges, *rows, *cols;
1092   PetscBLASInt       adesc[9], amb, zero = 0, one = 1, lld, info;
1093   const PetscScalar *aarray;
1094   IS                 ir, ic;
1095   PetscInt           lda;
1096   PetscBool          flg;
1097 
1098   PetscFunctionBegin;
1099   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1100 
1101   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1102   else {
1103     PetscCall(MatCreate(comm, &Bmpi));
1104     m = PETSC_DECIDE;
1105     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1106     n = PETSC_DECIDE;
1107     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1108     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1109     PetscCall(MatSetType(Bmpi, MATSCALAPACK));
1110     PetscCall(MatSetUp(Bmpi));
1111   }
1112   b = (Mat_ScaLAPACK *)Bmpi->data;
1113 
1114   PetscCall(MatDenseGetLDA(A, &lda));
1115   PetscCall(MatDenseGetArrayRead(A, &aarray));
1116   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1117   if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1118   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1119     /* create ScaLAPACK descriptor for A (1d block distribution) */
1120     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1121     PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */
1122     lld = PetscMax(lda, 1);                       /* local leading dimension */
1123     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1124     PetscCheckScaLapackInfo("descinit", info);
1125 
1126     /* redistribute matrix */
1127     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1128     Bmpi->nooffprocentries = PETSC_TRUE;
1129   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1130     PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1131     b->roworiented = PETSC_FALSE;
1132     PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1133     PetscCall(ISGetIndices(ir, &rows));
1134     PetscCall(ISGetIndices(ic, &cols));
1135     PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1136     PetscCall(ISRestoreIndices(ir, &rows));
1137     PetscCall(ISRestoreIndices(ic, &cols));
1138     PetscCall(ISDestroy(&ic));
1139     PetscCall(ISDestroy(&ir));
1140   }
1141   PetscCall(MatDenseRestoreArrayRead(A, &aarray));
1142   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1143   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1144   if (reuse == MAT_INPLACE_MATRIX) {
1145     PetscCall(MatHeaderReplace(A, &Bmpi));
1146   } else *B = Bmpi;
1147   PetscFunctionReturn(PETSC_SUCCESS);
1148 }
1149 
1150 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1151 {
1152   Mat                mat_scal;
1153   PetscInt           M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1154   const PetscInt    *cols;
1155   const PetscScalar *vals;
1156 
1157   PetscFunctionBegin;
1158   if (reuse == MAT_REUSE_MATRIX) {
1159     mat_scal = *newmat;
1160     PetscCall(MatZeroEntries(mat_scal));
1161   } else {
1162     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1163     m = PETSC_DECIDE;
1164     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1165     n = PETSC_DECIDE;
1166     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1167     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1168     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1169     PetscCall(MatSetUp(mat_scal));
1170   }
1171   for (row = rstart; row < rend; row++) {
1172     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1173     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
1174     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1175   }
1176   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1177   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1178 
1179   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1180   else *newmat = mat_scal;
1181   PetscFunctionReturn(PETSC_SUCCESS);
1182 }
1183 
1184 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1185 {
1186   Mat                mat_scal;
1187   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1188   const PetscInt    *cols;
1189   const PetscScalar *vals;
1190   PetscScalar        v;
1191 
1192   PetscFunctionBegin;
1193   if (reuse == MAT_REUSE_MATRIX) {
1194     mat_scal = *newmat;
1195     PetscCall(MatZeroEntries(mat_scal));
1196   } else {
1197     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1198     m = PETSC_DECIDE;
1199     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1200     n = PETSC_DECIDE;
1201     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1202     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1203     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1204     PetscCall(MatSetUp(mat_scal));
1205   }
1206   PetscCall(MatGetRowUpperTriangular(A));
1207   for (row = rstart; row < rend; row++) {
1208     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1209     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1210     for (j = 0; j < ncols; j++) { /* lower triangular part */
1211       if (cols[j] == row) continue;
1212       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1213       PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1214     }
1215     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1216   }
1217   PetscCall(MatRestoreRowUpperTriangular(A));
1218   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1219   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1220 
1221   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1222   else *newmat = mat_scal;
1223   PetscFunctionReturn(PETSC_SUCCESS);
1224 }
1225 
1226 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1227 {
1228   Mat_ScaLAPACK *a  = (Mat_ScaLAPACK *)A->data;
1229   PetscInt       sz = 0;
1230 
1231   PetscFunctionBegin;
1232   PetscCall(PetscLayoutSetUp(A->rmap));
1233   PetscCall(PetscLayoutSetUp(A->cmap));
1234   if (!a->lld) a->lld = a->locr;
1235 
1236   PetscCall(PetscFree(a->loc));
1237   PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
1238   PetscCall(PetscCalloc1(sz, &a->loc));
1239 
1240   A->preallocated = PETSC_TRUE;
1241   PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243 
1244 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1245 {
1246   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK *)A->data;
1247   Mat_ScaLAPACK_Grid *grid;
1248   PetscBool           flg;
1249   MPI_Comm            icomm;
1250 
1251   PetscFunctionBegin;
1252   PetscCall(MatStashDestroy_Private(&A->stash));
1253   PetscCall(PetscFree(a->loc));
1254   PetscCall(PetscFree(a->pivots));
1255   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1256   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1257   if (--grid->grid_refct == 0) {
1258     Cblacs_gridexit(grid->ictxt);
1259     Cblacs_gridexit(grid->ictxrow);
1260     Cblacs_gridexit(grid->ictxcol);
1261     PetscCall(PetscFree(grid));
1262     PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1263   }
1264   PetscCall(PetscCommDestroy(&icomm));
1265   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
1266   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1267   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
1268   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
1269   PetscCall(PetscFree(A->data));
1270   PetscFunctionReturn(PETSC_SUCCESS);
1271 }
1272 
1273 static PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1274 {
1275   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
1276   PetscBLASInt   info = 0;
1277   PetscBool      flg;
1278 
1279   PetscFunctionBegin;
1280   PetscCall(PetscLayoutSetUp(A->rmap));
1281   PetscCall(PetscLayoutSetUp(A->cmap));
1282 
1283   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1284   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1285   PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1286   PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1287   PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1288 
1289   /* compute local sizes */
1290   PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
1291   PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1292   a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1293   a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1294   a->lld  = PetscMax(1, a->locr);
1295 
1296   /* allocate local array */
1297   PetscCall(MatScaLAPACKSetPreallocation(A));
1298 
1299   /* set up ScaLAPACK descriptor */
1300   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1301   PetscCheckScaLapackInfo("descinit", info);
1302   PetscFunctionReturn(PETSC_SUCCESS);
1303 }
1304 
1305 static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type)
1306 {
1307   PetscInt nstash, reallocs;
1308 
1309   PetscFunctionBegin;
1310   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1311   PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
1312   PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
1313   PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
1314   PetscFunctionReturn(PETSC_SUCCESS);
1315 }
1316 
1317 static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type)
1318 {
1319   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1320   PetscMPIInt    n;
1321   PetscInt       i, flg, *row, *col;
1322   PetscScalar   *val;
1323   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
1324 
1325   PetscFunctionBegin;
1326   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1327   while (1) {
1328     PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1329     if (!flg) break;
1330     for (i = 0; i < n; i++) {
1331       PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
1332       PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1333       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1334       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1335       switch (A->insertmode) {
1336       case INSERT_VALUES:
1337         a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i];
1338         break;
1339       case ADD_VALUES:
1340         a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i];
1341         break;
1342       default:
1343         SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1344       }
1345     }
1346   }
1347   PetscCall(MatStashScatterEnd_Private(&A->stash));
1348   PetscFunctionReturn(PETSC_SUCCESS);
1349 }
1350 
1351 static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer)
1352 {
1353   Mat      Adense, As;
1354   MPI_Comm comm;
1355 
1356   PetscFunctionBegin;
1357   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1358   PetscCall(MatCreate(comm, &Adense));
1359   PetscCall(MatSetType(Adense, MATDENSE));
1360   PetscCall(MatLoad(Adense, viewer));
1361   PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
1362   PetscCall(MatDestroy(&Adense));
1363   PetscCall(MatHeaderReplace(newMat, &As));
1364   PetscFunctionReturn(PETSC_SUCCESS);
1365 }
1366 
1367 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1368                                        0,
1369                                        0,
1370                                        MatMult_ScaLAPACK,
1371                                        /* 4*/ MatMultAdd_ScaLAPACK,
1372                                        MatMultTranspose_ScaLAPACK,
1373                                        MatMultTransposeAdd_ScaLAPACK,
1374                                        MatSolve_ScaLAPACK,
1375                                        MatSolveAdd_ScaLAPACK,
1376                                        0,
1377                                        /*10*/ 0,
1378                                        MatLUFactor_ScaLAPACK,
1379                                        MatCholeskyFactor_ScaLAPACK,
1380                                        0,
1381                                        MatTranspose_ScaLAPACK,
1382                                        /*15*/ MatGetInfo_ScaLAPACK,
1383                                        0,
1384                                        MatGetDiagonal_ScaLAPACK,
1385                                        MatDiagonalScale_ScaLAPACK,
1386                                        MatNorm_ScaLAPACK,
1387                                        /*20*/ MatAssemblyBegin_ScaLAPACK,
1388                                        MatAssemblyEnd_ScaLAPACK,
1389                                        MatSetOption_ScaLAPACK,
1390                                        MatZeroEntries_ScaLAPACK,
1391                                        /*24*/ 0,
1392                                        MatLUFactorSymbolic_ScaLAPACK,
1393                                        MatLUFactorNumeric_ScaLAPACK,
1394                                        MatCholeskyFactorSymbolic_ScaLAPACK,
1395                                        MatCholeskyFactorNumeric_ScaLAPACK,
1396                                        /*29*/ MatSetUp_ScaLAPACK,
1397                                        0,
1398                                        0,
1399                                        0,
1400                                        0,
1401                                        /*34*/ MatDuplicate_ScaLAPACK,
1402                                        0,
1403                                        0,
1404                                        0,
1405                                        0,
1406                                        /*39*/ MatAXPY_ScaLAPACK,
1407                                        0,
1408                                        0,
1409                                        0,
1410                                        MatCopy_ScaLAPACK,
1411                                        /*44*/ 0,
1412                                        MatScale_ScaLAPACK,
1413                                        MatShift_ScaLAPACK,
1414                                        0,
1415                                        0,
1416                                        /*49*/ 0,
1417                                        0,
1418                                        0,
1419                                        0,
1420                                        0,
1421                                        /*54*/ 0,
1422                                        0,
1423                                        0,
1424                                        0,
1425                                        0,
1426                                        /*59*/ 0,
1427                                        MatDestroy_ScaLAPACK,
1428                                        MatView_ScaLAPACK,
1429                                        0,
1430                                        0,
1431                                        /*64*/ 0,
1432                                        0,
1433                                        0,
1434                                        0,
1435                                        0,
1436                                        /*69*/ 0,
1437                                        0,
1438                                        MatConvert_ScaLAPACK_Dense,
1439                                        0,
1440                                        0,
1441                                        /*74*/ 0,
1442                                        0,
1443                                        0,
1444                                        0,
1445                                        0,
1446                                        /*79*/ 0,
1447                                        0,
1448                                        0,
1449                                        0,
1450                                        MatLoad_ScaLAPACK,
1451                                        /*84*/ 0,
1452                                        0,
1453                                        0,
1454                                        0,
1455                                        0,
1456                                        /*89*/ 0,
1457                                        0,
1458                                        MatMatMultNumeric_ScaLAPACK,
1459                                        0,
1460                                        0,
1461                                        /*94*/ 0,
1462                                        0,
1463                                        0,
1464                                        MatMatTransposeMultNumeric_ScaLAPACK,
1465                                        0,
1466                                        /*99*/ MatProductSetFromOptions_ScaLAPACK,
1467                                        0,
1468                                        0,
1469                                        MatConjugate_ScaLAPACK,
1470                                        0,
1471                                        /*104*/ 0,
1472                                        0,
1473                                        0,
1474                                        0,
1475                                        0,
1476                                        /*109*/ MatMatSolve_ScaLAPACK,
1477                                        0,
1478                                        0,
1479                                        0,
1480                                        MatMissingDiagonal_ScaLAPACK,
1481                                        /*114*/ 0,
1482                                        0,
1483                                        0,
1484                                        0,
1485                                        0,
1486                                        /*119*/ 0,
1487                                        MatHermitianTranspose_ScaLAPACK,
1488                                        MatMultHermitianTranspose_ScaLAPACK,
1489                                        MatMultHermitianTransposeAdd_ScaLAPACK,
1490                                        0,
1491                                        /*124*/ 0,
1492                                        0,
1493                                        0,
1494                                        0,
1495                                        0,
1496                                        /*129*/ 0,
1497                                        0,
1498                                        0,
1499                                        MatTransposeMatMultNumeric_ScaLAPACK,
1500                                        0,
1501                                        /*134*/ 0,
1502                                        0,
1503                                        0,
1504                                        0,
1505                                        0,
1506                                        0,
1507                                        /*140*/ 0,
1508                                        0,
1509                                        0,
1510                                        0,
1511                                        0,
1512                                        /*145*/ 0,
1513                                        0,
1514                                        0,
1515                                        0,
1516                                        0,
1517                                        /*150*/ 0,
1518                                        0,
1519                                        0};
1520 
1521 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners)
1522 {
1523   PetscInt          *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2;
1524   PetscInt           size = stash->size, nsends;
1525   PetscInt           count, *sindices, **rindices, i, j, l;
1526   PetscScalar      **rvalues, *svalues;
1527   MPI_Comm           comm = stash->comm;
1528   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1529   PetscMPIInt       *sizes, *nlengths, nreceives;
1530   PetscInt          *sp_idx, *sp_idy;
1531   PetscScalar       *sp_val;
1532   PetscMatStashSpace space, space_next;
1533   PetscBLASInt       gridx, gcidx, lridx, lcidx, rsrc, csrc;
1534   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)mat->data;
1535 
1536   PetscFunctionBegin;
1537   { /* make sure all processors are either in INSERTMODE or ADDMODE */
1538     InsertMode addv;
1539     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
1540     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1541     mat->insertmode = addv; /* in case this processor had no cache */
1542   }
1543 
1544   bs2 = stash->bs * stash->bs;
1545 
1546   /*  first count number of contributors to each processor */
1547   PetscCall(PetscCalloc1(size, &nlengths));
1548   PetscCall(PetscMalloc1(stash->n + 1, &owner));
1549 
1550   i = j = 0;
1551   space = stash->space_head;
1552   while (space) {
1553     space_next = space->next;
1554     for (l = 0; l < space->local_used; l++) {
1555       PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
1556       PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1557       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1558       j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
1559       nlengths[j]++;
1560       owner[i] = j;
1561       i++;
1562     }
1563     space = space_next;
1564   }
1565 
1566   /* Now check what procs get messages - and compute nsends. */
1567   PetscCall(PetscCalloc1(size, &sizes));
1568   for (i = 0, nsends = 0; i < size; i++) {
1569     if (nlengths[i]) {
1570       sizes[i] = 1;
1571       nsends++;
1572     }
1573   }
1574 
1575   {
1576     PetscMPIInt *onodes, *olengths;
1577     /* Determine the number of messages to expect, their lengths, from from-ids */
1578     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
1579     PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
1580     /* since clubbing row,col - lengths are multiplied by 2 */
1581     for (i = 0; i < nreceives; i++) olengths[i] *= 2;
1582     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1583     /* values are size 'bs2' lengths (and remove earlier factor 2 */
1584     for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2;
1585     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
1586     PetscCall(PetscFree(onodes));
1587     PetscCall(PetscFree(olengths));
1588   }
1589 
1590   /* do sends:
1591       1) starts[i] gives the starting index in svalues for stuff going to
1592          the ith processor
1593   */
1594   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
1595   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
1596   PetscCall(PetscMalloc2(size, &startv, size, &starti));
1597   /* use 2 sends the first with all_a, the next with all_i and all_j */
1598   startv[0] = 0;
1599   starti[0] = 0;
1600   for (i = 1; i < size; i++) {
1601     startv[i] = startv[i - 1] + nlengths[i - 1];
1602     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1603   }
1604 
1605   i     = 0;
1606   space = stash->space_head;
1607   while (space) {
1608     space_next = space->next;
1609     sp_idx     = space->idx;
1610     sp_idy     = space->idy;
1611     sp_val     = space->val;
1612     for (l = 0; l < space->local_used; l++) {
1613       j = owner[i];
1614       if (bs2 == 1) {
1615         svalues[startv[j]] = sp_val[l];
1616       } else {
1617         PetscInt     k;
1618         PetscScalar *buf1, *buf2;
1619         buf1 = svalues + bs2 * startv[j];
1620         buf2 = space->val + bs2 * l;
1621         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1622       }
1623       sindices[starti[j]]               = sp_idx[l];
1624       sindices[starti[j] + nlengths[j]] = sp_idy[l];
1625       startv[j]++;
1626       starti[j]++;
1627       i++;
1628     }
1629     space = space_next;
1630   }
1631   startv[0] = 0;
1632   for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1633 
1634   for (i = 0, count = 0; i < size; i++) {
1635     if (sizes[i]) {
1636       PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
1637       PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1638     }
1639   }
1640 #if defined(PETSC_USE_INFO)
1641   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
1642   for (i = 0; i < size; i++) {
1643     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1644   }
1645 #endif
1646   PetscCall(PetscFree(nlengths));
1647   PetscCall(PetscFree(owner));
1648   PetscCall(PetscFree2(startv, starti));
1649   PetscCall(PetscFree(sizes));
1650 
1651   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1652   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
1653 
1654   for (i = 0; i < nreceives; i++) {
1655     recv_waits[2 * i]     = recv_waits1[i];
1656     recv_waits[2 * i + 1] = recv_waits2[i];
1657   }
1658   stash->recv_waits = recv_waits;
1659 
1660   PetscCall(PetscFree(recv_waits1));
1661   PetscCall(PetscFree(recv_waits2));
1662 
1663   stash->svalues         = svalues;
1664   stash->sindices        = sindices;
1665   stash->rvalues         = rvalues;
1666   stash->rindices        = rindices;
1667   stash->send_waits      = send_waits;
1668   stash->nsends          = nsends;
1669   stash->nrecvs          = nreceives;
1670   stash->reproduce_count = 0;
1671   PetscFunctionReturn(PETSC_SUCCESS);
1672 }
1673 
1674 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb)
1675 {
1676   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1677 
1678   PetscFunctionBegin;
1679   PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1680   PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1681   PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
1682   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1683   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1684   PetscFunctionReturn(PETSC_SUCCESS);
1685 }
1686 
1687 /*@
1688   MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1689   the `MATSCALAPACK` matrix
1690 
1691   Logically Collective
1692 
1693   Input Parameters:
1694 + A  - a `MATSCALAPACK` matrix
1695 . mb - the row block size
1696 - nb - the column block size
1697 
1698   Level: intermediate
1699 
1700   Note:
1701   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1702 
1703 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1704 @*/
1705 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb)
1706 {
1707   PetscFunctionBegin;
1708   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1709   PetscValidLogicalCollectiveInt(A, mb, 2);
1710   PetscValidLogicalCollectiveInt(A, nb, 3);
1711   PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
1712   PetscFunctionReturn(PETSC_SUCCESS);
1713 }
1714 
1715 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb)
1716 {
1717   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1718 
1719   PetscFunctionBegin;
1720   if (mb) *mb = a->mb;
1721   if (nb) *nb = a->nb;
1722   PetscFunctionReturn(PETSC_SUCCESS);
1723 }
1724 
1725 /*@
1726   MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1727   the `MATSCALAPACK` matrix
1728 
1729   Not Collective
1730 
1731   Input Parameter:
1732 . A - a `MATSCALAPACK` matrix
1733 
1734   Output Parameters:
1735 + mb - the row block size
1736 - nb - the column block size
1737 
1738   Level: intermediate
1739 
1740   Note:
1741   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1742 
1743 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1744 @*/
1745 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb)
1746 {
1747   PetscFunctionBegin;
1748   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1749   PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
1750   PetscFunctionReturn(PETSC_SUCCESS);
1751 }
1752 
1753 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1754 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1755 
1756 /*MC
1757    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1758 
1759    Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK
1760 
1761    Options Database Keys:
1762 +  -mat_type scalapack - sets the matrix type to `MATSCALAPACK`
1763 .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu`
1764 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1765 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1766 
1767    Level: intermediate
1768 
1769   Note:
1770    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1771    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1772    the given rank.
1773 
1774 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()`
1775 M*/
1776 
1777 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1778 {
1779   Mat_ScaLAPACK      *a;
1780   PetscBool           flg, flg1;
1781   Mat_ScaLAPACK_Grid *grid;
1782   MPI_Comm            icomm;
1783   PetscBLASInt        nprow, npcol, myrow, mycol;
1784   PetscInt            optv1, k = 2, array[2] = {0, 0};
1785   PetscMPIInt         size;
1786 
1787   PetscFunctionBegin;
1788   A->ops[0]     = MatOps_Values;
1789   A->insertmode = NOT_SET_VALUES;
1790 
1791   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1792   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1793   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1794   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1795   A->stash.ScatterDestroy = NULL;
1796 
1797   PetscCall(PetscNew(&a));
1798   A->data = (void *)a;
1799 
1800   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1801   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1802     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0));
1803     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1804     PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1805   }
1806   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1807   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1808   if (!flg) {
1809     PetscCall(PetscNew(&grid));
1810 
1811     PetscCallMPI(MPI_Comm_size(icomm, &size));
1812     grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001);
1813 
1814     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
1815     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1));
1816     if (flg1) {
1817       PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
1818       grid->nprow = optv1;
1819     }
1820     PetscOptionsEnd();
1821 
1822     if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1823     grid->npcol = size / grid->nprow;
1824     PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
1825     PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1826     grid->ictxt = Csys2blacs_handle(icomm);
1827     Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1828     Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1829     grid->grid_refct = 1;
1830     grid->nprow      = nprow;
1831     grid->npcol      = npcol;
1832     grid->myrow      = myrow;
1833     grid->mycol      = mycol;
1834     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1835     grid->ictxrow = Csys2blacs_handle(icomm);
1836     Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1837     grid->ictxcol = Csys2blacs_handle(icomm);
1838     Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
1839     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));
1840 
1841   } else grid->grid_refct++;
1842   PetscCall(PetscCommDestroy(&icomm));
1843   a->grid = grid;
1844   a->mb   = DEFAULT_BLOCKSIZE;
1845   a->nb   = DEFAULT_BLOCKSIZE;
1846 
1847   PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
1848   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1849   if (flg) {
1850     a->mb = array[0];
1851     a->nb = (k > 1) ? array[1] : a->mb;
1852   }
1853   PetscOptionsEnd();
1854 
1855   a->roworiented = PETSC_TRUE;
1856   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
1857   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
1858   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
1859   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
1860   PetscFunctionReturn(PETSC_SUCCESS);
1861 }
1862 
1863 /*@C
1864   MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1865   (2D block cyclic distribution) for a `MATSCALAPACK` matrix
1866 
1867   Collective
1868 
1869   Input Parameters:
1870 + comm - MPI communicator
1871 . mb   - row block size (or `PETSC_DECIDE` to have it set)
1872 . nb   - column block size (or `PETSC_DECIDE` to have it set)
1873 . M    - number of global rows
1874 . N    - number of global columns
1875 . rsrc - coordinate of process that owns the first row of the distributed matrix
1876 - csrc - coordinate of process that owns the first column of the distributed matrix
1877 
1878   Output Parameter:
1879 . A - the matrix
1880 
1881   Options Database Key:
1882 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1883 
1884   Level: intermediate
1885 
1886   Notes:
1887   If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen
1888 
1889   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1890   MatXXXXSetPreallocation() paradigm instead of this routine directly.
1891   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
1892 
1893   Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1894   configured with ScaLAPACK. In particular, PETSc's local sizes lose
1895   significance and are thus ignored. The block sizes refer to the values
1896   used for the distributed matrix, not the same meaning as in `MATBAIJ`.
1897 
1898 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1899 @*/
1900 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A)
1901 {
1902   Mat_ScaLAPACK *a;
1903   PetscInt       m, n;
1904 
1905   PetscFunctionBegin;
1906   PetscCall(MatCreate(comm, A));
1907   PetscCall(MatSetType(*A, MATSCALAPACK));
1908   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1909   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1910   m = PETSC_DECIDE;
1911   PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1912   n = PETSC_DECIDE;
1913   PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1914   PetscCall(MatSetSizes(*A, m, n, M, N));
1915   a = (Mat_ScaLAPACK *)(*A)->data;
1916   PetscCall(PetscBLASIntCast(M, &a->M));
1917   PetscCall(PetscBLASIntCast(N, &a->N));
1918   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1919   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1920   PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
1921   PetscCall(PetscBLASIntCast(csrc, &a->csrc));
1922   PetscCall(MatSetUp(*A));
1923   PetscFunctionReturn(PETSC_SUCCESS);
1924 }
1925