xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision 2d30e087755efd99e28fdfe792ffbeb2ee1ea928)
1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/
2 
3 const char       ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4                                        "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5                                        "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6                                        "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7                                        "       TITLE = {Sca{LAPACK} Users' Guide},\n"
8                                        "       PUBLISHER = {SIAM},\n"
9                                        "       ADDRESS = {Philadelphia, PA},\n"
10                                        "       YEAR = 1997\n"
11                                        "}\n";
12 static PetscBool ScaLAPACKCite       = PETSC_FALSE;
13 
14 #define DEFAULT_BLOCKSIZE 64
15 
16 /*
17     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19 */
20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21 
22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) {
23   PetscFunctionBegin;
24   PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
25   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
26   PetscFunctionReturn(0);
27 }
28 
29 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer) {
30   Mat_ScaLAPACK    *a = (Mat_ScaLAPACK *)A->data;
31   PetscBool         iascii;
32   PetscViewerFormat format;
33   Mat               Adense;
34 
35   PetscFunctionBegin;
36   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
37   if (iascii) {
38     PetscCall(PetscViewerGetFormat(viewer, &format));
39     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
40       PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
41       PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
42       PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
43       PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
44       PetscFunctionReturn(0);
45     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
46       PetscFunctionReturn(0);
47     }
48   }
49   /* convert to dense format and call MatView() */
50   PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
51   PetscCall(MatView(Adense, viewer));
52   PetscCall(MatDestroy(&Adense));
53   PetscFunctionReturn(0);
54 }
55 
56 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info) {
57   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
58   PetscLogDouble isend[2], irecv[2];
59 
60   PetscFunctionBegin;
61   info->block_size = 1.0;
62 
63   isend[0] = a->lld * a->locc;  /* locally allocated */
64   isend[1] = a->locr * a->locc; /* used submatrix */
65   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
66     info->nz_allocated = isend[0];
67     info->nz_used      = isend[1];
68   } else if (flag == MAT_GLOBAL_MAX) {
69     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
70     info->nz_allocated = irecv[0];
71     info->nz_used      = irecv[1];
72   } else if (flag == MAT_GLOBAL_SUM) {
73     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
74     info->nz_allocated = irecv[0];
75     info->nz_used      = irecv[1];
76   }
77 
78   info->nz_unneeded       = 0;
79   info->assemblies        = A->num_ass;
80   info->mallocs           = 0;
81   info->memory            = ((PetscObject)A)->mem;
82   info->fill_ratio_given  = 0;
83   info->fill_ratio_needed = 0;
84   info->factor_mallocs    = 0;
85   PetscFunctionReturn(0);
86 }
87 
88 PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg) {
89   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
90 
91   PetscFunctionBegin;
92   switch (op) {
93   case MAT_NEW_NONZERO_LOCATIONS:
94   case MAT_NEW_NONZERO_LOCATION_ERR:
95   case MAT_NEW_NONZERO_ALLOCATION_ERR:
96   case MAT_SYMMETRIC:
97   case MAT_SORTED_FULL:
98   case MAT_HERMITIAN: break;
99   case MAT_ROW_ORIENTED: a->roworiented = flg; break;
100   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
101   }
102   PetscFunctionReturn(0);
103 }
104 
105 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode) {
106   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
107   PetscInt       i, j;
108   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
109   PetscBool      roworiented = a->roworiented;
110 
111   PetscFunctionBegin;
112   PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
113   for (i = 0; i < nr; i++) {
114     if (rows[i] < 0) continue;
115     PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
116     for (j = 0; j < nc; j++) {
117       if (cols[j] < 0) continue;
118       PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
119       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
120       if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
121         if (roworiented) {
122           switch (imode) {
123           case INSERT_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j]; break;
124           default: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j]; break;
125           }
126         } else {
127           switch (imode) {
128           case INSERT_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr]; break;
129           default: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr]; break;
130           }
131         }
132       } else {
133         PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
134         A->assembled = PETSC_FALSE;
135         PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
136       }
137     }
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscScalar beta, const PetscScalar *x, PetscScalar *y) {
143   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
144   PetscScalar    *x2d, *y2d, alpha = 1.0;
145   const PetscInt *ranges;
146   PetscBLASInt    xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
147 
148   PetscFunctionBegin;
149   if (transpose) {
150     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
151     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
152     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
153     xlld = PetscMax(1, A->rmap->n);
154     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
155     PetscCheckScaLapackInfo("descinit", info);
156     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
157     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
158     ylld = 1;
159     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
160     PetscCheckScaLapackInfo("descinit", info);
161 
162     /* allocate 2d vectors */
163     lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
164     lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
165     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
166     xlld = PetscMax(1, lszx);
167 
168     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
169     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
170     PetscCheckScaLapackInfo("descinit", info);
171     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
172     PetscCheckScaLapackInfo("descinit", info);
173 
174     /* redistribute x as a column of a 2d matrix */
175     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
176 
177     /* redistribute y as a row of a 2d matrix */
178     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
179 
180     /* call PBLAS subroutine */
181     PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
182 
183     /* redistribute y from a row of a 2d matrix */
184     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
185 
186   } else { /* non-transpose */
187 
188     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
189     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
190     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
191     xlld = 1;
192     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
193     PetscCheckScaLapackInfo("descinit", info);
194     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
195     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
196     ylld = PetscMax(1, A->rmap->n);
197     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
198     PetscCheckScaLapackInfo("descinit", info);
199 
200     /* allocate 2d vectors */
201     lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
202     lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
203     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
204     ylld = PetscMax(1, lszy);
205 
206     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
207     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
208     PetscCheckScaLapackInfo("descinit", info);
209     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
210     PetscCheckScaLapackInfo("descinit", info);
211 
212     /* redistribute x as a row of a 2d matrix */
213     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
214 
215     /* redistribute y as a column of a 2d matrix */
216     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
217 
218     /* call PBLAS subroutine */
219     PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
220 
221     /* redistribute y from a column of a 2d matrix */
222     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
223   }
224   PetscCall(PetscFree2(x2d, y2d));
225   PetscFunctionReturn(0);
226 }
227 
228 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y) {
229   const PetscScalar *xarray;
230   PetscScalar       *yarray;
231 
232   PetscFunctionBegin;
233   PetscCall(VecGetArrayRead(x, &xarray));
234   PetscCall(VecGetArray(y, &yarray));
235   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 0.0, xarray, yarray));
236   PetscCall(VecRestoreArrayRead(x, &xarray));
237   PetscCall(VecRestoreArray(y, &yarray));
238   PetscFunctionReturn(0);
239 }
240 
241 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y) {
242   const PetscScalar *xarray;
243   PetscScalar       *yarray;
244 
245   PetscFunctionBegin;
246   PetscCall(VecGetArrayRead(x, &xarray));
247   PetscCall(VecGetArray(y, &yarray));
248   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 0.0, xarray, yarray));
249   PetscCall(VecRestoreArrayRead(x, &xarray));
250   PetscCall(VecRestoreArray(y, &yarray));
251   PetscFunctionReturn(0);
252 }
253 
254 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) {
255   const PetscScalar *xarray;
256   PetscScalar       *zarray;
257 
258   PetscFunctionBegin;
259   if (y != z) PetscCall(VecCopy(y, z));
260   PetscCall(VecGetArrayRead(x, &xarray));
261   PetscCall(VecGetArray(z, &zarray));
262   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 1.0, xarray, zarray));
263   PetscCall(VecRestoreArrayRead(x, &xarray));
264   PetscCall(VecRestoreArray(z, &zarray));
265   PetscFunctionReturn(0);
266 }
267 
268 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) {
269   const PetscScalar *xarray;
270   PetscScalar       *zarray;
271 
272   PetscFunctionBegin;
273   if (y != z) PetscCall(VecCopy(y, z));
274   PetscCall(VecGetArrayRead(x, &xarray));
275   PetscCall(VecGetArray(z, &zarray));
276   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 1.0, xarray, zarray));
277   PetscCall(VecRestoreArrayRead(x, &xarray));
278   PetscCall(VecRestoreArray(z, &zarray));
279   PetscFunctionReturn(0);
280 }
281 
282 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) {
283   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
284   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
285   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
286   PetscScalar    sone = 1.0, zero = 0.0;
287   PetscBLASInt   one = 1;
288 
289   PetscFunctionBegin;
290   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
291   C->assembled = PETSC_TRUE;
292   PetscFunctionReturn(0);
293 }
294 
295 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) {
296   PetscFunctionBegin;
297   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
298   PetscCall(MatSetType(C, MATSCALAPACK));
299   PetscCall(MatSetUp(C));
300   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
301   PetscFunctionReturn(0);
302 }
303 
304 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) {
305   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
306   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
307   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
308   PetscScalar    sone = 1.0, zero = 0.0;
309   PetscBLASInt   one = 1;
310 
311   PetscFunctionBegin;
312   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
313   C->assembled = PETSC_TRUE;
314   PetscFunctionReturn(0);
315 }
316 
317 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) {
318   PetscFunctionBegin;
319   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
320   PetscCall(MatSetType(C, MATSCALAPACK));
321   PetscCall(MatSetUp(C));
322   PetscFunctionReturn(0);
323 }
324 
325 /* --------------------------------------- */
326 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) {
327   PetscFunctionBegin;
328   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
329   C->ops->productsymbolic = MatProductSymbolic_AB;
330   PetscFunctionReturn(0);
331 }
332 
333 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) {
334   PetscFunctionBegin;
335   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
336   C->ops->productsymbolic          = MatProductSymbolic_ABt;
337   PetscFunctionReturn(0);
338 }
339 
340 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) {
341   Mat_Product *product = C->product;
342 
343   PetscFunctionBegin;
344   switch (product->type) {
345   case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C)); break;
346   case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C)); break;
347   default: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
348   }
349   PetscFunctionReturn(0);
350 }
351 /* --------------------------------------- */
352 
353 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D) {
354   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
355   PetscScalar    *darray, *d2d, v;
356   const PetscInt *ranges;
357   PetscBLASInt    j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
358 
359   PetscFunctionBegin;
360   PetscCall(VecGetArray(D, &darray));
361 
362   if (A->rmap->N <= A->cmap->N) { /* row version */
363 
364     /* create ScaLAPACK descriptor for vector (1d block distribution) */
365     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
366     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
367     dlld = PetscMax(1, A->rmap->n);
368     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
369     PetscCheckScaLapackInfo("descinit", info);
370 
371     /* allocate 2d vector */
372     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
373     PetscCall(PetscCalloc1(lszd, &d2d));
374     dlld = PetscMax(1, lszd);
375 
376     /* create ScaLAPACK descriptor for vector (2d block distribution) */
377     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
378     PetscCheckScaLapackInfo("descinit", info);
379 
380     /* collect diagonal */
381     for (j = 1; j <= a->M; j++) {
382       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
383       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
384     }
385 
386     /* redistribute d from a column of a 2d matrix */
387     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
388     PetscCall(PetscFree(d2d));
389 
390   } else { /* column version */
391 
392     /* create ScaLAPACK descriptor for vector (1d block distribution) */
393     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
394     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
395     dlld = 1;
396     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
397     PetscCheckScaLapackInfo("descinit", info);
398 
399     /* allocate 2d vector */
400     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
401     PetscCall(PetscCalloc1(lszd, &d2d));
402 
403     /* create ScaLAPACK descriptor for vector (2d block distribution) */
404     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
405     PetscCheckScaLapackInfo("descinit", info);
406 
407     /* collect diagonal */
408     for (j = 1; j <= a->N; j++) {
409       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
410       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
411     }
412 
413     /* redistribute d from a row of a 2d matrix */
414     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
415     PetscCall(PetscFree(d2d));
416   }
417 
418   PetscCall(VecRestoreArray(D, &darray));
419   PetscCall(VecAssemblyBegin(D));
420   PetscCall(VecAssemblyEnd(D));
421   PetscFunctionReturn(0);
422 }
423 
424 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R) {
425   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
426   const PetscScalar *d;
427   const PetscInt    *ranges;
428   PetscScalar       *d2d;
429   PetscBLASInt       i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
430 
431   PetscFunctionBegin;
432   if (R) {
433     PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d));
434     /* create ScaLAPACK descriptor for vector (1d block distribution) */
435     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
436     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
437     dlld = 1;
438     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
439     PetscCheckScaLapackInfo("descinit", info);
440 
441     /* allocate 2d vector */
442     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
443     PetscCall(PetscCalloc1(lszd, &d2d));
444 
445     /* create ScaLAPACK descriptor for vector (2d block distribution) */
446     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
447     PetscCheckScaLapackInfo("descinit", info);
448 
449     /* redistribute d to a row of a 2d matrix */
450     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
451 
452     /* broadcast along process columns */
453     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
454     else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
455 
456     /* local scaling */
457     for (j = 0; j < a->locc; j++)
458       for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
459 
460     PetscCall(PetscFree(d2d));
461     PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d));
462   }
463   if (L) {
464     PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d));
465     /* create ScaLAPACK descriptor for vector (1d block distribution) */
466     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
467     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
468     dlld = PetscMax(1, A->rmap->n);
469     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
470     PetscCheckScaLapackInfo("descinit", info);
471 
472     /* allocate 2d vector */
473     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
474     PetscCall(PetscCalloc1(lszd, &d2d));
475     dlld = PetscMax(1, lszd);
476 
477     /* create ScaLAPACK descriptor for vector (2d block distribution) */
478     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
479     PetscCheckScaLapackInfo("descinit", info);
480 
481     /* redistribute d to a column of a 2d matrix */
482     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
483 
484     /* broadcast along process rows */
485     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
486     else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
487 
488     /* local scaling */
489     for (i = 0; i < a->locr; i++)
490       for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
491 
492     PetscCall(PetscFree(d2d));
493     PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d));
494   }
495   PetscFunctionReturn(0);
496 }
497 
498 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d) {
499   PetscFunctionBegin;
500   *missing = PETSC_FALSE;
501   PetscFunctionReturn(0);
502 }
503 
504 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a) {
505   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
506   PetscBLASInt   n, one = 1;
507 
508   PetscFunctionBegin;
509   n = x->lld * x->locc;
510   PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
511   PetscFunctionReturn(0);
512 }
513 
514 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha) {
515   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
516   PetscBLASInt   i, n;
517   PetscScalar    v;
518 
519   PetscFunctionBegin;
520   n = PetscMin(x->M, x->N);
521   for (i = 1; i <= n; i++) {
522     PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
523     v += alpha;
524     PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
525   }
526   PetscFunctionReturn(0);
527 }
528 
529 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str) {
530   Mat_ScaLAPACK *x    = (Mat_ScaLAPACK *)X->data;
531   Mat_ScaLAPACK *y    = (Mat_ScaLAPACK *)Y->data;
532   PetscBLASInt   one  = 1;
533   PetscScalar    beta = 1.0;
534 
535   PetscFunctionBegin;
536   MatScaLAPACKCheckDistribution(Y, 1, X, 3);
537   PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
538   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
539   PetscFunctionReturn(0);
540 }
541 
542 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str) {
543   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
544   Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
545 
546   PetscFunctionBegin;
547   PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
548   PetscCall(PetscObjectStateIncrease((PetscObject)B));
549   PetscFunctionReturn(0);
550 }
551 
552 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B) {
553   Mat            Bs;
554   MPI_Comm       comm;
555   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
556 
557   PetscFunctionBegin;
558   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
559   PetscCall(MatCreate(comm, &Bs));
560   PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
561   PetscCall(MatSetType(Bs, MATSCALAPACK));
562   b       = (Mat_ScaLAPACK *)Bs->data;
563   b->M    = a->M;
564   b->N    = a->N;
565   b->mb   = a->mb;
566   b->nb   = a->nb;
567   b->rsrc = a->rsrc;
568   b->csrc = a->csrc;
569   PetscCall(MatSetUp(Bs));
570   *B = Bs;
571   if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
572   Bs->assembled = PETSC_TRUE;
573   PetscFunctionReturn(0);
574 }
575 
576 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) {
577   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
578   Mat            Bs   = *B;
579   PetscBLASInt   one  = 1;
580   PetscScalar    sone = 1.0, zero = 0.0;
581 #if defined(PETSC_USE_COMPLEX)
582   PetscInt i, j;
583 #endif
584 
585   PetscFunctionBegin;
586   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
587   if (reuse == MAT_INITIAL_MATRIX) {
588     PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
589     *B = Bs;
590   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
591   b = (Mat_ScaLAPACK *)Bs->data;
592   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
593 #if defined(PETSC_USE_COMPLEX)
594   /* undo conjugation */
595   for (i = 0; i < b->locr; i++)
596     for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
597 #endif
598   Bs->assembled = PETSC_TRUE;
599   PetscFunctionReturn(0);
600 }
601 
602 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) {
603   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
604   PetscInt       i, j;
605 
606   PetscFunctionBegin;
607   for (i = 0; i < a->locr; i++)
608     for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
609   PetscFunctionReturn(0);
610 }
611 
612 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) {
613   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
614   Mat            Bs   = *B;
615   PetscBLASInt   one  = 1;
616   PetscScalar    sone = 1.0, zero = 0.0;
617 
618   PetscFunctionBegin;
619   if (reuse == MAT_INITIAL_MATRIX) {
620     PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
621     *B = Bs;
622   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
623   b = (Mat_ScaLAPACK *)Bs->data;
624   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
625   Bs->assembled = PETSC_TRUE;
626   PetscFunctionReturn(0);
627 }
628 
629 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X) {
630   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
631   PetscScalar    *x, *x2d;
632   const PetscInt *ranges;
633   PetscBLASInt    xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
634 
635   PetscFunctionBegin;
636   PetscCall(VecCopy(B, X));
637   PetscCall(VecGetArray(X, &x));
638 
639   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
640   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
641   PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
642   xlld = PetscMax(1, A->rmap->n);
643   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
644   PetscCheckScaLapackInfo("descinit", info);
645 
646   /* allocate 2d vector */
647   lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
648   PetscCall(PetscMalloc1(lszx, &x2d));
649   xlld = PetscMax(1, lszx);
650 
651   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
652   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
653   PetscCheckScaLapackInfo("descinit", info);
654 
655   /* redistribute x as a column of a 2d matrix */
656   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
657 
658   /* call ScaLAPACK subroutine */
659   switch (A->factortype) {
660   case MAT_FACTOR_LU:
661     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
662     PetscCheckScaLapackInfo("getrs", info);
663     break;
664   case MAT_FACTOR_CHOLESKY:
665     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
666     PetscCheckScaLapackInfo("potrs", info);
667     break;
668   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
669   }
670 
671   /* redistribute x from a column of a 2d matrix */
672   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
673 
674   PetscCall(PetscFree(x2d));
675   PetscCall(VecRestoreArray(X, &x));
676   PetscFunctionReturn(0);
677 }
678 
679 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X) {
680   PetscFunctionBegin;
681   PetscCall(MatSolve_ScaLAPACK(A, B, X));
682   PetscCall(VecAXPY(X, 1, Y));
683   PetscFunctionReturn(0);
684 }
685 
686 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X) {
687   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x;
688   PetscBool      flg1, flg2;
689   PetscBLASInt   one = 1, info;
690 
691   PetscFunctionBegin;
692   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
693   PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
694   PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK");
695   MatScaLAPACKCheckDistribution(B, 1, X, 2);
696   b = (Mat_ScaLAPACK *)B->data;
697   x = (Mat_ScaLAPACK *)X->data;
698   PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc));
699 
700   switch (A->factortype) {
701   case MAT_FACTOR_LU:
702     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
703     PetscCheckScaLapackInfo("getrs", info);
704     break;
705   case MAT_FACTOR_CHOLESKY:
706     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
707     PetscCheckScaLapackInfo("potrs", info);
708     break;
709   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
710   }
711   PetscFunctionReturn(0);
712 }
713 
714 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo) {
715   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
716   PetscBLASInt   one = 1, info;
717 
718   PetscFunctionBegin;
719   if (!a->pivots) {
720     PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots));
721     PetscCall(PetscLogObjectMemory((PetscObject)A, a->locr * sizeof(PetscBLASInt)));
722   }
723   PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
724   PetscCheckScaLapackInfo("getrf", info);
725   A->factortype = MAT_FACTOR_LU;
726   A->assembled  = PETSC_TRUE;
727 
728   PetscCall(PetscFree(A->solvertype));
729   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
730   PetscFunctionReturn(0);
731 }
732 
733 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) {
734   PetscFunctionBegin;
735   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
736   PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
737   PetscFunctionReturn(0);
738 }
739 
740 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) {
741   PetscFunctionBegin;
742   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
743   PetscFunctionReturn(0);
744 }
745 
746 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo) {
747   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
748   PetscBLASInt   one = 1, info;
749 
750   PetscFunctionBegin;
751   PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
752   PetscCheckScaLapackInfo("potrf", info);
753   A->factortype = MAT_FACTOR_CHOLESKY;
754   A->assembled  = PETSC_TRUE;
755 
756   PetscCall(PetscFree(A->solvertype));
757   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
758   PetscFunctionReturn(0);
759 }
760 
761 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) {
762   PetscFunctionBegin;
763   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
764   PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
765   PetscFunctionReturn(0);
766 }
767 
768 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info) {
769   PetscFunctionBegin;
770   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
771   PetscFunctionReturn(0);
772 }
773 
774 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type) {
775   PetscFunctionBegin;
776   *type = MATSOLVERSCALAPACK;
777   PetscFunctionReturn(0);
778 }
779 
780 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F) {
781   Mat            B;
782   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
783 
784   PetscFunctionBegin;
785   /* Create the factorization matrix */
786   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
787   B->trivialsymbolic = PETSC_TRUE;
788   B->factortype      = ftype;
789   PetscCall(PetscFree(B->solvertype));
790   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));
791 
792   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
793   *F = B;
794   PetscFunctionReturn(0);
795 }
796 
797 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) {
798   PetscFunctionBegin;
799   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
800   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
801   PetscFunctionReturn(0);
802 }
803 
804 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm) {
805   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
806   PetscBLASInt   one = 1, lwork = 0;
807   const char    *ntype;
808   PetscScalar   *work = NULL, dummy;
809 
810   PetscFunctionBegin;
811   switch (type) {
812   case NORM_1:
813     ntype = "1";
814     lwork = PetscMax(a->locr, a->locc);
815     break;
816   case NORM_FROBENIUS:
817     ntype = "F";
818     work  = &dummy;
819     break;
820   case NORM_INFINITY:
821     ntype = "I";
822     lwork = PetscMax(a->locr, a->locc);
823     break;
824   default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
825   }
826   if (lwork) PetscCall(PetscMalloc1(lwork, &work));
827   *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
828   if (lwork) PetscCall(PetscFree(work));
829   PetscFunctionReturn(0);
830 }
831 
832 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) {
833   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
834 
835   PetscFunctionBegin;
836   PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
837   PetscFunctionReturn(0);
838 }
839 
840 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols) {
841   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
842   PetscInt       i, n, nb, isrc, nproc, iproc, *idx;
843 
844   PetscFunctionBegin;
845   if (rows) {
846     n     = a->locr;
847     nb    = a->mb;
848     isrc  = a->rsrc;
849     nproc = a->grid->nprow;
850     iproc = a->grid->myrow;
851     PetscCall(PetscMalloc1(n, &idx));
852     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
853     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
854   }
855   if (cols) {
856     n     = a->locc;
857     nb    = a->nb;
858     isrc  = a->csrc;
859     nproc = a->grid->npcol;
860     iproc = a->grid->mycol;
861     PetscCall(PetscMalloc1(n, &idx));
862     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
863     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
864   }
865   PetscFunctionReturn(0);
866 }
867 
868 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B) {
869   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
870   Mat                Bmpi;
871   MPI_Comm           comm;
872   PetscInt           i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz;
873   const PetscInt    *ranges, *branges, *cwork;
874   const PetscScalar *vwork;
875   PetscBLASInt       bdesc[9], bmb, zero = 0, one = 1, lld, info;
876   PetscScalar       *barray;
877   PetscBool          differ = PETSC_FALSE;
878   PetscMPIInt        size;
879 
880   PetscFunctionBegin;
881   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
882   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
883 
884   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
885     PetscCallMPI(MPI_Comm_size(comm, &size));
886     PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
887     for (i = 0; i < size; i++)
888       if (ranges[i + 1] != branges[i + 1]) {
889         differ = PETSC_TRUE;
890         break;
891       }
892   }
893 
894   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
895     PetscCall(MatCreate(comm, &Bmpi));
896     m = PETSC_DECIDE;
897     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
898     n = PETSC_DECIDE;
899     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
900     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
901     PetscCall(MatSetType(Bmpi, MATDENSE));
902     PetscCall(MatSetUp(Bmpi));
903 
904     /* create ScaLAPACK descriptor for B (1d block distribution) */
905     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
906     lld = PetscMax(A->rmap->n, 1);                /* local leading dimension */
907     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
908     PetscCheckScaLapackInfo("descinit", info);
909 
910     /* redistribute matrix */
911     PetscCall(MatDenseGetArray(Bmpi, &barray));
912     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
913     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
914     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
915     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
916 
917     /* transfer rows of auxiliary matrix to the final matrix B */
918     PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
919     for (i = rstart; i < rend; i++) {
920       PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
921       PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
922       PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
923     }
924     PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
925     PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
926     PetscCall(MatDestroy(&Bmpi));
927 
928   } else { /* normal cases */
929 
930     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
931     else {
932       PetscCall(MatCreate(comm, &Bmpi));
933       m = PETSC_DECIDE;
934       PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
935       n = PETSC_DECIDE;
936       PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
937       PetscCall(MatSetSizes(Bmpi, m, n, M, N));
938       PetscCall(MatSetType(Bmpi, MATDENSE));
939       PetscCall(MatSetUp(Bmpi));
940     }
941 
942     /* create ScaLAPACK descriptor for B (1d block distribution) */
943     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
944     lld = PetscMax(A->rmap->n, 1);                /* local leading dimension */
945     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
946     PetscCheckScaLapackInfo("descinit", info);
947 
948     /* redistribute matrix */
949     PetscCall(MatDenseGetArray(Bmpi, &barray));
950     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
951     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
952 
953     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
954     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
955     if (reuse == MAT_INPLACE_MATRIX) {
956       PetscCall(MatHeaderReplace(A, &Bmpi));
957     } else *B = Bmpi;
958   }
959   PetscFunctionReturn(0);
960 }
961 
962 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct) {
963   const PetscInt *ranges;
964   PetscMPIInt     size;
965   PetscInt        i, n;
966 
967   PetscFunctionBegin;
968   *correct = PETSC_TRUE;
969   PetscCallMPI(MPI_Comm_size(map->comm, &size));
970   if (size > 1) {
971     PetscCall(PetscLayoutGetRanges(map, &ranges));
972     n = ranges[1] - ranges[0];
973     for (i = 1; i < size; i++)
974       if (ranges[i + 1] - ranges[i] != n) break;
975     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
976   }
977   PetscFunctionReturn(0);
978 }
979 
980 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B) {
981   Mat_ScaLAPACK  *b;
982   Mat             Bmpi;
983   MPI_Comm        comm;
984   PetscInt        M = A->rmap->N, N = A->cmap->N, m, n;
985   const PetscInt *ranges, *rows, *cols;
986   PetscBLASInt    adesc[9], amb, zero = 0, one = 1, lld, info;
987   PetscScalar    *aarray;
988   IS              ir, ic;
989   PetscInt        lda;
990   PetscBool       flg;
991 
992   PetscFunctionBegin;
993   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
994 
995   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
996   else {
997     PetscCall(MatCreate(comm, &Bmpi));
998     m = PETSC_DECIDE;
999     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1000     n = PETSC_DECIDE;
1001     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1002     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1003     PetscCall(MatSetType(Bmpi, MATSCALAPACK));
1004     PetscCall(MatSetUp(Bmpi));
1005   }
1006   b = (Mat_ScaLAPACK *)Bmpi->data;
1007 
1008   PetscCall(MatDenseGetLDA(A, &lda));
1009   PetscCall(MatDenseGetArray(A, &aarray));
1010   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1011   if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1012   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1013     /* create ScaLAPACK descriptor for A (1d block distribution) */
1014     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1015     PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */
1016     lld = PetscMax(lda, 1);                       /* local leading dimension */
1017     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1018     PetscCheckScaLapackInfo("descinit", info);
1019 
1020     /* redistribute matrix */
1021     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1022     Bmpi->nooffprocentries = PETSC_TRUE;
1023   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1024     PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1025     b->roworiented = PETSC_FALSE;
1026     PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1027     PetscCall(ISGetIndices(ir, &rows));
1028     PetscCall(ISGetIndices(ic, &cols));
1029     PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1030     PetscCall(ISRestoreIndices(ir, &rows));
1031     PetscCall(ISRestoreIndices(ic, &cols));
1032     PetscCall(ISDestroy(&ic));
1033     PetscCall(ISDestroy(&ir));
1034   }
1035   PetscCall(MatDenseRestoreArray(A, &aarray));
1036   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1037   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1038   if (reuse == MAT_INPLACE_MATRIX) {
1039     PetscCall(MatHeaderReplace(A, &Bmpi));
1040   } else *B = Bmpi;
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
1045   Mat                mat_scal;
1046   PetscInt           M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1047   const PetscInt    *cols;
1048   const PetscScalar *vals;
1049 
1050   PetscFunctionBegin;
1051   if (reuse == MAT_REUSE_MATRIX) {
1052     mat_scal = *newmat;
1053     PetscCall(MatZeroEntries(mat_scal));
1054   } else {
1055     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1056     m = PETSC_DECIDE;
1057     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1058     n = PETSC_DECIDE;
1059     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1060     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1061     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1062     PetscCall(MatSetUp(mat_scal));
1063   }
1064   for (row = rstart; row < rend; row++) {
1065     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1066     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
1067     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1068   }
1069   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1070   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1071 
1072   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1073   else *newmat = mat_scal;
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
1078   Mat                mat_scal;
1079   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1080   const PetscInt    *cols;
1081   const PetscScalar *vals;
1082   PetscScalar        v;
1083 
1084   PetscFunctionBegin;
1085   if (reuse == MAT_REUSE_MATRIX) {
1086     mat_scal = *newmat;
1087     PetscCall(MatZeroEntries(mat_scal));
1088   } else {
1089     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1090     m = PETSC_DECIDE;
1091     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1092     n = PETSC_DECIDE;
1093     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1094     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1095     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1096     PetscCall(MatSetUp(mat_scal));
1097   }
1098   PetscCall(MatGetRowUpperTriangular(A));
1099   for (row = rstart; row < rend; row++) {
1100     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1101     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1102     for (j = 0; j < ncols; j++) { /* lower triangular part */
1103       if (cols[j] == row) continue;
1104       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1105       PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1106     }
1107     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1108   }
1109   PetscCall(MatRestoreRowUpperTriangular(A));
1110   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1111   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1112 
1113   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1114   else *newmat = mat_scal;
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) {
1119   Mat_ScaLAPACK *a  = (Mat_ScaLAPACK *)A->data;
1120   PetscInt       sz = 0;
1121 
1122   PetscFunctionBegin;
1123   PetscCall(PetscLayoutSetUp(A->rmap));
1124   PetscCall(PetscLayoutSetUp(A->cmap));
1125   if (!a->lld) a->lld = a->locr;
1126 
1127   PetscCall(PetscFree(a->loc));
1128   PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
1129   PetscCall(PetscCalloc1(sz, &a->loc));
1130   PetscCall(PetscLogObjectMemory((PetscObject)A, sz * sizeof(PetscScalar)));
1131 
1132   A->preallocated = PETSC_TRUE;
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) {
1137   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK *)A->data;
1138   Mat_ScaLAPACK_Grid *grid;
1139   PetscBool           flg;
1140   MPI_Comm            icomm;
1141 
1142   PetscFunctionBegin;
1143   PetscCall(MatStashDestroy_Private(&A->stash));
1144   PetscCall(PetscFree(a->loc));
1145   PetscCall(PetscFree(a->pivots));
1146   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1147   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1148   if (--grid->grid_refct == 0) {
1149     Cblacs_gridexit(grid->ictxt);
1150     Cblacs_gridexit(grid->ictxrow);
1151     Cblacs_gridexit(grid->ictxcol);
1152     PetscCall(PetscFree(grid));
1153     PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1154   }
1155   PetscCall(PetscCommDestroy(&icomm));
1156   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
1157   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1158   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
1159   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
1160   PetscCall(PetscFree(A->data));
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 PetscErrorCode MatSetUp_ScaLAPACK(Mat A) {
1165   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
1166   PetscBLASInt   info = 0;
1167   PetscBool      flg;
1168 
1169   PetscFunctionBegin;
1170   PetscCall(PetscLayoutSetUp(A->rmap));
1171   PetscCall(PetscLayoutSetUp(A->cmap));
1172 
1173   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1174   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1175   PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1176   PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1177   PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1178 
1179   /* compute local sizes */
1180   PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
1181   PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1182   a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1183   a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1184   a->lld  = PetscMax(1, a->locr);
1185 
1186   /* allocate local array */
1187   PetscCall(MatScaLAPACKSetPreallocation(A));
1188 
1189   /* set up ScaLAPACK descriptor */
1190   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1191   PetscCheckScaLapackInfo("descinit", info);
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type) {
1196   PetscInt nstash, reallocs;
1197 
1198   PetscFunctionBegin;
1199   if (A->nooffprocentries) PetscFunctionReturn(0);
1200   PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
1201   PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
1202   PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type) {
1207   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1208   PetscMPIInt    n;
1209   PetscInt       i, flg, *row, *col;
1210   PetscScalar   *val;
1211   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
1212 
1213   PetscFunctionBegin;
1214   if (A->nooffprocentries) PetscFunctionReturn(0);
1215   while (1) {
1216     PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1217     if (!flg) break;
1218     for (i = 0; i < n; i++) {
1219       PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
1220       PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1221       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1222       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1223       switch (A->insertmode) {
1224       case INSERT_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i]; break;
1225       case ADD_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i]; break;
1226       default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1227       }
1228     }
1229   }
1230   PetscCall(MatStashScatterEnd_Private(&A->stash));
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer) {
1235   Mat      Adense, As;
1236   MPI_Comm comm;
1237 
1238   PetscFunctionBegin;
1239   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1240   PetscCall(MatCreate(comm, &Adense));
1241   PetscCall(MatSetType(Adense, MATDENSE));
1242   PetscCall(MatLoad(Adense, viewer));
1243   PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
1244   PetscCall(MatDestroy(&Adense));
1245   PetscCall(MatHeaderReplace(newMat, &As));
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 /* -------------------------------------------------------------------*/
1250 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1251                                        0,
1252                                        0,
1253                                        MatMult_ScaLAPACK,
1254                                        /* 4*/ MatMultAdd_ScaLAPACK,
1255                                        MatMultTranspose_ScaLAPACK,
1256                                        MatMultTransposeAdd_ScaLAPACK,
1257                                        MatSolve_ScaLAPACK,
1258                                        MatSolveAdd_ScaLAPACK,
1259                                        0,
1260                                        /*10*/ 0,
1261                                        MatLUFactor_ScaLAPACK,
1262                                        MatCholeskyFactor_ScaLAPACK,
1263                                        0,
1264                                        MatTranspose_ScaLAPACK,
1265                                        /*15*/ MatGetInfo_ScaLAPACK,
1266                                        0,
1267                                        MatGetDiagonal_ScaLAPACK,
1268                                        MatDiagonalScale_ScaLAPACK,
1269                                        MatNorm_ScaLAPACK,
1270                                        /*20*/ MatAssemblyBegin_ScaLAPACK,
1271                                        MatAssemblyEnd_ScaLAPACK,
1272                                        MatSetOption_ScaLAPACK,
1273                                        MatZeroEntries_ScaLAPACK,
1274                                        /*24*/ 0,
1275                                        MatLUFactorSymbolic_ScaLAPACK,
1276                                        MatLUFactorNumeric_ScaLAPACK,
1277                                        MatCholeskyFactorSymbolic_ScaLAPACK,
1278                                        MatCholeskyFactorNumeric_ScaLAPACK,
1279                                        /*29*/ MatSetUp_ScaLAPACK,
1280                                        0,
1281                                        0,
1282                                        0,
1283                                        0,
1284                                        /*34*/ MatDuplicate_ScaLAPACK,
1285                                        0,
1286                                        0,
1287                                        0,
1288                                        0,
1289                                        /*39*/ MatAXPY_ScaLAPACK,
1290                                        0,
1291                                        0,
1292                                        0,
1293                                        MatCopy_ScaLAPACK,
1294                                        /*44*/ 0,
1295                                        MatScale_ScaLAPACK,
1296                                        MatShift_ScaLAPACK,
1297                                        0,
1298                                        0,
1299                                        /*49*/ 0,
1300                                        0,
1301                                        0,
1302                                        0,
1303                                        0,
1304                                        /*54*/ 0,
1305                                        0,
1306                                        0,
1307                                        0,
1308                                        0,
1309                                        /*59*/ 0,
1310                                        MatDestroy_ScaLAPACK,
1311                                        MatView_ScaLAPACK,
1312                                        0,
1313                                        0,
1314                                        /*64*/ 0,
1315                                        0,
1316                                        0,
1317                                        0,
1318                                        0,
1319                                        /*69*/ 0,
1320                                        0,
1321                                        MatConvert_ScaLAPACK_Dense,
1322                                        0,
1323                                        0,
1324                                        /*74*/ 0,
1325                                        0,
1326                                        0,
1327                                        0,
1328                                        0,
1329                                        /*79*/ 0,
1330                                        0,
1331                                        0,
1332                                        0,
1333                                        MatLoad_ScaLAPACK,
1334                                        /*84*/ 0,
1335                                        0,
1336                                        0,
1337                                        0,
1338                                        0,
1339                                        /*89*/ 0,
1340                                        0,
1341                                        MatMatMultNumeric_ScaLAPACK,
1342                                        0,
1343                                        0,
1344                                        /*94*/ 0,
1345                                        0,
1346                                        0,
1347                                        MatMatTransposeMultNumeric_ScaLAPACK,
1348                                        0,
1349                                        /*99*/ MatProductSetFromOptions_ScaLAPACK,
1350                                        0,
1351                                        0,
1352                                        MatConjugate_ScaLAPACK,
1353                                        0,
1354                                        /*104*/ 0,
1355                                        0,
1356                                        0,
1357                                        0,
1358                                        0,
1359                                        /*109*/ MatMatSolve_ScaLAPACK,
1360                                        0,
1361                                        0,
1362                                        0,
1363                                        MatMissingDiagonal_ScaLAPACK,
1364                                        /*114*/ 0,
1365                                        0,
1366                                        0,
1367                                        0,
1368                                        0,
1369                                        /*119*/ 0,
1370                                        MatHermitianTranspose_ScaLAPACK,
1371                                        0,
1372                                        0,
1373                                        0,
1374                                        /*124*/ 0,
1375                                        0,
1376                                        0,
1377                                        0,
1378                                        0,
1379                                        /*129*/ 0,
1380                                        0,
1381                                        0,
1382                                        0,
1383                                        0,
1384                                        /*134*/ 0,
1385                                        0,
1386                                        0,
1387                                        0,
1388                                        0,
1389                                        0,
1390                                        /*140*/ 0,
1391                                        0,
1392                                        0,
1393                                        0,
1394                                        0,
1395                                        /*145*/ 0,
1396                                        0,
1397                                        0,
1398                                        0,
1399                                        0,
1400                                        /*150*/ 0};
1401 
1402 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners) {
1403   PetscInt          *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2;
1404   PetscInt           size = stash->size, nsends;
1405   PetscInt           count, *sindices, **rindices, i, j, l;
1406   PetscScalar      **rvalues, *svalues;
1407   MPI_Comm           comm = stash->comm;
1408   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1409   PetscMPIInt       *sizes, *nlengths, nreceives;
1410   PetscInt          *sp_idx, *sp_idy;
1411   PetscScalar       *sp_val;
1412   PetscMatStashSpace space, space_next;
1413   PetscBLASInt       gridx, gcidx, lridx, lcidx, rsrc, csrc;
1414   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)mat->data;
1415 
1416   PetscFunctionBegin;
1417   { /* make sure all processors are either in INSERTMODE or ADDMODE */
1418     InsertMode addv;
1419     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
1420     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1421     mat->insertmode = addv; /* in case this processor had no cache */
1422   }
1423 
1424   bs2 = stash->bs * stash->bs;
1425 
1426   /*  first count number of contributors to each processor */
1427   PetscCall(PetscCalloc1(size, &nlengths));
1428   PetscCall(PetscMalloc1(stash->n + 1, &owner));
1429 
1430   i = j = 0;
1431   space = stash->space_head;
1432   while (space) {
1433     space_next = space->next;
1434     for (l = 0; l < space->local_used; l++) {
1435       PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
1436       PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1437       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1438       j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
1439       nlengths[j]++;
1440       owner[i] = j;
1441       i++;
1442     }
1443     space = space_next;
1444   }
1445 
1446   /* Now check what procs get messages - and compute nsends. */
1447   PetscCall(PetscCalloc1(size, &sizes));
1448   for (i = 0, nsends = 0; i < size; i++) {
1449     if (nlengths[i]) {
1450       sizes[i] = 1;
1451       nsends++;
1452     }
1453   }
1454 
1455   {
1456     PetscMPIInt *onodes, *olengths;
1457     /* Determine the number of messages to expect, their lengths, from from-ids */
1458     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
1459     PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
1460     /* since clubbing row,col - lengths are multiplied by 2 */
1461     for (i = 0; i < nreceives; i++) olengths[i] *= 2;
1462     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1463     /* values are size 'bs2' lengths (and remove earlier factor 2 */
1464     for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2;
1465     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
1466     PetscCall(PetscFree(onodes));
1467     PetscCall(PetscFree(olengths));
1468   }
1469 
1470   /* do sends:
1471       1) starts[i] gives the starting index in svalues for stuff going to
1472          the ith processor
1473   */
1474   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
1475   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
1476   PetscCall(PetscMalloc2(size, &startv, size, &starti));
1477   /* use 2 sends the first with all_a, the next with all_i and all_j */
1478   startv[0] = 0;
1479   starti[0] = 0;
1480   for (i = 1; i < size; i++) {
1481     startv[i] = startv[i - 1] + nlengths[i - 1];
1482     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1483   }
1484 
1485   i     = 0;
1486   space = stash->space_head;
1487   while (space) {
1488     space_next = space->next;
1489     sp_idx     = space->idx;
1490     sp_idy     = space->idy;
1491     sp_val     = space->val;
1492     for (l = 0; l < space->local_used; l++) {
1493       j = owner[i];
1494       if (bs2 == 1) {
1495         svalues[startv[j]] = sp_val[l];
1496       } else {
1497         PetscInt     k;
1498         PetscScalar *buf1, *buf2;
1499         buf1 = svalues + bs2 * startv[j];
1500         buf2 = space->val + bs2 * l;
1501         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1502       }
1503       sindices[starti[j]]               = sp_idx[l];
1504       sindices[starti[j] + nlengths[j]] = sp_idy[l];
1505       startv[j]++;
1506       starti[j]++;
1507       i++;
1508     }
1509     space = space_next;
1510   }
1511   startv[0] = 0;
1512   for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1513 
1514   for (i = 0, count = 0; i < size; i++) {
1515     if (sizes[i]) {
1516       PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
1517       PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1518     }
1519   }
1520 #if defined(PETSC_USE_INFO)
1521   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
1522   for (i = 0; i < size; i++) {
1523     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1524   }
1525 #endif
1526   PetscCall(PetscFree(nlengths));
1527   PetscCall(PetscFree(owner));
1528   PetscCall(PetscFree2(startv, starti));
1529   PetscCall(PetscFree(sizes));
1530 
1531   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1532   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
1533 
1534   for (i = 0; i < nreceives; i++) {
1535     recv_waits[2 * i]     = recv_waits1[i];
1536     recv_waits[2 * i + 1] = recv_waits2[i];
1537   }
1538   stash->recv_waits = recv_waits;
1539 
1540   PetscCall(PetscFree(recv_waits1));
1541   PetscCall(PetscFree(recv_waits2));
1542 
1543   stash->svalues         = svalues;
1544   stash->sindices        = sindices;
1545   stash->rvalues         = rvalues;
1546   stash->rindices        = rindices;
1547   stash->send_waits      = send_waits;
1548   stash->nsends          = nsends;
1549   stash->nrecvs          = nreceives;
1550   stash->reproduce_count = 0;
1551   PetscFunctionReturn(0);
1552 }
1553 
1554 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb) {
1555   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1556 
1557   PetscFunctionBegin;
1558   PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1559   PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1560   PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
1561   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1562   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 /*@
1567    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1568    the `MATSCALAPACK` matrix
1569 
1570    Logically Collective on A
1571 
1572    Input Parameters:
1573 +  A  - a `MATSCALAPACK` matrix
1574 .  mb - the row block size
1575 -  nb - the column block size
1576 
1577    Level: intermediate
1578 
1579 .seealso: `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1580 @*/
1581 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb) {
1582   PetscFunctionBegin;
1583   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1584   PetscValidLogicalCollectiveInt(A, mb, 2);
1585   PetscValidLogicalCollectiveInt(A, nb, 3);
1586   PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb) {
1591   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1592 
1593   PetscFunctionBegin;
1594   if (mb) *mb = a->mb;
1595   if (nb) *nb = a->nb;
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 /*@
1600    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1601    the `MATSCALAPACK` matrix
1602 
1603    Not collective
1604 
1605    Input Parameter:
1606 .  A  - a `MATSCALAPACK` matrix
1607 
1608    Output Parameters:
1609 +  mb - the row block size
1610 -  nb - the column block size
1611 
1612    Level: intermediate
1613 
1614 .seealso: `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1615 @*/
1616 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb) {
1617   PetscFunctionBegin;
1618   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1619   PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
1620   PetscFunctionReturn(0);
1621 }
1622 
1623 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1624 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1625 
1626 /*MC
1627    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1628 
1629    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1630 
1631    Options Database Keys:
1632 +  -mat_type scalapack - sets the matrix type to `MATSCALAPACK` during a call to `MatSetFromOptions()`
1633 .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option -pc_type lu
1634 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1635 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1636 
1637   Note:
1638    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1639    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1640    the given rank.
1641 
1642    Level: beginner
1643 
1644 .seealso: `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`
1645 M*/
1646 
1647 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) {
1648   Mat_ScaLAPACK      *a;
1649   PetscBool           flg, flg1;
1650   Mat_ScaLAPACK_Grid *grid;
1651   MPI_Comm            icomm;
1652   PetscBLASInt        nprow, npcol, myrow, mycol;
1653   PetscInt            optv1, k = 2, array[2] = {0, 0};
1654   PetscMPIInt         size;
1655 
1656   PetscFunctionBegin;
1657   PetscCall(PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps)));
1658   A->insertmode = NOT_SET_VALUES;
1659 
1660   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1661   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1662   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1663   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1664   A->stash.ScatterDestroy = NULL;
1665 
1666   PetscCall(PetscNewLog(A, &a));
1667   A->data = (void *)a;
1668 
1669   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1670   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1671     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0));
1672     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1673     PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1674   }
1675   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1676   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1677   if (!flg) {
1678     PetscCall(PetscNewLog(A, &grid));
1679 
1680     PetscCallMPI(MPI_Comm_size(icomm, &size));
1681     grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001);
1682 
1683     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
1684     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1));
1685     if (flg1) {
1686       PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
1687       grid->nprow = optv1;
1688     }
1689     PetscOptionsEnd();
1690 
1691     if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1692     grid->npcol = size / grid->nprow;
1693     PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
1694     PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1695     grid->ictxt = Csys2blacs_handle(icomm);
1696     Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1697     Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1698     grid->grid_refct = 1;
1699     grid->nprow      = nprow;
1700     grid->npcol      = npcol;
1701     grid->myrow      = myrow;
1702     grid->mycol      = mycol;
1703     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1704     grid->ictxrow    = Csys2blacs_handle(icomm);
1705     Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1706     grid->ictxcol = Csys2blacs_handle(icomm);
1707     Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
1708     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));
1709 
1710   } else grid->grid_refct++;
1711   PetscCall(PetscCommDestroy(&icomm));
1712   a->grid = grid;
1713   a->mb   = DEFAULT_BLOCKSIZE;
1714   a->nb   = DEFAULT_BLOCKSIZE;
1715 
1716   PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
1717   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1718   if (flg) {
1719     a->mb = array[0];
1720     a->nb = (k > 1) ? array[1] : a->mb;
1721   }
1722   PetscOptionsEnd();
1723 
1724   a->roworiented = PETSC_TRUE;
1725   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
1726   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
1727   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
1728   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 /*@C
1733    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1734    (2D block cyclic distribution) for a `MATSCALAPACK` matrix
1735 
1736    Collective
1737 
1738    Input Parameters:
1739 +  comm - MPI communicator
1740 .  mb   - row block size (or `PETSC_DECIDE` to have it set)
1741 .  nb   - column block size (or `PETSC_DECIDE` to have it set)
1742 .  M    - number of global rows
1743 .  N    - number of global columns
1744 .  rsrc - coordinate of process that owns the first row of the distributed matrix
1745 -  csrc - coordinate of process that owns the first column of the distributed matrix
1746 
1747    Output Parameter:
1748 .  A - the matrix
1749 
1750    Options Database Key:
1751 .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1752 
1753    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1754    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1755    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
1756 
1757    Note:
1758    If `PETSC_DECIDE` is used for the block sizes, then an appropriate value
1759    is chosen.
1760 
1761    Storage Information:
1762    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1763    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1764    significance and are thus ignored. The block sizes refer to the values
1765    used for the distributed matrix, not the same meaning as in `MATBAIJ`.
1766 
1767    Level: intermediate
1768 
1769 .seealso: `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1770 @*/
1771 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A) {
1772   Mat_ScaLAPACK *a;
1773   PetscInt       m, n;
1774 
1775   PetscFunctionBegin;
1776   PetscCall(MatCreate(comm, A));
1777   PetscCall(MatSetType(*A, MATSCALAPACK));
1778   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1779   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1780   m = PETSC_DECIDE;
1781   PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1782   n = PETSC_DECIDE;
1783   PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1784   PetscCall(MatSetSizes(*A, m, n, M, N));
1785   a = (Mat_ScaLAPACK *)(*A)->data;
1786   PetscCall(PetscBLASIntCast(M, &a->M));
1787   PetscCall(PetscBLASIntCast(N, &a->N));
1788   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1789   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1790   PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
1791   PetscCall(PetscBLASIntCast(csrc, &a->csrc));
1792   PetscCall(MatSetUp(*A));
1793   PetscFunctionReturn(0);
1794 }
1795