xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision 43cdf1ebbcbfa05bee08e48007ef1bae3f20f4e9)
1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/
2 
3 const char       ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4                                        "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5                                        "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6                                        "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7                                        "       TITLE = {Sca{LAPACK} Users' Guide},\n"
8                                        "       PUBLISHER = {SIAM},\n"
9                                        "       ADDRESS = {Philadelphia, PA},\n"
10                                        "       YEAR = 1997\n"
11                                        "}\n";
12 static PetscBool ScaLAPACKCite       = PETSC_FALSE;
13 
14 #define DEFAULT_BLOCKSIZE 64
15 
16 /*
17     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19 */
20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21 
22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23 {
24   PetscFunctionBegin;
25   PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
26   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
27   PetscFunctionReturn(0);
28 }
29 
30 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer)
31 {
32   Mat_ScaLAPACK    *a = (Mat_ScaLAPACK *)A->data;
33   PetscBool         iascii;
34   PetscViewerFormat format;
35   Mat               Adense;
36 
37   PetscFunctionBegin;
38   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
39   if (iascii) {
40     PetscCall(PetscViewerGetFormat(viewer, &format));
41     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
42       PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
43       PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
44       PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
45       PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
46       PetscFunctionReturn(0);
47     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
48       PetscFunctionReturn(0);
49     }
50   }
51   /* convert to dense format and call MatView() */
52   PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
53   PetscCall(MatView(Adense, viewer));
54   PetscCall(MatDestroy(&Adense));
55   PetscFunctionReturn(0);
56 }
57 
58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info)
59 {
60   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
61   PetscLogDouble isend[2], irecv[2];
62 
63   PetscFunctionBegin;
64   info->block_size = 1.0;
65 
66   isend[0] = a->lld * a->locc;  /* locally allocated */
67   isend[1] = a->locr * a->locc; /* used submatrix */
68   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69     info->nz_allocated = isend[0];
70     info->nz_used      = isend[1];
71   } else if (flag == MAT_GLOBAL_MAX) {
72     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
73     info->nz_allocated = irecv[0];
74     info->nz_used      = irecv[1];
75   } else if (flag == MAT_GLOBAL_SUM) {
76     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
77     info->nz_allocated = irecv[0];
78     info->nz_used      = irecv[1];
79   }
80 
81   info->nz_unneeded       = 0;
82   info->assemblies        = A->num_ass;
83   info->mallocs           = 0;
84   info->memory            = 0; /* REVIEW ME */
85   info->fill_ratio_given  = 0;
86   info->fill_ratio_needed = 0;
87   info->factor_mallocs    = 0;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg)
92 {
93   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
94 
95   PetscFunctionBegin;
96   switch (op) {
97   case MAT_NEW_NONZERO_LOCATIONS:
98   case MAT_NEW_NONZERO_LOCATION_ERR:
99   case MAT_NEW_NONZERO_ALLOCATION_ERR:
100   case MAT_SYMMETRIC:
101   case MAT_SORTED_FULL:
102   case MAT_HERMITIAN:
103     break;
104   case MAT_ROW_ORIENTED:
105     a->roworiented = flg;
106     break;
107   default:
108     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
109   }
110   PetscFunctionReturn(0);
111 }
112 
113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
114 {
115   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
116   PetscInt       i, j;
117   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
118   PetscBool      roworiented = a->roworiented;
119 
120   PetscFunctionBegin;
121   PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
122   for (i = 0; i < nr; i++) {
123     if (rows[i] < 0) continue;
124     PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
125     for (j = 0; j < nc; j++) {
126       if (cols[j] < 0) continue;
127       PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
128       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
129       if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
130         if (roworiented) {
131           switch (imode) {
132           case INSERT_VALUES:
133             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j];
134             break;
135           default:
136             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j];
137             break;
138           }
139         } else {
140           switch (imode) {
141           case INSERT_VALUES:
142             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr];
143             break;
144           default:
145             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr];
146             break;
147           }
148         }
149       } else {
150         PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
151         A->assembled = PETSC_FALSE;
152         PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
153       }
154     }
155   }
156   PetscFunctionReturn(0);
157 }
158 
159 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscScalar beta, const PetscScalar *x, PetscScalar *y)
160 {
161   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
162   PetscScalar    *x2d, *y2d, alpha = 1.0;
163   const PetscInt *ranges;
164   PetscBLASInt    xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
165 
166   PetscFunctionBegin;
167   if (transpose) {
168     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
169     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
170     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
171     xlld = PetscMax(1, A->rmap->n);
172     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
173     PetscCheckScaLapackInfo("descinit", info);
174     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
175     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
176     ylld = 1;
177     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
178     PetscCheckScaLapackInfo("descinit", info);
179 
180     /* allocate 2d vectors */
181     lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
182     lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
183     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
184     xlld = PetscMax(1, lszx);
185 
186     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
187     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
188     PetscCheckScaLapackInfo("descinit", info);
189     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
190     PetscCheckScaLapackInfo("descinit", info);
191 
192     /* redistribute x as a column of a 2d matrix */
193     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
194 
195     /* redistribute y as a row of a 2d matrix */
196     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
197 
198     /* call PBLAS subroutine */
199     PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
200 
201     /* redistribute y from a row of a 2d matrix */
202     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
203 
204   } else { /* non-transpose */
205 
206     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
207     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
208     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
209     xlld = 1;
210     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
211     PetscCheckScaLapackInfo("descinit", info);
212     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
213     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
214     ylld = PetscMax(1, A->rmap->n);
215     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
216     PetscCheckScaLapackInfo("descinit", info);
217 
218     /* allocate 2d vectors */
219     lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
220     lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
221     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
222     ylld = PetscMax(1, lszy);
223 
224     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
225     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
226     PetscCheckScaLapackInfo("descinit", info);
227     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
228     PetscCheckScaLapackInfo("descinit", info);
229 
230     /* redistribute x as a row of a 2d matrix */
231     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
232 
233     /* redistribute y as a column of a 2d matrix */
234     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
235 
236     /* call PBLAS subroutine */
237     PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
238 
239     /* redistribute y from a column of a 2d matrix */
240     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
241   }
242   PetscCall(PetscFree2(x2d, y2d));
243   PetscFunctionReturn(0);
244 }
245 
246 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y)
247 {
248   const PetscScalar *xarray;
249   PetscScalar       *yarray;
250 
251   PetscFunctionBegin;
252   PetscCall(VecGetArrayRead(x, &xarray));
253   PetscCall(VecGetArray(y, &yarray));
254   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 0.0, xarray, yarray));
255   PetscCall(VecRestoreArrayRead(x, &xarray));
256   PetscCall(VecRestoreArray(y, &yarray));
257   PetscFunctionReturn(0);
258 }
259 
260 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
261 {
262   const PetscScalar *xarray;
263   PetscScalar       *yarray;
264 
265   PetscFunctionBegin;
266   PetscCall(VecGetArrayRead(x, &xarray));
267   PetscCall(VecGetArray(y, &yarray));
268   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 0.0, xarray, yarray));
269   PetscCall(VecRestoreArrayRead(x, &xarray));
270   PetscCall(VecRestoreArray(y, &yarray));
271   PetscFunctionReturn(0);
272 }
273 
274 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
275 {
276   const PetscScalar *xarray;
277   PetscScalar       *zarray;
278 
279   PetscFunctionBegin;
280   if (y != z) PetscCall(VecCopy(y, z));
281   PetscCall(VecGetArrayRead(x, &xarray));
282   PetscCall(VecGetArray(z, &zarray));
283   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 1.0, xarray, zarray));
284   PetscCall(VecRestoreArrayRead(x, &xarray));
285   PetscCall(VecRestoreArray(z, &zarray));
286   PetscFunctionReturn(0);
287 }
288 
289 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
290 {
291   const PetscScalar *xarray;
292   PetscScalar       *zarray;
293 
294   PetscFunctionBegin;
295   if (y != z) PetscCall(VecCopy(y, z));
296   PetscCall(VecGetArrayRead(x, &xarray));
297   PetscCall(VecGetArray(z, &zarray));
298   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 1.0, xarray, zarray));
299   PetscCall(VecRestoreArrayRead(x, &xarray));
300   PetscCall(VecRestoreArray(z, &zarray));
301   PetscFunctionReturn(0);
302 }
303 
304 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
305 {
306   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
307   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
308   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
309   PetscScalar    sone = 1.0, zero = 0.0;
310   PetscBLASInt   one = 1;
311 
312   PetscFunctionBegin;
313   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
314   C->assembled = PETSC_TRUE;
315   PetscFunctionReturn(0);
316 }
317 
318 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
319 {
320   PetscFunctionBegin;
321   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
322   PetscCall(MatSetType(C, MATSCALAPACK));
323   PetscCall(MatSetUp(C));
324   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
325   PetscFunctionReturn(0);
326 }
327 
328 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
329 {
330   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
331   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
332   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
333   PetscScalar    sone = 1.0, zero = 0.0;
334   PetscBLASInt   one = 1;
335 
336   PetscFunctionBegin;
337   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
338   C->assembled = PETSC_TRUE;
339   PetscFunctionReturn(0);
340 }
341 
342 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
343 {
344   PetscFunctionBegin;
345   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
346   PetscCall(MatSetType(C, MATSCALAPACK));
347   PetscCall(MatSetUp(C));
348   PetscFunctionReturn(0);
349 }
350 
351 /* --------------------------------------- */
352 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
353 {
354   PetscFunctionBegin;
355   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
356   C->ops->productsymbolic = MatProductSymbolic_AB;
357   PetscFunctionReturn(0);
358 }
359 
360 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
361 {
362   PetscFunctionBegin;
363   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
364   C->ops->productsymbolic          = MatProductSymbolic_ABt;
365   PetscFunctionReturn(0);
366 }
367 
368 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
369 {
370   Mat_Product *product = C->product;
371 
372   PetscFunctionBegin;
373   switch (product->type) {
374   case MATPRODUCT_AB:
375     PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C));
376     break;
377   case MATPRODUCT_ABt:
378     PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C));
379     break;
380   default:
381     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
382   }
383   PetscFunctionReturn(0);
384 }
385 /* --------------------------------------- */
386 
387 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D)
388 {
389   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
390   PetscScalar    *darray, *d2d, v;
391   const PetscInt *ranges;
392   PetscBLASInt    j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
393 
394   PetscFunctionBegin;
395   PetscCall(VecGetArray(D, &darray));
396 
397   if (A->rmap->N <= A->cmap->N) { /* row version */
398 
399     /* create ScaLAPACK descriptor for vector (1d block distribution) */
400     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
401     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
402     dlld = PetscMax(1, A->rmap->n);
403     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
404     PetscCheckScaLapackInfo("descinit", info);
405 
406     /* allocate 2d vector */
407     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
408     PetscCall(PetscCalloc1(lszd, &d2d));
409     dlld = PetscMax(1, lszd);
410 
411     /* create ScaLAPACK descriptor for vector (2d block distribution) */
412     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
413     PetscCheckScaLapackInfo("descinit", info);
414 
415     /* collect diagonal */
416     for (j = 1; j <= a->M; j++) {
417       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
418       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
419     }
420 
421     /* redistribute d from a column of a 2d matrix */
422     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
423     PetscCall(PetscFree(d2d));
424 
425   } else { /* column version */
426 
427     /* create ScaLAPACK descriptor for vector (1d block distribution) */
428     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
429     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
430     dlld = 1;
431     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
432     PetscCheckScaLapackInfo("descinit", info);
433 
434     /* allocate 2d vector */
435     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
436     PetscCall(PetscCalloc1(lszd, &d2d));
437 
438     /* create ScaLAPACK descriptor for vector (2d block distribution) */
439     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
440     PetscCheckScaLapackInfo("descinit", info);
441 
442     /* collect diagonal */
443     for (j = 1; j <= a->N; j++) {
444       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
445       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
446     }
447 
448     /* redistribute d from a row of a 2d matrix */
449     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
450     PetscCall(PetscFree(d2d));
451   }
452 
453   PetscCall(VecRestoreArray(D, &darray));
454   PetscCall(VecAssemblyBegin(D));
455   PetscCall(VecAssemblyEnd(D));
456   PetscFunctionReturn(0);
457 }
458 
459 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R)
460 {
461   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
462   const PetscScalar *d;
463   const PetscInt    *ranges;
464   PetscScalar       *d2d;
465   PetscBLASInt       i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
466 
467   PetscFunctionBegin;
468   if (R) {
469     PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d));
470     /* create ScaLAPACK descriptor for vector (1d block distribution) */
471     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
472     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
473     dlld = 1;
474     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
475     PetscCheckScaLapackInfo("descinit", info);
476 
477     /* allocate 2d vector */
478     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
479     PetscCall(PetscCalloc1(lszd, &d2d));
480 
481     /* create ScaLAPACK descriptor for vector (2d block distribution) */
482     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
483     PetscCheckScaLapackInfo("descinit", info);
484 
485     /* redistribute d to a row of a 2d matrix */
486     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
487 
488     /* broadcast along process columns */
489     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
490     else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
491 
492     /* local scaling */
493     for (j = 0; j < a->locc; j++)
494       for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
495 
496     PetscCall(PetscFree(d2d));
497     PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d));
498   }
499   if (L) {
500     PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d));
501     /* create ScaLAPACK descriptor for vector (1d block distribution) */
502     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
503     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
504     dlld = PetscMax(1, A->rmap->n);
505     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
506     PetscCheckScaLapackInfo("descinit", info);
507 
508     /* allocate 2d vector */
509     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
510     PetscCall(PetscCalloc1(lszd, &d2d));
511     dlld = PetscMax(1, lszd);
512 
513     /* create ScaLAPACK descriptor for vector (2d block distribution) */
514     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
515     PetscCheckScaLapackInfo("descinit", info);
516 
517     /* redistribute d to a column of a 2d matrix */
518     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
519 
520     /* broadcast along process rows */
521     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
522     else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
523 
524     /* local scaling */
525     for (i = 0; i < a->locr; i++)
526       for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
527 
528     PetscCall(PetscFree(d2d));
529     PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d));
530   }
531   PetscFunctionReturn(0);
532 }
533 
534 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d)
535 {
536   PetscFunctionBegin;
537   *missing = PETSC_FALSE;
538   PetscFunctionReturn(0);
539 }
540 
541 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a)
542 {
543   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
544   PetscBLASInt   n, one = 1;
545 
546   PetscFunctionBegin;
547   n = x->lld * x->locc;
548   PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
549   PetscFunctionReturn(0);
550 }
551 
552 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha)
553 {
554   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
555   PetscBLASInt   i, n;
556   PetscScalar    v;
557 
558   PetscFunctionBegin;
559   n = PetscMin(x->M, x->N);
560   for (i = 1; i <= n; i++) {
561     PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
562     v += alpha;
563     PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
564   }
565   PetscFunctionReturn(0);
566 }
567 
568 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
569 {
570   Mat_ScaLAPACK *x    = (Mat_ScaLAPACK *)X->data;
571   Mat_ScaLAPACK *y    = (Mat_ScaLAPACK *)Y->data;
572   PetscBLASInt   one  = 1;
573   PetscScalar    beta = 1.0;
574 
575   PetscFunctionBegin;
576   MatScaLAPACKCheckDistribution(Y, 1, X, 3);
577   PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
578   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
579   PetscFunctionReturn(0);
580 }
581 
582 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str)
583 {
584   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
585   Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
586 
587   PetscFunctionBegin;
588   PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
589   PetscCall(PetscObjectStateIncrease((PetscObject)B));
590   PetscFunctionReturn(0);
591 }
592 
593 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B)
594 {
595   Mat            Bs;
596   MPI_Comm       comm;
597   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
598 
599   PetscFunctionBegin;
600   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
601   PetscCall(MatCreate(comm, &Bs));
602   PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
603   PetscCall(MatSetType(Bs, MATSCALAPACK));
604   b       = (Mat_ScaLAPACK *)Bs->data;
605   b->M    = a->M;
606   b->N    = a->N;
607   b->mb   = a->mb;
608   b->nb   = a->nb;
609   b->rsrc = a->rsrc;
610   b->csrc = a->csrc;
611   PetscCall(MatSetUp(Bs));
612   *B = Bs;
613   if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
614   Bs->assembled = PETSC_TRUE;
615   PetscFunctionReturn(0);
616 }
617 
618 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
619 {
620   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
621   Mat            Bs   = *B;
622   PetscBLASInt   one  = 1;
623   PetscScalar    sone = 1.0, zero = 0.0;
624 #if defined(PETSC_USE_COMPLEX)
625   PetscInt i, j;
626 #endif
627 
628   PetscFunctionBegin;
629   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
630   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
631   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
632   *B = Bs;
633   b  = (Mat_ScaLAPACK *)Bs->data;
634   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
635 #if defined(PETSC_USE_COMPLEX)
636   /* undo conjugation */
637   for (i = 0; i < b->locr; i++)
638     for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
639 #endif
640   Bs->assembled = PETSC_TRUE;
641   PetscFunctionReturn(0);
642 }
643 
644 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
645 {
646   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
647   PetscInt       i, j;
648 
649   PetscFunctionBegin;
650   for (i = 0; i < a->locr; i++)
651     for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
652   PetscFunctionReturn(0);
653 }
654 
655 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
656 {
657   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
658   Mat            Bs   = *B;
659   PetscBLASInt   one  = 1;
660   PetscScalar    sone = 1.0, zero = 0.0;
661 
662   PetscFunctionBegin;
663   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
664   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
665   *B = Bs;
666   b  = (Mat_ScaLAPACK *)Bs->data;
667   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
668   Bs->assembled = PETSC_TRUE;
669   PetscFunctionReturn(0);
670 }
671 
672 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X)
673 {
674   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
675   PetscScalar    *x, *x2d;
676   const PetscInt *ranges;
677   PetscBLASInt    xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
678 
679   PetscFunctionBegin;
680   PetscCall(VecCopy(B, X));
681   PetscCall(VecGetArray(X, &x));
682 
683   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
684   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
685   PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
686   xlld = PetscMax(1, A->rmap->n);
687   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
688   PetscCheckScaLapackInfo("descinit", info);
689 
690   /* allocate 2d vector */
691   lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
692   PetscCall(PetscMalloc1(lszx, &x2d));
693   xlld = PetscMax(1, lszx);
694 
695   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
696   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
697   PetscCheckScaLapackInfo("descinit", info);
698 
699   /* redistribute x as a column of a 2d matrix */
700   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
701 
702   /* call ScaLAPACK subroutine */
703   switch (A->factortype) {
704   case MAT_FACTOR_LU:
705     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
706     PetscCheckScaLapackInfo("getrs", info);
707     break;
708   case MAT_FACTOR_CHOLESKY:
709     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
710     PetscCheckScaLapackInfo("potrs", info);
711     break;
712   default:
713     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
714   }
715 
716   /* redistribute x from a column of a 2d matrix */
717   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
718 
719   PetscCall(PetscFree(x2d));
720   PetscCall(VecRestoreArray(X, &x));
721   PetscFunctionReturn(0);
722 }
723 
724 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X)
725 {
726   PetscFunctionBegin;
727   PetscCall(MatSolve_ScaLAPACK(A, B, X));
728   PetscCall(VecAXPY(X, 1, Y));
729   PetscFunctionReturn(0);
730 }
731 
732 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X)
733 {
734   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x;
735   PetscBool      flg1, flg2;
736   PetscBLASInt   one = 1, info;
737 
738   PetscFunctionBegin;
739   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
740   PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
741   PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK");
742   MatScaLAPACKCheckDistribution(B, 1, X, 2);
743   b = (Mat_ScaLAPACK *)B->data;
744   x = (Mat_ScaLAPACK *)X->data;
745   PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc));
746 
747   switch (A->factortype) {
748   case MAT_FACTOR_LU:
749     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
750     PetscCheckScaLapackInfo("getrs", info);
751     break;
752   case MAT_FACTOR_CHOLESKY:
753     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
754     PetscCheckScaLapackInfo("potrs", info);
755     break;
756   default:
757     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
758   }
759   PetscFunctionReturn(0);
760 }
761 
762 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo)
763 {
764   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
765   PetscBLASInt   one = 1, info;
766 
767   PetscFunctionBegin;
768   if (!a->pivots) { PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots)); }
769   PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
770   PetscCheckScaLapackInfo("getrf", info);
771   A->factortype = MAT_FACTOR_LU;
772   A->assembled  = PETSC_TRUE;
773 
774   PetscCall(PetscFree(A->solvertype));
775   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
776   PetscFunctionReturn(0);
777 }
778 
779 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
780 {
781   PetscFunctionBegin;
782   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
783   PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
784   PetscFunctionReturn(0);
785 }
786 
787 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
788 {
789   PetscFunctionBegin;
790   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
791   PetscFunctionReturn(0);
792 }
793 
794 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo)
795 {
796   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
797   PetscBLASInt   one = 1, info;
798 
799   PetscFunctionBegin;
800   PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
801   PetscCheckScaLapackInfo("potrf", info);
802   A->factortype = MAT_FACTOR_CHOLESKY;
803   A->assembled  = PETSC_TRUE;
804 
805   PetscCall(PetscFree(A->solvertype));
806   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
807   PetscFunctionReturn(0);
808 }
809 
810 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
811 {
812   PetscFunctionBegin;
813   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
814   PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
815   PetscFunctionReturn(0);
816 }
817 
818 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info)
819 {
820   PetscFunctionBegin;
821   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
822   PetscFunctionReturn(0);
823 }
824 
825 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type)
826 {
827   PetscFunctionBegin;
828   *type = MATSOLVERSCALAPACK;
829   PetscFunctionReturn(0);
830 }
831 
832 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F)
833 {
834   Mat            B;
835   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
836 
837   PetscFunctionBegin;
838   /* Create the factorization matrix */
839   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
840   B->trivialsymbolic = PETSC_TRUE;
841   B->factortype      = ftype;
842   PetscCall(PetscFree(B->solvertype));
843   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));
844 
845   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
846   *F = B;
847   PetscFunctionReturn(0);
848 }
849 
850 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
851 {
852   PetscFunctionBegin;
853   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
854   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
855   PetscFunctionReturn(0);
856 }
857 
858 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm)
859 {
860   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
861   PetscBLASInt   one = 1, lwork = 0;
862   const char    *ntype;
863   PetscScalar   *work = NULL, dummy;
864 
865   PetscFunctionBegin;
866   switch (type) {
867   case NORM_1:
868     ntype = "1";
869     lwork = PetscMax(a->locr, a->locc);
870     break;
871   case NORM_FROBENIUS:
872     ntype = "F";
873     work  = &dummy;
874     break;
875   case NORM_INFINITY:
876     ntype = "I";
877     lwork = PetscMax(a->locr, a->locc);
878     break;
879   default:
880     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
881   }
882   if (lwork) PetscCall(PetscMalloc1(lwork, &work));
883   *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
884   if (lwork) PetscCall(PetscFree(work));
885   PetscFunctionReturn(0);
886 }
887 
888 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
889 {
890   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
891 
892   PetscFunctionBegin;
893   PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
894   PetscFunctionReturn(0);
895 }
896 
897 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols)
898 {
899   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
900   PetscInt       i, n, nb, isrc, nproc, iproc, *idx;
901 
902   PetscFunctionBegin;
903   if (rows) {
904     n     = a->locr;
905     nb    = a->mb;
906     isrc  = a->rsrc;
907     nproc = a->grid->nprow;
908     iproc = a->grid->myrow;
909     PetscCall(PetscMalloc1(n, &idx));
910     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
911     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
912   }
913   if (cols) {
914     n     = a->locc;
915     nb    = a->nb;
916     isrc  = a->csrc;
917     nproc = a->grid->npcol;
918     iproc = a->grid->mycol;
919     PetscCall(PetscMalloc1(n, &idx));
920     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
921     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
922   }
923   PetscFunctionReturn(0);
924 }
925 
926 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B)
927 {
928   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
929   Mat                Bmpi;
930   MPI_Comm           comm;
931   PetscInt           i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz;
932   const PetscInt    *ranges, *branges, *cwork;
933   const PetscScalar *vwork;
934   PetscBLASInt       bdesc[9], bmb, zero = 0, one = 1, lld, info;
935   PetscScalar       *barray;
936   PetscBool          differ = PETSC_FALSE;
937   PetscMPIInt        size;
938 
939   PetscFunctionBegin;
940   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
941   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
942 
943   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
944     PetscCallMPI(MPI_Comm_size(comm, &size));
945     PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
946     for (i = 0; i < size; i++)
947       if (ranges[i + 1] != branges[i + 1]) {
948         differ = PETSC_TRUE;
949         break;
950       }
951   }
952 
953   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
954     PetscCall(MatCreate(comm, &Bmpi));
955     m = PETSC_DECIDE;
956     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
957     n = PETSC_DECIDE;
958     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
959     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
960     PetscCall(MatSetType(Bmpi, MATDENSE));
961     PetscCall(MatSetUp(Bmpi));
962 
963     /* create ScaLAPACK descriptor for B (1d block distribution) */
964     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
965     lld = PetscMax(A->rmap->n, 1);                /* local leading dimension */
966     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
967     PetscCheckScaLapackInfo("descinit", info);
968 
969     /* redistribute matrix */
970     PetscCall(MatDenseGetArray(Bmpi, &barray));
971     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
972     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
973     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
974     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
975 
976     /* transfer rows of auxiliary matrix to the final matrix B */
977     PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
978     for (i = rstart; i < rend; i++) {
979       PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
980       PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
981       PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
982     }
983     PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
984     PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
985     PetscCall(MatDestroy(&Bmpi));
986 
987   } else { /* normal cases */
988 
989     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
990     else {
991       PetscCall(MatCreate(comm, &Bmpi));
992       m = PETSC_DECIDE;
993       PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
994       n = PETSC_DECIDE;
995       PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
996       PetscCall(MatSetSizes(Bmpi, m, n, M, N));
997       PetscCall(MatSetType(Bmpi, MATDENSE));
998       PetscCall(MatSetUp(Bmpi));
999     }
1000 
1001     /* create ScaLAPACK descriptor for B (1d block distribution) */
1002     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1003     lld = PetscMax(A->rmap->n, 1);                /* local leading dimension */
1004     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1005     PetscCheckScaLapackInfo("descinit", info);
1006 
1007     /* redistribute matrix */
1008     PetscCall(MatDenseGetArray(Bmpi, &barray));
1009     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1010     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1011 
1012     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1013     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1014     if (reuse == MAT_INPLACE_MATRIX) {
1015       PetscCall(MatHeaderReplace(A, &Bmpi));
1016     } else *B = Bmpi;
1017   }
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct)
1022 {
1023   const PetscInt *ranges;
1024   PetscMPIInt     size;
1025   PetscInt        i, n;
1026 
1027   PetscFunctionBegin;
1028   *correct = PETSC_TRUE;
1029   PetscCallMPI(MPI_Comm_size(map->comm, &size));
1030   if (size > 1) {
1031     PetscCall(PetscLayoutGetRanges(map, &ranges));
1032     n = ranges[1] - ranges[0];
1033     for (i = 1; i < size; i++)
1034       if (ranges[i + 1] - ranges[i] != n) break;
1035     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
1036   }
1037   PetscFunctionReturn(0);
1038 }
1039 
1040 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B)
1041 {
1042   Mat_ScaLAPACK  *b;
1043   Mat             Bmpi;
1044   MPI_Comm        comm;
1045   PetscInt        M = A->rmap->N, N = A->cmap->N, m, n;
1046   const PetscInt *ranges, *rows, *cols;
1047   PetscBLASInt    adesc[9], amb, zero = 0, one = 1, lld, info;
1048   PetscScalar    *aarray;
1049   IS              ir, ic;
1050   PetscInt        lda;
1051   PetscBool       flg;
1052 
1053   PetscFunctionBegin;
1054   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1055 
1056   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1057   else {
1058     PetscCall(MatCreate(comm, &Bmpi));
1059     m = PETSC_DECIDE;
1060     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1061     n = PETSC_DECIDE;
1062     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1063     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1064     PetscCall(MatSetType(Bmpi, MATSCALAPACK));
1065     PetscCall(MatSetUp(Bmpi));
1066   }
1067   b = (Mat_ScaLAPACK *)Bmpi->data;
1068 
1069   PetscCall(MatDenseGetLDA(A, &lda));
1070   PetscCall(MatDenseGetArray(A, &aarray));
1071   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1072   if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1073   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1074     /* create ScaLAPACK descriptor for A (1d block distribution) */
1075     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1076     PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */
1077     lld = PetscMax(lda, 1);                       /* local leading dimension */
1078     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1079     PetscCheckScaLapackInfo("descinit", info);
1080 
1081     /* redistribute matrix */
1082     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1083     Bmpi->nooffprocentries = PETSC_TRUE;
1084   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1085     PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1086     b->roworiented = PETSC_FALSE;
1087     PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1088     PetscCall(ISGetIndices(ir, &rows));
1089     PetscCall(ISGetIndices(ic, &cols));
1090     PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1091     PetscCall(ISRestoreIndices(ir, &rows));
1092     PetscCall(ISRestoreIndices(ic, &cols));
1093     PetscCall(ISDestroy(&ic));
1094     PetscCall(ISDestroy(&ir));
1095   }
1096   PetscCall(MatDenseRestoreArray(A, &aarray));
1097   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1098   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1099   if (reuse == MAT_INPLACE_MATRIX) {
1100     PetscCall(MatHeaderReplace(A, &Bmpi));
1101   } else *B = Bmpi;
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1106 {
1107   Mat                mat_scal;
1108   PetscInt           M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1109   const PetscInt    *cols;
1110   const PetscScalar *vals;
1111 
1112   PetscFunctionBegin;
1113   if (reuse == MAT_REUSE_MATRIX) {
1114     mat_scal = *newmat;
1115     PetscCall(MatZeroEntries(mat_scal));
1116   } else {
1117     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1118     m = PETSC_DECIDE;
1119     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1120     n = PETSC_DECIDE;
1121     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1122     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1123     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1124     PetscCall(MatSetUp(mat_scal));
1125   }
1126   for (row = rstart; row < rend; row++) {
1127     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1128     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
1129     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1130   }
1131   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1132   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1133 
1134   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1135   else *newmat = mat_scal;
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1140 {
1141   Mat                mat_scal;
1142   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1143   const PetscInt    *cols;
1144   const PetscScalar *vals;
1145   PetscScalar        v;
1146 
1147   PetscFunctionBegin;
1148   if (reuse == MAT_REUSE_MATRIX) {
1149     mat_scal = *newmat;
1150     PetscCall(MatZeroEntries(mat_scal));
1151   } else {
1152     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1153     m = PETSC_DECIDE;
1154     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1155     n = PETSC_DECIDE;
1156     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1157     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1158     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1159     PetscCall(MatSetUp(mat_scal));
1160   }
1161   PetscCall(MatGetRowUpperTriangular(A));
1162   for (row = rstart; row < rend; row++) {
1163     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1164     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1165     for (j = 0; j < ncols; j++) { /* lower triangular part */
1166       if (cols[j] == row) continue;
1167       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1168       PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1169     }
1170     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1171   }
1172   PetscCall(MatRestoreRowUpperTriangular(A));
1173   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1174   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1175 
1176   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1177   else *newmat = mat_scal;
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1182 {
1183   Mat_ScaLAPACK *a  = (Mat_ScaLAPACK *)A->data;
1184   PetscInt       sz = 0;
1185 
1186   PetscFunctionBegin;
1187   PetscCall(PetscLayoutSetUp(A->rmap));
1188   PetscCall(PetscLayoutSetUp(A->cmap));
1189   if (!a->lld) a->lld = a->locr;
1190 
1191   PetscCall(PetscFree(a->loc));
1192   PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
1193   PetscCall(PetscCalloc1(sz, &a->loc));
1194 
1195   A->preallocated = PETSC_TRUE;
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1200 {
1201   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK *)A->data;
1202   Mat_ScaLAPACK_Grid *grid;
1203   PetscBool           flg;
1204   MPI_Comm            icomm;
1205 
1206   PetscFunctionBegin;
1207   PetscCall(MatStashDestroy_Private(&A->stash));
1208   PetscCall(PetscFree(a->loc));
1209   PetscCall(PetscFree(a->pivots));
1210   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1211   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1212   if (--grid->grid_refct == 0) {
1213     Cblacs_gridexit(grid->ictxt);
1214     Cblacs_gridexit(grid->ictxrow);
1215     Cblacs_gridexit(grid->ictxcol);
1216     PetscCall(PetscFree(grid));
1217     PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1218   }
1219   PetscCall(PetscCommDestroy(&icomm));
1220   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
1221   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1222   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
1223   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
1224   PetscCall(PetscFree(A->data));
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1229 {
1230   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
1231   PetscBLASInt   info = 0;
1232   PetscBool      flg;
1233 
1234   PetscFunctionBegin;
1235   PetscCall(PetscLayoutSetUp(A->rmap));
1236   PetscCall(PetscLayoutSetUp(A->cmap));
1237 
1238   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1239   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1240   PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1241   PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1242   PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1243 
1244   /* compute local sizes */
1245   PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
1246   PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1247   a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1248   a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1249   a->lld  = PetscMax(1, a->locr);
1250 
1251   /* allocate local array */
1252   PetscCall(MatScaLAPACKSetPreallocation(A));
1253 
1254   /* set up ScaLAPACK descriptor */
1255   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1256   PetscCheckScaLapackInfo("descinit", info);
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type)
1261 {
1262   PetscInt nstash, reallocs;
1263 
1264   PetscFunctionBegin;
1265   if (A->nooffprocentries) PetscFunctionReturn(0);
1266   PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
1267   PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
1268   PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type)
1273 {
1274   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1275   PetscMPIInt    n;
1276   PetscInt       i, flg, *row, *col;
1277   PetscScalar   *val;
1278   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
1279 
1280   PetscFunctionBegin;
1281   if (A->nooffprocentries) PetscFunctionReturn(0);
1282   while (1) {
1283     PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1284     if (!flg) break;
1285     for (i = 0; i < n; i++) {
1286       PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
1287       PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1288       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1289       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1290       switch (A->insertmode) {
1291       case INSERT_VALUES:
1292         a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i];
1293         break;
1294       case ADD_VALUES:
1295         a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i];
1296         break;
1297       default:
1298         SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1299       }
1300     }
1301   }
1302   PetscCall(MatStashScatterEnd_Private(&A->stash));
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer)
1307 {
1308   Mat      Adense, As;
1309   MPI_Comm comm;
1310 
1311   PetscFunctionBegin;
1312   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1313   PetscCall(MatCreate(comm, &Adense));
1314   PetscCall(MatSetType(Adense, MATDENSE));
1315   PetscCall(MatLoad(Adense, viewer));
1316   PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
1317   PetscCall(MatDestroy(&Adense));
1318   PetscCall(MatHeaderReplace(newMat, &As));
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 /* -------------------------------------------------------------------*/
1323 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1324                                        0,
1325                                        0,
1326                                        MatMult_ScaLAPACK,
1327                                        /* 4*/ MatMultAdd_ScaLAPACK,
1328                                        MatMultTranspose_ScaLAPACK,
1329                                        MatMultTransposeAdd_ScaLAPACK,
1330                                        MatSolve_ScaLAPACK,
1331                                        MatSolveAdd_ScaLAPACK,
1332                                        0,
1333                                        /*10*/ 0,
1334                                        MatLUFactor_ScaLAPACK,
1335                                        MatCholeskyFactor_ScaLAPACK,
1336                                        0,
1337                                        MatTranspose_ScaLAPACK,
1338                                        /*15*/ MatGetInfo_ScaLAPACK,
1339                                        0,
1340                                        MatGetDiagonal_ScaLAPACK,
1341                                        MatDiagonalScale_ScaLAPACK,
1342                                        MatNorm_ScaLAPACK,
1343                                        /*20*/ MatAssemblyBegin_ScaLAPACK,
1344                                        MatAssemblyEnd_ScaLAPACK,
1345                                        MatSetOption_ScaLAPACK,
1346                                        MatZeroEntries_ScaLAPACK,
1347                                        /*24*/ 0,
1348                                        MatLUFactorSymbolic_ScaLAPACK,
1349                                        MatLUFactorNumeric_ScaLAPACK,
1350                                        MatCholeskyFactorSymbolic_ScaLAPACK,
1351                                        MatCholeskyFactorNumeric_ScaLAPACK,
1352                                        /*29*/ MatSetUp_ScaLAPACK,
1353                                        0,
1354                                        0,
1355                                        0,
1356                                        0,
1357                                        /*34*/ MatDuplicate_ScaLAPACK,
1358                                        0,
1359                                        0,
1360                                        0,
1361                                        0,
1362                                        /*39*/ MatAXPY_ScaLAPACK,
1363                                        0,
1364                                        0,
1365                                        0,
1366                                        MatCopy_ScaLAPACK,
1367                                        /*44*/ 0,
1368                                        MatScale_ScaLAPACK,
1369                                        MatShift_ScaLAPACK,
1370                                        0,
1371                                        0,
1372                                        /*49*/ 0,
1373                                        0,
1374                                        0,
1375                                        0,
1376                                        0,
1377                                        /*54*/ 0,
1378                                        0,
1379                                        0,
1380                                        0,
1381                                        0,
1382                                        /*59*/ 0,
1383                                        MatDestroy_ScaLAPACK,
1384                                        MatView_ScaLAPACK,
1385                                        0,
1386                                        0,
1387                                        /*64*/ 0,
1388                                        0,
1389                                        0,
1390                                        0,
1391                                        0,
1392                                        /*69*/ 0,
1393                                        0,
1394                                        MatConvert_ScaLAPACK_Dense,
1395                                        0,
1396                                        0,
1397                                        /*74*/ 0,
1398                                        0,
1399                                        0,
1400                                        0,
1401                                        0,
1402                                        /*79*/ 0,
1403                                        0,
1404                                        0,
1405                                        0,
1406                                        MatLoad_ScaLAPACK,
1407                                        /*84*/ 0,
1408                                        0,
1409                                        0,
1410                                        0,
1411                                        0,
1412                                        /*89*/ 0,
1413                                        0,
1414                                        MatMatMultNumeric_ScaLAPACK,
1415                                        0,
1416                                        0,
1417                                        /*94*/ 0,
1418                                        0,
1419                                        0,
1420                                        MatMatTransposeMultNumeric_ScaLAPACK,
1421                                        0,
1422                                        /*99*/ MatProductSetFromOptions_ScaLAPACK,
1423                                        0,
1424                                        0,
1425                                        MatConjugate_ScaLAPACK,
1426                                        0,
1427                                        /*104*/ 0,
1428                                        0,
1429                                        0,
1430                                        0,
1431                                        0,
1432                                        /*109*/ MatMatSolve_ScaLAPACK,
1433                                        0,
1434                                        0,
1435                                        0,
1436                                        MatMissingDiagonal_ScaLAPACK,
1437                                        /*114*/ 0,
1438                                        0,
1439                                        0,
1440                                        0,
1441                                        0,
1442                                        /*119*/ 0,
1443                                        MatHermitianTranspose_ScaLAPACK,
1444                                        0,
1445                                        0,
1446                                        0,
1447                                        /*124*/ 0,
1448                                        0,
1449                                        0,
1450                                        0,
1451                                        0,
1452                                        /*129*/ 0,
1453                                        0,
1454                                        0,
1455                                        0,
1456                                        0,
1457                                        /*134*/ 0,
1458                                        0,
1459                                        0,
1460                                        0,
1461                                        0,
1462                                        0,
1463                                        /*140*/ 0,
1464                                        0,
1465                                        0,
1466                                        0,
1467                                        0,
1468                                        /*145*/ 0,
1469                                        0,
1470                                        0,
1471                                        0,
1472                                        0,
1473                                        /*150*/ 0,
1474                                        0};
1475 
1476 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners)
1477 {
1478   PetscInt          *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2;
1479   PetscInt           size = stash->size, nsends;
1480   PetscInt           count, *sindices, **rindices, i, j, l;
1481   PetscScalar      **rvalues, *svalues;
1482   MPI_Comm           comm = stash->comm;
1483   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1484   PetscMPIInt       *sizes, *nlengths, nreceives;
1485   PetscInt          *sp_idx, *sp_idy;
1486   PetscScalar       *sp_val;
1487   PetscMatStashSpace space, space_next;
1488   PetscBLASInt       gridx, gcidx, lridx, lcidx, rsrc, csrc;
1489   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)mat->data;
1490 
1491   PetscFunctionBegin;
1492   { /* make sure all processors are either in INSERTMODE or ADDMODE */
1493     InsertMode addv;
1494     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
1495     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1496     mat->insertmode = addv; /* in case this processor had no cache */
1497   }
1498 
1499   bs2 = stash->bs * stash->bs;
1500 
1501   /*  first count number of contributors to each processor */
1502   PetscCall(PetscCalloc1(size, &nlengths));
1503   PetscCall(PetscMalloc1(stash->n + 1, &owner));
1504 
1505   i = j = 0;
1506   space = stash->space_head;
1507   while (space) {
1508     space_next = space->next;
1509     for (l = 0; l < space->local_used; l++) {
1510       PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
1511       PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1512       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1513       j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
1514       nlengths[j]++;
1515       owner[i] = j;
1516       i++;
1517     }
1518     space = space_next;
1519   }
1520 
1521   /* Now check what procs get messages - and compute nsends. */
1522   PetscCall(PetscCalloc1(size, &sizes));
1523   for (i = 0, nsends = 0; i < size; i++) {
1524     if (nlengths[i]) {
1525       sizes[i] = 1;
1526       nsends++;
1527     }
1528   }
1529 
1530   {
1531     PetscMPIInt *onodes, *olengths;
1532     /* Determine the number of messages to expect, their lengths, from from-ids */
1533     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
1534     PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
1535     /* since clubbing row,col - lengths are multiplied by 2 */
1536     for (i = 0; i < nreceives; i++) olengths[i] *= 2;
1537     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1538     /* values are size 'bs2' lengths (and remove earlier factor 2 */
1539     for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2;
1540     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
1541     PetscCall(PetscFree(onodes));
1542     PetscCall(PetscFree(olengths));
1543   }
1544 
1545   /* do sends:
1546       1) starts[i] gives the starting index in svalues for stuff going to
1547          the ith processor
1548   */
1549   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
1550   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
1551   PetscCall(PetscMalloc2(size, &startv, size, &starti));
1552   /* use 2 sends the first with all_a, the next with all_i and all_j */
1553   startv[0] = 0;
1554   starti[0] = 0;
1555   for (i = 1; i < size; i++) {
1556     startv[i] = startv[i - 1] + nlengths[i - 1];
1557     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1558   }
1559 
1560   i     = 0;
1561   space = stash->space_head;
1562   while (space) {
1563     space_next = space->next;
1564     sp_idx     = space->idx;
1565     sp_idy     = space->idy;
1566     sp_val     = space->val;
1567     for (l = 0; l < space->local_used; l++) {
1568       j = owner[i];
1569       if (bs2 == 1) {
1570         svalues[startv[j]] = sp_val[l];
1571       } else {
1572         PetscInt     k;
1573         PetscScalar *buf1, *buf2;
1574         buf1 = svalues + bs2 * startv[j];
1575         buf2 = space->val + bs2 * l;
1576         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1577       }
1578       sindices[starti[j]]               = sp_idx[l];
1579       sindices[starti[j] + nlengths[j]] = sp_idy[l];
1580       startv[j]++;
1581       starti[j]++;
1582       i++;
1583     }
1584     space = space_next;
1585   }
1586   startv[0] = 0;
1587   for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1588 
1589   for (i = 0, count = 0; i < size; i++) {
1590     if (sizes[i]) {
1591       PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
1592       PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1593     }
1594   }
1595 #if defined(PETSC_USE_INFO)
1596   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
1597   for (i = 0; i < size; i++) {
1598     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1599   }
1600 #endif
1601   PetscCall(PetscFree(nlengths));
1602   PetscCall(PetscFree(owner));
1603   PetscCall(PetscFree2(startv, starti));
1604   PetscCall(PetscFree(sizes));
1605 
1606   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1607   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
1608 
1609   for (i = 0; i < nreceives; i++) {
1610     recv_waits[2 * i]     = recv_waits1[i];
1611     recv_waits[2 * i + 1] = recv_waits2[i];
1612   }
1613   stash->recv_waits = recv_waits;
1614 
1615   PetscCall(PetscFree(recv_waits1));
1616   PetscCall(PetscFree(recv_waits2));
1617 
1618   stash->svalues         = svalues;
1619   stash->sindices        = sindices;
1620   stash->rvalues         = rvalues;
1621   stash->rindices        = rindices;
1622   stash->send_waits      = send_waits;
1623   stash->nsends          = nsends;
1624   stash->nrecvs          = nreceives;
1625   stash->reproduce_count = 0;
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb)
1630 {
1631   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1632 
1633   PetscFunctionBegin;
1634   PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1635   PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1636   PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
1637   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1638   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1639   PetscFunctionReturn(0);
1640 }
1641 
1642 /*@
1643    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1644    the `MATSCALAPACK` matrix
1645 
1646    Logically Collective
1647 
1648    Input Parameters:
1649 +  A  - a `MATSCALAPACK` matrix
1650 .  mb - the row block size
1651 -  nb - the column block size
1652 
1653    Level: intermediate
1654 
1655 .seealso: `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1656 @*/
1657 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb)
1658 {
1659   PetscFunctionBegin;
1660   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1661   PetscValidLogicalCollectiveInt(A, mb, 2);
1662   PetscValidLogicalCollectiveInt(A, nb, 3);
1663   PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb)
1668 {
1669   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1670 
1671   PetscFunctionBegin;
1672   if (mb) *mb = a->mb;
1673   if (nb) *nb = a->nb;
1674   PetscFunctionReturn(0);
1675 }
1676 
1677 /*@
1678    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1679    the `MATSCALAPACK` matrix
1680 
1681    Not collective
1682 
1683    Input Parameter:
1684 .  A  - a `MATSCALAPACK` matrix
1685 
1686    Output Parameters:
1687 +  mb - the row block size
1688 -  nb - the column block size
1689 
1690    Level: intermediate
1691 
1692 .seealso: `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1693 @*/
1694 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb)
1695 {
1696   PetscFunctionBegin;
1697   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1698   PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1703 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1704 
1705 /*MC
1706    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1707 
1708    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1709 
1710    Options Database Keys:
1711 +  -mat_type scalapack - sets the matrix type to `MATSCALAPACK` during a call to `MatSetFromOptions()`
1712 .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option -pc_type lu
1713 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1714 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1715 
1716   Note:
1717    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1718    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1719    the given rank.
1720 
1721    Level: beginner
1722 
1723 .seealso: `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`
1724 M*/
1725 
1726 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1727 {
1728   Mat_ScaLAPACK      *a;
1729   PetscBool           flg, flg1;
1730   Mat_ScaLAPACK_Grid *grid;
1731   MPI_Comm            icomm;
1732   PetscBLASInt        nprow, npcol, myrow, mycol;
1733   PetscInt            optv1, k = 2, array[2] = {0, 0};
1734   PetscMPIInt         size;
1735 
1736   PetscFunctionBegin;
1737   PetscCall(PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps)));
1738   A->insertmode = NOT_SET_VALUES;
1739 
1740   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1741   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1742   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1743   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1744   A->stash.ScatterDestroy = NULL;
1745 
1746   PetscCall(PetscNew(&a));
1747   A->data = (void *)a;
1748 
1749   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1750   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1751     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0));
1752     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1753     PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1754   }
1755   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1756   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1757   if (!flg) {
1758     PetscCall(PetscNew(&grid));
1759 
1760     PetscCallMPI(MPI_Comm_size(icomm, &size));
1761     grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001);
1762 
1763     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
1764     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1));
1765     if (flg1) {
1766       PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
1767       grid->nprow = optv1;
1768     }
1769     PetscOptionsEnd();
1770 
1771     if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1772     grid->npcol = size / grid->nprow;
1773     PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
1774     PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1775     grid->ictxt = Csys2blacs_handle(icomm);
1776     Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1777     Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1778     grid->grid_refct = 1;
1779     grid->nprow      = nprow;
1780     grid->npcol      = npcol;
1781     grid->myrow      = myrow;
1782     grid->mycol      = mycol;
1783     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1784     grid->ictxrow = Csys2blacs_handle(icomm);
1785     Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1786     grid->ictxcol = Csys2blacs_handle(icomm);
1787     Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
1788     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));
1789 
1790   } else grid->grid_refct++;
1791   PetscCall(PetscCommDestroy(&icomm));
1792   a->grid = grid;
1793   a->mb   = DEFAULT_BLOCKSIZE;
1794   a->nb   = DEFAULT_BLOCKSIZE;
1795 
1796   PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
1797   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1798   if (flg) {
1799     a->mb = array[0];
1800     a->nb = (k > 1) ? array[1] : a->mb;
1801   }
1802   PetscOptionsEnd();
1803 
1804   a->roworiented = PETSC_TRUE;
1805   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
1806   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
1807   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
1808   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 /*@C
1813    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1814    (2D block cyclic distribution) for a `MATSCALAPACK` matrix
1815 
1816    Collective
1817 
1818    Input Parameters:
1819 +  comm - MPI communicator
1820 .  mb   - row block size (or `PETSC_DECIDE` to have it set)
1821 .  nb   - column block size (or `PETSC_DECIDE` to have it set)
1822 .  M    - number of global rows
1823 .  N    - number of global columns
1824 .  rsrc - coordinate of process that owns the first row of the distributed matrix
1825 -  csrc - coordinate of process that owns the first column of the distributed matrix
1826 
1827    Output Parameter:
1828 .  A - the matrix
1829 
1830    Options Database Key:
1831 .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1832 
1833    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1834    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1835    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
1836 
1837    Note:
1838    If `PETSC_DECIDE` is used for the block sizes, then an appropriate value
1839    is chosen.
1840 
1841    Storage Information:
1842    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1843    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1844    significance and are thus ignored. The block sizes refer to the values
1845    used for the distributed matrix, not the same meaning as in `MATBAIJ`.
1846 
1847    Level: intermediate
1848 
1849 .seealso: `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1850 @*/
1851 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A)
1852 {
1853   Mat_ScaLAPACK *a;
1854   PetscInt       m, n;
1855 
1856   PetscFunctionBegin;
1857   PetscCall(MatCreate(comm, A));
1858   PetscCall(MatSetType(*A, MATSCALAPACK));
1859   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1860   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1861   m = PETSC_DECIDE;
1862   PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1863   n = PETSC_DECIDE;
1864   PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1865   PetscCall(MatSetSizes(*A, m, n, M, N));
1866   a = (Mat_ScaLAPACK *)(*A)->data;
1867   PetscCall(PetscBLASIntCast(M, &a->M));
1868   PetscCall(PetscBLASIntCast(N, &a->N));
1869   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1870   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1871   PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
1872   PetscCall(PetscBLASIntCast(csrc, &a->csrc));
1873   PetscCall(MatSetUp(*A));
1874   PetscFunctionReturn(0);
1875 }
1876