xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision bcee047adeeb73090d7e36cc71e39fc287cdbb97)
1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/
2 
3 const char       ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4                                        "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5                                        "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6                                        "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7                                        "       TITLE = {Sca{LAPACK} Users' Guide},\n"
8                                        "       PUBLISHER = {SIAM},\n"
9                                        "       ADDRESS = {Philadelphia, PA},\n"
10                                        "       YEAR = 1997\n"
11                                        "}\n";
12 static PetscBool ScaLAPACKCite       = PETSC_FALSE;
13 
14 #define DEFAULT_BLOCKSIZE 64
15 
16 /*
17     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19 */
20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21 
22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23 {
24   PetscFunctionBegin;
25   PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
26   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
27   PetscFunctionReturn(PETSC_SUCCESS);
28 }
29 
30 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer)
31 {
32   Mat_ScaLAPACK    *a = (Mat_ScaLAPACK *)A->data;
33   PetscBool         iascii;
34   PetscViewerFormat format;
35   Mat               Adense;
36 
37   PetscFunctionBegin;
38   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
39   if (iascii) {
40     PetscCall(PetscViewerGetFormat(viewer, &format));
41     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
42       PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
43       PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
44       PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
45       PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
46       PetscFunctionReturn(PETSC_SUCCESS);
47     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
48       PetscFunctionReturn(PETSC_SUCCESS);
49     }
50   }
51   /* convert to dense format and call MatView() */
52   PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
53   PetscCall(MatView(Adense, viewer));
54   PetscCall(MatDestroy(&Adense));
55   PetscFunctionReturn(PETSC_SUCCESS);
56 }
57 
58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info)
59 {
60   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
61   PetscLogDouble isend[2], irecv[2];
62 
63   PetscFunctionBegin;
64   info->block_size = 1.0;
65 
66   isend[0] = a->lld * a->locc;  /* locally allocated */
67   isend[1] = a->locr * a->locc; /* used submatrix */
68   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69     info->nz_allocated = isend[0];
70     info->nz_used      = isend[1];
71   } else if (flag == MAT_GLOBAL_MAX) {
72     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
73     info->nz_allocated = irecv[0];
74     info->nz_used      = irecv[1];
75   } else if (flag == MAT_GLOBAL_SUM) {
76     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
77     info->nz_allocated = irecv[0];
78     info->nz_used      = irecv[1];
79   }
80 
81   info->nz_unneeded       = 0;
82   info->assemblies        = A->num_ass;
83   info->mallocs           = 0;
84   info->memory            = 0; /* REVIEW ME */
85   info->fill_ratio_given  = 0;
86   info->fill_ratio_needed = 0;
87   info->factor_mallocs    = 0;
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 
91 PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg)
92 {
93   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
94 
95   PetscFunctionBegin;
96   switch (op) {
97   case MAT_NEW_NONZERO_LOCATIONS:
98   case MAT_NEW_NONZERO_LOCATION_ERR:
99   case MAT_NEW_NONZERO_ALLOCATION_ERR:
100   case MAT_SYMMETRIC:
101   case MAT_SORTED_FULL:
102   case MAT_HERMITIAN:
103     break;
104   case MAT_ROW_ORIENTED:
105     a->roworiented = flg;
106     break;
107   default:
108     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
109   }
110   PetscFunctionReturn(PETSC_SUCCESS);
111 }
112 
113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
114 {
115   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
116   PetscInt       i, j;
117   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
118   PetscBool      roworiented = a->roworiented;
119 
120   PetscFunctionBegin;
121   PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
122   for (i = 0; i < nr; i++) {
123     if (rows[i] < 0) continue;
124     PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
125     for (j = 0; j < nc; j++) {
126       if (cols[j] < 0) continue;
127       PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
128       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
129       if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
130         if (roworiented) {
131           switch (imode) {
132           case INSERT_VALUES:
133             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j];
134             break;
135           default:
136             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j];
137             break;
138           }
139         } else {
140           switch (imode) {
141           case INSERT_VALUES:
142             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr];
143             break;
144           default:
145             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr];
146             break;
147           }
148         }
149       } else {
150         PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
151         A->assembled = PETSC_FALSE;
152         PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
153       }
154     }
155   }
156   PetscFunctionReturn(PETSC_SUCCESS);
157 }
158 
159 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscScalar beta, const PetscScalar *x, PetscScalar *y)
160 {
161   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
162   PetscScalar    *x2d, *y2d, alpha = 1.0;
163   const PetscInt *ranges;
164   PetscBLASInt    xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
165 
166   PetscFunctionBegin;
167   if (transpose) {
168     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
169     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
170     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
171     xlld = PetscMax(1, A->rmap->n);
172     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
173     PetscCheckScaLapackInfo("descinit", info);
174     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
175     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
176     ylld = 1;
177     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
178     PetscCheckScaLapackInfo("descinit", info);
179 
180     /* allocate 2d vectors */
181     lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
182     lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
183     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
184     xlld = PetscMax(1, lszx);
185 
186     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
187     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
188     PetscCheckScaLapackInfo("descinit", info);
189     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
190     PetscCheckScaLapackInfo("descinit", info);
191 
192     /* redistribute x as a column of a 2d matrix */
193     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
194 
195     /* redistribute y as a row of a 2d matrix */
196     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
197 
198     /* call PBLAS subroutine */
199     PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
200 
201     /* redistribute y from a row of a 2d matrix */
202     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
203 
204   } else { /* non-transpose */
205 
206     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
207     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
208     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
209     xlld = 1;
210     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
211     PetscCheckScaLapackInfo("descinit", info);
212     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
213     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
214     ylld = PetscMax(1, A->rmap->n);
215     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
216     PetscCheckScaLapackInfo("descinit", info);
217 
218     /* allocate 2d vectors */
219     lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
220     lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
221     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
222     ylld = PetscMax(1, lszy);
223 
224     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
225     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
226     PetscCheckScaLapackInfo("descinit", info);
227     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
228     PetscCheckScaLapackInfo("descinit", info);
229 
230     /* redistribute x as a row of a 2d matrix */
231     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
232 
233     /* redistribute y as a column of a 2d matrix */
234     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
235 
236     /* call PBLAS subroutine */
237     PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
238 
239     /* redistribute y from a column of a 2d matrix */
240     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
241   }
242   PetscCall(PetscFree2(x2d, y2d));
243   PetscFunctionReturn(PETSC_SUCCESS);
244 }
245 
246 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y)
247 {
248   const PetscScalar *xarray;
249   PetscScalar       *yarray;
250 
251   PetscFunctionBegin;
252   PetscCall(VecGetArrayRead(x, &xarray));
253   PetscCall(VecGetArray(y, &yarray));
254   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 0.0, xarray, yarray));
255   PetscCall(VecRestoreArrayRead(x, &xarray));
256   PetscCall(VecRestoreArray(y, &yarray));
257   PetscFunctionReturn(PETSC_SUCCESS);
258 }
259 
260 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
261 {
262   const PetscScalar *xarray;
263   PetscScalar       *yarray;
264 
265   PetscFunctionBegin;
266   PetscCall(VecGetArrayRead(x, &xarray));
267   PetscCall(VecGetArray(y, &yarray));
268   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 0.0, xarray, yarray));
269   PetscCall(VecRestoreArrayRead(x, &xarray));
270   PetscCall(VecRestoreArray(y, &yarray));
271   PetscFunctionReturn(PETSC_SUCCESS);
272 }
273 
274 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
275 {
276   const PetscScalar *xarray;
277   PetscScalar       *zarray;
278 
279   PetscFunctionBegin;
280   if (y != z) PetscCall(VecCopy(y, z));
281   PetscCall(VecGetArrayRead(x, &xarray));
282   PetscCall(VecGetArray(z, &zarray));
283   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 1.0, xarray, zarray));
284   PetscCall(VecRestoreArrayRead(x, &xarray));
285   PetscCall(VecRestoreArray(z, &zarray));
286   PetscFunctionReturn(PETSC_SUCCESS);
287 }
288 
289 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
290 {
291   const PetscScalar *xarray;
292   PetscScalar       *zarray;
293 
294   PetscFunctionBegin;
295   if (y != z) PetscCall(VecCopy(y, z));
296   PetscCall(VecGetArrayRead(x, &xarray));
297   PetscCall(VecGetArray(z, &zarray));
298   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 1.0, xarray, zarray));
299   PetscCall(VecRestoreArrayRead(x, &xarray));
300   PetscCall(VecRestoreArray(z, &zarray));
301   PetscFunctionReturn(PETSC_SUCCESS);
302 }
303 
304 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
305 {
306   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
307   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
308   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
309   PetscScalar    sone = 1.0, zero = 0.0;
310   PetscBLASInt   one = 1;
311 
312   PetscFunctionBegin;
313   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
314   C->assembled = PETSC_TRUE;
315   PetscFunctionReturn(PETSC_SUCCESS);
316 }
317 
318 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
319 {
320   PetscFunctionBegin;
321   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
322   PetscCall(MatSetType(C, MATSCALAPACK));
323   PetscCall(MatSetUp(C));
324   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
325   PetscFunctionReturn(PETSC_SUCCESS);
326 }
327 
328 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
329 {
330   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
331   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
332   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
333   PetscScalar    sone = 1.0, zero = 0.0;
334   PetscBLASInt   one = 1;
335 
336   PetscFunctionBegin;
337   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
338   C->assembled = PETSC_TRUE;
339   PetscFunctionReturn(PETSC_SUCCESS);
340 }
341 
342 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
343 {
344   PetscFunctionBegin;
345   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
346   PetscCall(MatSetType(C, MATSCALAPACK));
347   PetscCall(MatSetUp(C));
348   PetscFunctionReturn(PETSC_SUCCESS);
349 }
350 
351 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
352 {
353   PetscFunctionBegin;
354   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
355   C->ops->productsymbolic = MatProductSymbolic_AB;
356   PetscFunctionReturn(PETSC_SUCCESS);
357 }
358 
359 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
360 {
361   PetscFunctionBegin;
362   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
363   C->ops->productsymbolic          = MatProductSymbolic_ABt;
364   PetscFunctionReturn(PETSC_SUCCESS);
365 }
366 
367 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
368 {
369   Mat_Product *product = C->product;
370 
371   PetscFunctionBegin;
372   switch (product->type) {
373   case MATPRODUCT_AB:
374     PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C));
375     break;
376   case MATPRODUCT_ABt:
377     PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C));
378     break;
379   default:
380     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
381   }
382   PetscFunctionReturn(PETSC_SUCCESS);
383 }
384 
385 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D)
386 {
387   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
388   PetscScalar    *darray, *d2d, v;
389   const PetscInt *ranges;
390   PetscBLASInt    j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
391 
392   PetscFunctionBegin;
393   PetscCall(VecGetArray(D, &darray));
394 
395   if (A->rmap->N <= A->cmap->N) { /* row version */
396 
397     /* create ScaLAPACK descriptor for vector (1d block distribution) */
398     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
399     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
400     dlld = PetscMax(1, A->rmap->n);
401     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
402     PetscCheckScaLapackInfo("descinit", info);
403 
404     /* allocate 2d vector */
405     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
406     PetscCall(PetscCalloc1(lszd, &d2d));
407     dlld = PetscMax(1, lszd);
408 
409     /* create ScaLAPACK descriptor for vector (2d block distribution) */
410     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
411     PetscCheckScaLapackInfo("descinit", info);
412 
413     /* collect diagonal */
414     for (j = 1; j <= a->M; j++) {
415       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
416       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
417     }
418 
419     /* redistribute d from a column of a 2d matrix */
420     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
421     PetscCall(PetscFree(d2d));
422 
423   } else { /* column version */
424 
425     /* create ScaLAPACK descriptor for vector (1d block distribution) */
426     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
427     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
428     dlld = 1;
429     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
430     PetscCheckScaLapackInfo("descinit", info);
431 
432     /* allocate 2d vector */
433     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
434     PetscCall(PetscCalloc1(lszd, &d2d));
435 
436     /* create ScaLAPACK descriptor for vector (2d block distribution) */
437     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
438     PetscCheckScaLapackInfo("descinit", info);
439 
440     /* collect diagonal */
441     for (j = 1; j <= a->N; j++) {
442       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
443       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
444     }
445 
446     /* redistribute d from a row of a 2d matrix */
447     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
448     PetscCall(PetscFree(d2d));
449   }
450 
451   PetscCall(VecRestoreArray(D, &darray));
452   PetscCall(VecAssemblyBegin(D));
453   PetscCall(VecAssemblyEnd(D));
454   PetscFunctionReturn(PETSC_SUCCESS);
455 }
456 
457 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R)
458 {
459   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
460   const PetscScalar *d;
461   const PetscInt    *ranges;
462   PetscScalar       *d2d;
463   PetscBLASInt       i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
464 
465   PetscFunctionBegin;
466   if (R) {
467     PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d));
468     /* create ScaLAPACK descriptor for vector (1d block distribution) */
469     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
470     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
471     dlld = 1;
472     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
473     PetscCheckScaLapackInfo("descinit", info);
474 
475     /* allocate 2d vector */
476     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
477     PetscCall(PetscCalloc1(lszd, &d2d));
478 
479     /* create ScaLAPACK descriptor for vector (2d block distribution) */
480     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
481     PetscCheckScaLapackInfo("descinit", info);
482 
483     /* redistribute d to a row of a 2d matrix */
484     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
485 
486     /* broadcast along process columns */
487     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
488     else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
489 
490     /* local scaling */
491     for (j = 0; j < a->locc; j++)
492       for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
493 
494     PetscCall(PetscFree(d2d));
495     PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d));
496   }
497   if (L) {
498     PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d));
499     /* create ScaLAPACK descriptor for vector (1d block distribution) */
500     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
501     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
502     dlld = PetscMax(1, A->rmap->n);
503     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
504     PetscCheckScaLapackInfo("descinit", info);
505 
506     /* allocate 2d vector */
507     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
508     PetscCall(PetscCalloc1(lszd, &d2d));
509     dlld = PetscMax(1, lszd);
510 
511     /* create ScaLAPACK descriptor for vector (2d block distribution) */
512     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
513     PetscCheckScaLapackInfo("descinit", info);
514 
515     /* redistribute d to a column of a 2d matrix */
516     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
517 
518     /* broadcast along process rows */
519     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
520     else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
521 
522     /* local scaling */
523     for (i = 0; i < a->locr; i++)
524       for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
525 
526     PetscCall(PetscFree(d2d));
527     PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d));
528   }
529   PetscFunctionReturn(PETSC_SUCCESS);
530 }
531 
532 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d)
533 {
534   PetscFunctionBegin;
535   *missing = PETSC_FALSE;
536   PetscFunctionReturn(PETSC_SUCCESS);
537 }
538 
539 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a)
540 {
541   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
542   PetscBLASInt   n, one = 1;
543 
544   PetscFunctionBegin;
545   n = x->lld * x->locc;
546   PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
547   PetscFunctionReturn(PETSC_SUCCESS);
548 }
549 
550 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha)
551 {
552   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
553   PetscBLASInt   i, n;
554   PetscScalar    v;
555 
556   PetscFunctionBegin;
557   n = PetscMin(x->M, x->N);
558   for (i = 1; i <= n; i++) {
559     PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
560     v += alpha;
561     PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
562   }
563   PetscFunctionReturn(PETSC_SUCCESS);
564 }
565 
566 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
567 {
568   Mat_ScaLAPACK *x    = (Mat_ScaLAPACK *)X->data;
569   Mat_ScaLAPACK *y    = (Mat_ScaLAPACK *)Y->data;
570   PetscBLASInt   one  = 1;
571   PetscScalar    beta = 1.0;
572 
573   PetscFunctionBegin;
574   MatScaLAPACKCheckDistribution(Y, 1, X, 3);
575   PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
576   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
577   PetscFunctionReturn(PETSC_SUCCESS);
578 }
579 
580 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str)
581 {
582   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
583   Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
584 
585   PetscFunctionBegin;
586   PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
587   PetscCall(PetscObjectStateIncrease((PetscObject)B));
588   PetscFunctionReturn(PETSC_SUCCESS);
589 }
590 
591 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B)
592 {
593   Mat            Bs;
594   MPI_Comm       comm;
595   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
596 
597   PetscFunctionBegin;
598   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
599   PetscCall(MatCreate(comm, &Bs));
600   PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
601   PetscCall(MatSetType(Bs, MATSCALAPACK));
602   b       = (Mat_ScaLAPACK *)Bs->data;
603   b->M    = a->M;
604   b->N    = a->N;
605   b->mb   = a->mb;
606   b->nb   = a->nb;
607   b->rsrc = a->rsrc;
608   b->csrc = a->csrc;
609   PetscCall(MatSetUp(Bs));
610   *B = Bs;
611   if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
612   Bs->assembled = PETSC_TRUE;
613   PetscFunctionReturn(PETSC_SUCCESS);
614 }
615 
616 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
617 {
618   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
619   Mat            Bs   = *B;
620   PetscBLASInt   one  = 1;
621   PetscScalar    sone = 1.0, zero = 0.0;
622 #if defined(PETSC_USE_COMPLEX)
623   PetscInt i, j;
624 #endif
625 
626   PetscFunctionBegin;
627   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
628   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
629   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
630   *B = Bs;
631   b  = (Mat_ScaLAPACK *)Bs->data;
632   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
633 #if defined(PETSC_USE_COMPLEX)
634   /* undo conjugation */
635   for (i = 0; i < b->locr; i++)
636     for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
637 #endif
638   Bs->assembled = PETSC_TRUE;
639   PetscFunctionReturn(PETSC_SUCCESS);
640 }
641 
642 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
643 {
644   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
645   PetscInt       i, j;
646 
647   PetscFunctionBegin;
648   for (i = 0; i < a->locr; i++)
649     for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
650   PetscFunctionReturn(PETSC_SUCCESS);
651 }
652 
653 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
654 {
655   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
656   Mat            Bs   = *B;
657   PetscBLASInt   one  = 1;
658   PetscScalar    sone = 1.0, zero = 0.0;
659 
660   PetscFunctionBegin;
661   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
662   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
663   *B = Bs;
664   b  = (Mat_ScaLAPACK *)Bs->data;
665   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
666   Bs->assembled = PETSC_TRUE;
667   PetscFunctionReturn(PETSC_SUCCESS);
668 }
669 
670 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X)
671 {
672   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
673   PetscScalar    *x, *x2d;
674   const PetscInt *ranges;
675   PetscBLASInt    xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
676 
677   PetscFunctionBegin;
678   PetscCall(VecCopy(B, X));
679   PetscCall(VecGetArray(X, &x));
680 
681   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
682   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
683   PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
684   xlld = PetscMax(1, A->rmap->n);
685   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
686   PetscCheckScaLapackInfo("descinit", info);
687 
688   /* allocate 2d vector */
689   lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
690   PetscCall(PetscMalloc1(lszx, &x2d));
691   xlld = PetscMax(1, lszx);
692 
693   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
694   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
695   PetscCheckScaLapackInfo("descinit", info);
696 
697   /* redistribute x as a column of a 2d matrix */
698   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
699 
700   /* call ScaLAPACK subroutine */
701   switch (A->factortype) {
702   case MAT_FACTOR_LU:
703     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
704     PetscCheckScaLapackInfo("getrs", info);
705     break;
706   case MAT_FACTOR_CHOLESKY:
707     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
708     PetscCheckScaLapackInfo("potrs", info);
709     break;
710   default:
711     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
712   }
713 
714   /* redistribute x from a column of a 2d matrix */
715   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
716 
717   PetscCall(PetscFree(x2d));
718   PetscCall(VecRestoreArray(X, &x));
719   PetscFunctionReturn(PETSC_SUCCESS);
720 }
721 
722 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X)
723 {
724   PetscFunctionBegin;
725   PetscCall(MatSolve_ScaLAPACK(A, B, X));
726   PetscCall(VecAXPY(X, 1, Y));
727   PetscFunctionReturn(PETSC_SUCCESS);
728 }
729 
730 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X)
731 {
732   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x;
733   PetscBool      flg1, flg2;
734   PetscBLASInt   one = 1, info;
735 
736   PetscFunctionBegin;
737   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
738   PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
739   PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK");
740   MatScaLAPACKCheckDistribution(B, 1, X, 2);
741   b = (Mat_ScaLAPACK *)B->data;
742   x = (Mat_ScaLAPACK *)X->data;
743   PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc));
744 
745   switch (A->factortype) {
746   case MAT_FACTOR_LU:
747     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
748     PetscCheckScaLapackInfo("getrs", info);
749     break;
750   case MAT_FACTOR_CHOLESKY:
751     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
752     PetscCheckScaLapackInfo("potrs", info);
753     break;
754   default:
755     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
756   }
757   PetscFunctionReturn(PETSC_SUCCESS);
758 }
759 
760 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo)
761 {
762   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
763   PetscBLASInt   one = 1, info;
764 
765   PetscFunctionBegin;
766   if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots));
767   PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
768   PetscCheckScaLapackInfo("getrf", info);
769   A->factortype = MAT_FACTOR_LU;
770   A->assembled  = PETSC_TRUE;
771 
772   PetscCall(PetscFree(A->solvertype));
773   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
774   PetscFunctionReturn(PETSC_SUCCESS);
775 }
776 
777 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
778 {
779   PetscFunctionBegin;
780   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
781   PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
782   PetscFunctionReturn(PETSC_SUCCESS);
783 }
784 
785 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
786 {
787   PetscFunctionBegin;
788   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
789   PetscFunctionReturn(PETSC_SUCCESS);
790 }
791 
792 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo)
793 {
794   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
795   PetscBLASInt   one = 1, info;
796 
797   PetscFunctionBegin;
798   PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
799   PetscCheckScaLapackInfo("potrf", info);
800   A->factortype = MAT_FACTOR_CHOLESKY;
801   A->assembled  = PETSC_TRUE;
802 
803   PetscCall(PetscFree(A->solvertype));
804   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
805   PetscFunctionReturn(PETSC_SUCCESS);
806 }
807 
808 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
809 {
810   PetscFunctionBegin;
811   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
812   PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
813   PetscFunctionReturn(PETSC_SUCCESS);
814 }
815 
816 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info)
817 {
818   PetscFunctionBegin;
819   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
820   PetscFunctionReturn(PETSC_SUCCESS);
821 }
822 
823 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type)
824 {
825   PetscFunctionBegin;
826   *type = MATSOLVERSCALAPACK;
827   PetscFunctionReturn(PETSC_SUCCESS);
828 }
829 
830 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F)
831 {
832   Mat            B;
833   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
834 
835   PetscFunctionBegin;
836   /* Create the factorization matrix */
837   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
838   B->trivialsymbolic = PETSC_TRUE;
839   B->factortype      = ftype;
840   PetscCall(PetscFree(B->solvertype));
841   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));
842 
843   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
844   *F = B;
845   PetscFunctionReturn(PETSC_SUCCESS);
846 }
847 
848 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
849 {
850   PetscFunctionBegin;
851   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
852   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
853   PetscFunctionReturn(PETSC_SUCCESS);
854 }
855 
856 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm)
857 {
858   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
859   PetscBLASInt   one = 1, lwork = 0;
860   const char    *ntype;
861   PetscScalar   *work = NULL, dummy;
862 
863   PetscFunctionBegin;
864   switch (type) {
865   case NORM_1:
866     ntype = "1";
867     lwork = PetscMax(a->locr, a->locc);
868     break;
869   case NORM_FROBENIUS:
870     ntype = "F";
871     work  = &dummy;
872     break;
873   case NORM_INFINITY:
874     ntype = "I";
875     lwork = PetscMax(a->locr, a->locc);
876     break;
877   default:
878     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
879   }
880   if (lwork) PetscCall(PetscMalloc1(lwork, &work));
881   *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
882   if (lwork) PetscCall(PetscFree(work));
883   PetscFunctionReturn(PETSC_SUCCESS);
884 }
885 
886 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
887 {
888   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
889 
890   PetscFunctionBegin;
891   PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
892   PetscFunctionReturn(PETSC_SUCCESS);
893 }
894 
895 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols)
896 {
897   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
898   PetscInt       i, n, nb, isrc, nproc, iproc, *idx;
899 
900   PetscFunctionBegin;
901   if (rows) {
902     n     = a->locr;
903     nb    = a->mb;
904     isrc  = a->rsrc;
905     nproc = a->grid->nprow;
906     iproc = a->grid->myrow;
907     PetscCall(PetscMalloc1(n, &idx));
908     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
909     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
910   }
911   if (cols) {
912     n     = a->locc;
913     nb    = a->nb;
914     isrc  = a->csrc;
915     nproc = a->grid->npcol;
916     iproc = a->grid->mycol;
917     PetscCall(PetscMalloc1(n, &idx));
918     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
919     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
920   }
921   PetscFunctionReturn(PETSC_SUCCESS);
922 }
923 
924 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B)
925 {
926   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
927   Mat                Bmpi;
928   MPI_Comm           comm;
929   PetscInt           i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz;
930   const PetscInt    *ranges, *branges, *cwork;
931   const PetscScalar *vwork;
932   PetscBLASInt       bdesc[9], bmb, zero = 0, one = 1, lld, info;
933   PetscScalar       *barray;
934   PetscBool          differ = PETSC_FALSE;
935   PetscMPIInt        size;
936 
937   PetscFunctionBegin;
938   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
939   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
940 
941   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
942     PetscCallMPI(MPI_Comm_size(comm, &size));
943     PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
944     for (i = 0; i < size; i++)
945       if (ranges[i + 1] != branges[i + 1]) {
946         differ = PETSC_TRUE;
947         break;
948       }
949   }
950 
951   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
952     PetscCall(MatCreate(comm, &Bmpi));
953     m = PETSC_DECIDE;
954     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
955     n = PETSC_DECIDE;
956     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
957     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
958     PetscCall(MatSetType(Bmpi, MATDENSE));
959     PetscCall(MatSetUp(Bmpi));
960 
961     /* create ScaLAPACK descriptor for B (1d block distribution) */
962     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
963     lld = PetscMax(A->rmap->n, 1);                /* local leading dimension */
964     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
965     PetscCheckScaLapackInfo("descinit", info);
966 
967     /* redistribute matrix */
968     PetscCall(MatDenseGetArray(Bmpi, &barray));
969     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
970     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
971     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
972     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
973 
974     /* transfer rows of auxiliary matrix to the final matrix B */
975     PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
976     for (i = rstart; i < rend; i++) {
977       PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
978       PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
979       PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
980     }
981     PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
982     PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
983     PetscCall(MatDestroy(&Bmpi));
984 
985   } else { /* normal cases */
986 
987     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
988     else {
989       PetscCall(MatCreate(comm, &Bmpi));
990       m = PETSC_DECIDE;
991       PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
992       n = PETSC_DECIDE;
993       PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
994       PetscCall(MatSetSizes(Bmpi, m, n, M, N));
995       PetscCall(MatSetType(Bmpi, MATDENSE));
996       PetscCall(MatSetUp(Bmpi));
997     }
998 
999     /* create ScaLAPACK descriptor for B (1d block distribution) */
1000     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1001     lld = PetscMax(A->rmap->n, 1);                /* local leading dimension */
1002     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1003     PetscCheckScaLapackInfo("descinit", info);
1004 
1005     /* redistribute matrix */
1006     PetscCall(MatDenseGetArray(Bmpi, &barray));
1007     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1008     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1009 
1010     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1011     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1012     if (reuse == MAT_INPLACE_MATRIX) {
1013       PetscCall(MatHeaderReplace(A, &Bmpi));
1014     } else *B = Bmpi;
1015   }
1016   PetscFunctionReturn(PETSC_SUCCESS);
1017 }
1018 
1019 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct)
1020 {
1021   const PetscInt *ranges;
1022   PetscMPIInt     size;
1023   PetscInt        i, n;
1024 
1025   PetscFunctionBegin;
1026   *correct = PETSC_TRUE;
1027   PetscCallMPI(MPI_Comm_size(map->comm, &size));
1028   if (size > 1) {
1029     PetscCall(PetscLayoutGetRanges(map, &ranges));
1030     n = ranges[1] - ranges[0];
1031     for (i = 1; i < size; i++)
1032       if (ranges[i + 1] - ranges[i] != n) break;
1033     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
1034   }
1035   PetscFunctionReturn(PETSC_SUCCESS);
1036 }
1037 
1038 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B)
1039 {
1040   Mat_ScaLAPACK  *b;
1041   Mat             Bmpi;
1042   MPI_Comm        comm;
1043   PetscInt        M = A->rmap->N, N = A->cmap->N, m, n;
1044   const PetscInt *ranges, *rows, *cols;
1045   PetscBLASInt    adesc[9], amb, zero = 0, one = 1, lld, info;
1046   PetscScalar    *aarray;
1047   IS              ir, ic;
1048   PetscInt        lda;
1049   PetscBool       flg;
1050 
1051   PetscFunctionBegin;
1052   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1053 
1054   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1055   else {
1056     PetscCall(MatCreate(comm, &Bmpi));
1057     m = PETSC_DECIDE;
1058     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1059     n = PETSC_DECIDE;
1060     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1061     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1062     PetscCall(MatSetType(Bmpi, MATSCALAPACK));
1063     PetscCall(MatSetUp(Bmpi));
1064   }
1065   b = (Mat_ScaLAPACK *)Bmpi->data;
1066 
1067   PetscCall(MatDenseGetLDA(A, &lda));
1068   PetscCall(MatDenseGetArray(A, &aarray));
1069   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1070   if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1071   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1072     /* create ScaLAPACK descriptor for A (1d block distribution) */
1073     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1074     PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */
1075     lld = PetscMax(lda, 1);                       /* local leading dimension */
1076     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1077     PetscCheckScaLapackInfo("descinit", info);
1078 
1079     /* redistribute matrix */
1080     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1081     Bmpi->nooffprocentries = PETSC_TRUE;
1082   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1083     PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1084     b->roworiented = PETSC_FALSE;
1085     PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1086     PetscCall(ISGetIndices(ir, &rows));
1087     PetscCall(ISGetIndices(ic, &cols));
1088     PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1089     PetscCall(ISRestoreIndices(ir, &rows));
1090     PetscCall(ISRestoreIndices(ic, &cols));
1091     PetscCall(ISDestroy(&ic));
1092     PetscCall(ISDestroy(&ir));
1093   }
1094   PetscCall(MatDenseRestoreArray(A, &aarray));
1095   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1096   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1097   if (reuse == MAT_INPLACE_MATRIX) {
1098     PetscCall(MatHeaderReplace(A, &Bmpi));
1099   } else *B = Bmpi;
1100   PetscFunctionReturn(PETSC_SUCCESS);
1101 }
1102 
1103 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1104 {
1105   Mat                mat_scal;
1106   PetscInt           M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1107   const PetscInt    *cols;
1108   const PetscScalar *vals;
1109 
1110   PetscFunctionBegin;
1111   if (reuse == MAT_REUSE_MATRIX) {
1112     mat_scal = *newmat;
1113     PetscCall(MatZeroEntries(mat_scal));
1114   } else {
1115     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1116     m = PETSC_DECIDE;
1117     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1118     n = PETSC_DECIDE;
1119     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1120     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1121     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1122     PetscCall(MatSetUp(mat_scal));
1123   }
1124   for (row = rstart; row < rend; row++) {
1125     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1126     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
1127     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1128   }
1129   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1130   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1131 
1132   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1133   else *newmat = mat_scal;
1134   PetscFunctionReturn(PETSC_SUCCESS);
1135 }
1136 
1137 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1138 {
1139   Mat                mat_scal;
1140   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1141   const PetscInt    *cols;
1142   const PetscScalar *vals;
1143   PetscScalar        v;
1144 
1145   PetscFunctionBegin;
1146   if (reuse == MAT_REUSE_MATRIX) {
1147     mat_scal = *newmat;
1148     PetscCall(MatZeroEntries(mat_scal));
1149   } else {
1150     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1151     m = PETSC_DECIDE;
1152     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1153     n = PETSC_DECIDE;
1154     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1155     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1156     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1157     PetscCall(MatSetUp(mat_scal));
1158   }
1159   PetscCall(MatGetRowUpperTriangular(A));
1160   for (row = rstart; row < rend; row++) {
1161     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1162     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1163     for (j = 0; j < ncols; j++) { /* lower triangular part */
1164       if (cols[j] == row) continue;
1165       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1166       PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1167     }
1168     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1169   }
1170   PetscCall(MatRestoreRowUpperTriangular(A));
1171   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1172   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1173 
1174   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1175   else *newmat = mat_scal;
1176   PetscFunctionReturn(PETSC_SUCCESS);
1177 }
1178 
1179 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1180 {
1181   Mat_ScaLAPACK *a  = (Mat_ScaLAPACK *)A->data;
1182   PetscInt       sz = 0;
1183 
1184   PetscFunctionBegin;
1185   PetscCall(PetscLayoutSetUp(A->rmap));
1186   PetscCall(PetscLayoutSetUp(A->cmap));
1187   if (!a->lld) a->lld = a->locr;
1188 
1189   PetscCall(PetscFree(a->loc));
1190   PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
1191   PetscCall(PetscCalloc1(sz, &a->loc));
1192 
1193   A->preallocated = PETSC_TRUE;
1194   PetscFunctionReturn(PETSC_SUCCESS);
1195 }
1196 
1197 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1198 {
1199   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK *)A->data;
1200   Mat_ScaLAPACK_Grid *grid;
1201   PetscBool           flg;
1202   MPI_Comm            icomm;
1203 
1204   PetscFunctionBegin;
1205   PetscCall(MatStashDestroy_Private(&A->stash));
1206   PetscCall(PetscFree(a->loc));
1207   PetscCall(PetscFree(a->pivots));
1208   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1209   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1210   if (--grid->grid_refct == 0) {
1211     Cblacs_gridexit(grid->ictxt);
1212     Cblacs_gridexit(grid->ictxrow);
1213     Cblacs_gridexit(grid->ictxcol);
1214     PetscCall(PetscFree(grid));
1215     PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1216   }
1217   PetscCall(PetscCommDestroy(&icomm));
1218   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
1219   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1220   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
1221   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
1222   PetscCall(PetscFree(A->data));
1223   PetscFunctionReturn(PETSC_SUCCESS);
1224 }
1225 
1226 PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1227 {
1228   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
1229   PetscBLASInt   info = 0;
1230   PetscBool      flg;
1231 
1232   PetscFunctionBegin;
1233   PetscCall(PetscLayoutSetUp(A->rmap));
1234   PetscCall(PetscLayoutSetUp(A->cmap));
1235 
1236   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1237   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1238   PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1239   PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1240   PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1241 
1242   /* compute local sizes */
1243   PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
1244   PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1245   a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1246   a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1247   a->lld  = PetscMax(1, a->locr);
1248 
1249   /* allocate local array */
1250   PetscCall(MatScaLAPACKSetPreallocation(A));
1251 
1252   /* set up ScaLAPACK descriptor */
1253   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1254   PetscCheckScaLapackInfo("descinit", info);
1255   PetscFunctionReturn(PETSC_SUCCESS);
1256 }
1257 
1258 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type)
1259 {
1260   PetscInt nstash, reallocs;
1261 
1262   PetscFunctionBegin;
1263   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1264   PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
1265   PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
1266   PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
1267   PetscFunctionReturn(PETSC_SUCCESS);
1268 }
1269 
1270 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type)
1271 {
1272   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1273   PetscMPIInt    n;
1274   PetscInt       i, flg, *row, *col;
1275   PetscScalar   *val;
1276   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
1277 
1278   PetscFunctionBegin;
1279   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1280   while (1) {
1281     PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1282     if (!flg) break;
1283     for (i = 0; i < n; i++) {
1284       PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
1285       PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1286       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1287       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1288       switch (A->insertmode) {
1289       case INSERT_VALUES:
1290         a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i];
1291         break;
1292       case ADD_VALUES:
1293         a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i];
1294         break;
1295       default:
1296         SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1297       }
1298     }
1299   }
1300   PetscCall(MatStashScatterEnd_Private(&A->stash));
1301   PetscFunctionReturn(PETSC_SUCCESS);
1302 }
1303 
1304 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer)
1305 {
1306   Mat      Adense, As;
1307   MPI_Comm comm;
1308 
1309   PetscFunctionBegin;
1310   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1311   PetscCall(MatCreate(comm, &Adense));
1312   PetscCall(MatSetType(Adense, MATDENSE));
1313   PetscCall(MatLoad(Adense, viewer));
1314   PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
1315   PetscCall(MatDestroy(&Adense));
1316   PetscCall(MatHeaderReplace(newMat, &As));
1317   PetscFunctionReturn(PETSC_SUCCESS);
1318 }
1319 
1320 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1321                                        0,
1322                                        0,
1323                                        MatMult_ScaLAPACK,
1324                                        /* 4*/ MatMultAdd_ScaLAPACK,
1325                                        MatMultTranspose_ScaLAPACK,
1326                                        MatMultTransposeAdd_ScaLAPACK,
1327                                        MatSolve_ScaLAPACK,
1328                                        MatSolveAdd_ScaLAPACK,
1329                                        0,
1330                                        /*10*/ 0,
1331                                        MatLUFactor_ScaLAPACK,
1332                                        MatCholeskyFactor_ScaLAPACK,
1333                                        0,
1334                                        MatTranspose_ScaLAPACK,
1335                                        /*15*/ MatGetInfo_ScaLAPACK,
1336                                        0,
1337                                        MatGetDiagonal_ScaLAPACK,
1338                                        MatDiagonalScale_ScaLAPACK,
1339                                        MatNorm_ScaLAPACK,
1340                                        /*20*/ MatAssemblyBegin_ScaLAPACK,
1341                                        MatAssemblyEnd_ScaLAPACK,
1342                                        MatSetOption_ScaLAPACK,
1343                                        MatZeroEntries_ScaLAPACK,
1344                                        /*24*/ 0,
1345                                        MatLUFactorSymbolic_ScaLAPACK,
1346                                        MatLUFactorNumeric_ScaLAPACK,
1347                                        MatCholeskyFactorSymbolic_ScaLAPACK,
1348                                        MatCholeskyFactorNumeric_ScaLAPACK,
1349                                        /*29*/ MatSetUp_ScaLAPACK,
1350                                        0,
1351                                        0,
1352                                        0,
1353                                        0,
1354                                        /*34*/ MatDuplicate_ScaLAPACK,
1355                                        0,
1356                                        0,
1357                                        0,
1358                                        0,
1359                                        /*39*/ MatAXPY_ScaLAPACK,
1360                                        0,
1361                                        0,
1362                                        0,
1363                                        MatCopy_ScaLAPACK,
1364                                        /*44*/ 0,
1365                                        MatScale_ScaLAPACK,
1366                                        MatShift_ScaLAPACK,
1367                                        0,
1368                                        0,
1369                                        /*49*/ 0,
1370                                        0,
1371                                        0,
1372                                        0,
1373                                        0,
1374                                        /*54*/ 0,
1375                                        0,
1376                                        0,
1377                                        0,
1378                                        0,
1379                                        /*59*/ 0,
1380                                        MatDestroy_ScaLAPACK,
1381                                        MatView_ScaLAPACK,
1382                                        0,
1383                                        0,
1384                                        /*64*/ 0,
1385                                        0,
1386                                        0,
1387                                        0,
1388                                        0,
1389                                        /*69*/ 0,
1390                                        0,
1391                                        MatConvert_ScaLAPACK_Dense,
1392                                        0,
1393                                        0,
1394                                        /*74*/ 0,
1395                                        0,
1396                                        0,
1397                                        0,
1398                                        0,
1399                                        /*79*/ 0,
1400                                        0,
1401                                        0,
1402                                        0,
1403                                        MatLoad_ScaLAPACK,
1404                                        /*84*/ 0,
1405                                        0,
1406                                        0,
1407                                        0,
1408                                        0,
1409                                        /*89*/ 0,
1410                                        0,
1411                                        MatMatMultNumeric_ScaLAPACK,
1412                                        0,
1413                                        0,
1414                                        /*94*/ 0,
1415                                        0,
1416                                        0,
1417                                        MatMatTransposeMultNumeric_ScaLAPACK,
1418                                        0,
1419                                        /*99*/ MatProductSetFromOptions_ScaLAPACK,
1420                                        0,
1421                                        0,
1422                                        MatConjugate_ScaLAPACK,
1423                                        0,
1424                                        /*104*/ 0,
1425                                        0,
1426                                        0,
1427                                        0,
1428                                        0,
1429                                        /*109*/ MatMatSolve_ScaLAPACK,
1430                                        0,
1431                                        0,
1432                                        0,
1433                                        MatMissingDiagonal_ScaLAPACK,
1434                                        /*114*/ 0,
1435                                        0,
1436                                        0,
1437                                        0,
1438                                        0,
1439                                        /*119*/ 0,
1440                                        MatHermitianTranspose_ScaLAPACK,
1441                                        0,
1442                                        0,
1443                                        0,
1444                                        /*124*/ 0,
1445                                        0,
1446                                        0,
1447                                        0,
1448                                        0,
1449                                        /*129*/ 0,
1450                                        0,
1451                                        0,
1452                                        0,
1453                                        0,
1454                                        /*134*/ 0,
1455                                        0,
1456                                        0,
1457                                        0,
1458                                        0,
1459                                        0,
1460                                        /*140*/ 0,
1461                                        0,
1462                                        0,
1463                                        0,
1464                                        0,
1465                                        /*145*/ 0,
1466                                        0,
1467                                        0,
1468                                        0,
1469                                        0,
1470                                        /*150*/ 0,
1471                                        0};
1472 
1473 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners)
1474 {
1475   PetscInt          *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2;
1476   PetscInt           size = stash->size, nsends;
1477   PetscInt           count, *sindices, **rindices, i, j, l;
1478   PetscScalar      **rvalues, *svalues;
1479   MPI_Comm           comm = stash->comm;
1480   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1481   PetscMPIInt       *sizes, *nlengths, nreceives;
1482   PetscInt          *sp_idx, *sp_idy;
1483   PetscScalar       *sp_val;
1484   PetscMatStashSpace space, space_next;
1485   PetscBLASInt       gridx, gcidx, lridx, lcidx, rsrc, csrc;
1486   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)mat->data;
1487 
1488   PetscFunctionBegin;
1489   { /* make sure all processors are either in INSERTMODE or ADDMODE */
1490     InsertMode addv;
1491     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
1492     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1493     mat->insertmode = addv; /* in case this processor had no cache */
1494   }
1495 
1496   bs2 = stash->bs * stash->bs;
1497 
1498   /*  first count number of contributors to each processor */
1499   PetscCall(PetscCalloc1(size, &nlengths));
1500   PetscCall(PetscMalloc1(stash->n + 1, &owner));
1501 
1502   i = j = 0;
1503   space = stash->space_head;
1504   while (space) {
1505     space_next = space->next;
1506     for (l = 0; l < space->local_used; l++) {
1507       PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
1508       PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1509       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1510       j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
1511       nlengths[j]++;
1512       owner[i] = j;
1513       i++;
1514     }
1515     space = space_next;
1516   }
1517 
1518   /* Now check what procs get messages - and compute nsends. */
1519   PetscCall(PetscCalloc1(size, &sizes));
1520   for (i = 0, nsends = 0; i < size; i++) {
1521     if (nlengths[i]) {
1522       sizes[i] = 1;
1523       nsends++;
1524     }
1525   }
1526 
1527   {
1528     PetscMPIInt *onodes, *olengths;
1529     /* Determine the number of messages to expect, their lengths, from from-ids */
1530     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
1531     PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
1532     /* since clubbing row,col - lengths are multiplied by 2 */
1533     for (i = 0; i < nreceives; i++) olengths[i] *= 2;
1534     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1535     /* values are size 'bs2' lengths (and remove earlier factor 2 */
1536     for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2;
1537     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
1538     PetscCall(PetscFree(onodes));
1539     PetscCall(PetscFree(olengths));
1540   }
1541 
1542   /* do sends:
1543       1) starts[i] gives the starting index in svalues for stuff going to
1544          the ith processor
1545   */
1546   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
1547   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
1548   PetscCall(PetscMalloc2(size, &startv, size, &starti));
1549   /* use 2 sends the first with all_a, the next with all_i and all_j */
1550   startv[0] = 0;
1551   starti[0] = 0;
1552   for (i = 1; i < size; i++) {
1553     startv[i] = startv[i - 1] + nlengths[i - 1];
1554     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1555   }
1556 
1557   i     = 0;
1558   space = stash->space_head;
1559   while (space) {
1560     space_next = space->next;
1561     sp_idx     = space->idx;
1562     sp_idy     = space->idy;
1563     sp_val     = space->val;
1564     for (l = 0; l < space->local_used; l++) {
1565       j = owner[i];
1566       if (bs2 == 1) {
1567         svalues[startv[j]] = sp_val[l];
1568       } else {
1569         PetscInt     k;
1570         PetscScalar *buf1, *buf2;
1571         buf1 = svalues + bs2 * startv[j];
1572         buf2 = space->val + bs2 * l;
1573         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1574       }
1575       sindices[starti[j]]               = sp_idx[l];
1576       sindices[starti[j] + nlengths[j]] = sp_idy[l];
1577       startv[j]++;
1578       starti[j]++;
1579       i++;
1580     }
1581     space = space_next;
1582   }
1583   startv[0] = 0;
1584   for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1585 
1586   for (i = 0, count = 0; i < size; i++) {
1587     if (sizes[i]) {
1588       PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
1589       PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1590     }
1591   }
1592 #if defined(PETSC_USE_INFO)
1593   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
1594   for (i = 0; i < size; i++) {
1595     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1596   }
1597 #endif
1598   PetscCall(PetscFree(nlengths));
1599   PetscCall(PetscFree(owner));
1600   PetscCall(PetscFree2(startv, starti));
1601   PetscCall(PetscFree(sizes));
1602 
1603   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1604   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
1605 
1606   for (i = 0; i < nreceives; i++) {
1607     recv_waits[2 * i]     = recv_waits1[i];
1608     recv_waits[2 * i + 1] = recv_waits2[i];
1609   }
1610   stash->recv_waits = recv_waits;
1611 
1612   PetscCall(PetscFree(recv_waits1));
1613   PetscCall(PetscFree(recv_waits2));
1614 
1615   stash->svalues         = svalues;
1616   stash->sindices        = sindices;
1617   stash->rvalues         = rvalues;
1618   stash->rindices        = rindices;
1619   stash->send_waits      = send_waits;
1620   stash->nsends          = nsends;
1621   stash->nrecvs          = nreceives;
1622   stash->reproduce_count = 0;
1623   PetscFunctionReturn(PETSC_SUCCESS);
1624 }
1625 
1626 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb)
1627 {
1628   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1629 
1630   PetscFunctionBegin;
1631   PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1632   PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1633   PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
1634   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1635   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1636   PetscFunctionReturn(PETSC_SUCCESS);
1637 }
1638 
1639 /*@
1640    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1641    the `MATSCALAPACK` matrix
1642 
1643    Logically Collective
1644 
1645    Input Parameters:
1646 +  A  - a `MATSCALAPACK` matrix
1647 .  mb - the row block size
1648 -  nb - the column block size
1649 
1650    Level: intermediate
1651 
1652    Note:
1653    This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1654 
1655 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1656 @*/
1657 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb)
1658 {
1659   PetscFunctionBegin;
1660   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1661   PetscValidLogicalCollectiveInt(A, mb, 2);
1662   PetscValidLogicalCollectiveInt(A, nb, 3);
1663   PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
1664   PetscFunctionReturn(PETSC_SUCCESS);
1665 }
1666 
1667 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb)
1668 {
1669   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1670 
1671   PetscFunctionBegin;
1672   if (mb) *mb = a->mb;
1673   if (nb) *nb = a->nb;
1674   PetscFunctionReturn(PETSC_SUCCESS);
1675 }
1676 
1677 /*@
1678    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1679    the `MATSCALAPACK` matrix
1680 
1681    Not Collective
1682 
1683    Input Parameter:
1684 .  A  - a `MATSCALAPACK` matrix
1685 
1686    Output Parameters:
1687 +  mb - the row block size
1688 -  nb - the column block size
1689 
1690    Level: intermediate
1691 
1692    Note:
1693    This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1694 
1695 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1696 @*/
1697 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb)
1698 {
1699   PetscFunctionBegin;
1700   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1701   PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
1702   PetscFunctionReturn(PETSC_SUCCESS);
1703 }
1704 
1705 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1706 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1707 
1708 /*MC
1709    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1710 
1711    Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK
1712 
1713    Options Database Keys:
1714 +  -mat_type scalapack - sets the matrix type to `MATSCALAPACK`
1715 .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu`
1716 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1717 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1718 
1719    Level: intermediate
1720 
1721   Note:
1722    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1723    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1724    the given rank.
1725 
1726 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()`
1727 M*/
1728 
1729 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1730 {
1731   Mat_ScaLAPACK      *a;
1732   PetscBool           flg, flg1;
1733   Mat_ScaLAPACK_Grid *grid;
1734   MPI_Comm            icomm;
1735   PetscBLASInt        nprow, npcol, myrow, mycol;
1736   PetscInt            optv1, k = 2, array[2] = {0, 0};
1737   PetscMPIInt         size;
1738 
1739   PetscFunctionBegin;
1740   A->ops[0]     = MatOps_Values;
1741   A->insertmode = NOT_SET_VALUES;
1742 
1743   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1744   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1745   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1746   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1747   A->stash.ScatterDestroy = NULL;
1748 
1749   PetscCall(PetscNew(&a));
1750   A->data = (void *)a;
1751 
1752   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1753   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1754     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0));
1755     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1756     PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1757   }
1758   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1759   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1760   if (!flg) {
1761     PetscCall(PetscNew(&grid));
1762 
1763     PetscCallMPI(MPI_Comm_size(icomm, &size));
1764     grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001);
1765 
1766     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
1767     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1));
1768     if (flg1) {
1769       PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
1770       grid->nprow = optv1;
1771     }
1772     PetscOptionsEnd();
1773 
1774     if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1775     grid->npcol = size / grid->nprow;
1776     PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
1777     PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1778     grid->ictxt = Csys2blacs_handle(icomm);
1779     Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1780     Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1781     grid->grid_refct = 1;
1782     grid->nprow      = nprow;
1783     grid->npcol      = npcol;
1784     grid->myrow      = myrow;
1785     grid->mycol      = mycol;
1786     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1787     grid->ictxrow = Csys2blacs_handle(icomm);
1788     Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1789     grid->ictxcol = Csys2blacs_handle(icomm);
1790     Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
1791     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));
1792 
1793   } else grid->grid_refct++;
1794   PetscCall(PetscCommDestroy(&icomm));
1795   a->grid = grid;
1796   a->mb   = DEFAULT_BLOCKSIZE;
1797   a->nb   = DEFAULT_BLOCKSIZE;
1798 
1799   PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
1800   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1801   if (flg) {
1802     a->mb = array[0];
1803     a->nb = (k > 1) ? array[1] : a->mb;
1804   }
1805   PetscOptionsEnd();
1806 
1807   a->roworiented = PETSC_TRUE;
1808   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
1809   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
1810   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
1811   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
1812   PetscFunctionReturn(PETSC_SUCCESS);
1813 }
1814 
1815 /*@C
1816    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1817    (2D block cyclic distribution) for a `MATSCALAPACK` matrix
1818 
1819    Collective
1820 
1821    Input Parameters:
1822 +  comm - MPI communicator
1823 .  mb   - row block size (or `PETSC_DECIDE` to have it set)
1824 .  nb   - column block size (or `PETSC_DECIDE` to have it set)
1825 .  M    - number of global rows
1826 .  N    - number of global columns
1827 .  rsrc - coordinate of process that owns the first row of the distributed matrix
1828 -  csrc - coordinate of process that owns the first column of the distributed matrix
1829 
1830    Output Parameter:
1831 .  A - the matrix
1832 
1833    Options Database Key:
1834 .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1835 
1836    Level: intermediate
1837 
1838    Notes:
1839    If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen
1840 
1841    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1842    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1843    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
1844 
1845    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1846    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1847    significance and are thus ignored. The block sizes refer to the values
1848    used for the distributed matrix, not the same meaning as in `MATBAIJ`.
1849 
1850 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1851 @*/
1852 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A)
1853 {
1854   Mat_ScaLAPACK *a;
1855   PetscInt       m, n;
1856 
1857   PetscFunctionBegin;
1858   PetscCall(MatCreate(comm, A));
1859   PetscCall(MatSetType(*A, MATSCALAPACK));
1860   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1861   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1862   m = PETSC_DECIDE;
1863   PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1864   n = PETSC_DECIDE;
1865   PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1866   PetscCall(MatSetSizes(*A, m, n, M, N));
1867   a = (Mat_ScaLAPACK *)(*A)->data;
1868   PetscCall(PetscBLASIntCast(M, &a->M));
1869   PetscCall(PetscBLASIntCast(N, &a->N));
1870   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1871   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1872   PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
1873   PetscCall(PetscBLASIntCast(csrc, &a->csrc));
1874   PetscCall(MatSetUp(*A));
1875   PetscFunctionReturn(PETSC_SUCCESS);
1876 }
1877