xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision d8e47b638cf8f604a99e9678e1df24f82d959cd7)
1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/
2 
3 const char       ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4                                        "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5                                        "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6                                        "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7                                        "       TITLE = {Sca{LAPACK} Users' Guide},\n"
8                                        "       PUBLISHER = {SIAM},\n"
9                                        "       ADDRESS = {Philadelphia, PA},\n"
10                                        "       YEAR = 1997\n"
11                                        "}\n";
12 static PetscBool ScaLAPACKCite       = PETSC_FALSE;
13 
14 #define DEFAULT_BLOCKSIZE 64
15 
16 /*
17     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19 */
20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21 
22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23 {
24   PetscFunctionBegin;
25   PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
26   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
27   PetscFunctionReturn(PETSC_SUCCESS);
28 }
29 
30 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer)
31 {
32   Mat_ScaLAPACK    *a = (Mat_ScaLAPACK *)A->data;
33   PetscBool         iascii;
34   PetscViewerFormat format;
35   Mat               Adense;
36 
37   PetscFunctionBegin;
38   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
39   if (iascii) {
40     PetscCall(PetscViewerGetFormat(viewer, &format));
41     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
42       PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
43       PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
44       PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
45       PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
46       PetscFunctionReturn(PETSC_SUCCESS);
47     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
48       PetscFunctionReturn(PETSC_SUCCESS);
49     }
50   }
51   /* convert to dense format and call MatView() */
52   PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
53   PetscCall(MatView(Adense, viewer));
54   PetscCall(MatDestroy(&Adense));
55   PetscFunctionReturn(PETSC_SUCCESS);
56 }
57 
58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info)
59 {
60   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
61   PetscLogDouble isend[2], irecv[2];
62 
63   PetscFunctionBegin;
64   info->block_size = 1.0;
65 
66   isend[0] = a->lld * a->locc;  /* locally allocated */
67   isend[1] = a->locr * a->locc; /* used submatrix */
68   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69     info->nz_allocated = isend[0];
70     info->nz_used      = isend[1];
71   } else if (flag == MAT_GLOBAL_MAX) {
72     PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
73     info->nz_allocated = irecv[0];
74     info->nz_used      = irecv[1];
75   } else if (flag == MAT_GLOBAL_SUM) {
76     PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
77     info->nz_allocated = irecv[0];
78     info->nz_used      = irecv[1];
79   }
80 
81   info->nz_unneeded       = 0;
82   info->assemblies        = A->num_ass;
83   info->mallocs           = 0;
84   info->memory            = 0; /* REVIEW ME */
85   info->fill_ratio_given  = 0;
86   info->fill_ratio_needed = 0;
87   info->factor_mallocs    = 0;
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 
91 static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg)
92 {
93   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
94 
95   PetscFunctionBegin;
96   switch (op) {
97   case MAT_NEW_NONZERO_LOCATIONS:
98   case MAT_NEW_NONZERO_LOCATION_ERR:
99   case MAT_NEW_NONZERO_ALLOCATION_ERR:
100   case MAT_SYMMETRIC:
101   case MAT_SORTED_FULL:
102   case MAT_HERMITIAN:
103     break;
104   case MAT_ROW_ORIENTED:
105     a->roworiented = flg;
106     break;
107   default:
108     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
109   }
110   PetscFunctionReturn(PETSC_SUCCESS);
111 }
112 
113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
114 {
115   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
116   PetscInt       i, j;
117   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
118   PetscBool      roworiented = a->roworiented;
119 
120   PetscFunctionBegin;
121   PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
122   for (i = 0; i < nr; i++) {
123     if (rows[i] < 0) continue;
124     PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
125     for (j = 0; j < nc; j++) {
126       if (cols[j] < 0) continue;
127       PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
128       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
129       if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
130         if (roworiented) {
131           switch (imode) {
132           case INSERT_VALUES:
133             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j];
134             break;
135           default:
136             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j];
137             break;
138           }
139         } else {
140           switch (imode) {
141           case INSERT_VALUES:
142             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr];
143             break;
144           default:
145             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr];
146             break;
147           }
148         }
149       } else {
150         PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
151         A->assembled = PETSC_FALSE;
152         PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
153       }
154     }
155   }
156   PetscFunctionReturn(PETSC_SUCCESS);
157 }
158 
159 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscBool hermitian, PetscScalar beta, const PetscScalar *x, PetscScalar *y)
160 {
161   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
162   PetscScalar    *x2d, *y2d, alpha = 1.0;
163   const PetscInt *ranges;
164   PetscBLASInt    xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
165 
166   PetscFunctionBegin;
167   if (transpose) {
168     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
169     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
170     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
171     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld));
172     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
173     PetscCheckScaLapackInfo("descinit", info);
174     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
175     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
176     ylld = 1;
177     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
178     PetscCheckScaLapackInfo("descinit", info);
179 
180     /* allocate 2d vectors */
181     lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
182     lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
183     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
184     PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld));
185 
186     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
187     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
188     PetscCheckScaLapackInfo("descinit", info);
189     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
190     PetscCheckScaLapackInfo("descinit", info);
191 
192     /* redistribute x as a column of a 2d matrix */
193     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
194 
195     /* redistribute y as a row of a 2d matrix */
196     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
197 
198     /* call PBLAS subroutine */
199     if (hermitian) PetscCallBLAS("PBLASgemv", PBLASgemv_("C", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
200     else PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
201 
202     /* redistribute y from a row of a 2d matrix */
203     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
204 
205   } else { /* non-transpose */
206 
207     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
208     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
209     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
210     xlld = 1;
211     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
212     PetscCheckScaLapackInfo("descinit", info);
213     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
214     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
215     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &ylld));
216     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
217     PetscCheckScaLapackInfo("descinit", info);
218 
219     /* allocate 2d vectors */
220     lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
221     lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
222     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
223     PetscCall(PetscBLASIntCast(PetscMax(1, lszy), &ylld));
224 
225     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
226     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
227     PetscCheckScaLapackInfo("descinit", info);
228     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
229     PetscCheckScaLapackInfo("descinit", info);
230 
231     /* redistribute x as a row of a 2d matrix */
232     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
233 
234     /* redistribute y as a column of a 2d matrix */
235     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
236 
237     /* call PBLAS subroutine */
238     PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
239 
240     /* redistribute y from a column of a 2d matrix */
241     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
242   }
243   PetscCall(PetscFree2(x2d, y2d));
244   PetscFunctionReturn(PETSC_SUCCESS);
245 }
246 
247 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y)
248 {
249   const PetscScalar *xarray;
250   PetscScalar       *yarray;
251 
252   PetscFunctionBegin;
253   PetscCall(VecGetArrayRead(x, &xarray));
254   PetscCall(VecGetArray(y, &yarray));
255   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 0.0, xarray, yarray));
256   PetscCall(VecRestoreArrayRead(x, &xarray));
257   PetscCall(VecRestoreArray(y, &yarray));
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
261 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
262 {
263   const PetscScalar *xarray;
264   PetscScalar       *yarray;
265 
266   PetscFunctionBegin;
267   PetscCall(VecGetArrayRead(x, &xarray));
268   PetscCall(VecGetArray(y, &yarray));
269   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 0.0, xarray, yarray));
270   PetscCall(VecRestoreArrayRead(x, &xarray));
271   PetscCall(VecRestoreArray(y, &yarray));
272   PetscFunctionReturn(PETSC_SUCCESS);
273 }
274 
275 static PetscErrorCode MatMultHermitianTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
276 {
277   const PetscScalar *xarray;
278   PetscScalar       *yarray;
279 
280   PetscFunctionBegin;
281   PetscCall(VecGetArrayRead(x, &xarray));
282   PetscCall(VecGetArrayWrite(y, &yarray));
283   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 0.0, xarray, yarray));
284   PetscCall(VecRestoreArrayRead(x, &xarray));
285   PetscCall(VecRestoreArrayWrite(y, &yarray));
286   PetscFunctionReturn(PETSC_SUCCESS);
287 }
288 
289 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
290 {
291   const PetscScalar *xarray;
292   PetscScalar       *zarray;
293 
294   PetscFunctionBegin;
295   if (y != z) PetscCall(VecCopy(y, z));
296   PetscCall(VecGetArrayRead(x, &xarray));
297   PetscCall(VecGetArray(z, &zarray));
298   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 1.0, xarray, zarray));
299   PetscCall(VecRestoreArrayRead(x, &xarray));
300   PetscCall(VecRestoreArray(z, &zarray));
301   PetscFunctionReturn(PETSC_SUCCESS);
302 }
303 
304 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
305 {
306   const PetscScalar *xarray;
307   PetscScalar       *zarray;
308 
309   PetscFunctionBegin;
310   if (y != z) PetscCall(VecCopy(y, z));
311   PetscCall(VecGetArrayRead(x, &xarray));
312   PetscCall(VecGetArray(z, &zarray));
313   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 1.0, xarray, zarray));
314   PetscCall(VecRestoreArrayRead(x, &xarray));
315   PetscCall(VecRestoreArray(z, &zarray));
316   PetscFunctionReturn(PETSC_SUCCESS);
317 }
318 
319 static PetscErrorCode MatMultHermitianTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
320 {
321   const PetscScalar *xarray;
322   PetscScalar       *zarray;
323 
324   PetscFunctionBegin;
325   if (y != z) PetscCall(VecCopy(y, z));
326   PetscCall(VecGetArrayRead(x, &xarray));
327   PetscCall(VecGetArray(z, &zarray));
328   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 1.0, xarray, zarray));
329   PetscCall(VecRestoreArrayRead(x, &xarray));
330   PetscCall(VecRestoreArray(z, &zarray));
331   PetscFunctionReturn(PETSC_SUCCESS);
332 }
333 
334 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
335 {
336   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
337   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
338   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
339   PetscScalar    sone = 1.0, zero = 0.0;
340   PetscBLASInt   one = 1;
341 
342   PetscFunctionBegin;
343   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
344   C->assembled = PETSC_TRUE;
345   PetscFunctionReturn(PETSC_SUCCESS);
346 }
347 
348 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
349 {
350   PetscFunctionBegin;
351   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
352   PetscCall(MatSetType(C, MATSCALAPACK));
353   PetscCall(MatSetUp(C));
354   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
355   PetscFunctionReturn(PETSC_SUCCESS);
356 }
357 
358 static PetscErrorCode MatTransposeMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
359 {
360   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
361   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
362   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
363   PetscScalar    sone = 1.0, zero = 0.0;
364   PetscBLASInt   one = 1;
365 
366   PetscFunctionBegin;
367   PetscCallBLAS("PBLASgemm", PBLASgemm_("T", "N", &a->N, &b->N, &a->M, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
368   C->assembled = PETSC_TRUE;
369   PetscFunctionReturn(PETSC_SUCCESS);
370 }
371 
372 static PetscErrorCode MatTransposeMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
373 {
374   PetscFunctionBegin;
375   PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
376   PetscCall(MatSetType(C, MATSCALAPACK));
377   PetscCall(MatSetUp(C));
378   C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_ScaLAPACK;
379   PetscFunctionReturn(PETSC_SUCCESS);
380 }
381 
382 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
383 {
384   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
385   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
386   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
387   PetscScalar    sone = 1.0, zero = 0.0;
388   PetscBLASInt   one = 1;
389 
390   PetscFunctionBegin;
391   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
392   C->assembled = PETSC_TRUE;
393   PetscFunctionReturn(PETSC_SUCCESS);
394 }
395 
396 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
397 {
398   PetscFunctionBegin;
399   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
400   PetscCall(MatSetType(C, MATSCALAPACK));
401   PetscCall(MatSetUp(C));
402   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_ScaLAPACK;
403   PetscFunctionReturn(PETSC_SUCCESS);
404 }
405 
406 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
407 {
408   Mat_Product *product = C->product;
409 
410   PetscFunctionBegin;
411   switch (product->type) {
412   case MATPRODUCT_AB:
413     C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
414     C->ops->productsymbolic = MatProductSymbolic_AB;
415     break;
416   case MATPRODUCT_AtB:
417     C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_ScaLAPACK;
418     C->ops->productsymbolic          = MatProductSymbolic_AtB;
419     break;
420   case MATPRODUCT_ABt:
421     C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
422     C->ops->productsymbolic          = MatProductSymbolic_ABt;
423     break;
424   default:
425     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
426   }
427   PetscFunctionReturn(PETSC_SUCCESS);
428 }
429 
430 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D)
431 {
432   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
433   PetscScalar    *darray, *d2d, v;
434   const PetscInt *ranges;
435   PetscBLASInt    j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
436 
437   PetscFunctionBegin;
438   PetscCall(VecGetArray(D, &darray));
439 
440   if (A->rmap->N <= A->cmap->N) { /* row version */
441 
442     /* create ScaLAPACK descriptor for vector (1d block distribution) */
443     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
444     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
445     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld));
446     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
447     PetscCheckScaLapackInfo("descinit", info);
448 
449     /* allocate 2d vector */
450     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
451     PetscCall(PetscCalloc1(lszd, &d2d));
452     PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld));
453 
454     /* create ScaLAPACK descriptor for vector (2d block distribution) */
455     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
456     PetscCheckScaLapackInfo("descinit", info);
457 
458     /* collect diagonal */
459     for (j = 1; j <= a->M; j++) {
460       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
461       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
462     }
463 
464     /* redistribute d from a column of a 2d matrix */
465     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
466     PetscCall(PetscFree(d2d));
467 
468   } else { /* column version */
469 
470     /* create ScaLAPACK descriptor for vector (1d block distribution) */
471     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
472     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
473     dlld = 1;
474     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
475     PetscCheckScaLapackInfo("descinit", info);
476 
477     /* allocate 2d vector */
478     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
479     PetscCall(PetscCalloc1(lszd, &d2d));
480 
481     /* create ScaLAPACK descriptor for vector (2d block distribution) */
482     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
483     PetscCheckScaLapackInfo("descinit", info);
484 
485     /* collect diagonal */
486     for (j = 1; j <= a->N; j++) {
487       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
488       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
489     }
490 
491     /* redistribute d from a row of a 2d matrix */
492     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
493     PetscCall(PetscFree(d2d));
494   }
495 
496   PetscCall(VecRestoreArray(D, &darray));
497   PetscCall(VecAssemblyBegin(D));
498   PetscCall(VecAssemblyEnd(D));
499   PetscFunctionReturn(PETSC_SUCCESS);
500 }
501 
502 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R)
503 {
504   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
505   const PetscScalar *d;
506   const PetscInt    *ranges;
507   PetscScalar       *d2d;
508   PetscBLASInt       i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
509 
510   PetscFunctionBegin;
511   if (R) {
512     PetscCall(VecGetArrayRead(R, &d));
513     /* create ScaLAPACK descriptor for vector (1d block distribution) */
514     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
515     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
516     dlld = 1;
517     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
518     PetscCheckScaLapackInfo("descinit", info);
519 
520     /* allocate 2d vector */
521     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
522     PetscCall(PetscCalloc1(lszd, &d2d));
523 
524     /* create ScaLAPACK descriptor for vector (2d block distribution) */
525     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
526     PetscCheckScaLapackInfo("descinit", info);
527 
528     /* redistribute d to a row of a 2d matrix */
529     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
530 
531     /* broadcast along process columns */
532     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
533     else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
534 
535     /* local scaling */
536     for (j = 0; j < a->locc; j++)
537       for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
538 
539     PetscCall(PetscFree(d2d));
540     PetscCall(VecRestoreArrayRead(R, &d));
541   }
542   if (L) {
543     PetscCall(VecGetArrayRead(L, &d));
544     /* create ScaLAPACK descriptor for vector (1d block distribution) */
545     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
546     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
547     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld));
548     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
549     PetscCheckScaLapackInfo("descinit", info);
550 
551     /* allocate 2d vector */
552     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
553     PetscCall(PetscCalloc1(lszd, &d2d));
554     PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld));
555 
556     /* create ScaLAPACK descriptor for vector (2d block distribution) */
557     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
558     PetscCheckScaLapackInfo("descinit", info);
559 
560     /* redistribute d to a column of a 2d matrix */
561     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
562 
563     /* broadcast along process rows */
564     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
565     else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
566 
567     /* local scaling */
568     for (i = 0; i < a->locr; i++)
569       for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
570 
571     PetscCall(PetscFree(d2d));
572     PetscCall(VecRestoreArrayRead(L, &d));
573   }
574   PetscFunctionReturn(PETSC_SUCCESS);
575 }
576 
577 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d)
578 {
579   PetscFunctionBegin;
580   *missing = PETSC_FALSE;
581   PetscFunctionReturn(PETSC_SUCCESS);
582 }
583 
584 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a)
585 {
586   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
587   PetscBLASInt   n, one = 1;
588 
589   PetscFunctionBegin;
590   n = x->lld * x->locc;
591   PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
592   PetscFunctionReturn(PETSC_SUCCESS);
593 }
594 
595 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha)
596 {
597   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
598   PetscBLASInt   i, n;
599   PetscScalar    v;
600 
601   PetscFunctionBegin;
602   n = PetscMin(x->M, x->N);
603   for (i = 1; i <= n; i++) {
604     PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
605     v += alpha;
606     PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
607   }
608   PetscFunctionReturn(PETSC_SUCCESS);
609 }
610 
611 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
612 {
613   Mat_ScaLAPACK *x    = (Mat_ScaLAPACK *)X->data;
614   Mat_ScaLAPACK *y    = (Mat_ScaLAPACK *)Y->data;
615   PetscBLASInt   one  = 1;
616   PetscScalar    beta = 1.0;
617 
618   PetscFunctionBegin;
619   MatScaLAPACKCheckDistribution(Y, 1, X, 3);
620   PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
621   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
622   PetscFunctionReturn(PETSC_SUCCESS);
623 }
624 
625 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str)
626 {
627   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
628   Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
629 
630   PetscFunctionBegin;
631   PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
632   PetscCall(PetscObjectStateIncrease((PetscObject)B));
633   PetscFunctionReturn(PETSC_SUCCESS);
634 }
635 
636 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B)
637 {
638   Mat            Bs;
639   MPI_Comm       comm;
640   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
641 
642   PetscFunctionBegin;
643   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
644   PetscCall(MatCreate(comm, &Bs));
645   PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
646   PetscCall(MatSetType(Bs, MATSCALAPACK));
647   b       = (Mat_ScaLAPACK *)Bs->data;
648   b->M    = a->M;
649   b->N    = a->N;
650   b->mb   = a->mb;
651   b->nb   = a->nb;
652   b->rsrc = a->rsrc;
653   b->csrc = a->csrc;
654   PetscCall(MatSetUp(Bs));
655   *B = Bs;
656   if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
657   Bs->assembled = PETSC_TRUE;
658   PetscFunctionReturn(PETSC_SUCCESS);
659 }
660 
661 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
662 {
663   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
664   Mat            Bs   = *B;
665   PetscBLASInt   one  = 1;
666   PetscScalar    sone = 1.0, zero = 0.0;
667 #if defined(PETSC_USE_COMPLEX)
668   PetscInt i, j;
669 #endif
670 
671   PetscFunctionBegin;
672   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
673   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
674   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
675   *B = Bs;
676   b  = (Mat_ScaLAPACK *)Bs->data;
677   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
678 #if defined(PETSC_USE_COMPLEX)
679   /* undo conjugation */
680   for (i = 0; i < b->locr; i++)
681     for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
682 #endif
683   Bs->assembled = PETSC_TRUE;
684   PetscFunctionReturn(PETSC_SUCCESS);
685 }
686 
687 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
688 {
689   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
690   PetscInt       i, j;
691 
692   PetscFunctionBegin;
693   for (i = 0; i < a->locr; i++)
694     for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
695   PetscFunctionReturn(PETSC_SUCCESS);
696 }
697 
698 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
699 {
700   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
701   Mat            Bs   = *B;
702   PetscBLASInt   one  = 1;
703   PetscScalar    sone = 1.0, zero = 0.0;
704 
705   PetscFunctionBegin;
706   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
707   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
708   *B = Bs;
709   b  = (Mat_ScaLAPACK *)Bs->data;
710   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
711   Bs->assembled = PETSC_TRUE;
712   PetscFunctionReturn(PETSC_SUCCESS);
713 }
714 
715 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X)
716 {
717   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
718   PetscScalar    *x, *x2d;
719   const PetscInt *ranges;
720   PetscBLASInt    xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
721 
722   PetscFunctionBegin;
723   PetscCall(VecCopy(B, X));
724   PetscCall(VecGetArray(X, &x));
725 
726   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
727   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
728   PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
729   PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld));
730   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
731   PetscCheckScaLapackInfo("descinit", info);
732 
733   /* allocate 2d vector */
734   lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
735   PetscCall(PetscMalloc1(lszx, &x2d));
736   PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld));
737 
738   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
739   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
740   PetscCheckScaLapackInfo("descinit", info);
741 
742   /* redistribute x as a column of a 2d matrix */
743   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
744 
745   /* call ScaLAPACK subroutine */
746   switch (A->factortype) {
747   case MAT_FACTOR_LU:
748     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
749     PetscCheckScaLapackInfo("getrs", info);
750     break;
751   case MAT_FACTOR_CHOLESKY:
752     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
753     PetscCheckScaLapackInfo("potrs", info);
754     break;
755   default:
756     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
757   }
758 
759   /* redistribute x from a column of a 2d matrix */
760   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
761 
762   PetscCall(PetscFree(x2d));
763   PetscCall(VecRestoreArray(X, &x));
764   PetscFunctionReturn(PETSC_SUCCESS);
765 }
766 
767 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X)
768 {
769   PetscFunctionBegin;
770   PetscCall(MatSolve_ScaLAPACK(A, B, X));
771   PetscCall(VecAXPY(X, 1, Y));
772   PetscFunctionReturn(PETSC_SUCCESS);
773 }
774 
775 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X)
776 {
777   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *x;
778   PetscBool      flg1, flg2;
779   PetscBLASInt   one = 1, info;
780   Mat            C;
781   MatType        type;
782 
783   PetscFunctionBegin;
784   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
785   PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
786   if (flg1 && flg2) MatScaLAPACKCheckDistribution(B, 2, X, 3);
787   if (flg2) {
788     if (flg1) PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN));
789     else PetscCall(MatConvert(B, MATSCALAPACK, MAT_REUSE_MATRIX, &X));
790     C = X;
791   } else {
792     PetscCall(MatConvert(B, MATSCALAPACK, MAT_INITIAL_MATRIX, &C));
793   }
794   x = (Mat_ScaLAPACK *)C->data;
795 
796   switch (A->factortype) {
797   case MAT_FACTOR_LU:
798     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
799     PetscCheckScaLapackInfo("getrs", info);
800     break;
801   case MAT_FACTOR_CHOLESKY:
802     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
803     PetscCheckScaLapackInfo("potrs", info);
804     break;
805   default:
806     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
807   }
808   if (!flg2) {
809     PetscCall(MatGetType(X, &type));
810     PetscCall(MatConvert(C, type, MAT_REUSE_MATRIX, &X));
811     PetscCall(MatDestroy(&C));
812   }
813   PetscFunctionReturn(PETSC_SUCCESS);
814 }
815 
816 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo)
817 {
818   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
819   PetscBLASInt   one = 1, info;
820 
821   PetscFunctionBegin;
822   if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots));
823   PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
824   PetscCheckScaLapackInfo("getrf", info);
825   A->factortype = MAT_FACTOR_LU;
826   A->assembled  = PETSC_TRUE;
827 
828   PetscCall(PetscFree(A->solvertype));
829   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
830   PetscFunctionReturn(PETSC_SUCCESS);
831 }
832 
833 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
834 {
835   PetscFunctionBegin;
836   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
837   PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
838   PetscFunctionReturn(PETSC_SUCCESS);
839 }
840 
841 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
842 {
843   PetscFunctionBegin;
844   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
845   PetscFunctionReturn(PETSC_SUCCESS);
846 }
847 
848 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo)
849 {
850   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
851   PetscBLASInt   one = 1, info;
852 
853   PetscFunctionBegin;
854   PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
855   PetscCheckScaLapackInfo("potrf", info);
856   A->factortype = MAT_FACTOR_CHOLESKY;
857   A->assembled  = PETSC_TRUE;
858 
859   PetscCall(PetscFree(A->solvertype));
860   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
861   PetscFunctionReturn(PETSC_SUCCESS);
862 }
863 
864 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
865 {
866   PetscFunctionBegin;
867   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
868   PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
869   PetscFunctionReturn(PETSC_SUCCESS);
870 }
871 
872 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info)
873 {
874   PetscFunctionBegin;
875   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
876   PetscFunctionReturn(PETSC_SUCCESS);
877 }
878 
879 static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type)
880 {
881   PetscFunctionBegin;
882   *type = MATSOLVERSCALAPACK;
883   PetscFunctionReturn(PETSC_SUCCESS);
884 }
885 
886 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F)
887 {
888   Mat            B;
889   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
890 
891   PetscFunctionBegin;
892   /* Create the factorization matrix */
893   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
894   B->trivialsymbolic = PETSC_TRUE;
895   B->factortype      = ftype;
896   PetscCall(PetscFree(B->solvertype));
897   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));
898 
899   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
900   *F = B;
901   PetscFunctionReturn(PETSC_SUCCESS);
902 }
903 
904 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
905 {
906   PetscFunctionBegin;
907   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
908   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
909   PetscFunctionReturn(PETSC_SUCCESS);
910 }
911 
912 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm)
913 {
914   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
915   PetscBLASInt   one = 1, lwork = 0;
916   const char    *ntype;
917   PetscScalar   *work = NULL, dummy;
918 
919   PetscFunctionBegin;
920   switch (type) {
921   case NORM_1:
922     ntype = "1";
923     lwork = PetscMax(a->locr, a->locc);
924     break;
925   case NORM_FROBENIUS:
926     ntype = "F";
927     work  = &dummy;
928     break;
929   case NORM_INFINITY:
930     ntype = "I";
931     lwork = PetscMax(a->locr, a->locc);
932     break;
933   default:
934     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
935   }
936   if (lwork) PetscCall(PetscMalloc1(lwork, &work));
937   *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
938   if (lwork) PetscCall(PetscFree(work));
939   PetscFunctionReturn(PETSC_SUCCESS);
940 }
941 
942 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
943 {
944   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
945 
946   PetscFunctionBegin;
947   PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
948   PetscFunctionReturn(PETSC_SUCCESS);
949 }
950 
951 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols)
952 {
953   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
954   PetscInt       i, n, nb, isrc, nproc, iproc, *idx;
955 
956   PetscFunctionBegin;
957   if (rows) {
958     n     = a->locr;
959     nb    = a->mb;
960     isrc  = a->rsrc;
961     nproc = a->grid->nprow;
962     iproc = a->grid->myrow;
963     PetscCall(PetscMalloc1(n, &idx));
964     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
965     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
966   }
967   if (cols) {
968     n     = a->locc;
969     nb    = a->nb;
970     isrc  = a->csrc;
971     nproc = a->grid->npcol;
972     iproc = a->grid->mycol;
973     PetscCall(PetscMalloc1(n, &idx));
974     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
975     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
976   }
977   PetscFunctionReturn(PETSC_SUCCESS);
978 }
979 
980 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B)
981 {
982   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
983   Mat                Bmpi;
984   MPI_Comm           comm;
985   PetscInt           i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb;
986   const PetscInt    *ranges, *branges, *cwork;
987   const PetscScalar *vwork;
988   PetscBLASInt       bdesc[9], bmb, zero = 0, one = 1, lld, info;
989   PetscScalar       *barray;
990   PetscBool          differ = PETSC_FALSE;
991   PetscMPIInt        size;
992 
993   PetscFunctionBegin;
994   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
995   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
996 
997   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
998     PetscCallMPI(MPI_Comm_size(comm, &size));
999     PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
1000     for (i = 0; i < size; i++)
1001       if (ranges[i + 1] != branges[i + 1]) {
1002         differ = PETSC_TRUE;
1003         break;
1004       }
1005   }
1006 
1007   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
1008     PetscCall(MatCreate(comm, &Bmpi));
1009     m = PETSC_DECIDE;
1010     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1011     n = PETSC_DECIDE;
1012     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1013     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1014     PetscCall(MatSetType(Bmpi, MATDENSE));
1015     PetscCall(MatSetUp(Bmpi));
1016 
1017     /* create ScaLAPACK descriptor for B (1d block distribution) */
1018     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1019     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1020     PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */
1021     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1022     PetscCheckScaLapackInfo("descinit", info);
1023 
1024     /* redistribute matrix */
1025     PetscCall(MatDenseGetArray(Bmpi, &barray));
1026     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1027     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1028     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1029     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1030 
1031     /* transfer rows of auxiliary matrix to the final matrix B */
1032     PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
1033     for (i = rstart; i < rend; i++) {
1034       PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
1035       PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
1036       PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
1037     }
1038     PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
1039     PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
1040     PetscCall(MatDestroy(&Bmpi));
1041 
1042   } else { /* normal cases */
1043 
1044     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1045     else {
1046       PetscCall(MatCreate(comm, &Bmpi));
1047       m = PETSC_DECIDE;
1048       PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1049       n = PETSC_DECIDE;
1050       PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1051       PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1052       PetscCall(MatSetType(Bmpi, MATDENSE));
1053       PetscCall(MatSetUp(Bmpi));
1054     }
1055 
1056     /* create ScaLAPACK descriptor for B (1d block distribution) */
1057     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1058     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1059     PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */
1060     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1061     PetscCheckScaLapackInfo("descinit", info);
1062 
1063     /* redistribute matrix */
1064     PetscCall(MatDenseGetArray(Bmpi, &barray));
1065     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1066     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1067 
1068     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1069     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1070     if (reuse == MAT_INPLACE_MATRIX) {
1071       PetscCall(MatHeaderReplace(A, &Bmpi));
1072     } else *B = Bmpi;
1073   }
1074   PetscFunctionReturn(PETSC_SUCCESS);
1075 }
1076 
1077 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct)
1078 {
1079   const PetscInt *ranges;
1080   PetscMPIInt     size;
1081   PetscInt        i, n;
1082 
1083   PetscFunctionBegin;
1084   *correct = PETSC_TRUE;
1085   PetscCallMPI(MPI_Comm_size(map->comm, &size));
1086   if (size > 1) {
1087     PetscCall(PetscLayoutGetRanges(map, &ranges));
1088     n = ranges[1] - ranges[0];
1089     for (i = 1; i < size; i++)
1090       if (ranges[i + 1] - ranges[i] != n) break;
1091     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
1092   }
1093   PetscFunctionReturn(PETSC_SUCCESS);
1094 }
1095 
1096 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B)
1097 {
1098   Mat_ScaLAPACK     *b;
1099   Mat                Bmpi;
1100   MPI_Comm           comm;
1101   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n;
1102   const PetscInt    *ranges, *rows, *cols;
1103   PetscBLASInt       adesc[9], amb, zero = 0, one = 1, lld, info;
1104   const PetscScalar *aarray;
1105   IS                 ir, ic;
1106   PetscInt           lda;
1107   PetscBool          flg;
1108 
1109   PetscFunctionBegin;
1110   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1111 
1112   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1113   else {
1114     PetscCall(MatCreate(comm, &Bmpi));
1115     m = PETSC_DECIDE;
1116     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1117     n = PETSC_DECIDE;
1118     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1119     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1120     PetscCall(MatSetType(Bmpi, MATSCALAPACK));
1121     PetscCall(MatSetUp(Bmpi));
1122   }
1123   b = (Mat_ScaLAPACK *)Bmpi->data;
1124 
1125   PetscCall(MatDenseGetLDA(A, &lda));
1126   PetscCall(MatDenseGetArrayRead(A, &aarray));
1127   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1128   if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1129   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1130     /* create ScaLAPACK descriptor for A (1d block distribution) */
1131     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1132     PetscCall(PetscBLASIntCast(ranges[1], &amb));        /* row block size */
1133     PetscCall(PetscBLASIntCast(PetscMax(lda, 1), &lld)); /* local leading dimension */
1134     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1135     PetscCheckScaLapackInfo("descinit", info);
1136 
1137     /* redistribute matrix */
1138     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1139     Bmpi->nooffprocentries = PETSC_TRUE;
1140   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1141     PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1142     b->roworiented = PETSC_FALSE;
1143     PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1144     PetscCall(ISGetIndices(ir, &rows));
1145     PetscCall(ISGetIndices(ic, &cols));
1146     PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1147     PetscCall(ISRestoreIndices(ir, &rows));
1148     PetscCall(ISRestoreIndices(ic, &cols));
1149     PetscCall(ISDestroy(&ic));
1150     PetscCall(ISDestroy(&ir));
1151   }
1152   PetscCall(MatDenseRestoreArrayRead(A, &aarray));
1153   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1154   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1155   if (reuse == MAT_INPLACE_MATRIX) {
1156     PetscCall(MatHeaderReplace(A, &Bmpi));
1157   } else *B = Bmpi;
1158   PetscFunctionReturn(PETSC_SUCCESS);
1159 }
1160 
1161 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1162 {
1163   Mat                mat_scal;
1164   PetscInt           M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1165   const PetscInt    *cols;
1166   const PetscScalar *vals;
1167 
1168   PetscFunctionBegin;
1169   if (reuse == MAT_REUSE_MATRIX) {
1170     mat_scal = *newmat;
1171     PetscCall(MatZeroEntries(mat_scal));
1172   } else {
1173     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1174     m = PETSC_DECIDE;
1175     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1176     n = PETSC_DECIDE;
1177     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1178     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1179     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1180     PetscCall(MatSetUp(mat_scal));
1181   }
1182   for (row = rstart; row < rend; row++) {
1183     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1184     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
1185     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1186   }
1187   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1188   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1189 
1190   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1191   else *newmat = mat_scal;
1192   PetscFunctionReturn(PETSC_SUCCESS);
1193 }
1194 
1195 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1196 {
1197   Mat                mat_scal;
1198   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1199   const PetscInt    *cols;
1200   const PetscScalar *vals;
1201   PetscScalar        v;
1202 
1203   PetscFunctionBegin;
1204   if (reuse == MAT_REUSE_MATRIX) {
1205     mat_scal = *newmat;
1206     PetscCall(MatZeroEntries(mat_scal));
1207   } else {
1208     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1209     m = PETSC_DECIDE;
1210     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1211     n = PETSC_DECIDE;
1212     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1213     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1214     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1215     PetscCall(MatSetUp(mat_scal));
1216   }
1217   PetscCall(MatGetRowUpperTriangular(A));
1218   for (row = rstart; row < rend; row++) {
1219     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1220     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1221     for (j = 0; j < ncols; j++) { /* lower triangular part */
1222       if (cols[j] == row) continue;
1223       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1224       PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1225     }
1226     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1227   }
1228   PetscCall(MatRestoreRowUpperTriangular(A));
1229   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1230   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1231 
1232   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1233   else *newmat = mat_scal;
1234   PetscFunctionReturn(PETSC_SUCCESS);
1235 }
1236 
1237 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1238 {
1239   Mat_ScaLAPACK *a  = (Mat_ScaLAPACK *)A->data;
1240   PetscInt       sz = 0;
1241 
1242   PetscFunctionBegin;
1243   PetscCall(PetscLayoutSetUp(A->rmap));
1244   PetscCall(PetscLayoutSetUp(A->cmap));
1245   if (!a->lld) a->lld = a->locr;
1246 
1247   PetscCall(PetscFree(a->loc));
1248   PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
1249   PetscCall(PetscCalloc1(sz, &a->loc));
1250 
1251   A->preallocated = PETSC_TRUE;
1252   PetscFunctionReturn(PETSC_SUCCESS);
1253 }
1254 
1255 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1256 {
1257   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK *)A->data;
1258   Mat_ScaLAPACK_Grid *grid;
1259   PetscBool           flg;
1260   MPI_Comm            icomm;
1261 
1262   PetscFunctionBegin;
1263   PetscCall(MatStashDestroy_Private(&A->stash));
1264   PetscCall(PetscFree(a->loc));
1265   PetscCall(PetscFree(a->pivots));
1266   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1267   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1268   if (--grid->grid_refct == 0) {
1269     Cblacs_gridexit(grid->ictxt);
1270     Cblacs_gridexit(grid->ictxrow);
1271     Cblacs_gridexit(grid->ictxcol);
1272     PetscCall(PetscFree(grid));
1273     PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1274   }
1275   PetscCall(PetscCommDestroy(&icomm));
1276   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
1277   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1278   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
1279   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
1280   PetscCall(PetscFree(A->data));
1281   PetscFunctionReturn(PETSC_SUCCESS);
1282 }
1283 
1284 static PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1285 {
1286   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
1287   PetscBLASInt   info = 0;
1288   PetscBool      flg;
1289 
1290   PetscFunctionBegin;
1291   PetscCall(PetscLayoutSetUp(A->rmap));
1292   PetscCall(PetscLayoutSetUp(A->cmap));
1293 
1294   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1295   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1296   PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1297   PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1298   PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1299 
1300   /* compute local sizes */
1301   PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
1302   PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1303   a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1304   a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1305   a->lld  = PetscMax(1, a->locr);
1306 
1307   /* allocate local array */
1308   PetscCall(MatScaLAPACKSetPreallocation(A));
1309 
1310   /* set up ScaLAPACK descriptor */
1311   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1312   PetscCheckScaLapackInfo("descinit", info);
1313   PetscFunctionReturn(PETSC_SUCCESS);
1314 }
1315 
1316 static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type)
1317 {
1318   PetscInt nstash, reallocs;
1319 
1320   PetscFunctionBegin;
1321   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1322   PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
1323   PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
1324   PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
1325   PetscFunctionReturn(PETSC_SUCCESS);
1326 }
1327 
1328 static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type)
1329 {
1330   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1331   PetscMPIInt    n;
1332   PetscInt       i, flg, *row, *col;
1333   PetscScalar   *val;
1334   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
1335 
1336   PetscFunctionBegin;
1337   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1338   while (1) {
1339     PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1340     if (!flg) break;
1341     for (i = 0; i < n; i++) {
1342       PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
1343       PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1344       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1345       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1346       switch (A->insertmode) {
1347       case INSERT_VALUES:
1348         a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i];
1349         break;
1350       case ADD_VALUES:
1351         a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i];
1352         break;
1353       default:
1354         SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1355       }
1356     }
1357   }
1358   PetscCall(MatStashScatterEnd_Private(&A->stash));
1359   PetscFunctionReturn(PETSC_SUCCESS);
1360 }
1361 
1362 static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer)
1363 {
1364   Mat      Adense, As;
1365   MPI_Comm comm;
1366 
1367   PetscFunctionBegin;
1368   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1369   PetscCall(MatCreate(comm, &Adense));
1370   PetscCall(MatSetType(Adense, MATDENSE));
1371   PetscCall(MatLoad(Adense, viewer));
1372   PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
1373   PetscCall(MatDestroy(&Adense));
1374   PetscCall(MatHeaderReplace(newMat, &As));
1375   PetscFunctionReturn(PETSC_SUCCESS);
1376 }
1377 
1378 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1379                                        NULL,
1380                                        NULL,
1381                                        MatMult_ScaLAPACK,
1382                                        /* 4*/ MatMultAdd_ScaLAPACK,
1383                                        MatMultTranspose_ScaLAPACK,
1384                                        MatMultTransposeAdd_ScaLAPACK,
1385                                        MatSolve_ScaLAPACK,
1386                                        MatSolveAdd_ScaLAPACK,
1387                                        NULL,
1388                                        /*10*/ NULL,
1389                                        MatLUFactor_ScaLAPACK,
1390                                        MatCholeskyFactor_ScaLAPACK,
1391                                        NULL,
1392                                        MatTranspose_ScaLAPACK,
1393                                        /*15*/ MatGetInfo_ScaLAPACK,
1394                                        NULL,
1395                                        MatGetDiagonal_ScaLAPACK,
1396                                        MatDiagonalScale_ScaLAPACK,
1397                                        MatNorm_ScaLAPACK,
1398                                        /*20*/ MatAssemblyBegin_ScaLAPACK,
1399                                        MatAssemblyEnd_ScaLAPACK,
1400                                        MatSetOption_ScaLAPACK,
1401                                        MatZeroEntries_ScaLAPACK,
1402                                        /*24*/ NULL,
1403                                        MatLUFactorSymbolic_ScaLAPACK,
1404                                        MatLUFactorNumeric_ScaLAPACK,
1405                                        MatCholeskyFactorSymbolic_ScaLAPACK,
1406                                        MatCholeskyFactorNumeric_ScaLAPACK,
1407                                        /*29*/ MatSetUp_ScaLAPACK,
1408                                        NULL,
1409                                        NULL,
1410                                        NULL,
1411                                        NULL,
1412                                        /*34*/ MatDuplicate_ScaLAPACK,
1413                                        NULL,
1414                                        NULL,
1415                                        NULL,
1416                                        NULL,
1417                                        /*39*/ MatAXPY_ScaLAPACK,
1418                                        NULL,
1419                                        NULL,
1420                                        NULL,
1421                                        MatCopy_ScaLAPACK,
1422                                        /*44*/ NULL,
1423                                        MatScale_ScaLAPACK,
1424                                        MatShift_ScaLAPACK,
1425                                        NULL,
1426                                        NULL,
1427                                        /*49*/ NULL,
1428                                        NULL,
1429                                        NULL,
1430                                        NULL,
1431                                        NULL,
1432                                        /*54*/ NULL,
1433                                        NULL,
1434                                        NULL,
1435                                        NULL,
1436                                        NULL,
1437                                        /*59*/ NULL,
1438                                        MatDestroy_ScaLAPACK,
1439                                        MatView_ScaLAPACK,
1440                                        NULL,
1441                                        NULL,
1442                                        /*64*/ NULL,
1443                                        NULL,
1444                                        NULL,
1445                                        NULL,
1446                                        NULL,
1447                                        /*69*/ NULL,
1448                                        NULL,
1449                                        MatConvert_ScaLAPACK_Dense,
1450                                        NULL,
1451                                        NULL,
1452                                        /*74*/ NULL,
1453                                        NULL,
1454                                        NULL,
1455                                        NULL,
1456                                        NULL,
1457                                        /*79*/ NULL,
1458                                        NULL,
1459                                        NULL,
1460                                        NULL,
1461                                        MatLoad_ScaLAPACK,
1462                                        /*84*/ NULL,
1463                                        NULL,
1464                                        NULL,
1465                                        NULL,
1466                                        NULL,
1467                                        /*89*/ NULL,
1468                                        NULL,
1469                                        MatMatMultNumeric_ScaLAPACK,
1470                                        NULL,
1471                                        NULL,
1472                                        /*94*/ NULL,
1473                                        NULL,
1474                                        NULL,
1475                                        MatMatTransposeMultNumeric_ScaLAPACK,
1476                                        NULL,
1477                                        /*99*/ MatProductSetFromOptions_ScaLAPACK,
1478                                        NULL,
1479                                        NULL,
1480                                        MatConjugate_ScaLAPACK,
1481                                        NULL,
1482                                        /*104*/ NULL,
1483                                        NULL,
1484                                        NULL,
1485                                        NULL,
1486                                        NULL,
1487                                        /*109*/ MatMatSolve_ScaLAPACK,
1488                                        NULL,
1489                                        NULL,
1490                                        NULL,
1491                                        MatMissingDiagonal_ScaLAPACK,
1492                                        /*114*/ NULL,
1493                                        NULL,
1494                                        NULL,
1495                                        NULL,
1496                                        NULL,
1497                                        /*119*/ NULL,
1498                                        MatHermitianTranspose_ScaLAPACK,
1499                                        MatMultHermitianTranspose_ScaLAPACK,
1500                                        MatMultHermitianTransposeAdd_ScaLAPACK,
1501                                        NULL,
1502                                        /*124*/ NULL,
1503                                        NULL,
1504                                        NULL,
1505                                        NULL,
1506                                        NULL,
1507                                        /*129*/ NULL,
1508                                        NULL,
1509                                        NULL,
1510                                        MatTransposeMatMultNumeric_ScaLAPACK,
1511                                        NULL,
1512                                        /*134*/ NULL,
1513                                        NULL,
1514                                        NULL,
1515                                        NULL,
1516                                        NULL,
1517                                        NULL,
1518                                        /*140*/ NULL,
1519                                        NULL,
1520                                        NULL,
1521                                        NULL,
1522                                        NULL,
1523                                        /*145*/ NULL,
1524                                        NULL,
1525                                        NULL,
1526                                        NULL,
1527                                        NULL,
1528                                        /*150*/ NULL,
1529                                        NULL,
1530                                        NULL,
1531                                        NULL};
1532 
1533 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners)
1534 {
1535   PetscInt          *owner, *startv, *starti, bs2;
1536   PetscInt           size = stash->size, nsends;
1537   PetscInt          *sindices, **rindices, j, l;
1538   PetscScalar      **rvalues, *svalues;
1539   MPI_Comm           comm = stash->comm;
1540   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1541   PetscMPIInt        tag1 = stash->tag1, tag2 = stash->tag2, *sizes, *nlengths, nreceives, insends, ii;
1542   PetscInt          *sp_idx, *sp_idy;
1543   PetscScalar       *sp_val;
1544   PetscMatStashSpace space, space_next;
1545   PetscBLASInt       gridx, gcidx, lridx, lcidx, rsrc, csrc;
1546   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)mat->data;
1547 
1548   PetscFunctionBegin;
1549   { /* make sure all processors are either in INSERTMODE or ADDMODE */
1550     InsertMode addv;
1551     PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
1552     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1553     mat->insertmode = addv; /* in case this processor had no cache */
1554   }
1555 
1556   bs2 = stash->bs * stash->bs;
1557 
1558   /*  first count number of contributors to each processor */
1559   PetscCall(PetscCalloc1(size, &nlengths));
1560   PetscCall(PetscMalloc1(stash->n + 1, &owner));
1561 
1562   ii = j = 0;
1563   space  = stash->space_head;
1564   while (space) {
1565     space_next = space->next;
1566     for (l = 0; l < space->local_used; l++) {
1567       PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
1568       PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1569       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1570       j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
1571       nlengths[j]++;
1572       owner[ii] = j;
1573       ii++;
1574     }
1575     space = space_next;
1576   }
1577 
1578   /* Now check what procs get messages - and compute nsends. */
1579   PetscCall(PetscCalloc1(size, &sizes));
1580   nsends = 0;
1581   for (PetscMPIInt i = 0; i < size; i++) {
1582     if (nlengths[i]) {
1583       sizes[i] = 1;
1584       nsends++;
1585     }
1586   }
1587 
1588   {
1589     PetscMPIInt *onodes, *olengths;
1590 
1591     /* Determine the number of messages to expect, their lengths, from from-ids */
1592     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
1593     PetscCall(PetscMPIIntCast(nsends, &insends));
1594     PetscCall(PetscGatherMessageLengths(comm, insends, nreceives, nlengths, &onodes, &olengths));
1595     /* since clubbing row,col - lengths are multiplied by 2 */
1596     for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] *= 2;
1597     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1598     /* values are size 'bs2' lengths (and remove earlier factor 2 */
1599     for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] = (PetscMPIInt)(olengths[i] * bs2 / 2);
1600     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
1601     PetscCall(PetscFree(onodes));
1602     PetscCall(PetscFree(olengths));
1603   }
1604 
1605   /* do sends:
1606       1) starts[i] gives the starting index in svalues for stuff going to
1607          the ith processor
1608   */
1609   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
1610   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
1611   PetscCall(PetscMalloc2(size, &startv, size, &starti));
1612   /* use 2 sends the first with all_a, the next with all_i and all_j */
1613   startv[0] = 0;
1614   starti[0] = 0;
1615   for (PetscMPIInt i = 1; i < size; i++) {
1616     startv[i] = startv[i - 1] + nlengths[i - 1];
1617     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1618   }
1619 
1620   ii    = 0;
1621   space = stash->space_head;
1622   while (space) {
1623     space_next = space->next;
1624     sp_idx     = space->idx;
1625     sp_idy     = space->idy;
1626     sp_val     = space->val;
1627     for (l = 0; l < space->local_used; l++) {
1628       j = owner[ii];
1629       if (bs2 == 1) {
1630         svalues[startv[j]] = sp_val[l];
1631       } else {
1632         PetscInt     k;
1633         PetscScalar *buf1, *buf2;
1634         buf1 = svalues + bs2 * startv[j];
1635         buf2 = space->val + bs2 * l;
1636         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1637       }
1638       sindices[starti[j]]               = sp_idx[l];
1639       sindices[starti[j] + nlengths[j]] = sp_idy[l];
1640       startv[j]++;
1641       starti[j]++;
1642       ii++;
1643     }
1644     space = space_next;
1645   }
1646   startv[0] = 0;
1647   for (PetscMPIInt i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1648 
1649   for (PetscMPIInt i = 0, count = 0; i < size; i++) {
1650     if (sizes[i]) {
1651       PetscCallMPI(MPIU_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
1652       PetscCallMPI(MPIU_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1653     }
1654   }
1655 #if defined(PETSC_USE_INFO)
1656   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
1657   for (PetscMPIInt i = 0; i < size; i++) {
1658     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %d: size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1659   }
1660 #endif
1661   PetscCall(PetscFree(nlengths));
1662   PetscCall(PetscFree(owner));
1663   PetscCall(PetscFree2(startv, starti));
1664   PetscCall(PetscFree(sizes));
1665 
1666   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1667   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
1668 
1669   for (PetscMPIInt i = 0; i < nreceives; i++) {
1670     recv_waits[2 * i]     = recv_waits1[i];
1671     recv_waits[2 * i + 1] = recv_waits2[i];
1672   }
1673   stash->recv_waits = recv_waits;
1674 
1675   PetscCall(PetscFree(recv_waits1));
1676   PetscCall(PetscFree(recv_waits2));
1677 
1678   stash->svalues         = svalues;
1679   stash->sindices        = sindices;
1680   stash->rvalues         = rvalues;
1681   stash->rindices        = rindices;
1682   stash->send_waits      = send_waits;
1683   stash->nsends          = (PetscMPIInt)nsends;
1684   stash->nrecvs          = nreceives;
1685   stash->reproduce_count = 0;
1686   PetscFunctionReturn(PETSC_SUCCESS);
1687 }
1688 
1689 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb)
1690 {
1691   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1692 
1693   PetscFunctionBegin;
1694   PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1695   PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1696   PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
1697   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1698   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1699   PetscFunctionReturn(PETSC_SUCCESS);
1700 }
1701 
1702 /*@
1703   MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1704   the `MATSCALAPACK` matrix
1705 
1706   Logically Collective
1707 
1708   Input Parameters:
1709 + A  - a `MATSCALAPACK` matrix
1710 . mb - the row block size
1711 - nb - the column block size
1712 
1713   Level: intermediate
1714 
1715   Note:
1716   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1717 
1718 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1719 @*/
1720 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb)
1721 {
1722   PetscFunctionBegin;
1723   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1724   PetscValidLogicalCollectiveInt(A, mb, 2);
1725   PetscValidLogicalCollectiveInt(A, nb, 3);
1726   PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
1727   PetscFunctionReturn(PETSC_SUCCESS);
1728 }
1729 
1730 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb)
1731 {
1732   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1733 
1734   PetscFunctionBegin;
1735   if (mb) *mb = a->mb;
1736   if (nb) *nb = a->nb;
1737   PetscFunctionReturn(PETSC_SUCCESS);
1738 }
1739 
1740 /*@
1741   MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1742   the `MATSCALAPACK` matrix
1743 
1744   Not Collective
1745 
1746   Input Parameter:
1747 . A - a `MATSCALAPACK` matrix
1748 
1749   Output Parameters:
1750 + mb - the row block size
1751 - nb - the column block size
1752 
1753   Level: intermediate
1754 
1755   Note:
1756   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1757 
1758 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1759 @*/
1760 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb)
1761 {
1762   PetscFunctionBegin;
1763   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1764   PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
1765   PetscFunctionReturn(PETSC_SUCCESS);
1766 }
1767 
1768 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1769 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1770 
1771 /*MC
1772    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1773 
1774    Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK
1775 
1776    Options Database Keys:
1777 +  -mat_type scalapack - sets the matrix type to `MATSCALAPACK`
1778 .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu`
1779 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1780 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1781 
1782    Level: intermediate
1783 
1784   Note:
1785    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1786    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1787    the given rank.
1788 
1789 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()`
1790 M*/
1791 
1792 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1793 {
1794   Mat_ScaLAPACK      *a;
1795   PetscBool           flg, flg1;
1796   Mat_ScaLAPACK_Grid *grid;
1797   MPI_Comm            icomm;
1798   PetscBLASInt        nprow, npcol, myrow, mycol;
1799   PetscInt            optv1, k = 2, array[2] = {0, 0};
1800   PetscMPIInt         size;
1801 
1802   PetscFunctionBegin;
1803   A->ops[0]     = MatOps_Values;
1804   A->insertmode = NOT_SET_VALUES;
1805 
1806   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1807   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1808   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1809   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1810   A->stash.ScatterDestroy = NULL;
1811 
1812   PetscCall(PetscNew(&a));
1813   A->data = (void *)a;
1814 
1815   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1816   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1817     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0));
1818     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1819     PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1820   }
1821   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1822   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1823   if (!flg) {
1824     PetscCall(PetscNew(&grid));
1825 
1826     PetscCallMPI(MPI_Comm_size(icomm, &size));
1827     PetscCall(PetscBLASIntCast(PetscSqrtReal((PetscReal)size) + 0.001, &grid->nprow));
1828 
1829     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
1830     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1));
1831     if (flg1) {
1832       PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
1833       PetscCall(PetscBLASIntCast(optv1, &grid->nprow));
1834     }
1835     PetscOptionsEnd();
1836 
1837     if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1838     grid->npcol = size / grid->nprow;
1839     PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
1840     PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1841     grid->ictxt = Csys2blacs_handle(icomm);
1842     Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1843     Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1844     grid->grid_refct = 1;
1845     grid->nprow      = nprow;
1846     grid->npcol      = npcol;
1847     grid->myrow      = myrow;
1848     grid->mycol      = mycol;
1849     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1850     grid->ictxrow = Csys2blacs_handle(icomm);
1851     Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1852     grid->ictxcol = Csys2blacs_handle(icomm);
1853     Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
1854     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));
1855 
1856   } else grid->grid_refct++;
1857   PetscCall(PetscCommDestroy(&icomm));
1858   a->grid = grid;
1859   a->mb   = DEFAULT_BLOCKSIZE;
1860   a->nb   = DEFAULT_BLOCKSIZE;
1861 
1862   PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
1863   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1864   if (flg) {
1865     a->mb = (PetscMPIInt)array[0];
1866     a->nb = (k > 1) ? (PetscMPIInt)array[1] : a->mb;
1867   }
1868   PetscOptionsEnd();
1869 
1870   a->roworiented = PETSC_TRUE;
1871   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
1872   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
1873   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
1874   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
1875   PetscFunctionReturn(PETSC_SUCCESS);
1876 }
1877 
1878 /*@C
1879   MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1880   (2D block cyclic distribution) for a `MATSCALAPACK` matrix
1881 
1882   Collective
1883 
1884   Input Parameters:
1885 + comm - MPI communicator
1886 . mb   - row block size (or `PETSC_DECIDE` to have it set)
1887 . nb   - column block size (or `PETSC_DECIDE` to have it set)
1888 . M    - number of global rows
1889 . N    - number of global columns
1890 . rsrc - coordinate of process that owns the first row of the distributed matrix
1891 - csrc - coordinate of process that owns the first column of the distributed matrix
1892 
1893   Output Parameter:
1894 . A - the matrix
1895 
1896   Options Database Key:
1897 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1898 
1899   Level: intermediate
1900 
1901   Notes:
1902   If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen
1903 
1904   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1905   MatXXXXSetPreallocation() paradigm instead of this routine directly.
1906   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
1907 
1908   Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1909   configured with ScaLAPACK. In particular, PETSc's local sizes lose
1910   significance and are thus ignored. The block sizes refer to the values
1911   used for the distributed matrix, not the same meaning as in `MATBAIJ`.
1912 
1913 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1914 @*/
1915 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A)
1916 {
1917   Mat_ScaLAPACK *a;
1918   PetscInt       m, n;
1919 
1920   PetscFunctionBegin;
1921   PetscCall(MatCreate(comm, A));
1922   PetscCall(MatSetType(*A, MATSCALAPACK));
1923   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1924   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1925   m = PETSC_DECIDE;
1926   PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1927   n = PETSC_DECIDE;
1928   PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1929   PetscCall(MatSetSizes(*A, m, n, M, N));
1930   a = (Mat_ScaLAPACK *)(*A)->data;
1931   PetscCall(PetscBLASIntCast(M, &a->M));
1932   PetscCall(PetscBLASIntCast(N, &a->N));
1933   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1934   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1935   PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
1936   PetscCall(PetscBLASIntCast(csrc, &a->csrc));
1937   PetscCall(MatSetUp(*A));
1938   PetscFunctionReturn(PETSC_SUCCESS);
1939 }
1940