xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision 517f4e3460cd8c7e73c19b8bf533f1efe47f30a7)
1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/
2 
3 const char       ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4                                        "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5                                        "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6                                        "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7                                        "       TITLE = {Sca{LAPACK} Users' Guide},\n"
8                                        "       PUBLISHER = {SIAM},\n"
9                                        "       ADDRESS = {Philadelphia, PA},\n"
10                                        "       YEAR = 1997\n"
11                                        "}\n";
12 static PetscBool ScaLAPACKCite       = PETSC_FALSE;
13 
14 #define DEFAULT_BLOCKSIZE 64
15 
16 /*
17     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19 */
20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21 
22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23 {
24   PetscFunctionBegin;
25   PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
26   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
27   PetscFunctionReturn(PETSC_SUCCESS);
28 }
29 
30 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer)
31 {
32   Mat_ScaLAPACK    *a = (Mat_ScaLAPACK *)A->data;
33   PetscBool         iascii;
34   PetscViewerFormat format;
35   Mat               Adense;
36 
37   PetscFunctionBegin;
38   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
39   if (iascii) {
40     PetscCall(PetscViewerGetFormat(viewer, &format));
41     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
42       PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
43       PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
44       PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
45       PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
46       PetscFunctionReturn(PETSC_SUCCESS);
47     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
48       PetscFunctionReturn(PETSC_SUCCESS);
49     }
50   }
51   /* convert to dense format and call MatView() */
52   PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
53   PetscCall(MatView(Adense, viewer));
54   PetscCall(MatDestroy(&Adense));
55   PetscFunctionReturn(PETSC_SUCCESS);
56 }
57 
58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info)
59 {
60   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
61   PetscLogDouble isend[2], irecv[2];
62 
63   PetscFunctionBegin;
64   info->block_size = 1.0;
65 
66   isend[0] = a->lld * a->locc;  /* locally allocated */
67   isend[1] = a->locr * a->locc; /* used submatrix */
68   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69     info->nz_allocated = isend[0];
70     info->nz_used      = isend[1];
71   } else if (flag == MAT_GLOBAL_MAX) {
72     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
73     info->nz_allocated = irecv[0];
74     info->nz_used      = irecv[1];
75   } else if (flag == MAT_GLOBAL_SUM) {
76     PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
77     info->nz_allocated = irecv[0];
78     info->nz_used      = irecv[1];
79   }
80 
81   info->nz_unneeded       = 0;
82   info->assemblies        = A->num_ass;
83   info->mallocs           = 0;
84   info->memory            = 0; /* REVIEW ME */
85   info->fill_ratio_given  = 0;
86   info->fill_ratio_needed = 0;
87   info->factor_mallocs    = 0;
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 
91 static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg)
92 {
93   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
94 
95   PetscFunctionBegin;
96   switch (op) {
97   case MAT_NEW_NONZERO_LOCATIONS:
98   case MAT_NEW_NONZERO_LOCATION_ERR:
99   case MAT_NEW_NONZERO_ALLOCATION_ERR:
100   case MAT_SYMMETRIC:
101   case MAT_SORTED_FULL:
102   case MAT_HERMITIAN:
103     break;
104   case MAT_ROW_ORIENTED:
105     a->roworiented = flg;
106     break;
107   default:
108     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
109   }
110   PetscFunctionReturn(PETSC_SUCCESS);
111 }
112 
113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
114 {
115   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
116   PetscInt       i, j;
117   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
118   PetscBool      roworiented = a->roworiented;
119 
120   PetscFunctionBegin;
121   PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
122   for (i = 0; i < nr; i++) {
123     if (rows[i] < 0) continue;
124     PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
125     for (j = 0; j < nc; j++) {
126       if (cols[j] < 0) continue;
127       PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
128       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
129       if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
130         if (roworiented) {
131           switch (imode) {
132           case INSERT_VALUES:
133             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j];
134             break;
135           default:
136             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j];
137             break;
138           }
139         } else {
140           switch (imode) {
141           case INSERT_VALUES:
142             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr];
143             break;
144           default:
145             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr];
146             break;
147           }
148         }
149       } else {
150         PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
151         A->assembled = PETSC_FALSE;
152         PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
153       }
154     }
155   }
156   PetscFunctionReturn(PETSC_SUCCESS);
157 }
158 
159 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscScalar beta, const PetscScalar *x, PetscScalar *y)
160 {
161   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
162   PetscScalar    *x2d, *y2d, alpha = 1.0;
163   const PetscInt *ranges;
164   PetscBLASInt    xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
165 
166   PetscFunctionBegin;
167   if (transpose) {
168     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
169     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
170     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
171     xlld = PetscMax(1, A->rmap->n);
172     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
173     PetscCheckScaLapackInfo("descinit", info);
174     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
175     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
176     ylld = 1;
177     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
178     PetscCheckScaLapackInfo("descinit", info);
179 
180     /* allocate 2d vectors */
181     lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
182     lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
183     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
184     xlld = PetscMax(1, lszx);
185 
186     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
187     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
188     PetscCheckScaLapackInfo("descinit", info);
189     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
190     PetscCheckScaLapackInfo("descinit", info);
191 
192     /* redistribute x as a column of a 2d matrix */
193     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
194 
195     /* redistribute y as a row of a 2d matrix */
196     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
197 
198     /* call PBLAS subroutine */
199     PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
200 
201     /* redistribute y from a row of a 2d matrix */
202     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
203 
204   } else { /* non-transpose */
205 
206     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
207     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
208     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
209     xlld = 1;
210     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
211     PetscCheckScaLapackInfo("descinit", info);
212     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
213     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
214     ylld = PetscMax(1, A->rmap->n);
215     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
216     PetscCheckScaLapackInfo("descinit", info);
217 
218     /* allocate 2d vectors */
219     lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
220     lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
221     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
222     ylld = PetscMax(1, lszy);
223 
224     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
225     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
226     PetscCheckScaLapackInfo("descinit", info);
227     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
228     PetscCheckScaLapackInfo("descinit", info);
229 
230     /* redistribute x as a row of a 2d matrix */
231     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
232 
233     /* redistribute y as a column of a 2d matrix */
234     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
235 
236     /* call PBLAS subroutine */
237     PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
238 
239     /* redistribute y from a column of a 2d matrix */
240     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
241   }
242   PetscCall(PetscFree2(x2d, y2d));
243   PetscFunctionReturn(PETSC_SUCCESS);
244 }
245 
246 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y)
247 {
248   const PetscScalar *xarray;
249   PetscScalar       *yarray;
250 
251   PetscFunctionBegin;
252   PetscCall(VecGetArrayRead(x, &xarray));
253   PetscCall(VecGetArray(y, &yarray));
254   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 0.0, xarray, yarray));
255   PetscCall(VecRestoreArrayRead(x, &xarray));
256   PetscCall(VecRestoreArray(y, &yarray));
257   PetscFunctionReturn(PETSC_SUCCESS);
258 }
259 
260 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
261 {
262   const PetscScalar *xarray;
263   PetscScalar       *yarray;
264 
265   PetscFunctionBegin;
266   PetscCall(VecGetArrayRead(x, &xarray));
267   PetscCall(VecGetArray(y, &yarray));
268   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 0.0, xarray, yarray));
269   PetscCall(VecRestoreArrayRead(x, &xarray));
270   PetscCall(VecRestoreArray(y, &yarray));
271   PetscFunctionReturn(PETSC_SUCCESS);
272 }
273 
274 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
275 {
276   const PetscScalar *xarray;
277   PetscScalar       *zarray;
278 
279   PetscFunctionBegin;
280   if (y != z) PetscCall(VecCopy(y, z));
281   PetscCall(VecGetArrayRead(x, &xarray));
282   PetscCall(VecGetArray(z, &zarray));
283   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 1.0, xarray, zarray));
284   PetscCall(VecRestoreArrayRead(x, &xarray));
285   PetscCall(VecRestoreArray(z, &zarray));
286   PetscFunctionReturn(PETSC_SUCCESS);
287 }
288 
289 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
290 {
291   const PetscScalar *xarray;
292   PetscScalar       *zarray;
293 
294   PetscFunctionBegin;
295   if (y != z) PetscCall(VecCopy(y, z));
296   PetscCall(VecGetArrayRead(x, &xarray));
297   PetscCall(VecGetArray(z, &zarray));
298   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 1.0, xarray, zarray));
299   PetscCall(VecRestoreArrayRead(x, &xarray));
300   PetscCall(VecRestoreArray(z, &zarray));
301   PetscFunctionReturn(PETSC_SUCCESS);
302 }
303 
304 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
305 {
306   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
307   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
308   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
309   PetscScalar    sone = 1.0, zero = 0.0;
310   PetscBLASInt   one = 1;
311 
312   PetscFunctionBegin;
313   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
314   C->assembled = PETSC_TRUE;
315   PetscFunctionReturn(PETSC_SUCCESS);
316 }
317 
318 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
319 {
320   PetscFunctionBegin;
321   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
322   PetscCall(MatSetType(C, MATSCALAPACK));
323   PetscCall(MatSetUp(C));
324   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
325   PetscFunctionReturn(PETSC_SUCCESS);
326 }
327 
328 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
329 {
330   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
331   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
332   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
333   PetscScalar    sone = 1.0, zero = 0.0;
334   PetscBLASInt   one = 1;
335 
336   PetscFunctionBegin;
337   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
338   C->assembled = PETSC_TRUE;
339   PetscFunctionReturn(PETSC_SUCCESS);
340 }
341 
342 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
343 {
344   PetscFunctionBegin;
345   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
346   PetscCall(MatSetType(C, MATSCALAPACK));
347   PetscCall(MatSetUp(C));
348   PetscFunctionReturn(PETSC_SUCCESS);
349 }
350 
351 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
352 {
353   PetscFunctionBegin;
354   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
355   C->ops->productsymbolic = MatProductSymbolic_AB;
356   PetscFunctionReturn(PETSC_SUCCESS);
357 }
358 
359 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
360 {
361   PetscFunctionBegin;
362   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
363   C->ops->productsymbolic          = MatProductSymbolic_ABt;
364   PetscFunctionReturn(PETSC_SUCCESS);
365 }
366 
367 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
368 {
369   Mat_Product *product = C->product;
370 
371   PetscFunctionBegin;
372   switch (product->type) {
373   case MATPRODUCT_AB:
374     PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C));
375     break;
376   case MATPRODUCT_ABt:
377     PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C));
378     break;
379   default:
380     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
381   }
382   PetscFunctionReturn(PETSC_SUCCESS);
383 }
384 
385 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D)
386 {
387   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
388   PetscScalar    *darray, *d2d, v;
389   const PetscInt *ranges;
390   PetscBLASInt    j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
391 
392   PetscFunctionBegin;
393   PetscCall(VecGetArray(D, &darray));
394 
395   if (A->rmap->N <= A->cmap->N) { /* row version */
396 
397     /* create ScaLAPACK descriptor for vector (1d block distribution) */
398     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
399     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
400     dlld = PetscMax(1, A->rmap->n);
401     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
402     PetscCheckScaLapackInfo("descinit", info);
403 
404     /* allocate 2d vector */
405     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
406     PetscCall(PetscCalloc1(lszd, &d2d));
407     dlld = PetscMax(1, lszd);
408 
409     /* create ScaLAPACK descriptor for vector (2d block distribution) */
410     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
411     PetscCheckScaLapackInfo("descinit", info);
412 
413     /* collect diagonal */
414     for (j = 1; j <= a->M; j++) {
415       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
416       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
417     }
418 
419     /* redistribute d from a column of a 2d matrix */
420     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
421     PetscCall(PetscFree(d2d));
422 
423   } else { /* column version */
424 
425     /* create ScaLAPACK descriptor for vector (1d block distribution) */
426     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
427     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
428     dlld = 1;
429     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
430     PetscCheckScaLapackInfo("descinit", info);
431 
432     /* allocate 2d vector */
433     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
434     PetscCall(PetscCalloc1(lszd, &d2d));
435 
436     /* create ScaLAPACK descriptor for vector (2d block distribution) */
437     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
438     PetscCheckScaLapackInfo("descinit", info);
439 
440     /* collect diagonal */
441     for (j = 1; j <= a->N; j++) {
442       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
443       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
444     }
445 
446     /* redistribute d from a row of a 2d matrix */
447     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
448     PetscCall(PetscFree(d2d));
449   }
450 
451   PetscCall(VecRestoreArray(D, &darray));
452   PetscCall(VecAssemblyBegin(D));
453   PetscCall(VecAssemblyEnd(D));
454   PetscFunctionReturn(PETSC_SUCCESS);
455 }
456 
457 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R)
458 {
459   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
460   const PetscScalar *d;
461   const PetscInt    *ranges;
462   PetscScalar       *d2d;
463   PetscBLASInt       i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
464 
465   PetscFunctionBegin;
466   if (R) {
467     PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d));
468     /* create ScaLAPACK descriptor for vector (1d block distribution) */
469     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
470     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
471     dlld = 1;
472     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
473     PetscCheckScaLapackInfo("descinit", info);
474 
475     /* allocate 2d vector */
476     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
477     PetscCall(PetscCalloc1(lszd, &d2d));
478 
479     /* create ScaLAPACK descriptor for vector (2d block distribution) */
480     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
481     PetscCheckScaLapackInfo("descinit", info);
482 
483     /* redistribute d to a row of a 2d matrix */
484     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
485 
486     /* broadcast along process columns */
487     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
488     else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
489 
490     /* local scaling */
491     for (j = 0; j < a->locc; j++)
492       for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
493 
494     PetscCall(PetscFree(d2d));
495     PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d));
496   }
497   if (L) {
498     PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d));
499     /* create ScaLAPACK descriptor for vector (1d block distribution) */
500     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
501     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
502     dlld = PetscMax(1, A->rmap->n);
503     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
504     PetscCheckScaLapackInfo("descinit", info);
505 
506     /* allocate 2d vector */
507     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
508     PetscCall(PetscCalloc1(lszd, &d2d));
509     dlld = PetscMax(1, lszd);
510 
511     /* create ScaLAPACK descriptor for vector (2d block distribution) */
512     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
513     PetscCheckScaLapackInfo("descinit", info);
514 
515     /* redistribute d to a column of a 2d matrix */
516     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
517 
518     /* broadcast along process rows */
519     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
520     else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
521 
522     /* local scaling */
523     for (i = 0; i < a->locr; i++)
524       for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
525 
526     PetscCall(PetscFree(d2d));
527     PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d));
528   }
529   PetscFunctionReturn(PETSC_SUCCESS);
530 }
531 
532 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d)
533 {
534   PetscFunctionBegin;
535   *missing = PETSC_FALSE;
536   PetscFunctionReturn(PETSC_SUCCESS);
537 }
538 
539 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a)
540 {
541   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
542   PetscBLASInt   n, one = 1;
543 
544   PetscFunctionBegin;
545   n = x->lld * x->locc;
546   PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
547   PetscFunctionReturn(PETSC_SUCCESS);
548 }
549 
550 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha)
551 {
552   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
553   PetscBLASInt   i, n;
554   PetscScalar    v;
555 
556   PetscFunctionBegin;
557   n = PetscMin(x->M, x->N);
558   for (i = 1; i <= n; i++) {
559     PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
560     v += alpha;
561     PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
562   }
563   PetscFunctionReturn(PETSC_SUCCESS);
564 }
565 
566 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
567 {
568   Mat_ScaLAPACK *x    = (Mat_ScaLAPACK *)X->data;
569   Mat_ScaLAPACK *y    = (Mat_ScaLAPACK *)Y->data;
570   PetscBLASInt   one  = 1;
571   PetscScalar    beta = 1.0;
572 
573   PetscFunctionBegin;
574   MatScaLAPACKCheckDistribution(Y, 1, X, 3);
575   PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
576   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
577   PetscFunctionReturn(PETSC_SUCCESS);
578 }
579 
580 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str)
581 {
582   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
583   Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
584 
585   PetscFunctionBegin;
586   PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
587   PetscCall(PetscObjectStateIncrease((PetscObject)B));
588   PetscFunctionReturn(PETSC_SUCCESS);
589 }
590 
591 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B)
592 {
593   Mat            Bs;
594   MPI_Comm       comm;
595   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
596 
597   PetscFunctionBegin;
598   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
599   PetscCall(MatCreate(comm, &Bs));
600   PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
601   PetscCall(MatSetType(Bs, MATSCALAPACK));
602   b       = (Mat_ScaLAPACK *)Bs->data;
603   b->M    = a->M;
604   b->N    = a->N;
605   b->mb   = a->mb;
606   b->nb   = a->nb;
607   b->rsrc = a->rsrc;
608   b->csrc = a->csrc;
609   PetscCall(MatSetUp(Bs));
610   *B = Bs;
611   if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
612   Bs->assembled = PETSC_TRUE;
613   PetscFunctionReturn(PETSC_SUCCESS);
614 }
615 
616 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
617 {
618   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
619   Mat            Bs   = *B;
620   PetscBLASInt   one  = 1;
621   PetscScalar    sone = 1.0, zero = 0.0;
622 #if defined(PETSC_USE_COMPLEX)
623   PetscInt i, j;
624 #endif
625 
626   PetscFunctionBegin;
627   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
628   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
629   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
630   *B = Bs;
631   b  = (Mat_ScaLAPACK *)Bs->data;
632   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
633 #if defined(PETSC_USE_COMPLEX)
634   /* undo conjugation */
635   for (i = 0; i < b->locr; i++)
636     for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
637 #endif
638   Bs->assembled = PETSC_TRUE;
639   PetscFunctionReturn(PETSC_SUCCESS);
640 }
641 
642 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
643 {
644   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
645   PetscInt       i, j;
646 
647   PetscFunctionBegin;
648   for (i = 0; i < a->locr; i++)
649     for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
650   PetscFunctionReturn(PETSC_SUCCESS);
651 }
652 
653 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
654 {
655   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
656   Mat            Bs   = *B;
657   PetscBLASInt   one  = 1;
658   PetscScalar    sone = 1.0, zero = 0.0;
659 
660   PetscFunctionBegin;
661   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
662   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
663   *B = Bs;
664   b  = (Mat_ScaLAPACK *)Bs->data;
665   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
666   Bs->assembled = PETSC_TRUE;
667   PetscFunctionReturn(PETSC_SUCCESS);
668 }
669 
670 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X)
671 {
672   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
673   PetscScalar    *x, *x2d;
674   const PetscInt *ranges;
675   PetscBLASInt    xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
676 
677   PetscFunctionBegin;
678   PetscCall(VecCopy(B, X));
679   PetscCall(VecGetArray(X, &x));
680 
681   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
682   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
683   PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
684   xlld = PetscMax(1, A->rmap->n);
685   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
686   PetscCheckScaLapackInfo("descinit", info);
687 
688   /* allocate 2d vector */
689   lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
690   PetscCall(PetscMalloc1(lszx, &x2d));
691   xlld = PetscMax(1, lszx);
692 
693   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
694   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
695   PetscCheckScaLapackInfo("descinit", info);
696 
697   /* redistribute x as a column of a 2d matrix */
698   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
699 
700   /* call ScaLAPACK subroutine */
701   switch (A->factortype) {
702   case MAT_FACTOR_LU:
703     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
704     PetscCheckScaLapackInfo("getrs", info);
705     break;
706   case MAT_FACTOR_CHOLESKY:
707     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
708     PetscCheckScaLapackInfo("potrs", info);
709     break;
710   default:
711     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
712   }
713 
714   /* redistribute x from a column of a 2d matrix */
715   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
716 
717   PetscCall(PetscFree(x2d));
718   PetscCall(VecRestoreArray(X, &x));
719   PetscFunctionReturn(PETSC_SUCCESS);
720 }
721 
722 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X)
723 {
724   PetscFunctionBegin;
725   PetscCall(MatSolve_ScaLAPACK(A, B, X));
726   PetscCall(VecAXPY(X, 1, Y));
727   PetscFunctionReturn(PETSC_SUCCESS);
728 }
729 
730 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X)
731 {
732   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x;
733   PetscBool      flg1, flg2;
734   PetscBLASInt   one = 1, info;
735 
736   PetscFunctionBegin;
737   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
738   PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
739   PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK");
740   MatScaLAPACKCheckDistribution(B, 1, X, 2);
741   b = (Mat_ScaLAPACK *)B->data;
742   x = (Mat_ScaLAPACK *)X->data;
743   PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc));
744 
745   switch (A->factortype) {
746   case MAT_FACTOR_LU:
747     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
748     PetscCheckScaLapackInfo("getrs", info);
749     break;
750   case MAT_FACTOR_CHOLESKY:
751     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
752     PetscCheckScaLapackInfo("potrs", info);
753     break;
754   default:
755     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
756   }
757   PetscFunctionReturn(PETSC_SUCCESS);
758 }
759 
760 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo)
761 {
762   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
763   PetscBLASInt   one = 1, info;
764 
765   PetscFunctionBegin;
766   if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots));
767   PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
768   PetscCheckScaLapackInfo("getrf", info);
769   A->factortype = MAT_FACTOR_LU;
770   A->assembled  = PETSC_TRUE;
771 
772   PetscCall(PetscFree(A->solvertype));
773   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
774   PetscFunctionReturn(PETSC_SUCCESS);
775 }
776 
777 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
778 {
779   PetscFunctionBegin;
780   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
781   PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
782   PetscFunctionReturn(PETSC_SUCCESS);
783 }
784 
785 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
786 {
787   PetscFunctionBegin;
788   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
789   PetscFunctionReturn(PETSC_SUCCESS);
790 }
791 
792 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo)
793 {
794   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
795   PetscBLASInt   one = 1, info;
796 
797   PetscFunctionBegin;
798   PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
799   PetscCheckScaLapackInfo("potrf", info);
800   A->factortype = MAT_FACTOR_CHOLESKY;
801   A->assembled  = PETSC_TRUE;
802 
803   PetscCall(PetscFree(A->solvertype));
804   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
805   PetscFunctionReturn(PETSC_SUCCESS);
806 }
807 
808 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
809 {
810   PetscFunctionBegin;
811   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
812   PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
813   PetscFunctionReturn(PETSC_SUCCESS);
814 }
815 
816 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info)
817 {
818   PetscFunctionBegin;
819   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
820   PetscFunctionReturn(PETSC_SUCCESS);
821 }
822 
823 static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type)
824 {
825   PetscFunctionBegin;
826   *type = MATSOLVERSCALAPACK;
827   PetscFunctionReturn(PETSC_SUCCESS);
828 }
829 
830 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F)
831 {
832   Mat            B;
833   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
834 
835   PetscFunctionBegin;
836   /* Create the factorization matrix */
837   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
838   B->trivialsymbolic = PETSC_TRUE;
839   B->factortype      = ftype;
840   PetscCall(PetscFree(B->solvertype));
841   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));
842 
843   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
844   *F = B;
845   PetscFunctionReturn(PETSC_SUCCESS);
846 }
847 
848 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
849 {
850   PetscFunctionBegin;
851   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
852   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
853   PetscFunctionReturn(PETSC_SUCCESS);
854 }
855 
856 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm)
857 {
858   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
859   PetscBLASInt   one = 1, lwork = 0;
860   const char    *ntype;
861   PetscScalar   *work = NULL, dummy;
862 
863   PetscFunctionBegin;
864   switch (type) {
865   case NORM_1:
866     ntype = "1";
867     lwork = PetscMax(a->locr, a->locc);
868     break;
869   case NORM_FROBENIUS:
870     ntype = "F";
871     work  = &dummy;
872     break;
873   case NORM_INFINITY:
874     ntype = "I";
875     lwork = PetscMax(a->locr, a->locc);
876     break;
877   default:
878     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
879   }
880   if (lwork) PetscCall(PetscMalloc1(lwork, &work));
881   *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
882   if (lwork) PetscCall(PetscFree(work));
883   PetscFunctionReturn(PETSC_SUCCESS);
884 }
885 
886 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
887 {
888   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
889 
890   PetscFunctionBegin;
891   PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
892   PetscFunctionReturn(PETSC_SUCCESS);
893 }
894 
895 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols)
896 {
897   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
898   PetscInt       i, n, nb, isrc, nproc, iproc, *idx;
899 
900   PetscFunctionBegin;
901   if (rows) {
902     n     = a->locr;
903     nb    = a->mb;
904     isrc  = a->rsrc;
905     nproc = a->grid->nprow;
906     iproc = a->grid->myrow;
907     PetscCall(PetscMalloc1(n, &idx));
908     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
909     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
910   }
911   if (cols) {
912     n     = a->locc;
913     nb    = a->nb;
914     isrc  = a->csrc;
915     nproc = a->grid->npcol;
916     iproc = a->grid->mycol;
917     PetscCall(PetscMalloc1(n, &idx));
918     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
919     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
920   }
921   PetscFunctionReturn(PETSC_SUCCESS);
922 }
923 
924 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B)
925 {
926   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
927   Mat                Bmpi;
928   MPI_Comm           comm;
929   PetscInt           i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb;
930   const PetscInt    *ranges, *branges, *cwork;
931   const PetscScalar *vwork;
932   PetscBLASInt       bdesc[9], bmb, zero = 0, one = 1, lld, info;
933   PetscScalar       *barray;
934   PetscBool          differ = PETSC_FALSE;
935   PetscMPIInt        size;
936 
937   PetscFunctionBegin;
938   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
939   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
940 
941   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
942     PetscCallMPI(MPI_Comm_size(comm, &size));
943     PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
944     for (i = 0; i < size; i++)
945       if (ranges[i + 1] != branges[i + 1]) {
946         differ = PETSC_TRUE;
947         break;
948       }
949   }
950 
951   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
952     PetscCall(MatCreate(comm, &Bmpi));
953     m = PETSC_DECIDE;
954     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
955     n = PETSC_DECIDE;
956     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
957     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
958     PetscCall(MatSetType(Bmpi, MATDENSE));
959     PetscCall(MatSetUp(Bmpi));
960 
961     /* create ScaLAPACK descriptor for B (1d block distribution) */
962     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
963     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
964     lld = PetscMax(ldb, 1); /* local leading dimension */
965     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
966     PetscCheckScaLapackInfo("descinit", info);
967 
968     /* redistribute matrix */
969     PetscCall(MatDenseGetArray(Bmpi, &barray));
970     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
971     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
972     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
973     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
974 
975     /* transfer rows of auxiliary matrix to the final matrix B */
976     PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
977     for (i = rstart; i < rend; i++) {
978       PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
979       PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
980       PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
981     }
982     PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
983     PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
984     PetscCall(MatDestroy(&Bmpi));
985 
986   } else { /* normal cases */
987 
988     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
989     else {
990       PetscCall(MatCreate(comm, &Bmpi));
991       m = PETSC_DECIDE;
992       PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
993       n = PETSC_DECIDE;
994       PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
995       PetscCall(MatSetSizes(Bmpi, m, n, M, N));
996       PetscCall(MatSetType(Bmpi, MATDENSE));
997       PetscCall(MatSetUp(Bmpi));
998     }
999 
1000     /* create ScaLAPACK descriptor for B (1d block distribution) */
1001     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1002     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1003     lld = PetscMax(ldb, 1); /* local leading dimension */
1004     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1005     PetscCheckScaLapackInfo("descinit", info);
1006 
1007     /* redistribute matrix */
1008     PetscCall(MatDenseGetArray(Bmpi, &barray));
1009     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1010     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1011 
1012     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1013     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1014     if (reuse == MAT_INPLACE_MATRIX) {
1015       PetscCall(MatHeaderReplace(A, &Bmpi));
1016     } else *B = Bmpi;
1017   }
1018   PetscFunctionReturn(PETSC_SUCCESS);
1019 }
1020 
1021 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct)
1022 {
1023   const PetscInt *ranges;
1024   PetscMPIInt     size;
1025   PetscInt        i, n;
1026 
1027   PetscFunctionBegin;
1028   *correct = PETSC_TRUE;
1029   PetscCallMPI(MPI_Comm_size(map->comm, &size));
1030   if (size > 1) {
1031     PetscCall(PetscLayoutGetRanges(map, &ranges));
1032     n = ranges[1] - ranges[0];
1033     for (i = 1; i < size; i++)
1034       if (ranges[i + 1] - ranges[i] != n) break;
1035     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
1036   }
1037   PetscFunctionReturn(PETSC_SUCCESS);
1038 }
1039 
1040 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B)
1041 {
1042   Mat_ScaLAPACK  *b;
1043   Mat             Bmpi;
1044   MPI_Comm        comm;
1045   PetscInt        M = A->rmap->N, N = A->cmap->N, m, n;
1046   const PetscInt *ranges, *rows, *cols;
1047   PetscBLASInt    adesc[9], amb, zero = 0, one = 1, lld, info;
1048   PetscScalar    *aarray;
1049   IS              ir, ic;
1050   PetscInt        lda;
1051   PetscBool       flg;
1052 
1053   PetscFunctionBegin;
1054   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1055 
1056   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1057   else {
1058     PetscCall(MatCreate(comm, &Bmpi));
1059     m = PETSC_DECIDE;
1060     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1061     n = PETSC_DECIDE;
1062     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1063     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1064     PetscCall(MatSetType(Bmpi, MATSCALAPACK));
1065     PetscCall(MatSetUp(Bmpi));
1066   }
1067   b = (Mat_ScaLAPACK *)Bmpi->data;
1068 
1069   PetscCall(MatDenseGetLDA(A, &lda));
1070   PetscCall(MatDenseGetArray(A, &aarray));
1071   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1072   if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1073   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1074     /* create ScaLAPACK descriptor for A (1d block distribution) */
1075     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1076     PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */
1077     lld = PetscMax(lda, 1);                       /* local leading dimension */
1078     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1079     PetscCheckScaLapackInfo("descinit", info);
1080 
1081     /* redistribute matrix */
1082     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1083     Bmpi->nooffprocentries = PETSC_TRUE;
1084   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1085     PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1086     b->roworiented = PETSC_FALSE;
1087     PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1088     PetscCall(ISGetIndices(ir, &rows));
1089     PetscCall(ISGetIndices(ic, &cols));
1090     PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1091     PetscCall(ISRestoreIndices(ir, &rows));
1092     PetscCall(ISRestoreIndices(ic, &cols));
1093     PetscCall(ISDestroy(&ic));
1094     PetscCall(ISDestroy(&ir));
1095   }
1096   PetscCall(MatDenseRestoreArray(A, &aarray));
1097   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1098   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1099   if (reuse == MAT_INPLACE_MATRIX) {
1100     PetscCall(MatHeaderReplace(A, &Bmpi));
1101   } else *B = Bmpi;
1102   PetscFunctionReturn(PETSC_SUCCESS);
1103 }
1104 
1105 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1106 {
1107   Mat                mat_scal;
1108   PetscInt           M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1109   const PetscInt    *cols;
1110   const PetscScalar *vals;
1111 
1112   PetscFunctionBegin;
1113   if (reuse == MAT_REUSE_MATRIX) {
1114     mat_scal = *newmat;
1115     PetscCall(MatZeroEntries(mat_scal));
1116   } else {
1117     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1118     m = PETSC_DECIDE;
1119     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1120     n = PETSC_DECIDE;
1121     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1122     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1123     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1124     PetscCall(MatSetUp(mat_scal));
1125   }
1126   for (row = rstart; row < rend; row++) {
1127     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1128     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
1129     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1130   }
1131   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1132   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1133 
1134   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1135   else *newmat = mat_scal;
1136   PetscFunctionReturn(PETSC_SUCCESS);
1137 }
1138 
1139 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1140 {
1141   Mat                mat_scal;
1142   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1143   const PetscInt    *cols;
1144   const PetscScalar *vals;
1145   PetscScalar        v;
1146 
1147   PetscFunctionBegin;
1148   if (reuse == MAT_REUSE_MATRIX) {
1149     mat_scal = *newmat;
1150     PetscCall(MatZeroEntries(mat_scal));
1151   } else {
1152     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1153     m = PETSC_DECIDE;
1154     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1155     n = PETSC_DECIDE;
1156     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1157     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1158     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1159     PetscCall(MatSetUp(mat_scal));
1160   }
1161   PetscCall(MatGetRowUpperTriangular(A));
1162   for (row = rstart; row < rend; row++) {
1163     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1164     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1165     for (j = 0; j < ncols; j++) { /* lower triangular part */
1166       if (cols[j] == row) continue;
1167       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1168       PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1169     }
1170     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1171   }
1172   PetscCall(MatRestoreRowUpperTriangular(A));
1173   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1174   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1175 
1176   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1177   else *newmat = mat_scal;
1178   PetscFunctionReturn(PETSC_SUCCESS);
1179 }
1180 
1181 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1182 {
1183   Mat_ScaLAPACK *a  = (Mat_ScaLAPACK *)A->data;
1184   PetscInt       sz = 0;
1185 
1186   PetscFunctionBegin;
1187   PetscCall(PetscLayoutSetUp(A->rmap));
1188   PetscCall(PetscLayoutSetUp(A->cmap));
1189   if (!a->lld) a->lld = a->locr;
1190 
1191   PetscCall(PetscFree(a->loc));
1192   PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
1193   PetscCall(PetscCalloc1(sz, &a->loc));
1194 
1195   A->preallocated = PETSC_TRUE;
1196   PetscFunctionReturn(PETSC_SUCCESS);
1197 }
1198 
1199 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1200 {
1201   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK *)A->data;
1202   Mat_ScaLAPACK_Grid *grid;
1203   PetscBool           flg;
1204   MPI_Comm            icomm;
1205 
1206   PetscFunctionBegin;
1207   PetscCall(MatStashDestroy_Private(&A->stash));
1208   PetscCall(PetscFree(a->loc));
1209   PetscCall(PetscFree(a->pivots));
1210   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1211   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1212   if (--grid->grid_refct == 0) {
1213     Cblacs_gridexit(grid->ictxt);
1214     Cblacs_gridexit(grid->ictxrow);
1215     Cblacs_gridexit(grid->ictxcol);
1216     PetscCall(PetscFree(grid));
1217     PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1218   }
1219   PetscCall(PetscCommDestroy(&icomm));
1220   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
1221   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1222   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
1223   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
1224   PetscCall(PetscFree(A->data));
1225   PetscFunctionReturn(PETSC_SUCCESS);
1226 }
1227 
1228 static PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1229 {
1230   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
1231   PetscBLASInt   info = 0;
1232   PetscBool      flg;
1233 
1234   PetscFunctionBegin;
1235   PetscCall(PetscLayoutSetUp(A->rmap));
1236   PetscCall(PetscLayoutSetUp(A->cmap));
1237 
1238   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1239   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1240   PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1241   PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1242   PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1243 
1244   /* compute local sizes */
1245   PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
1246   PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1247   a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1248   a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1249   a->lld  = PetscMax(1, a->locr);
1250 
1251   /* allocate local array */
1252   PetscCall(MatScaLAPACKSetPreallocation(A));
1253 
1254   /* set up ScaLAPACK descriptor */
1255   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1256   PetscCheckScaLapackInfo("descinit", info);
1257   PetscFunctionReturn(PETSC_SUCCESS);
1258 }
1259 
1260 static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type)
1261 {
1262   PetscInt nstash, reallocs;
1263 
1264   PetscFunctionBegin;
1265   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1266   PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
1267   PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
1268   PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
1269   PetscFunctionReturn(PETSC_SUCCESS);
1270 }
1271 
1272 static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type)
1273 {
1274   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1275   PetscMPIInt    n;
1276   PetscInt       i, flg, *row, *col;
1277   PetscScalar   *val;
1278   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
1279 
1280   PetscFunctionBegin;
1281   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1282   while (1) {
1283     PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1284     if (!flg) break;
1285     for (i = 0; i < n; i++) {
1286       PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
1287       PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1288       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1289       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1290       switch (A->insertmode) {
1291       case INSERT_VALUES:
1292         a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i];
1293         break;
1294       case ADD_VALUES:
1295         a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i];
1296         break;
1297       default:
1298         SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1299       }
1300     }
1301   }
1302   PetscCall(MatStashScatterEnd_Private(&A->stash));
1303   PetscFunctionReturn(PETSC_SUCCESS);
1304 }
1305 
1306 static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer)
1307 {
1308   Mat      Adense, As;
1309   MPI_Comm comm;
1310 
1311   PetscFunctionBegin;
1312   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1313   PetscCall(MatCreate(comm, &Adense));
1314   PetscCall(MatSetType(Adense, MATDENSE));
1315   PetscCall(MatLoad(Adense, viewer));
1316   PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
1317   PetscCall(MatDestroy(&Adense));
1318   PetscCall(MatHeaderReplace(newMat, &As));
1319   PetscFunctionReturn(PETSC_SUCCESS);
1320 }
1321 
1322 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1323                                        0,
1324                                        0,
1325                                        MatMult_ScaLAPACK,
1326                                        /* 4*/ MatMultAdd_ScaLAPACK,
1327                                        MatMultTranspose_ScaLAPACK,
1328                                        MatMultTransposeAdd_ScaLAPACK,
1329                                        MatSolve_ScaLAPACK,
1330                                        MatSolveAdd_ScaLAPACK,
1331                                        0,
1332                                        /*10*/ 0,
1333                                        MatLUFactor_ScaLAPACK,
1334                                        MatCholeskyFactor_ScaLAPACK,
1335                                        0,
1336                                        MatTranspose_ScaLAPACK,
1337                                        /*15*/ MatGetInfo_ScaLAPACK,
1338                                        0,
1339                                        MatGetDiagonal_ScaLAPACK,
1340                                        MatDiagonalScale_ScaLAPACK,
1341                                        MatNorm_ScaLAPACK,
1342                                        /*20*/ MatAssemblyBegin_ScaLAPACK,
1343                                        MatAssemblyEnd_ScaLAPACK,
1344                                        MatSetOption_ScaLAPACK,
1345                                        MatZeroEntries_ScaLAPACK,
1346                                        /*24*/ 0,
1347                                        MatLUFactorSymbolic_ScaLAPACK,
1348                                        MatLUFactorNumeric_ScaLAPACK,
1349                                        MatCholeskyFactorSymbolic_ScaLAPACK,
1350                                        MatCholeskyFactorNumeric_ScaLAPACK,
1351                                        /*29*/ MatSetUp_ScaLAPACK,
1352                                        0,
1353                                        0,
1354                                        0,
1355                                        0,
1356                                        /*34*/ MatDuplicate_ScaLAPACK,
1357                                        0,
1358                                        0,
1359                                        0,
1360                                        0,
1361                                        /*39*/ MatAXPY_ScaLAPACK,
1362                                        0,
1363                                        0,
1364                                        0,
1365                                        MatCopy_ScaLAPACK,
1366                                        /*44*/ 0,
1367                                        MatScale_ScaLAPACK,
1368                                        MatShift_ScaLAPACK,
1369                                        0,
1370                                        0,
1371                                        /*49*/ 0,
1372                                        0,
1373                                        0,
1374                                        0,
1375                                        0,
1376                                        /*54*/ 0,
1377                                        0,
1378                                        0,
1379                                        0,
1380                                        0,
1381                                        /*59*/ 0,
1382                                        MatDestroy_ScaLAPACK,
1383                                        MatView_ScaLAPACK,
1384                                        0,
1385                                        0,
1386                                        /*64*/ 0,
1387                                        0,
1388                                        0,
1389                                        0,
1390                                        0,
1391                                        /*69*/ 0,
1392                                        0,
1393                                        MatConvert_ScaLAPACK_Dense,
1394                                        0,
1395                                        0,
1396                                        /*74*/ 0,
1397                                        0,
1398                                        0,
1399                                        0,
1400                                        0,
1401                                        /*79*/ 0,
1402                                        0,
1403                                        0,
1404                                        0,
1405                                        MatLoad_ScaLAPACK,
1406                                        /*84*/ 0,
1407                                        0,
1408                                        0,
1409                                        0,
1410                                        0,
1411                                        /*89*/ 0,
1412                                        0,
1413                                        MatMatMultNumeric_ScaLAPACK,
1414                                        0,
1415                                        0,
1416                                        /*94*/ 0,
1417                                        0,
1418                                        0,
1419                                        MatMatTransposeMultNumeric_ScaLAPACK,
1420                                        0,
1421                                        /*99*/ MatProductSetFromOptions_ScaLAPACK,
1422                                        0,
1423                                        0,
1424                                        MatConjugate_ScaLAPACK,
1425                                        0,
1426                                        /*104*/ 0,
1427                                        0,
1428                                        0,
1429                                        0,
1430                                        0,
1431                                        /*109*/ MatMatSolve_ScaLAPACK,
1432                                        0,
1433                                        0,
1434                                        0,
1435                                        MatMissingDiagonal_ScaLAPACK,
1436                                        /*114*/ 0,
1437                                        0,
1438                                        0,
1439                                        0,
1440                                        0,
1441                                        /*119*/ 0,
1442                                        MatHermitianTranspose_ScaLAPACK,
1443                                        0,
1444                                        0,
1445                                        0,
1446                                        /*124*/ 0,
1447                                        0,
1448                                        0,
1449                                        0,
1450                                        0,
1451                                        /*129*/ 0,
1452                                        0,
1453                                        0,
1454                                        0,
1455                                        0,
1456                                        /*134*/ 0,
1457                                        0,
1458                                        0,
1459                                        0,
1460                                        0,
1461                                        0,
1462                                        /*140*/ 0,
1463                                        0,
1464                                        0,
1465                                        0,
1466                                        0,
1467                                        /*145*/ 0,
1468                                        0,
1469                                        0,
1470                                        0,
1471                                        0,
1472                                        /*150*/ 0,
1473                                        0};
1474 
1475 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners)
1476 {
1477   PetscInt          *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2;
1478   PetscInt           size = stash->size, nsends;
1479   PetscInt           count, *sindices, **rindices, i, j, l;
1480   PetscScalar      **rvalues, *svalues;
1481   MPI_Comm           comm = stash->comm;
1482   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1483   PetscMPIInt       *sizes, *nlengths, nreceives;
1484   PetscInt          *sp_idx, *sp_idy;
1485   PetscScalar       *sp_val;
1486   PetscMatStashSpace space, space_next;
1487   PetscBLASInt       gridx, gcidx, lridx, lcidx, rsrc, csrc;
1488   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)mat->data;
1489 
1490   PetscFunctionBegin;
1491   { /* make sure all processors are either in INSERTMODE or ADDMODE */
1492     InsertMode addv;
1493     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
1494     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1495     mat->insertmode = addv; /* in case this processor had no cache */
1496   }
1497 
1498   bs2 = stash->bs * stash->bs;
1499 
1500   /*  first count number of contributors to each processor */
1501   PetscCall(PetscCalloc1(size, &nlengths));
1502   PetscCall(PetscMalloc1(stash->n + 1, &owner));
1503 
1504   i = j = 0;
1505   space = stash->space_head;
1506   while (space) {
1507     space_next = space->next;
1508     for (l = 0; l < space->local_used; l++) {
1509       PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
1510       PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1511       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1512       j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
1513       nlengths[j]++;
1514       owner[i] = j;
1515       i++;
1516     }
1517     space = space_next;
1518   }
1519 
1520   /* Now check what procs get messages - and compute nsends. */
1521   PetscCall(PetscCalloc1(size, &sizes));
1522   for (i = 0, nsends = 0; i < size; i++) {
1523     if (nlengths[i]) {
1524       sizes[i] = 1;
1525       nsends++;
1526     }
1527   }
1528 
1529   {
1530     PetscMPIInt *onodes, *olengths;
1531     /* Determine the number of messages to expect, their lengths, from from-ids */
1532     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
1533     PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
1534     /* since clubbing row,col - lengths are multiplied by 2 */
1535     for (i = 0; i < nreceives; i++) olengths[i] *= 2;
1536     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1537     /* values are size 'bs2' lengths (and remove earlier factor 2 */
1538     for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2;
1539     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
1540     PetscCall(PetscFree(onodes));
1541     PetscCall(PetscFree(olengths));
1542   }
1543 
1544   /* do sends:
1545       1) starts[i] gives the starting index in svalues for stuff going to
1546          the ith processor
1547   */
1548   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
1549   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
1550   PetscCall(PetscMalloc2(size, &startv, size, &starti));
1551   /* use 2 sends the first with all_a, the next with all_i and all_j */
1552   startv[0] = 0;
1553   starti[0] = 0;
1554   for (i = 1; i < size; i++) {
1555     startv[i] = startv[i - 1] + nlengths[i - 1];
1556     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1557   }
1558 
1559   i     = 0;
1560   space = stash->space_head;
1561   while (space) {
1562     space_next = space->next;
1563     sp_idx     = space->idx;
1564     sp_idy     = space->idy;
1565     sp_val     = space->val;
1566     for (l = 0; l < space->local_used; l++) {
1567       j = owner[i];
1568       if (bs2 == 1) {
1569         svalues[startv[j]] = sp_val[l];
1570       } else {
1571         PetscInt     k;
1572         PetscScalar *buf1, *buf2;
1573         buf1 = svalues + bs2 * startv[j];
1574         buf2 = space->val + bs2 * l;
1575         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1576       }
1577       sindices[starti[j]]               = sp_idx[l];
1578       sindices[starti[j] + nlengths[j]] = sp_idy[l];
1579       startv[j]++;
1580       starti[j]++;
1581       i++;
1582     }
1583     space = space_next;
1584   }
1585   startv[0] = 0;
1586   for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1587 
1588   for (i = 0, count = 0; i < size; i++) {
1589     if (sizes[i]) {
1590       PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
1591       PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1592     }
1593   }
1594 #if defined(PETSC_USE_INFO)
1595   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
1596   for (i = 0; i < size; i++) {
1597     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1598   }
1599 #endif
1600   PetscCall(PetscFree(nlengths));
1601   PetscCall(PetscFree(owner));
1602   PetscCall(PetscFree2(startv, starti));
1603   PetscCall(PetscFree(sizes));
1604 
1605   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1606   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
1607 
1608   for (i = 0; i < nreceives; i++) {
1609     recv_waits[2 * i]     = recv_waits1[i];
1610     recv_waits[2 * i + 1] = recv_waits2[i];
1611   }
1612   stash->recv_waits = recv_waits;
1613 
1614   PetscCall(PetscFree(recv_waits1));
1615   PetscCall(PetscFree(recv_waits2));
1616 
1617   stash->svalues         = svalues;
1618   stash->sindices        = sindices;
1619   stash->rvalues         = rvalues;
1620   stash->rindices        = rindices;
1621   stash->send_waits      = send_waits;
1622   stash->nsends          = nsends;
1623   stash->nrecvs          = nreceives;
1624   stash->reproduce_count = 0;
1625   PetscFunctionReturn(PETSC_SUCCESS);
1626 }
1627 
1628 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb)
1629 {
1630   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1631 
1632   PetscFunctionBegin;
1633   PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1634   PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1635   PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
1636   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1637   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1638   PetscFunctionReturn(PETSC_SUCCESS);
1639 }
1640 
1641 /*@
1642   MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1643   the `MATSCALAPACK` matrix
1644 
1645   Logically Collective
1646 
1647   Input Parameters:
1648 + A  - a `MATSCALAPACK` matrix
1649 . mb - the row block size
1650 - nb - the column block size
1651 
1652   Level: intermediate
1653 
1654   Note:
1655   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1656 
1657 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1658 @*/
1659 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb)
1660 {
1661   PetscFunctionBegin;
1662   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1663   PetscValidLogicalCollectiveInt(A, mb, 2);
1664   PetscValidLogicalCollectiveInt(A, nb, 3);
1665   PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
1666   PetscFunctionReturn(PETSC_SUCCESS);
1667 }
1668 
1669 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb)
1670 {
1671   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1672 
1673   PetscFunctionBegin;
1674   if (mb) *mb = a->mb;
1675   if (nb) *nb = a->nb;
1676   PetscFunctionReturn(PETSC_SUCCESS);
1677 }
1678 
1679 /*@
1680   MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1681   the `MATSCALAPACK` matrix
1682 
1683   Not Collective
1684 
1685   Input Parameter:
1686 . A - a `MATSCALAPACK` matrix
1687 
1688   Output Parameters:
1689 + mb - the row block size
1690 - nb - the column block size
1691 
1692   Level: intermediate
1693 
1694   Note:
1695   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1696 
1697 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1698 @*/
1699 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb)
1700 {
1701   PetscFunctionBegin;
1702   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1703   PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
1704   PetscFunctionReturn(PETSC_SUCCESS);
1705 }
1706 
1707 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1708 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1709 
1710 /*MC
1711    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1712 
1713    Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK
1714 
1715    Options Database Keys:
1716 +  -mat_type scalapack - sets the matrix type to `MATSCALAPACK`
1717 .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu`
1718 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1719 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1720 
1721    Level: intermediate
1722 
1723   Note:
1724    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1725    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1726    the given rank.
1727 
1728 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()`
1729 M*/
1730 
1731 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1732 {
1733   Mat_ScaLAPACK      *a;
1734   PetscBool           flg, flg1;
1735   Mat_ScaLAPACK_Grid *grid;
1736   MPI_Comm            icomm;
1737   PetscBLASInt        nprow, npcol, myrow, mycol;
1738   PetscInt            optv1, k = 2, array[2] = {0, 0};
1739   PetscMPIInt         size;
1740 
1741   PetscFunctionBegin;
1742   A->ops[0]     = MatOps_Values;
1743   A->insertmode = NOT_SET_VALUES;
1744 
1745   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1746   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1747   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1748   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1749   A->stash.ScatterDestroy = NULL;
1750 
1751   PetscCall(PetscNew(&a));
1752   A->data = (void *)a;
1753 
1754   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1755   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1756     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0));
1757     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1758     PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1759   }
1760   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1761   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1762   if (!flg) {
1763     PetscCall(PetscNew(&grid));
1764 
1765     PetscCallMPI(MPI_Comm_size(icomm, &size));
1766     grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001);
1767 
1768     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
1769     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1));
1770     if (flg1) {
1771       PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
1772       grid->nprow = optv1;
1773     }
1774     PetscOptionsEnd();
1775 
1776     if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1777     grid->npcol = size / grid->nprow;
1778     PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
1779     PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1780     grid->ictxt = Csys2blacs_handle(icomm);
1781     Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1782     Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1783     grid->grid_refct = 1;
1784     grid->nprow      = nprow;
1785     grid->npcol      = npcol;
1786     grid->myrow      = myrow;
1787     grid->mycol      = mycol;
1788     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1789     grid->ictxrow = Csys2blacs_handle(icomm);
1790     Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1791     grid->ictxcol = Csys2blacs_handle(icomm);
1792     Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
1793     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));
1794 
1795   } else grid->grid_refct++;
1796   PetscCall(PetscCommDestroy(&icomm));
1797   a->grid = grid;
1798   a->mb   = DEFAULT_BLOCKSIZE;
1799   a->nb   = DEFAULT_BLOCKSIZE;
1800 
1801   PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
1802   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1803   if (flg) {
1804     a->mb = array[0];
1805     a->nb = (k > 1) ? array[1] : a->mb;
1806   }
1807   PetscOptionsEnd();
1808 
1809   a->roworiented = PETSC_TRUE;
1810   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
1811   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
1812   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
1813   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
1814   PetscFunctionReturn(PETSC_SUCCESS);
1815 }
1816 
1817 /*@C
1818   MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1819   (2D block cyclic distribution) for a `MATSCALAPACK` matrix
1820 
1821   Collective
1822 
1823   Input Parameters:
1824 + comm - MPI communicator
1825 . mb   - row block size (or `PETSC_DECIDE` to have it set)
1826 . nb   - column block size (or `PETSC_DECIDE` to have it set)
1827 . M    - number of global rows
1828 . N    - number of global columns
1829 . rsrc - coordinate of process that owns the first row of the distributed matrix
1830 - csrc - coordinate of process that owns the first column of the distributed matrix
1831 
1832   Output Parameter:
1833 . A - the matrix
1834 
1835   Options Database Key:
1836 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1837 
1838   Level: intermediate
1839 
1840   Notes:
1841   If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen
1842 
1843   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1844   MatXXXXSetPreallocation() paradigm instead of this routine directly.
1845   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
1846 
1847   Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1848   configured with ScaLAPACK. In particular, PETSc's local sizes lose
1849   significance and are thus ignored. The block sizes refer to the values
1850   used for the distributed matrix, not the same meaning as in `MATBAIJ`.
1851 
1852 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1853 @*/
1854 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A)
1855 {
1856   Mat_ScaLAPACK *a;
1857   PetscInt       m, n;
1858 
1859   PetscFunctionBegin;
1860   PetscCall(MatCreate(comm, A));
1861   PetscCall(MatSetType(*A, MATSCALAPACK));
1862   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1863   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1864   m = PETSC_DECIDE;
1865   PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1866   n = PETSC_DECIDE;
1867   PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1868   PetscCall(MatSetSizes(*A, m, n, M, N));
1869   a = (Mat_ScaLAPACK *)(*A)->data;
1870   PetscCall(PetscBLASIntCast(M, &a->M));
1871   PetscCall(PetscBLASIntCast(N, &a->N));
1872   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1873   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1874   PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
1875   PetscCall(PetscBLASIntCast(csrc, &a->csrc));
1876   PetscCall(MatSetUp(*A));
1877   PetscFunctionReturn(PETSC_SUCCESS);
1878 }
1879