xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision e8c0849ab8fe171bed529bea27238c9b402db591)
1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/
2 
3 const char       ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4                                        "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5                                        "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6                                        "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7                                        "       TITLE = {Sca{LAPACK} Users' Guide},\n"
8                                        "       PUBLISHER = {SIAM},\n"
9                                        "       ADDRESS = {Philadelphia, PA},\n"
10                                        "       YEAR = 1997\n"
11                                        "}\n";
12 static PetscBool ScaLAPACKCite       = PETSC_FALSE;
13 
14 #define DEFAULT_BLOCKSIZE 64
15 
16 /*
17     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19 */
20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21 
Petsc_ScaLAPACK_keyval_free(void)22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23 {
24   PetscFunctionBegin;
25   PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
26   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
27   PetscFunctionReturn(PETSC_SUCCESS);
28 }
29 
MatView_ScaLAPACK(Mat A,PetscViewer viewer)30 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer)
31 {
32   Mat_ScaLAPACK    *a = (Mat_ScaLAPACK *)A->data;
33   PetscBool         isascii;
34   PetscViewerFormat format;
35   Mat               Adense;
36 
37   PetscFunctionBegin;
38   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
39   if (isascii) {
40     PetscCall(PetscViewerGetFormat(viewer, &format));
41     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
42       PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
43       PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
44       PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
45       PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
46       PetscFunctionReturn(PETSC_SUCCESS);
47     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
48       PetscFunctionReturn(PETSC_SUCCESS);
49     }
50   }
51   /* convert to dense format and call MatView() */
52   PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
53   PetscCall(MatView(Adense, viewer));
54   PetscCall(MatDestroy(&Adense));
55   PetscFunctionReturn(PETSC_SUCCESS);
56 }
57 
MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo * info)58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info)
59 {
60   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
61   PetscLogDouble isend[2], irecv[2];
62 
63   PetscFunctionBegin;
64   info->block_size = 1.0;
65 
66   isend[0] = a->lld * a->locc;  /* locally allocated */
67   isend[1] = a->locr * a->locc; /* used submatrix */
68   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69     info->nz_allocated = isend[0];
70     info->nz_used      = isend[1];
71   } else if (flag == MAT_GLOBAL_MAX) {
72     PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
73     info->nz_allocated = irecv[0];
74     info->nz_used      = irecv[1];
75   } else if (flag == MAT_GLOBAL_SUM) {
76     PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
77     info->nz_allocated = irecv[0];
78     info->nz_used      = irecv[1];
79   }
80 
81   info->nz_unneeded       = 0;
82   info->assemblies        = A->num_ass;
83   info->mallocs           = 0;
84   info->memory            = 0; /* REVIEW ME */
85   info->fill_ratio_given  = 0;
86   info->fill_ratio_needed = 0;
87   info->factor_mallocs    = 0;
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 
MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)91 static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg)
92 {
93   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
94 
95   PetscFunctionBegin;
96   switch (op) {
97   case MAT_NEW_NONZERO_LOCATIONS:
98   case MAT_NEW_NONZERO_LOCATION_ERR:
99   case MAT_NEW_NONZERO_ALLOCATION_ERR:
100   case MAT_SYMMETRIC:
101   case MAT_SORTED_FULL:
102   case MAT_HERMITIAN:
103     break;
104   case MAT_ROW_ORIENTED:
105     a->roworiented = flg;
106     break;
107   default:
108     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
109   }
110   PetscFunctionReturn(PETSC_SUCCESS);
111 }
112 
MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt * rows,PetscInt nc,const PetscInt * cols,const PetscScalar * vals,InsertMode imode)113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
114 {
115   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
116   PetscInt       i, j;
117   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
118   PetscBool      roworiented = a->roworiented;
119 
120   PetscFunctionBegin;
121   PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
122   for (i = 0; i < nr; i++) {
123     if (rows[i] < 0) continue;
124     PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
125     for (j = 0; j < nc; j++) {
126       if (cols[j] < 0) continue;
127       PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
128       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
129       if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
130         if (roworiented) {
131           switch (imode) {
132           case INSERT_VALUES:
133             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j];
134             break;
135           default:
136             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j];
137             break;
138           }
139         } else {
140           switch (imode) {
141           case INSERT_VALUES:
142             a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr];
143             break;
144           default:
145             a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr];
146             break;
147           }
148         }
149       } else {
150         PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
151         A->assembled = PETSC_FALSE;
152         PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
153       }
154     }
155   }
156   PetscFunctionReturn(PETSC_SUCCESS);
157 }
158 
MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscBool hermitian,PetscScalar beta,const PetscScalar * x,PetscScalar * y)159 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscBool hermitian, PetscScalar beta, const PetscScalar *x, PetscScalar *y)
160 {
161   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
162   PetscScalar    *x2d, *y2d, alpha = 1.0;
163   const PetscInt *ranges;
164   PetscBLASInt    xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
165 
166   PetscFunctionBegin;
167   if (transpose) {
168     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
169     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
170     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
171     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld));
172     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
173     PetscCheckScaLapackInfo("descinit", info);
174     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
175     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
176     ylld = 1;
177     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
178     PetscCheckScaLapackInfo("descinit", info);
179 
180     /* allocate 2d vectors */
181     lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
182     lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
183     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
184     PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld));
185 
186     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
187     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
188     PetscCheckScaLapackInfo("descinit", info);
189     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
190     PetscCheckScaLapackInfo("descinit", info);
191 
192     /* redistribute x as a column of a 2d matrix */
193     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
194 
195     /* redistribute y as a row of a 2d matrix */
196     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
197 
198     /* call PBLAS subroutine */
199     if (hermitian) PetscCallBLAS("PBLASgemv", PBLASgemv_("C", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
200     else PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
201 
202     /* redistribute y from a row of a 2d matrix */
203     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
204 
205   } else { /* non-transpose */
206 
207     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
208     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
209     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
210     xlld = 1;
211     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
212     PetscCheckScaLapackInfo("descinit", info);
213     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
214     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
215     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &ylld));
216     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
217     PetscCheckScaLapackInfo("descinit", info);
218 
219     /* allocate 2d vectors */
220     lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
221     lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
222     PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
223     PetscCall(PetscBLASIntCast(PetscMax(1, lszy), &ylld));
224 
225     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
226     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
227     PetscCheckScaLapackInfo("descinit", info);
228     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
229     PetscCheckScaLapackInfo("descinit", info);
230 
231     /* redistribute x as a row of a 2d matrix */
232     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
233 
234     /* redistribute y as a column of a 2d matrix */
235     if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
236 
237     /* call PBLAS subroutine */
238     PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
239 
240     /* redistribute y from a column of a 2d matrix */
241     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
242   }
243   PetscCall(PetscFree2(x2d, y2d));
244   PetscFunctionReturn(PETSC_SUCCESS);
245 }
246 
MatMult_ScaLAPACK(Mat A,Vec x,Vec y)247 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y)
248 {
249   const PetscScalar *xarray;
250   PetscScalar       *yarray;
251 
252   PetscFunctionBegin;
253   PetscCall(VecGetArrayRead(x, &xarray));
254   PetscCall(VecGetArray(y, &yarray));
255   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 0.0, xarray, yarray));
256   PetscCall(VecRestoreArrayRead(x, &xarray));
257   PetscCall(VecRestoreArray(y, &yarray));
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)261 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
262 {
263   const PetscScalar *xarray;
264   PetscScalar       *yarray;
265 
266   PetscFunctionBegin;
267   PetscCall(VecGetArrayRead(x, &xarray));
268   PetscCall(VecGetArray(y, &yarray));
269   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 0.0, xarray, yarray));
270   PetscCall(VecRestoreArrayRead(x, &xarray));
271   PetscCall(VecRestoreArray(y, &yarray));
272   PetscFunctionReturn(PETSC_SUCCESS);
273 }
274 
MatMultHermitianTranspose_ScaLAPACK(Mat A,Vec x,Vec y)275 static PetscErrorCode MatMultHermitianTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
276 {
277   const PetscScalar *xarray;
278   PetscScalar       *yarray;
279 
280   PetscFunctionBegin;
281   PetscCall(VecGetArrayRead(x, &xarray));
282   PetscCall(VecGetArrayWrite(y, &yarray));
283   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 0.0, xarray, yarray));
284   PetscCall(VecRestoreArrayRead(x, &xarray));
285   PetscCall(VecRestoreArrayWrite(y, &yarray));
286   PetscFunctionReturn(PETSC_SUCCESS);
287 }
288 
MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)289 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
290 {
291   const PetscScalar *xarray;
292   PetscScalar       *zarray;
293 
294   PetscFunctionBegin;
295   if (y != z) PetscCall(VecCopy(y, z));
296   PetscCall(VecGetArrayRead(x, &xarray));
297   PetscCall(VecGetArray(z, &zarray));
298   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 1.0, xarray, zarray));
299   PetscCall(VecRestoreArrayRead(x, &xarray));
300   PetscCall(VecRestoreArray(z, &zarray));
301   PetscFunctionReturn(PETSC_SUCCESS);
302 }
303 
MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)304 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
305 {
306   const PetscScalar *xarray;
307   PetscScalar       *zarray;
308 
309   PetscFunctionBegin;
310   if (y != z) PetscCall(VecCopy(y, z));
311   PetscCall(VecGetArrayRead(x, &xarray));
312   PetscCall(VecGetArray(z, &zarray));
313   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 1.0, xarray, zarray));
314   PetscCall(VecRestoreArrayRead(x, &xarray));
315   PetscCall(VecRestoreArray(z, &zarray));
316   PetscFunctionReturn(PETSC_SUCCESS);
317 }
318 
MatMultHermitianTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)319 static PetscErrorCode MatMultHermitianTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
320 {
321   const PetscScalar *xarray;
322   PetscScalar       *zarray;
323 
324   PetscFunctionBegin;
325   if (y != z) PetscCall(VecCopy(y, z));
326   PetscCall(VecGetArrayRead(x, &xarray));
327   PetscCall(VecGetArray(z, &zarray));
328   PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 1.0, xarray, zarray));
329   PetscCall(VecRestoreArrayRead(x, &xarray));
330   PetscCall(VecRestoreArray(z, &zarray));
331   PetscFunctionReturn(PETSC_SUCCESS);
332 }
333 
MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)334 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
335 {
336   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
337   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
338   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
339   PetscScalar    sone = 1.0, zero = 0.0;
340   PetscBLASInt   one = 1;
341 
342   PetscFunctionBegin;
343   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
344   C->assembled = PETSC_TRUE;
345   PetscFunctionReturn(PETSC_SUCCESS);
346 }
347 
MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)348 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
349 {
350   PetscFunctionBegin;
351   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
352   PetscCall(MatSetType(C, MATSCALAPACK));
353   PetscCall(MatSetUp(C));
354   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
355   PetscFunctionReturn(PETSC_SUCCESS);
356 }
357 
MatTransposeMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)358 static PetscErrorCode MatTransposeMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
359 {
360   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
361   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
362   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
363   PetscScalar    sone = 1.0, zero = 0.0;
364   PetscBLASInt   one = 1;
365 
366   PetscFunctionBegin;
367   PetscCallBLAS("PBLASgemm", PBLASgemm_("T", "N", &a->N, &b->N, &a->M, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
368   C->assembled = PETSC_TRUE;
369   PetscFunctionReturn(PETSC_SUCCESS);
370 }
371 
MatTransposeMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)372 static PetscErrorCode MatTransposeMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
373 {
374   PetscFunctionBegin;
375   PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
376   PetscCall(MatSetType(C, MATSCALAPACK));
377   PetscCall(MatSetUp(C));
378   C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_ScaLAPACK;
379   PetscFunctionReturn(PETSC_SUCCESS);
380 }
381 
MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)382 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
383 {
384   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
385   Mat_ScaLAPACK *b    = (Mat_ScaLAPACK *)B->data;
386   Mat_ScaLAPACK *c    = (Mat_ScaLAPACK *)C->data;
387   PetscScalar    sone = 1.0, zero = 0.0;
388   PetscBLASInt   one = 1;
389 
390   PetscFunctionBegin;
391   PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
392   C->assembled = PETSC_TRUE;
393   PetscFunctionReturn(PETSC_SUCCESS);
394 }
395 
MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)396 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
397 {
398   PetscFunctionBegin;
399   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
400   PetscCall(MatSetType(C, MATSCALAPACK));
401   PetscCall(MatSetUp(C));
402   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_ScaLAPACK;
403   PetscFunctionReturn(PETSC_SUCCESS);
404 }
405 
MatProductSetFromOptions_ScaLAPACK(Mat C)406 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
407 {
408   Mat_Product *product = C->product;
409 
410   PetscFunctionBegin;
411   switch (product->type) {
412   case MATPRODUCT_AB:
413     C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
414     C->ops->productsymbolic = MatProductSymbolic_AB;
415     break;
416   case MATPRODUCT_AtB:
417     C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_ScaLAPACK;
418     C->ops->productsymbolic          = MatProductSymbolic_AtB;
419     break;
420   case MATPRODUCT_ABt:
421     C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
422     C->ops->productsymbolic          = MatProductSymbolic_ABt;
423     break;
424   default:
425     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
426   }
427   PetscFunctionReturn(PETSC_SUCCESS);
428 }
429 
MatGetDiagonal_ScaLAPACK(Mat A,Vec D)430 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D)
431 {
432   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
433   PetscScalar    *darray, *d2d, v;
434   const PetscInt *ranges;
435   PetscBLASInt    j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
436 
437   PetscFunctionBegin;
438   PetscCall(VecGetArray(D, &darray));
439 
440   if (A->rmap->N <= A->cmap->N) { /* row version */
441 
442     /* create ScaLAPACK descriptor for vector (1d block distribution) */
443     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
444     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
445     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld));
446     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
447     PetscCheckScaLapackInfo("descinit", info);
448 
449     /* allocate 2d vector */
450     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
451     PetscCall(PetscCalloc1(lszd, &d2d));
452     PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld));
453 
454     /* create ScaLAPACK descriptor for vector (2d block distribution) */
455     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
456     PetscCheckScaLapackInfo("descinit", info);
457 
458     /* collect diagonal */
459     for (j = 1; j <= a->M; j++) {
460       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
461       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
462     }
463 
464     /* redistribute d from a column of a 2d matrix */
465     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
466     PetscCall(PetscFree(d2d));
467 
468   } else { /* column version */
469 
470     /* create ScaLAPACK descriptor for vector (1d block distribution) */
471     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
472     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
473     dlld = 1;
474     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
475     PetscCheckScaLapackInfo("descinit", info);
476 
477     /* allocate 2d vector */
478     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
479     PetscCall(PetscCalloc1(lszd, &d2d));
480 
481     /* create ScaLAPACK descriptor for vector (2d block distribution) */
482     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
483     PetscCheckScaLapackInfo("descinit", info);
484 
485     /* collect diagonal */
486     for (j = 1; j <= a->N; j++) {
487       PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
488       PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
489     }
490 
491     /* redistribute d from a row of a 2d matrix */
492     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
493     PetscCall(PetscFree(d2d));
494   }
495 
496   PetscCall(VecRestoreArray(D, &darray));
497   PetscCall(VecAssemblyBegin(D));
498   PetscCall(VecAssemblyEnd(D));
499   PetscFunctionReturn(PETSC_SUCCESS);
500 }
501 
MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)502 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R)
503 {
504   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
505   const PetscScalar *d;
506   const PetscInt    *ranges;
507   PetscScalar       *d2d;
508   PetscBLASInt       i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
509 
510   PetscFunctionBegin;
511   if (R) {
512     PetscCall(VecGetArrayRead(R, &d));
513     /* create ScaLAPACK descriptor for vector (1d block distribution) */
514     PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
515     PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
516     dlld = 1;
517     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
518     PetscCheckScaLapackInfo("descinit", info);
519 
520     /* allocate 2d vector */
521     lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
522     PetscCall(PetscCalloc1(lszd, &d2d));
523 
524     /* create ScaLAPACK descriptor for vector (2d block distribution) */
525     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
526     PetscCheckScaLapackInfo("descinit", info);
527 
528     /* redistribute d to a row of a 2d matrix */
529     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
530 
531     /* broadcast along process columns */
532     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
533     else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
534 
535     /* local scaling */
536     for (j = 0; j < a->locc; j++)
537       for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
538 
539     PetscCall(PetscFree(d2d));
540     PetscCall(VecRestoreArrayRead(R, &d));
541   }
542   if (L) {
543     PetscCall(VecGetArrayRead(L, &d));
544     /* create ScaLAPACK descriptor for vector (1d block distribution) */
545     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
546     PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
547     PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld));
548     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
549     PetscCheckScaLapackInfo("descinit", info);
550 
551     /* allocate 2d vector */
552     lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
553     PetscCall(PetscCalloc1(lszd, &d2d));
554     PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld));
555 
556     /* create ScaLAPACK descriptor for vector (2d block distribution) */
557     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
558     PetscCheckScaLapackInfo("descinit", info);
559 
560     /* redistribute d to a column of a 2d matrix */
561     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
562 
563     /* broadcast along process rows */
564     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
565     else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
566 
567     /* local scaling */
568     for (i = 0; i < a->locr; i++)
569       for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
570 
571     PetscCall(PetscFree(d2d));
572     PetscCall(VecRestoreArrayRead(L, &d));
573   }
574   PetscFunctionReturn(PETSC_SUCCESS);
575 }
576 
MatScale_ScaLAPACK(Mat X,PetscScalar a)577 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a)
578 {
579   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
580   PetscBLASInt   n, one = 1;
581 
582   PetscFunctionBegin;
583   n = x->lld * x->locc;
584   PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
585   PetscFunctionReturn(PETSC_SUCCESS);
586 }
587 
MatShift_ScaLAPACK(Mat X,PetscScalar alpha)588 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha)
589 {
590   Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
591   PetscBLASInt   i, n;
592   PetscScalar    v;
593 
594   PetscFunctionBegin;
595   n = PetscMin(x->M, x->N);
596   for (i = 1; i <= n; i++) {
597     PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
598     v += alpha;
599     PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
600   }
601   PetscFunctionReturn(PETSC_SUCCESS);
602 }
603 
MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)604 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
605 {
606   Mat_ScaLAPACK *x    = (Mat_ScaLAPACK *)X->data;
607   Mat_ScaLAPACK *y    = (Mat_ScaLAPACK *)Y->data;
608   PetscBLASInt   one  = 1;
609   PetscScalar    beta = 1.0;
610 
611   PetscFunctionBegin;
612   MatScaLAPACKCheckDistribution(Y, 1, X, 3);
613   PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
614   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
615   PetscFunctionReturn(PETSC_SUCCESS);
616 }
617 
MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)618 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str)
619 {
620   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
621   Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
622 
623   PetscFunctionBegin;
624   PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
625   PetscCall(PetscObjectStateIncrease((PetscObject)B));
626   PetscFunctionReturn(PETSC_SUCCESS);
627 }
628 
MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat * B)629 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B)
630 {
631   Mat            Bs;
632   MPI_Comm       comm;
633   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
634 
635   PetscFunctionBegin;
636   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
637   PetscCall(MatCreate(comm, &Bs));
638   PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
639   PetscCall(MatSetType(Bs, MATSCALAPACK));
640   b       = (Mat_ScaLAPACK *)Bs->data;
641   b->M    = a->M;
642   b->N    = a->N;
643   b->mb   = a->mb;
644   b->nb   = a->nb;
645   b->rsrc = a->rsrc;
646   b->csrc = a->csrc;
647   PetscCall(MatSetUp(Bs));
648   *B = Bs;
649   if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
650   Bs->assembled = PETSC_TRUE;
651   PetscFunctionReturn(PETSC_SUCCESS);
652 }
653 
MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat * B)654 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
655 {
656   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
657   Mat            Bs   = *B;
658   PetscBLASInt   one  = 1;
659   PetscScalar    sone = 1.0, zero = 0.0;
660 #if defined(PETSC_USE_COMPLEX)
661   PetscInt i, j;
662 #endif
663 
664   PetscFunctionBegin;
665   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
666   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
667   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
668   *B = Bs;
669   b  = (Mat_ScaLAPACK *)Bs->data;
670   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
671 #if defined(PETSC_USE_COMPLEX)
672   /* undo conjugation */
673   for (i = 0; i < b->locr; i++)
674     for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
675 #endif
676   Bs->assembled = PETSC_TRUE;
677   PetscFunctionReturn(PETSC_SUCCESS);
678 }
679 
MatConjugate_ScaLAPACK(Mat A)680 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
681 {
682   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
683   PetscInt       i, j;
684 
685   PetscFunctionBegin;
686   for (i = 0; i < a->locr; i++)
687     for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
688   PetscFunctionReturn(PETSC_SUCCESS);
689 }
690 
MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat * B)691 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
692 {
693   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data, *b;
694   Mat            Bs   = *B;
695   PetscBLASInt   one  = 1;
696   PetscScalar    sone = 1.0, zero = 0.0;
697 
698   PetscFunctionBegin;
699   PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
700   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
701   *B = Bs;
702   b  = (Mat_ScaLAPACK *)Bs->data;
703   PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
704   Bs->assembled = PETSC_TRUE;
705   PetscFunctionReturn(PETSC_SUCCESS);
706 }
707 
MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)708 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X)
709 {
710   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK *)A->data;
711   PetscScalar    *x, *x2d;
712   const PetscInt *ranges;
713   PetscBLASInt    xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
714 
715   PetscFunctionBegin;
716   PetscCall(VecCopy(B, X));
717   PetscCall(VecGetArray(X, &x));
718 
719   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
720   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
721   PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
722   PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld));
723   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
724   PetscCheckScaLapackInfo("descinit", info);
725 
726   /* allocate 2d vector */
727   lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
728   PetscCall(PetscMalloc1(lszx, &x2d));
729   PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld));
730 
731   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
732   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
733   PetscCheckScaLapackInfo("descinit", info);
734 
735   /* redistribute x as a column of a 2d matrix */
736   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
737 
738   /* call ScaLAPACK subroutine */
739   switch (A->factortype) {
740   case MAT_FACTOR_LU:
741     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
742     PetscCheckScaLapackInfo("getrs", info);
743     break;
744   case MAT_FACTOR_CHOLESKY:
745     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
746     PetscCheckScaLapackInfo("potrs", info);
747     break;
748   default:
749     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
750   }
751 
752   /* redistribute x from a column of a 2d matrix */
753   PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
754 
755   PetscCall(PetscFree(x2d));
756   PetscCall(VecRestoreArray(X, &x));
757   PetscFunctionReturn(PETSC_SUCCESS);
758 }
759 
MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)760 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X)
761 {
762   PetscFunctionBegin;
763   PetscCall(MatSolve_ScaLAPACK(A, B, X));
764   PetscCall(VecAXPY(X, 1, Y));
765   PetscFunctionReturn(PETSC_SUCCESS);
766 }
767 
MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)768 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X)
769 {
770   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *x;
771   PetscBool      flg1, flg2;
772   PetscBLASInt   one = 1, info;
773   Mat            C;
774   MatType        type;
775 
776   PetscFunctionBegin;
777   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
778   PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
779   if (flg1 && flg2) MatScaLAPACKCheckDistribution(B, 2, X, 3);
780   if (flg2) {
781     if (flg1) PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN));
782     else PetscCall(MatConvert(B, MATSCALAPACK, MAT_REUSE_MATRIX, &X));
783     C = X;
784   } else {
785     PetscCall(MatConvert(B, MATSCALAPACK, MAT_INITIAL_MATRIX, &C));
786   }
787   x = (Mat_ScaLAPACK *)C->data;
788 
789   switch (A->factortype) {
790   case MAT_FACTOR_LU:
791     PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
792     PetscCheckScaLapackInfo("getrs", info);
793     break;
794   case MAT_FACTOR_CHOLESKY:
795     PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
796     PetscCheckScaLapackInfo("potrs", info);
797     break;
798   default:
799     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
800   }
801   if (!flg2) {
802     PetscCall(MatGetType(X, &type));
803     PetscCall(MatConvert(C, type, MAT_REUSE_MATRIX, &X));
804     PetscCall(MatDestroy(&C));
805   }
806   PetscFunctionReturn(PETSC_SUCCESS);
807 }
808 
MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo * factorinfo)809 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo)
810 {
811   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
812   PetscBLASInt   one = 1, info;
813 
814   PetscFunctionBegin;
815   if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots));
816   PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
817   PetscCheckScaLapackInfo("getrf", info);
818   A->factortype = MAT_FACTOR_LU;
819   A->assembled  = PETSC_TRUE;
820 
821   PetscCall(PetscFree(A->solvertype));
822   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
823   PetscFunctionReturn(PETSC_SUCCESS);
824 }
825 
MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo * info)826 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
827 {
828   PetscFunctionBegin;
829   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
830   PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
831   PetscFunctionReturn(PETSC_SUCCESS);
832 }
833 
MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo * info)834 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
835 {
836   PetscFunctionBegin;
837   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
838   PetscFunctionReturn(PETSC_SUCCESS);
839 }
840 
MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo * factorinfo)841 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo)
842 {
843   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
844   PetscBLASInt   one = 1, info;
845 
846   PetscFunctionBegin;
847   PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
848   PetscCheckScaLapackInfo("potrf", info);
849   A->factortype = MAT_FACTOR_CHOLESKY;
850   A->assembled  = PETSC_TRUE;
851 
852   PetscCall(PetscFree(A->solvertype));
853   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
854   PetscFunctionReturn(PETSC_SUCCESS);
855 }
856 
MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo * info)857 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
858 {
859   PetscFunctionBegin;
860   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
861   PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
862   PetscFunctionReturn(PETSC_SUCCESS);
863 }
864 
MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo * info)865 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info)
866 {
867   PetscFunctionBegin;
868   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
869   PetscFunctionReturn(PETSC_SUCCESS);
870 }
871 
MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType * type)872 static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type)
873 {
874   PetscFunctionBegin;
875   *type = MATSOLVERSCALAPACK;
876   PetscFunctionReturn(PETSC_SUCCESS);
877 }
878 
MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat * F)879 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F)
880 {
881   Mat            B;
882   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
883 
884   PetscFunctionBegin;
885   /* Create the factorization matrix */
886   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
887   B->trivialsymbolic = PETSC_TRUE;
888   B->factortype      = ftype;
889   PetscCall(PetscFree(B->solvertype));
890   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));
891 
892   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
893   *F = B;
894   PetscFunctionReturn(PETSC_SUCCESS);
895 }
896 
MatSolverTypeRegister_ScaLAPACK(void)897 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
898 {
899   PetscFunctionBegin;
900   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
901   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
902   PetscFunctionReturn(PETSC_SUCCESS);
903 }
904 
MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal * nrm)905 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm)
906 {
907   Mat_ScaLAPACK *a   = (Mat_ScaLAPACK *)A->data;
908   PetscBLASInt   one = 1, lwork = 0;
909   const char    *ntype;
910   PetscScalar   *work = NULL, dummy;
911 
912   PetscFunctionBegin;
913   switch (type) {
914   case NORM_1:
915     ntype = "1";
916     lwork = PetscMax(a->locr, a->locc);
917     break;
918   case NORM_FROBENIUS:
919     ntype = "F";
920     work  = &dummy;
921     break;
922   case NORM_INFINITY:
923     ntype = "I";
924     lwork = PetscMax(a->locr, a->locc);
925     break;
926   default:
927     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
928   }
929   if (lwork) PetscCall(PetscMalloc1(lwork, &work));
930   *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
931   if (lwork) PetscCall(PetscFree(work));
932   PetscFunctionReturn(PETSC_SUCCESS);
933 }
934 
MatZeroEntries_ScaLAPACK(Mat A)935 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
936 {
937   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
938 
939   PetscFunctionBegin;
940   PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
941   PetscFunctionReturn(PETSC_SUCCESS);
942 }
943 
MatGetOwnershipIS_ScaLAPACK(Mat A,IS * rows,IS * cols)944 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols)
945 {
946   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
947   PetscInt       i, n, nb, isrc, nproc, iproc, *idx;
948 
949   PetscFunctionBegin;
950   if (rows) {
951     n     = a->locr;
952     nb    = a->mb;
953     isrc  = a->rsrc;
954     nproc = a->grid->nprow;
955     iproc = a->grid->myrow;
956     PetscCall(PetscMalloc1(n, &idx));
957     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
958     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
959   }
960   if (cols) {
961     n     = a->locc;
962     nb    = a->nb;
963     isrc  = a->csrc;
964     nproc = a->grid->npcol;
965     iproc = a->grid->mycol;
966     PetscCall(PetscMalloc1(n, &idx));
967     for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
968     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
969   }
970   PetscFunctionReturn(PETSC_SUCCESS);
971 }
972 
MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat * B)973 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B)
974 {
975   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)A->data;
976   Mat                Bmpi;
977   MPI_Comm           comm;
978   PetscInt           i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb;
979   const PetscInt    *ranges, *branges, *cwork;
980   const PetscScalar *vwork;
981   PetscBLASInt       bdesc[9], bmb, zero = 0, one = 1, lld, info;
982   PetscScalar       *barray;
983   PetscBool          differ = PETSC_FALSE;
984   PetscMPIInt        size;
985 
986   PetscFunctionBegin;
987   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
988   PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
989 
990   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
991     PetscCallMPI(MPI_Comm_size(comm, &size));
992     PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
993     for (i = 0; i < size; i++)
994       if (ranges[i + 1] != branges[i + 1]) {
995         differ = PETSC_TRUE;
996         break;
997       }
998   }
999 
1000   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
1001     PetscCall(MatCreate(comm, &Bmpi));
1002     m = PETSC_DECIDE;
1003     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1004     n = PETSC_DECIDE;
1005     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1006     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1007     PetscCall(MatSetType(Bmpi, MATDENSE));
1008     PetscCall(MatSetUp(Bmpi));
1009 
1010     /* create ScaLAPACK descriptor for B (1d block distribution) */
1011     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1012     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1013     PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */
1014     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1015     PetscCheckScaLapackInfo("descinit", info);
1016 
1017     /* redistribute matrix */
1018     PetscCall(MatDenseGetArray(Bmpi, &barray));
1019     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1020     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1021     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1022     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1023 
1024     /* transfer rows of auxiliary matrix to the final matrix B */
1025     PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
1026     for (i = rstart; i < rend; i++) {
1027       PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
1028       PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
1029       PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
1030     }
1031     PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
1032     PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
1033     PetscCall(MatDestroy(&Bmpi));
1034 
1035   } else { /* normal cases */
1036 
1037     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1038     else {
1039       PetscCall(MatCreate(comm, &Bmpi));
1040       m = PETSC_DECIDE;
1041       PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1042       n = PETSC_DECIDE;
1043       PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1044       PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1045       PetscCall(MatSetType(Bmpi, MATDENSE));
1046       PetscCall(MatSetUp(Bmpi));
1047     }
1048 
1049     /* create ScaLAPACK descriptor for B (1d block distribution) */
1050     PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1051     PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1052     PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */
1053     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1054     PetscCheckScaLapackInfo("descinit", info);
1055 
1056     /* redistribute matrix */
1057     PetscCall(MatDenseGetArray(Bmpi, &barray));
1058     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1059     PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1060 
1061     PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1062     PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1063     if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &Bmpi));
1064     else *B = Bmpi;
1065   }
1066   PetscFunctionReturn(PETSC_SUCCESS);
1067 }
1068 
MatScaLAPACKCheckLayout(PetscLayout map,PetscBool * correct)1069 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct)
1070 {
1071   const PetscInt *ranges;
1072   PetscMPIInt     size;
1073   PetscInt        i, n;
1074 
1075   PetscFunctionBegin;
1076   *correct = PETSC_TRUE;
1077   PetscCallMPI(MPI_Comm_size(map->comm, &size));
1078   if (size > 1) {
1079     PetscCall(PetscLayoutGetRanges(map, &ranges));
1080     n = ranges[1] - ranges[0];
1081     for (i = 1; i < size; i++)
1082       if (ranges[i + 1] - ranges[i] != n) break;
1083     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
1084   }
1085   PetscFunctionReturn(PETSC_SUCCESS);
1086 }
1087 
MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat * B)1088 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B)
1089 {
1090   Mat_ScaLAPACK     *b;
1091   Mat                Bmpi;
1092   MPI_Comm           comm;
1093   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n;
1094   const PetscInt    *ranges, *rows, *cols;
1095   PetscBLASInt       adesc[9], amb, zero = 0, one = 1, lld, info;
1096   const PetscScalar *aarray;
1097   IS                 ir, ic;
1098   PetscInt           lda;
1099   PetscBool          flg;
1100 
1101   PetscFunctionBegin;
1102   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1103 
1104   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1105   else {
1106     PetscCall(MatCreate(comm, &Bmpi));
1107     m = PETSC_DECIDE;
1108     PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1109     n = PETSC_DECIDE;
1110     PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1111     PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1112     PetscCall(MatSetType(Bmpi, MATSCALAPACK));
1113     PetscCall(MatSetUp(Bmpi));
1114   }
1115   b = (Mat_ScaLAPACK *)Bmpi->data;
1116 
1117   PetscCall(MatDenseGetLDA(A, &lda));
1118   PetscCall(MatDenseGetArrayRead(A, &aarray));
1119   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1120   if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1121   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1122     /* create ScaLAPACK descriptor for A (1d block distribution) */
1123     PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1124     PetscCall(PetscBLASIntCast(ranges[1], &amb));        /* row block size */
1125     PetscCall(PetscBLASIntCast(PetscMax(lda, 1), &lld)); /* local leading dimension */
1126     PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1127     PetscCheckScaLapackInfo("descinit", info);
1128 
1129     /* redistribute matrix */
1130     PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1131     Bmpi->nooffprocentries = PETSC_TRUE;
1132   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1133     PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1134     b->roworiented = PETSC_FALSE;
1135     PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1136     PetscCall(ISGetIndices(ir, &rows));
1137     PetscCall(ISGetIndices(ic, &cols));
1138     PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1139     PetscCall(ISRestoreIndices(ir, &rows));
1140     PetscCall(ISRestoreIndices(ic, &cols));
1141     PetscCall(ISDestroy(&ic));
1142     PetscCall(ISDestroy(&ir));
1143   }
1144   PetscCall(MatDenseRestoreArrayRead(A, &aarray));
1145   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1146   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1147   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &Bmpi));
1148   else *B = Bmpi;
1149   PetscFunctionReturn(PETSC_SUCCESS);
1150 }
1151 
MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)1152 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1153 {
1154   Mat                mat_scal;
1155   PetscInt           M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1156   const PetscInt    *cols;
1157   const PetscScalar *vals;
1158 
1159   PetscFunctionBegin;
1160   if (reuse == MAT_REUSE_MATRIX) {
1161     mat_scal = *newmat;
1162     PetscCall(MatZeroEntries(mat_scal));
1163   } else {
1164     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1165     m = PETSC_DECIDE;
1166     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1167     n = PETSC_DECIDE;
1168     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1169     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1170     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1171     PetscCall(MatSetUp(mat_scal));
1172   }
1173   for (row = rstart; row < rend; row++) {
1174     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1175     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
1176     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1177   }
1178   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1179   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1180 
1181   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1182   else *newmat = mat_scal;
1183   PetscFunctionReturn(PETSC_SUCCESS);
1184 }
1185 
MatConvert_SBAIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)1186 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1187 {
1188   Mat                mat_scal;
1189   PetscInt           M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1190   const PetscInt    *cols;
1191   const PetscScalar *vals;
1192   PetscScalar        v;
1193 
1194   PetscFunctionBegin;
1195   if (reuse == MAT_REUSE_MATRIX) {
1196     mat_scal = *newmat;
1197     PetscCall(MatZeroEntries(mat_scal));
1198   } else {
1199     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1200     m = PETSC_DECIDE;
1201     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1202     n = PETSC_DECIDE;
1203     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1204     PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1205     PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1206     PetscCall(MatSetUp(mat_scal));
1207   }
1208   PetscCall(MatGetRowUpperTriangular(A));
1209   for (row = rstart; row < rend; row++) {
1210     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1211     PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1212     for (j = 0; j < ncols; j++) { /* lower triangular part */
1213       if (cols[j] == row) continue;
1214       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1215       PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1216     }
1217     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1218   }
1219   PetscCall(MatRestoreRowUpperTriangular(A));
1220   PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1221   PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1222 
1223   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1224   else *newmat = mat_scal;
1225   PetscFunctionReturn(PETSC_SUCCESS);
1226 }
1227 
MatScaLAPACKSetPreallocation(Mat A)1228 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1229 {
1230   Mat_ScaLAPACK *a  = (Mat_ScaLAPACK *)A->data;
1231   PetscInt       sz = 0;
1232 
1233   PetscFunctionBegin;
1234   PetscCall(PetscLayoutSetUp(A->rmap));
1235   PetscCall(PetscLayoutSetUp(A->cmap));
1236   if (!a->lld) a->lld = a->locr;
1237 
1238   PetscCall(PetscFree(a->loc));
1239   PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
1240   PetscCall(PetscCalloc1(sz, &a->loc));
1241 
1242   A->preallocated = PETSC_TRUE;
1243   PetscFunctionReturn(PETSC_SUCCESS);
1244 }
1245 
MatDestroy_ScaLAPACK(Mat A)1246 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1247 {
1248   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK *)A->data;
1249   Mat_ScaLAPACK_Grid *grid;
1250   PetscMPIInt         iflg;
1251   MPI_Comm            icomm;
1252 
1253   PetscFunctionBegin;
1254   PetscCall(MatStashDestroy_Private(&A->stash));
1255   PetscCall(PetscFree(a->loc));
1256   PetscCall(PetscFree(a->pivots));
1257   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1258   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, &iflg));
1259   if (--grid->grid_refct == 0) {
1260     Cblacs_gridexit(grid->ictxt);
1261     Cblacs_gridexit(grid->ictxrow);
1262     Cblacs_gridexit(grid->ictxcol);
1263     PetscCall(PetscFree(grid));
1264     PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1265   }
1266   PetscCall(PetscCommDestroy(&icomm));
1267   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
1268   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1269   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
1270   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
1271   PetscCall(PetscFree(A->data));
1272   PetscFunctionReturn(PETSC_SUCCESS);
1273 }
1274 
MatSetUp_ScaLAPACK(Mat A)1275 static PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1276 {
1277   Mat_ScaLAPACK *a    = (Mat_ScaLAPACK *)A->data;
1278   PetscBLASInt   info = 0;
1279   PetscBool      flg;
1280 
1281   PetscFunctionBegin;
1282   PetscCall(PetscLayoutSetUp(A->rmap));
1283   PetscCall(PetscLayoutSetUp(A->cmap));
1284 
1285   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1286   PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1287   PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1288   PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1289   PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1290 
1291   /* compute local sizes */
1292   PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
1293   PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1294   a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1295   a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1296   a->lld  = PetscMax(1, a->locr);
1297 
1298   /* allocate local array */
1299   PetscCall(MatScaLAPACKSetPreallocation(A));
1300 
1301   /* set up ScaLAPACK descriptor */
1302   PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1303   PetscCheckScaLapackInfo("descinit", info);
1304   PetscFunctionReturn(PETSC_SUCCESS);
1305 }
1306 
MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)1307 static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type)
1308 {
1309   PetscInt nstash, reallocs;
1310 
1311   PetscFunctionBegin;
1312   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1313   PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
1314   PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
1315   PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
1316   PetscFunctionReturn(PETSC_SUCCESS);
1317 }
1318 
MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)1319 static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type)
1320 {
1321   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1322   PetscMPIInt    n;
1323   PetscInt       i, flg, *row, *col;
1324   PetscScalar   *val;
1325   PetscBLASInt   gridx, gcidx, lridx, lcidx, rsrc, csrc;
1326 
1327   PetscFunctionBegin;
1328   if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1329   while (1) {
1330     PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1331     if (!flg) break;
1332     for (i = 0; i < n; i++) {
1333       PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
1334       PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1335       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1336       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1337       switch (A->insertmode) {
1338       case INSERT_VALUES:
1339         a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i];
1340         break;
1341       case ADD_VALUES:
1342         a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i];
1343         break;
1344       default:
1345         SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1346       }
1347     }
1348   }
1349   PetscCall(MatStashScatterEnd_Private(&A->stash));
1350   PetscFunctionReturn(PETSC_SUCCESS);
1351 }
1352 
MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)1353 static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer)
1354 {
1355   Mat      Adense, As;
1356   MPI_Comm comm;
1357 
1358   PetscFunctionBegin;
1359   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1360   PetscCall(MatCreate(comm, &Adense));
1361   PetscCall(MatSetType(Adense, MATDENSE));
1362   PetscCall(MatLoad(Adense, viewer));
1363   PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
1364   PetscCall(MatDestroy(&Adense));
1365   PetscCall(MatHeaderReplace(newMat, &As));
1366   PetscFunctionReturn(PETSC_SUCCESS);
1367 }
1368 
1369 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1370                                        NULL,
1371                                        NULL,
1372                                        MatMult_ScaLAPACK,
1373                                        /* 4*/ MatMultAdd_ScaLAPACK,
1374                                        MatMultTranspose_ScaLAPACK,
1375                                        MatMultTransposeAdd_ScaLAPACK,
1376                                        MatSolve_ScaLAPACK,
1377                                        MatSolveAdd_ScaLAPACK,
1378                                        NULL,
1379                                        /*10*/ NULL,
1380                                        MatLUFactor_ScaLAPACK,
1381                                        MatCholeskyFactor_ScaLAPACK,
1382                                        NULL,
1383                                        MatTranspose_ScaLAPACK,
1384                                        /*15*/ MatGetInfo_ScaLAPACK,
1385                                        NULL,
1386                                        MatGetDiagonal_ScaLAPACK,
1387                                        MatDiagonalScale_ScaLAPACK,
1388                                        MatNorm_ScaLAPACK,
1389                                        /*20*/ MatAssemblyBegin_ScaLAPACK,
1390                                        MatAssemblyEnd_ScaLAPACK,
1391                                        MatSetOption_ScaLAPACK,
1392                                        MatZeroEntries_ScaLAPACK,
1393                                        /*24*/ NULL,
1394                                        MatLUFactorSymbolic_ScaLAPACK,
1395                                        MatLUFactorNumeric_ScaLAPACK,
1396                                        MatCholeskyFactorSymbolic_ScaLAPACK,
1397                                        MatCholeskyFactorNumeric_ScaLAPACK,
1398                                        /*29*/ MatSetUp_ScaLAPACK,
1399                                        NULL,
1400                                        NULL,
1401                                        NULL,
1402                                        NULL,
1403                                        /*34*/ MatDuplicate_ScaLAPACK,
1404                                        NULL,
1405                                        NULL,
1406                                        NULL,
1407                                        NULL,
1408                                        /*39*/ MatAXPY_ScaLAPACK,
1409                                        NULL,
1410                                        NULL,
1411                                        NULL,
1412                                        MatCopy_ScaLAPACK,
1413                                        /*44*/ NULL,
1414                                        MatScale_ScaLAPACK,
1415                                        MatShift_ScaLAPACK,
1416                                        NULL,
1417                                        NULL,
1418                                        /*49*/ NULL,
1419                                        NULL,
1420                                        NULL,
1421                                        NULL,
1422                                        NULL,
1423                                        /*54*/ NULL,
1424                                        NULL,
1425                                        NULL,
1426                                        NULL,
1427                                        NULL,
1428                                        /*59*/ NULL,
1429                                        MatDestroy_ScaLAPACK,
1430                                        MatView_ScaLAPACK,
1431                                        NULL,
1432                                        NULL,
1433                                        /*64*/ NULL,
1434                                        NULL,
1435                                        NULL,
1436                                        NULL,
1437                                        NULL,
1438                                        /*69*/ NULL,
1439                                        MatConvert_ScaLAPACK_Dense,
1440                                        NULL,
1441                                        NULL,
1442                                        NULL,
1443                                        /*74*/ NULL,
1444                                        NULL,
1445                                        NULL,
1446                                        NULL,
1447                                        MatLoad_ScaLAPACK,
1448                                        /*79*/ NULL,
1449                                        NULL,
1450                                        NULL,
1451                                        NULL,
1452                                        NULL,
1453                                        /*84*/ NULL,
1454                                        MatMatMultNumeric_ScaLAPACK,
1455                                        NULL,
1456                                        NULL,
1457                                        MatMatTransposeMultNumeric_ScaLAPACK,
1458                                        /*89*/ NULL,
1459                                        MatProductSetFromOptions_ScaLAPACK,
1460                                        NULL,
1461                                        NULL,
1462                                        MatConjugate_ScaLAPACK,
1463                                        /*94*/ NULL,
1464                                        NULL,
1465                                        NULL,
1466                                        NULL,
1467                                        NULL,
1468                                        /*99*/ NULL,
1469                                        MatMatSolve_ScaLAPACK,
1470                                        NULL,
1471                                        NULL,
1472                                        NULL,
1473                                        /*104*/ NULL,
1474                                        NULL,
1475                                        NULL,
1476                                        NULL,
1477                                        NULL,
1478                                        /*109*/ NULL,
1479                                        MatHermitianTranspose_ScaLAPACK,
1480                                        MatMultHermitianTranspose_ScaLAPACK,
1481                                        MatMultHermitianTransposeAdd_ScaLAPACK,
1482                                        NULL,
1483                                        /*114*/ NULL,
1484                                        NULL,
1485                                        NULL,
1486                                        NULL,
1487                                        NULL,
1488                                        /*119*/ NULL,
1489                                        NULL,
1490                                        MatTransposeMatMultNumeric_ScaLAPACK,
1491                                        NULL,
1492                                        /*124*/ NULL,
1493                                        NULL,
1494                                        NULL,
1495                                        NULL,
1496                                        NULL,
1497                                        /*129*/ NULL,
1498                                        NULL,
1499                                        NULL,
1500                                        NULL,
1501                                        NULL,
1502                                        /*134*/ NULL,
1503                                        NULL,
1504                                        NULL,
1505                                        NULL,
1506                                        NULL,
1507                                        NULL,
1508                                        /*140*/ NULL,
1509                                        NULL,
1510                                        NULL,
1511                                        NULL,
1512                                        NULL};
1513 
MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash * stash,PetscInt * owners)1514 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners)
1515 {
1516   PetscInt          *owner, *startv, *starti, bs2;
1517   PetscInt           size = stash->size, nsends;
1518   PetscInt          *sindices, **rindices, j, l;
1519   PetscScalar      **rvalues, *svalues;
1520   MPI_Comm           comm = stash->comm;
1521   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1522   PetscMPIInt        tag1 = stash->tag1, tag2 = stash->tag2, *sizes, *nlengths, nreceives, insends, ii;
1523   PetscInt          *sp_idx, *sp_idy;
1524   PetscScalar       *sp_val;
1525   PetscMatStashSpace space, space_next;
1526   PetscBLASInt       gridx, gcidx, lridx, lcidx, rsrc, csrc;
1527   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK *)mat->data;
1528 
1529   PetscFunctionBegin;
1530   { /* make sure all processors are either in INSERTMODE or ADDMODE */
1531     InsertMode addv;
1532     PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
1533     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1534     mat->insertmode = addv; /* in case this processor had no cache */
1535   }
1536 
1537   bs2 = stash->bs * stash->bs;
1538 
1539   /*  first count number of contributors to each processor */
1540   PetscCall(PetscCalloc1(size, &nlengths));
1541   PetscCall(PetscMalloc1(stash->n + 1, &owner));
1542 
1543   ii = j = 0;
1544   space  = stash->space_head;
1545   while (space) {
1546     space_next = space->next;
1547     for (l = 0; l < space->local_used; l++) {
1548       PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
1549       PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1550       PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1551       j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
1552       nlengths[j]++;
1553       owner[ii] = j;
1554       ii++;
1555     }
1556     space = space_next;
1557   }
1558 
1559   /* Now check what procs get messages - and compute nsends. */
1560   PetscCall(PetscCalloc1(size, &sizes));
1561   nsends = 0;
1562   for (PetscMPIInt i = 0; i < size; i++) {
1563     if (nlengths[i]) {
1564       sizes[i] = 1;
1565       nsends++;
1566     }
1567   }
1568 
1569   {
1570     PetscMPIInt *onodes, *olengths;
1571 
1572     /* Determine the number of messages to expect, their lengths, from from-ids */
1573     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
1574     PetscCall(PetscMPIIntCast(nsends, &insends));
1575     PetscCall(PetscGatherMessageLengths(comm, insends, nreceives, nlengths, &onodes, &olengths));
1576     /* since clubbing row,col - lengths are multiplied by 2 */
1577     for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] *= 2;
1578     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1579     /* values are size 'bs2' lengths (and remove earlier factor 2 */
1580     for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] = (PetscMPIInt)(olengths[i] * bs2 / 2);
1581     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
1582     PetscCall(PetscFree(onodes));
1583     PetscCall(PetscFree(olengths));
1584   }
1585 
1586   /* do sends:
1587       1) starts[i] gives the starting index in svalues for stuff going to
1588          the ith processor
1589   */
1590   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
1591   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
1592   PetscCall(PetscMalloc2(size, &startv, size, &starti));
1593   /* use 2 sends the first with all_a, the next with all_i and all_j */
1594   startv[0] = 0;
1595   starti[0] = 0;
1596   for (PetscMPIInt i = 1; i < size; i++) {
1597     startv[i] = startv[i - 1] + nlengths[i - 1];
1598     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1599   }
1600 
1601   ii    = 0;
1602   space = stash->space_head;
1603   while (space) {
1604     space_next = space->next;
1605     sp_idx     = space->idx;
1606     sp_idy     = space->idy;
1607     sp_val     = space->val;
1608     for (l = 0; l < space->local_used; l++) {
1609       j = owner[ii];
1610       if (bs2 == 1) {
1611         svalues[startv[j]] = sp_val[l];
1612       } else {
1613         PetscInt     k;
1614         PetscScalar *buf1, *buf2;
1615         buf1 = svalues + bs2 * startv[j];
1616         buf2 = space->val + bs2 * l;
1617         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1618       }
1619       sindices[starti[j]]               = sp_idx[l];
1620       sindices[starti[j] + nlengths[j]] = sp_idy[l];
1621       startv[j]++;
1622       starti[j]++;
1623       ii++;
1624     }
1625     space = space_next;
1626   }
1627   startv[0] = 0;
1628   for (PetscMPIInt i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1629 
1630   for (PetscMPIInt i = 0, count = 0; i < size; i++) {
1631     if (sizes[i]) {
1632       PetscCallMPI(MPIU_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
1633       PetscCallMPI(MPIU_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1634     }
1635   }
1636 #if defined(PETSC_USE_INFO)
1637   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
1638   for (PetscMPIInt i = 0; i < size; i++) {
1639     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %d: size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1640   }
1641 #endif
1642   PetscCall(PetscFree(nlengths));
1643   PetscCall(PetscFree(owner));
1644   PetscCall(PetscFree2(startv, starti));
1645   PetscCall(PetscFree(sizes));
1646 
1647   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1648   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
1649 
1650   for (PetscMPIInt i = 0; i < nreceives; i++) {
1651     recv_waits[2 * i]     = recv_waits1[i];
1652     recv_waits[2 * i + 1] = recv_waits2[i];
1653   }
1654   stash->recv_waits = recv_waits;
1655 
1656   PetscCall(PetscFree(recv_waits1));
1657   PetscCall(PetscFree(recv_waits2));
1658 
1659   stash->svalues         = svalues;
1660   stash->sindices        = sindices;
1661   stash->rvalues         = rvalues;
1662   stash->rindices        = rindices;
1663   stash->send_waits      = send_waits;
1664   stash->nsends          = (PetscMPIInt)nsends;
1665   stash->nrecvs          = nreceives;
1666   stash->reproduce_count = 0;
1667   PetscFunctionReturn(PETSC_SUCCESS);
1668 }
1669 
MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)1670 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb)
1671 {
1672   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1673 
1674   PetscFunctionBegin;
1675   PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1676   PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1677   PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
1678   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1679   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1680   PetscFunctionReturn(PETSC_SUCCESS);
1681 }
1682 
1683 /*@
1684   MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1685   the `MATSCALAPACK` matrix
1686 
1687   Logically Collective
1688 
1689   Input Parameters:
1690 + A  - a `MATSCALAPACK` matrix
1691 . mb - the row block size
1692 - nb - the column block size
1693 
1694   Level: intermediate
1695 
1696   Note:
1697   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1698 
1699 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1700 @*/
MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)1701 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb)
1702 {
1703   PetscFunctionBegin;
1704   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1705   PetscValidLogicalCollectiveInt(A, mb, 2);
1706   PetscValidLogicalCollectiveInt(A, nb, 3);
1707   PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
1708   PetscFunctionReturn(PETSC_SUCCESS);
1709 }
1710 
MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt * mb,PetscInt * nb)1711 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb)
1712 {
1713   Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1714 
1715   PetscFunctionBegin;
1716   if (mb) *mb = a->mb;
1717   if (nb) *nb = a->nb;
1718   PetscFunctionReturn(PETSC_SUCCESS);
1719 }
1720 
1721 /*@
1722   MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1723   the `MATSCALAPACK` matrix
1724 
1725   Not Collective
1726 
1727   Input Parameter:
1728 . A - a `MATSCALAPACK` matrix
1729 
1730   Output Parameters:
1731 + mb - the row block size
1732 - nb - the column block size
1733 
1734   Level: intermediate
1735 
1736   Note:
1737   This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1738 
1739 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1740 @*/
MatScaLAPACKGetBlockSizes(Mat A,PetscInt * mb,PetscInt * nb)1741 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb)
1742 {
1743   PetscFunctionBegin;
1744   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1745   PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
1746   PetscFunctionReturn(PETSC_SUCCESS);
1747 }
1748 
1749 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1750 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1751 
1752 /*MC
1753    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1754 
1755    Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK
1756 
1757    Options Database Keys:
1758 +  -mat_type scalapack - sets the matrix type to `MATSCALAPACK`
1759 .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu`
1760 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1761 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1762 
1763    Level: intermediate
1764 
1765   Note:
1766    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1767    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1768    the given rank.
1769 
1770 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()`
1771 M*/
1772 
MatCreate_ScaLAPACK(Mat A)1773 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1774 {
1775   Mat_ScaLAPACK      *a;
1776   PetscBool           flg;
1777   PetscMPIInt         iflg;
1778   Mat_ScaLAPACK_Grid *grid;
1779   MPI_Comm            icomm;
1780   PetscBLASInt        nprow, npcol, myrow, mycol;
1781   PetscInt            optv1, k = 2, array[2] = {0, 0};
1782   PetscMPIInt         size;
1783 
1784   PetscFunctionBegin;
1785   A->ops[0]     = MatOps_Values;
1786   A->insertmode = NOT_SET_VALUES;
1787 
1788   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1789   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1790   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1791   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1792   A->stash.ScatterDestroy = NULL;
1793 
1794   PetscCall(PetscNew(&a));
1795   A->data = (void *)a;
1796 
1797   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1798   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1799     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, NULL));
1800     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1801     PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1802   }
1803   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1804   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, &iflg));
1805   if (!iflg) {
1806     PetscCall(PetscNew(&grid));
1807 
1808     PetscCallMPI(MPI_Comm_size(icomm, &size));
1809     PetscCall(PetscBLASIntCast(PetscSqrtReal((PetscReal)size) + 0.001, &grid->nprow));
1810 
1811     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
1812     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg));
1813     if (flg) {
1814       PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
1815       PetscCall(PetscBLASIntCast(optv1, &grid->nprow));
1816     }
1817     PetscOptionsEnd();
1818 
1819     if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1820     grid->npcol = size / grid->nprow;
1821     PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
1822     PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1823     grid->ictxt = Csys2blacs_handle(icomm);
1824     Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1825     Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1826     grid->grid_refct = 1;
1827     grid->nprow      = nprow;
1828     grid->npcol      = npcol;
1829     grid->myrow      = myrow;
1830     grid->mycol      = mycol;
1831     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1832     grid->ictxrow = Csys2blacs_handle(icomm);
1833     Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1834     grid->ictxcol = Csys2blacs_handle(icomm);
1835     Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
1836     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));
1837 
1838   } else grid->grid_refct++;
1839   PetscCall(PetscCommDestroy(&icomm));
1840   a->grid = grid;
1841   a->mb   = DEFAULT_BLOCKSIZE;
1842   a->nb   = DEFAULT_BLOCKSIZE;
1843 
1844   PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
1845   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1846   if (flg) {
1847     a->mb = (PetscMPIInt)array[0];
1848     a->nb = (k > 1) ? (PetscMPIInt)array[1] : a->mb;
1849   }
1850   PetscOptionsEnd();
1851 
1852   a->roworiented = PETSC_TRUE;
1853   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
1854   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
1855   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
1856   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
1857   PetscFunctionReturn(PETSC_SUCCESS);
1858 }
1859 
1860 /*@C
1861   MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1862   (2D block cyclic distribution) for a `MATSCALAPACK` matrix
1863 
1864   Collective
1865 
1866   Input Parameters:
1867 + comm - MPI communicator
1868 . mb   - row block size (or `PETSC_DECIDE` to have it set)
1869 . nb   - column block size (or `PETSC_DECIDE` to have it set)
1870 . M    - number of global rows
1871 . N    - number of global columns
1872 . rsrc - coordinate of process that owns the first row of the distributed matrix
1873 - csrc - coordinate of process that owns the first column of the distributed matrix
1874 
1875   Output Parameter:
1876 . A - the matrix
1877 
1878   Options Database Key:
1879 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1880 
1881   Level: intermediate
1882 
1883   Notes:
1884   If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen
1885 
1886   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1887   MatXXXXSetPreallocation() paradigm instead of this routine directly.
1888   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
1889 
1890   Storage is completely managed by ScaLAPACK, so this requires PETSc to be
1891   configured with ScaLAPACK. In particular, PETSc's local sizes lose
1892   significance and are thus ignored. The block sizes refer to the values
1893   used for the distributed matrix, not the same meaning as in `MATBAIJ`.
1894 
1895 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1896 @*/
MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat * A)1897 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A)
1898 {
1899   Mat_ScaLAPACK *a;
1900   PetscInt       m, n;
1901 
1902   PetscFunctionBegin;
1903   PetscCall(MatCreate(comm, A));
1904   PetscCall(MatSetType(*A, MATSCALAPACK));
1905   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1906   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1907   m = PETSC_DECIDE;
1908   PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1909   n = PETSC_DECIDE;
1910   PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1911   PetscCall(MatSetSizes(*A, m, n, M, N));
1912   a = (Mat_ScaLAPACK *)(*A)->data;
1913   PetscCall(PetscBLASIntCast(M, &a->M));
1914   PetscCall(PetscBLASIntCast(N, &a->N));
1915   PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1916   PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1917   PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
1918   PetscCall(PetscBLASIntCast(csrc, &a->csrc));
1919   PetscCall(MatSetUp(*A));
1920   PetscFunctionReturn(PETSC_SUCCESS);
1921 }
1922