1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/
2
3 const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4 " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5 " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6 " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7 " TITLE = {Sca{LAPACK} Users' Guide},\n"
8 " PUBLISHER = {SIAM},\n"
9 " ADDRESS = {Philadelphia, PA},\n"
10 " YEAR = 1997\n"
11 "}\n";
12 static PetscBool ScaLAPACKCite = PETSC_FALSE;
13
14 #define DEFAULT_BLOCKSIZE 64
15
16 /*
17 The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18 is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19 */
20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21
Petsc_ScaLAPACK_keyval_free(void)22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23 {
24 PetscFunctionBegin;
25 PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
26 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
27 PetscFunctionReturn(PETSC_SUCCESS);
28 }
29
MatView_ScaLAPACK(Mat A,PetscViewer viewer)30 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer)
31 {
32 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
33 PetscBool isascii;
34 PetscViewerFormat format;
35 Mat Adense;
36
37 PetscFunctionBegin;
38 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
39 if (isascii) {
40 PetscCall(PetscViewerGetFormat(viewer, &format));
41 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
42 PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
43 PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
44 PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
45 PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
46 PetscFunctionReturn(PETSC_SUCCESS);
47 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
48 PetscFunctionReturn(PETSC_SUCCESS);
49 }
50 }
51 /* convert to dense format and call MatView() */
52 PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
53 PetscCall(MatView(Adense, viewer));
54 PetscCall(MatDestroy(&Adense));
55 PetscFunctionReturn(PETSC_SUCCESS);
56 }
57
MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo * info)58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info)
59 {
60 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
61 PetscLogDouble isend[2], irecv[2];
62
63 PetscFunctionBegin;
64 info->block_size = 1.0;
65
66 isend[0] = a->lld * a->locc; /* locally allocated */
67 isend[1] = a->locr * a->locc; /* used submatrix */
68 if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69 info->nz_allocated = isend[0];
70 info->nz_used = isend[1];
71 } else if (flag == MAT_GLOBAL_MAX) {
72 PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
73 info->nz_allocated = irecv[0];
74 info->nz_used = irecv[1];
75 } else if (flag == MAT_GLOBAL_SUM) {
76 PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
77 info->nz_allocated = irecv[0];
78 info->nz_used = irecv[1];
79 }
80
81 info->nz_unneeded = 0;
82 info->assemblies = A->num_ass;
83 info->mallocs = 0;
84 info->memory = 0; /* REVIEW ME */
85 info->fill_ratio_given = 0;
86 info->fill_ratio_needed = 0;
87 info->factor_mallocs = 0;
88 PetscFunctionReturn(PETSC_SUCCESS);
89 }
90
MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)91 static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg)
92 {
93 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
94
95 PetscFunctionBegin;
96 switch (op) {
97 case MAT_NEW_NONZERO_LOCATIONS:
98 case MAT_NEW_NONZERO_LOCATION_ERR:
99 case MAT_NEW_NONZERO_ALLOCATION_ERR:
100 case MAT_SYMMETRIC:
101 case MAT_SORTED_FULL:
102 case MAT_HERMITIAN:
103 break;
104 case MAT_ROW_ORIENTED:
105 a->roworiented = flg;
106 break;
107 default:
108 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
109 }
110 PetscFunctionReturn(PETSC_SUCCESS);
111 }
112
MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt * rows,PetscInt nc,const PetscInt * cols,const PetscScalar * vals,InsertMode imode)113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
114 {
115 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
116 PetscInt i, j;
117 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc;
118 PetscBool roworiented = a->roworiented;
119
120 PetscFunctionBegin;
121 PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
122 for (i = 0; i < nr; i++) {
123 if (rows[i] < 0) continue;
124 PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
125 for (j = 0; j < nc; j++) {
126 if (cols[j] < 0) continue;
127 PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
128 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
129 if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
130 if (roworiented) {
131 switch (imode) {
132 case INSERT_VALUES:
133 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j];
134 break;
135 default:
136 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j];
137 break;
138 }
139 } else {
140 switch (imode) {
141 case INSERT_VALUES:
142 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr];
143 break;
144 default:
145 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr];
146 break;
147 }
148 }
149 } else {
150 PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
151 A->assembled = PETSC_FALSE;
152 PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
153 }
154 }
155 }
156 PetscFunctionReturn(PETSC_SUCCESS);
157 }
158
MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscBool hermitian,PetscScalar beta,const PetscScalar * x,PetscScalar * y)159 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscBool hermitian, PetscScalar beta, const PetscScalar *x, PetscScalar *y)
160 {
161 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
162 PetscScalar *x2d, *y2d, alpha = 1.0;
163 const PetscInt *ranges;
164 PetscBLASInt xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
165
166 PetscFunctionBegin;
167 if (transpose) {
168 /* create ScaLAPACK descriptors for vectors (1d block distribution) */
169 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
170 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
171 PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld));
172 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
173 PetscCheckScaLapackInfo("descinit", info);
174 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
175 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
176 ylld = 1;
177 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
178 PetscCheckScaLapackInfo("descinit", info);
179
180 /* allocate 2d vectors */
181 lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
182 lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
183 PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
184 PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld));
185
186 /* create ScaLAPACK descriptors for vectors (2d block distribution) */
187 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
188 PetscCheckScaLapackInfo("descinit", info);
189 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
190 PetscCheckScaLapackInfo("descinit", info);
191
192 /* redistribute x as a column of a 2d matrix */
193 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
194
195 /* redistribute y as a row of a 2d matrix */
196 if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
197
198 /* call PBLAS subroutine */
199 if (hermitian) PetscCallBLAS("PBLASgemv", PBLASgemv_("C", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
200 else PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
201
202 /* redistribute y from a row of a 2d matrix */
203 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
204
205 } else { /* non-transpose */
206
207 /* create ScaLAPACK descriptors for vectors (1d block distribution) */
208 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
209 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
210 xlld = 1;
211 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
212 PetscCheckScaLapackInfo("descinit", info);
213 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
214 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
215 PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &ylld));
216 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
217 PetscCheckScaLapackInfo("descinit", info);
218
219 /* allocate 2d vectors */
220 lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
221 lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
222 PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
223 PetscCall(PetscBLASIntCast(PetscMax(1, lszy), &ylld));
224
225 /* create ScaLAPACK descriptors for vectors (2d block distribution) */
226 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
227 PetscCheckScaLapackInfo("descinit", info);
228 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
229 PetscCheckScaLapackInfo("descinit", info);
230
231 /* redistribute x as a row of a 2d matrix */
232 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
233
234 /* redistribute y as a column of a 2d matrix */
235 if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
236
237 /* call PBLAS subroutine */
238 PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
239
240 /* redistribute y from a column of a 2d matrix */
241 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
242 }
243 PetscCall(PetscFree2(x2d, y2d));
244 PetscFunctionReturn(PETSC_SUCCESS);
245 }
246
MatMult_ScaLAPACK(Mat A,Vec x,Vec y)247 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y)
248 {
249 const PetscScalar *xarray;
250 PetscScalar *yarray;
251
252 PetscFunctionBegin;
253 PetscCall(VecGetArrayRead(x, &xarray));
254 PetscCall(VecGetArray(y, &yarray));
255 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 0.0, xarray, yarray));
256 PetscCall(VecRestoreArrayRead(x, &xarray));
257 PetscCall(VecRestoreArray(y, &yarray));
258 PetscFunctionReturn(PETSC_SUCCESS);
259 }
260
MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)261 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
262 {
263 const PetscScalar *xarray;
264 PetscScalar *yarray;
265
266 PetscFunctionBegin;
267 PetscCall(VecGetArrayRead(x, &xarray));
268 PetscCall(VecGetArray(y, &yarray));
269 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 0.0, xarray, yarray));
270 PetscCall(VecRestoreArrayRead(x, &xarray));
271 PetscCall(VecRestoreArray(y, &yarray));
272 PetscFunctionReturn(PETSC_SUCCESS);
273 }
274
MatMultHermitianTranspose_ScaLAPACK(Mat A,Vec x,Vec y)275 static PetscErrorCode MatMultHermitianTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
276 {
277 const PetscScalar *xarray;
278 PetscScalar *yarray;
279
280 PetscFunctionBegin;
281 PetscCall(VecGetArrayRead(x, &xarray));
282 PetscCall(VecGetArrayWrite(y, &yarray));
283 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 0.0, xarray, yarray));
284 PetscCall(VecRestoreArrayRead(x, &xarray));
285 PetscCall(VecRestoreArrayWrite(y, &yarray));
286 PetscFunctionReturn(PETSC_SUCCESS);
287 }
288
MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)289 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
290 {
291 const PetscScalar *xarray;
292 PetscScalar *zarray;
293
294 PetscFunctionBegin;
295 if (y != z) PetscCall(VecCopy(y, z));
296 PetscCall(VecGetArrayRead(x, &xarray));
297 PetscCall(VecGetArray(z, &zarray));
298 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 1.0, xarray, zarray));
299 PetscCall(VecRestoreArrayRead(x, &xarray));
300 PetscCall(VecRestoreArray(z, &zarray));
301 PetscFunctionReturn(PETSC_SUCCESS);
302 }
303
MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)304 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
305 {
306 const PetscScalar *xarray;
307 PetscScalar *zarray;
308
309 PetscFunctionBegin;
310 if (y != z) PetscCall(VecCopy(y, z));
311 PetscCall(VecGetArrayRead(x, &xarray));
312 PetscCall(VecGetArray(z, &zarray));
313 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 1.0, xarray, zarray));
314 PetscCall(VecRestoreArrayRead(x, &xarray));
315 PetscCall(VecRestoreArray(z, &zarray));
316 PetscFunctionReturn(PETSC_SUCCESS);
317 }
318
MatMultHermitianTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)319 static PetscErrorCode MatMultHermitianTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
320 {
321 const PetscScalar *xarray;
322 PetscScalar *zarray;
323
324 PetscFunctionBegin;
325 if (y != z) PetscCall(VecCopy(y, z));
326 PetscCall(VecGetArrayRead(x, &xarray));
327 PetscCall(VecGetArray(z, &zarray));
328 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 1.0, xarray, zarray));
329 PetscCall(VecRestoreArrayRead(x, &xarray));
330 PetscCall(VecRestoreArray(z, &zarray));
331 PetscFunctionReturn(PETSC_SUCCESS);
332 }
333
MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)334 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
335 {
336 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
337 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
338 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data;
339 PetscScalar sone = 1.0, zero = 0.0;
340 PetscBLASInt one = 1;
341
342 PetscFunctionBegin;
343 PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
344 C->assembled = PETSC_TRUE;
345 PetscFunctionReturn(PETSC_SUCCESS);
346 }
347
MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)348 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
349 {
350 PetscFunctionBegin;
351 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
352 PetscCall(MatSetType(C, MATSCALAPACK));
353 PetscCall(MatSetUp(C));
354 C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
355 PetscFunctionReturn(PETSC_SUCCESS);
356 }
357
MatTransposeMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)358 static PetscErrorCode MatTransposeMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
359 {
360 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
361 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
362 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data;
363 PetscScalar sone = 1.0, zero = 0.0;
364 PetscBLASInt one = 1;
365
366 PetscFunctionBegin;
367 PetscCallBLAS("PBLASgemm", PBLASgemm_("T", "N", &a->N, &b->N, &a->M, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
368 C->assembled = PETSC_TRUE;
369 PetscFunctionReturn(PETSC_SUCCESS);
370 }
371
MatTransposeMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)372 static PetscErrorCode MatTransposeMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
373 {
374 PetscFunctionBegin;
375 PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
376 PetscCall(MatSetType(C, MATSCALAPACK));
377 PetscCall(MatSetUp(C));
378 C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_ScaLAPACK;
379 PetscFunctionReturn(PETSC_SUCCESS);
380 }
381
MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)382 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
383 {
384 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
385 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
386 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data;
387 PetscScalar sone = 1.0, zero = 0.0;
388 PetscBLASInt one = 1;
389
390 PetscFunctionBegin;
391 PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
392 C->assembled = PETSC_TRUE;
393 PetscFunctionReturn(PETSC_SUCCESS);
394 }
395
MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)396 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
397 {
398 PetscFunctionBegin;
399 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
400 PetscCall(MatSetType(C, MATSCALAPACK));
401 PetscCall(MatSetUp(C));
402 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_ScaLAPACK;
403 PetscFunctionReturn(PETSC_SUCCESS);
404 }
405
MatProductSetFromOptions_ScaLAPACK(Mat C)406 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
407 {
408 Mat_Product *product = C->product;
409
410 PetscFunctionBegin;
411 switch (product->type) {
412 case MATPRODUCT_AB:
413 C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
414 C->ops->productsymbolic = MatProductSymbolic_AB;
415 break;
416 case MATPRODUCT_AtB:
417 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_ScaLAPACK;
418 C->ops->productsymbolic = MatProductSymbolic_AtB;
419 break;
420 case MATPRODUCT_ABt:
421 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
422 C->ops->productsymbolic = MatProductSymbolic_ABt;
423 break;
424 default:
425 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
426 }
427 PetscFunctionReturn(PETSC_SUCCESS);
428 }
429
MatGetDiagonal_ScaLAPACK(Mat A,Vec D)430 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D)
431 {
432 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
433 PetscScalar *darray, *d2d, v;
434 const PetscInt *ranges;
435 PetscBLASInt j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
436
437 PetscFunctionBegin;
438 PetscCall(VecGetArray(D, &darray));
439
440 if (A->rmap->N <= A->cmap->N) { /* row version */
441
442 /* create ScaLAPACK descriptor for vector (1d block distribution) */
443 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
444 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
445 PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld));
446 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
447 PetscCheckScaLapackInfo("descinit", info);
448
449 /* allocate 2d vector */
450 lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
451 PetscCall(PetscCalloc1(lszd, &d2d));
452 PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld));
453
454 /* create ScaLAPACK descriptor for vector (2d block distribution) */
455 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
456 PetscCheckScaLapackInfo("descinit", info);
457
458 /* collect diagonal */
459 for (j = 1; j <= a->M; j++) {
460 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
461 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
462 }
463
464 /* redistribute d from a column of a 2d matrix */
465 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
466 PetscCall(PetscFree(d2d));
467
468 } else { /* column version */
469
470 /* create ScaLAPACK descriptor for vector (1d block distribution) */
471 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
472 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
473 dlld = 1;
474 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
475 PetscCheckScaLapackInfo("descinit", info);
476
477 /* allocate 2d vector */
478 lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
479 PetscCall(PetscCalloc1(lszd, &d2d));
480
481 /* create ScaLAPACK descriptor for vector (2d block distribution) */
482 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
483 PetscCheckScaLapackInfo("descinit", info);
484
485 /* collect diagonal */
486 for (j = 1; j <= a->N; j++) {
487 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
488 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
489 }
490
491 /* redistribute d from a row of a 2d matrix */
492 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
493 PetscCall(PetscFree(d2d));
494 }
495
496 PetscCall(VecRestoreArray(D, &darray));
497 PetscCall(VecAssemblyBegin(D));
498 PetscCall(VecAssemblyEnd(D));
499 PetscFunctionReturn(PETSC_SUCCESS);
500 }
501
MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)502 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R)
503 {
504 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
505 const PetscScalar *d;
506 const PetscInt *ranges;
507 PetscScalar *d2d;
508 PetscBLASInt i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
509
510 PetscFunctionBegin;
511 if (R) {
512 PetscCall(VecGetArrayRead(R, &d));
513 /* create ScaLAPACK descriptor for vector (1d block distribution) */
514 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
515 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
516 dlld = 1;
517 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
518 PetscCheckScaLapackInfo("descinit", info);
519
520 /* allocate 2d vector */
521 lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
522 PetscCall(PetscCalloc1(lszd, &d2d));
523
524 /* create ScaLAPACK descriptor for vector (2d block distribution) */
525 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
526 PetscCheckScaLapackInfo("descinit", info);
527
528 /* redistribute d to a row of a 2d matrix */
529 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
530
531 /* broadcast along process columns */
532 if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
533 else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
534
535 /* local scaling */
536 for (j = 0; j < a->locc; j++)
537 for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
538
539 PetscCall(PetscFree(d2d));
540 PetscCall(VecRestoreArrayRead(R, &d));
541 }
542 if (L) {
543 PetscCall(VecGetArrayRead(L, &d));
544 /* create ScaLAPACK descriptor for vector (1d block distribution) */
545 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
546 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
547 PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld));
548 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
549 PetscCheckScaLapackInfo("descinit", info);
550
551 /* allocate 2d vector */
552 lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
553 PetscCall(PetscCalloc1(lszd, &d2d));
554 PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld));
555
556 /* create ScaLAPACK descriptor for vector (2d block distribution) */
557 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
558 PetscCheckScaLapackInfo("descinit", info);
559
560 /* redistribute d to a column of a 2d matrix */
561 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
562
563 /* broadcast along process rows */
564 if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
565 else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
566
567 /* local scaling */
568 for (i = 0; i < a->locr; i++)
569 for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
570
571 PetscCall(PetscFree(d2d));
572 PetscCall(VecRestoreArrayRead(L, &d));
573 }
574 PetscFunctionReturn(PETSC_SUCCESS);
575 }
576
MatScale_ScaLAPACK(Mat X,PetscScalar a)577 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a)
578 {
579 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
580 PetscBLASInt n, one = 1;
581
582 PetscFunctionBegin;
583 n = x->lld * x->locc;
584 PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
585 PetscFunctionReturn(PETSC_SUCCESS);
586 }
587
MatShift_ScaLAPACK(Mat X,PetscScalar alpha)588 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha)
589 {
590 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
591 PetscBLASInt i, n;
592 PetscScalar v;
593
594 PetscFunctionBegin;
595 n = PetscMin(x->M, x->N);
596 for (i = 1; i <= n; i++) {
597 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
598 v += alpha;
599 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
600 }
601 PetscFunctionReturn(PETSC_SUCCESS);
602 }
603
MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)604 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
605 {
606 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
607 Mat_ScaLAPACK *y = (Mat_ScaLAPACK *)Y->data;
608 PetscBLASInt one = 1;
609 PetscScalar beta = 1.0;
610
611 PetscFunctionBegin;
612 MatScaLAPACKCheckDistribution(Y, 1, X, 3);
613 PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
614 PetscCall(PetscObjectStateIncrease((PetscObject)Y));
615 PetscFunctionReturn(PETSC_SUCCESS);
616 }
617
MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)618 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str)
619 {
620 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
621 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
622
623 PetscFunctionBegin;
624 PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
625 PetscCall(PetscObjectStateIncrease((PetscObject)B));
626 PetscFunctionReturn(PETSC_SUCCESS);
627 }
628
MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat * B)629 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B)
630 {
631 Mat Bs;
632 MPI_Comm comm;
633 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
634
635 PetscFunctionBegin;
636 PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
637 PetscCall(MatCreate(comm, &Bs));
638 PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
639 PetscCall(MatSetType(Bs, MATSCALAPACK));
640 b = (Mat_ScaLAPACK *)Bs->data;
641 b->M = a->M;
642 b->N = a->N;
643 b->mb = a->mb;
644 b->nb = a->nb;
645 b->rsrc = a->rsrc;
646 b->csrc = a->csrc;
647 PetscCall(MatSetUp(Bs));
648 *B = Bs;
649 if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
650 Bs->assembled = PETSC_TRUE;
651 PetscFunctionReturn(PETSC_SUCCESS);
652 }
653
MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat * B)654 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
655 {
656 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
657 Mat Bs = *B;
658 PetscBLASInt one = 1;
659 PetscScalar sone = 1.0, zero = 0.0;
660 #if defined(PETSC_USE_COMPLEX)
661 PetscInt i, j;
662 #endif
663
664 PetscFunctionBegin;
665 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
666 PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
667 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
668 *B = Bs;
669 b = (Mat_ScaLAPACK *)Bs->data;
670 PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
671 #if defined(PETSC_USE_COMPLEX)
672 /* undo conjugation */
673 for (i = 0; i < b->locr; i++)
674 for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
675 #endif
676 Bs->assembled = PETSC_TRUE;
677 PetscFunctionReturn(PETSC_SUCCESS);
678 }
679
MatConjugate_ScaLAPACK(Mat A)680 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
681 {
682 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
683 PetscInt i, j;
684
685 PetscFunctionBegin;
686 for (i = 0; i < a->locr; i++)
687 for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
688 PetscFunctionReturn(PETSC_SUCCESS);
689 }
690
MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat * B)691 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
692 {
693 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
694 Mat Bs = *B;
695 PetscBLASInt one = 1;
696 PetscScalar sone = 1.0, zero = 0.0;
697
698 PetscFunctionBegin;
699 PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
700 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
701 *B = Bs;
702 b = (Mat_ScaLAPACK *)Bs->data;
703 PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
704 Bs->assembled = PETSC_TRUE;
705 PetscFunctionReturn(PETSC_SUCCESS);
706 }
707
MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)708 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X)
709 {
710 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
711 PetscScalar *x, *x2d;
712 const PetscInt *ranges;
713 PetscBLASInt xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
714
715 PetscFunctionBegin;
716 PetscCall(VecCopy(B, X));
717 PetscCall(VecGetArray(X, &x));
718
719 /* create ScaLAPACK descriptor for a vector (1d block distribution) */
720 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
721 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
722 PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld));
723 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
724 PetscCheckScaLapackInfo("descinit", info);
725
726 /* allocate 2d vector */
727 lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
728 PetscCall(PetscMalloc1(lszx, &x2d));
729 PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld));
730
731 /* create ScaLAPACK descriptor for a vector (2d block distribution) */
732 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
733 PetscCheckScaLapackInfo("descinit", info);
734
735 /* redistribute x as a column of a 2d matrix */
736 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
737
738 /* call ScaLAPACK subroutine */
739 switch (A->factortype) {
740 case MAT_FACTOR_LU:
741 PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
742 PetscCheckScaLapackInfo("getrs", info);
743 break;
744 case MAT_FACTOR_CHOLESKY:
745 PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
746 PetscCheckScaLapackInfo("potrs", info);
747 break;
748 default:
749 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
750 }
751
752 /* redistribute x from a column of a 2d matrix */
753 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
754
755 PetscCall(PetscFree(x2d));
756 PetscCall(VecRestoreArray(X, &x));
757 PetscFunctionReturn(PETSC_SUCCESS);
758 }
759
MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)760 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X)
761 {
762 PetscFunctionBegin;
763 PetscCall(MatSolve_ScaLAPACK(A, B, X));
764 PetscCall(VecAXPY(X, 1, Y));
765 PetscFunctionReturn(PETSC_SUCCESS);
766 }
767
MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)768 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X)
769 {
770 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *x;
771 PetscBool flg1, flg2;
772 PetscBLASInt one = 1, info;
773 Mat C;
774 MatType type;
775
776 PetscFunctionBegin;
777 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
778 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
779 if (flg1 && flg2) MatScaLAPACKCheckDistribution(B, 2, X, 3);
780 if (flg2) {
781 if (flg1) PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN));
782 else PetscCall(MatConvert(B, MATSCALAPACK, MAT_REUSE_MATRIX, &X));
783 C = X;
784 } else {
785 PetscCall(MatConvert(B, MATSCALAPACK, MAT_INITIAL_MATRIX, &C));
786 }
787 x = (Mat_ScaLAPACK *)C->data;
788
789 switch (A->factortype) {
790 case MAT_FACTOR_LU:
791 PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
792 PetscCheckScaLapackInfo("getrs", info);
793 break;
794 case MAT_FACTOR_CHOLESKY:
795 PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
796 PetscCheckScaLapackInfo("potrs", info);
797 break;
798 default:
799 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
800 }
801 if (!flg2) {
802 PetscCall(MatGetType(X, &type));
803 PetscCall(MatConvert(C, type, MAT_REUSE_MATRIX, &X));
804 PetscCall(MatDestroy(&C));
805 }
806 PetscFunctionReturn(PETSC_SUCCESS);
807 }
808
MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo * factorinfo)809 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo)
810 {
811 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
812 PetscBLASInt one = 1, info;
813
814 PetscFunctionBegin;
815 if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots));
816 PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
817 PetscCheckScaLapackInfo("getrf", info);
818 A->factortype = MAT_FACTOR_LU;
819 A->assembled = PETSC_TRUE;
820
821 PetscCall(PetscFree(A->solvertype));
822 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
823 PetscFunctionReturn(PETSC_SUCCESS);
824 }
825
MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo * info)826 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
827 {
828 PetscFunctionBegin;
829 PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
830 PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
831 PetscFunctionReturn(PETSC_SUCCESS);
832 }
833
MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo * info)834 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
835 {
836 PetscFunctionBegin;
837 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
838 PetscFunctionReturn(PETSC_SUCCESS);
839 }
840
MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo * factorinfo)841 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo)
842 {
843 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
844 PetscBLASInt one = 1, info;
845
846 PetscFunctionBegin;
847 PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
848 PetscCheckScaLapackInfo("potrf", info);
849 A->factortype = MAT_FACTOR_CHOLESKY;
850 A->assembled = PETSC_TRUE;
851
852 PetscCall(PetscFree(A->solvertype));
853 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
854 PetscFunctionReturn(PETSC_SUCCESS);
855 }
856
MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo * info)857 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
858 {
859 PetscFunctionBegin;
860 PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
861 PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
862 PetscFunctionReturn(PETSC_SUCCESS);
863 }
864
MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo * info)865 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info)
866 {
867 PetscFunctionBegin;
868 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
869 PetscFunctionReturn(PETSC_SUCCESS);
870 }
871
MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType * type)872 static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type)
873 {
874 PetscFunctionBegin;
875 *type = MATSOLVERSCALAPACK;
876 PetscFunctionReturn(PETSC_SUCCESS);
877 }
878
MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat * F)879 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F)
880 {
881 Mat B;
882 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
883
884 PetscFunctionBegin;
885 /* Create the factorization matrix */
886 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
887 B->trivialsymbolic = PETSC_TRUE;
888 B->factortype = ftype;
889 PetscCall(PetscFree(B->solvertype));
890 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));
891
892 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
893 *F = B;
894 PetscFunctionReturn(PETSC_SUCCESS);
895 }
896
MatSolverTypeRegister_ScaLAPACK(void)897 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
898 {
899 PetscFunctionBegin;
900 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
901 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
902 PetscFunctionReturn(PETSC_SUCCESS);
903 }
904
MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal * nrm)905 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm)
906 {
907 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
908 PetscBLASInt one = 1, lwork = 0;
909 const char *ntype;
910 PetscScalar *work = NULL, dummy;
911
912 PetscFunctionBegin;
913 switch (type) {
914 case NORM_1:
915 ntype = "1";
916 lwork = PetscMax(a->locr, a->locc);
917 break;
918 case NORM_FROBENIUS:
919 ntype = "F";
920 work = &dummy;
921 break;
922 case NORM_INFINITY:
923 ntype = "I";
924 lwork = PetscMax(a->locr, a->locc);
925 break;
926 default:
927 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
928 }
929 if (lwork) PetscCall(PetscMalloc1(lwork, &work));
930 *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
931 if (lwork) PetscCall(PetscFree(work));
932 PetscFunctionReturn(PETSC_SUCCESS);
933 }
934
MatZeroEntries_ScaLAPACK(Mat A)935 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
936 {
937 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
938
939 PetscFunctionBegin;
940 PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
941 PetscFunctionReturn(PETSC_SUCCESS);
942 }
943
MatGetOwnershipIS_ScaLAPACK(Mat A,IS * rows,IS * cols)944 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols)
945 {
946 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
947 PetscInt i, n, nb, isrc, nproc, iproc, *idx;
948
949 PetscFunctionBegin;
950 if (rows) {
951 n = a->locr;
952 nb = a->mb;
953 isrc = a->rsrc;
954 nproc = a->grid->nprow;
955 iproc = a->grid->myrow;
956 PetscCall(PetscMalloc1(n, &idx));
957 for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
958 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
959 }
960 if (cols) {
961 n = a->locc;
962 nb = a->nb;
963 isrc = a->csrc;
964 nproc = a->grid->npcol;
965 iproc = a->grid->mycol;
966 PetscCall(PetscMalloc1(n, &idx));
967 for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
968 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
969 }
970 PetscFunctionReturn(PETSC_SUCCESS);
971 }
972
MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat * B)973 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B)
974 {
975 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
976 Mat Bmpi;
977 MPI_Comm comm;
978 PetscInt i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb;
979 const PetscInt *ranges, *branges, *cwork;
980 const PetscScalar *vwork;
981 PetscBLASInt bdesc[9], bmb, zero = 0, one = 1, lld, info;
982 PetscScalar *barray;
983 PetscBool differ = PETSC_FALSE;
984 PetscMPIInt size;
985
986 PetscFunctionBegin;
987 PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
988 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
989
990 if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
991 PetscCallMPI(MPI_Comm_size(comm, &size));
992 PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
993 for (i = 0; i < size; i++)
994 if (ranges[i + 1] != branges[i + 1]) {
995 differ = PETSC_TRUE;
996 break;
997 }
998 }
999
1000 if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
1001 PetscCall(MatCreate(comm, &Bmpi));
1002 m = PETSC_DECIDE;
1003 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1004 n = PETSC_DECIDE;
1005 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1006 PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1007 PetscCall(MatSetType(Bmpi, MATDENSE));
1008 PetscCall(MatSetUp(Bmpi));
1009
1010 /* create ScaLAPACK descriptor for B (1d block distribution) */
1011 PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1012 PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1013 PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */
1014 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1015 PetscCheckScaLapackInfo("descinit", info);
1016
1017 /* redistribute matrix */
1018 PetscCall(MatDenseGetArray(Bmpi, &barray));
1019 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1020 PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1021 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1022 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1023
1024 /* transfer rows of auxiliary matrix to the final matrix B */
1025 PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
1026 for (i = rstart; i < rend; i++) {
1027 PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
1028 PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
1029 PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
1030 }
1031 PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
1032 PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
1033 PetscCall(MatDestroy(&Bmpi));
1034
1035 } else { /* normal cases */
1036
1037 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1038 else {
1039 PetscCall(MatCreate(comm, &Bmpi));
1040 m = PETSC_DECIDE;
1041 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1042 n = PETSC_DECIDE;
1043 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1044 PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1045 PetscCall(MatSetType(Bmpi, MATDENSE));
1046 PetscCall(MatSetUp(Bmpi));
1047 }
1048
1049 /* create ScaLAPACK descriptor for B (1d block distribution) */
1050 PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1051 PetscCall(MatDenseGetLDA(Bmpi, &ldb));
1052 PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */
1053 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1054 PetscCheckScaLapackInfo("descinit", info);
1055
1056 /* redistribute matrix */
1057 PetscCall(MatDenseGetArray(Bmpi, &barray));
1058 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1059 PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1060
1061 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1062 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1063 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &Bmpi));
1064 else *B = Bmpi;
1065 }
1066 PetscFunctionReturn(PETSC_SUCCESS);
1067 }
1068
MatScaLAPACKCheckLayout(PetscLayout map,PetscBool * correct)1069 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct)
1070 {
1071 const PetscInt *ranges;
1072 PetscMPIInt size;
1073 PetscInt i, n;
1074
1075 PetscFunctionBegin;
1076 *correct = PETSC_TRUE;
1077 PetscCallMPI(MPI_Comm_size(map->comm, &size));
1078 if (size > 1) {
1079 PetscCall(PetscLayoutGetRanges(map, &ranges));
1080 n = ranges[1] - ranges[0];
1081 for (i = 1; i < size; i++)
1082 if (ranges[i + 1] - ranges[i] != n) break;
1083 *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
1084 }
1085 PetscFunctionReturn(PETSC_SUCCESS);
1086 }
1087
MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat * B)1088 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B)
1089 {
1090 Mat_ScaLAPACK *b;
1091 Mat Bmpi;
1092 MPI_Comm comm;
1093 PetscInt M = A->rmap->N, N = A->cmap->N, m, n;
1094 const PetscInt *ranges, *rows, *cols;
1095 PetscBLASInt adesc[9], amb, zero = 0, one = 1, lld, info;
1096 const PetscScalar *aarray;
1097 IS ir, ic;
1098 PetscInt lda;
1099 PetscBool flg;
1100
1101 PetscFunctionBegin;
1102 PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1103
1104 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1105 else {
1106 PetscCall(MatCreate(comm, &Bmpi));
1107 m = PETSC_DECIDE;
1108 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1109 n = PETSC_DECIDE;
1110 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1111 PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1112 PetscCall(MatSetType(Bmpi, MATSCALAPACK));
1113 PetscCall(MatSetUp(Bmpi));
1114 }
1115 b = (Mat_ScaLAPACK *)Bmpi->data;
1116
1117 PetscCall(MatDenseGetLDA(A, &lda));
1118 PetscCall(MatDenseGetArrayRead(A, &aarray));
1119 PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1120 if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1121 if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1122 /* create ScaLAPACK descriptor for A (1d block distribution) */
1123 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1124 PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */
1125 PetscCall(PetscBLASIntCast(PetscMax(lda, 1), &lld)); /* local leading dimension */
1126 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1127 PetscCheckScaLapackInfo("descinit", info);
1128
1129 /* redistribute matrix */
1130 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1131 Bmpi->nooffprocentries = PETSC_TRUE;
1132 } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1133 PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1134 b->roworiented = PETSC_FALSE;
1135 PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1136 PetscCall(ISGetIndices(ir, &rows));
1137 PetscCall(ISGetIndices(ic, &cols));
1138 PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1139 PetscCall(ISRestoreIndices(ir, &rows));
1140 PetscCall(ISRestoreIndices(ic, &cols));
1141 PetscCall(ISDestroy(&ic));
1142 PetscCall(ISDestroy(&ir));
1143 }
1144 PetscCall(MatDenseRestoreArrayRead(A, &aarray));
1145 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1146 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1147 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &Bmpi));
1148 else *B = Bmpi;
1149 PetscFunctionReturn(PETSC_SUCCESS);
1150 }
1151
MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)1152 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1153 {
1154 Mat mat_scal;
1155 PetscInt M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1156 const PetscInt *cols;
1157 const PetscScalar *vals;
1158
1159 PetscFunctionBegin;
1160 if (reuse == MAT_REUSE_MATRIX) {
1161 mat_scal = *newmat;
1162 PetscCall(MatZeroEntries(mat_scal));
1163 } else {
1164 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1165 m = PETSC_DECIDE;
1166 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1167 n = PETSC_DECIDE;
1168 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1169 PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1170 PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1171 PetscCall(MatSetUp(mat_scal));
1172 }
1173 for (row = rstart; row < rend; row++) {
1174 PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1175 PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
1176 PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1177 }
1178 PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1179 PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1180
1181 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1182 else *newmat = mat_scal;
1183 PetscFunctionReturn(PETSC_SUCCESS);
1184 }
1185
MatConvert_SBAIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)1186 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1187 {
1188 Mat mat_scal;
1189 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1190 const PetscInt *cols;
1191 const PetscScalar *vals;
1192 PetscScalar v;
1193
1194 PetscFunctionBegin;
1195 if (reuse == MAT_REUSE_MATRIX) {
1196 mat_scal = *newmat;
1197 PetscCall(MatZeroEntries(mat_scal));
1198 } else {
1199 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1200 m = PETSC_DECIDE;
1201 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1202 n = PETSC_DECIDE;
1203 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1204 PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1205 PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1206 PetscCall(MatSetUp(mat_scal));
1207 }
1208 PetscCall(MatGetRowUpperTriangular(A));
1209 for (row = rstart; row < rend; row++) {
1210 PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1211 PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1212 for (j = 0; j < ncols; j++) { /* lower triangular part */
1213 if (cols[j] == row) continue;
1214 v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1215 PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1216 }
1217 PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1218 }
1219 PetscCall(MatRestoreRowUpperTriangular(A));
1220 PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1221 PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1222
1223 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1224 else *newmat = mat_scal;
1225 PetscFunctionReturn(PETSC_SUCCESS);
1226 }
1227
MatScaLAPACKSetPreallocation(Mat A)1228 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1229 {
1230 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1231 PetscInt sz = 0;
1232
1233 PetscFunctionBegin;
1234 PetscCall(PetscLayoutSetUp(A->rmap));
1235 PetscCall(PetscLayoutSetUp(A->cmap));
1236 if (!a->lld) a->lld = a->locr;
1237
1238 PetscCall(PetscFree(a->loc));
1239 PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
1240 PetscCall(PetscCalloc1(sz, &a->loc));
1241
1242 A->preallocated = PETSC_TRUE;
1243 PetscFunctionReturn(PETSC_SUCCESS);
1244 }
1245
MatDestroy_ScaLAPACK(Mat A)1246 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1247 {
1248 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1249 Mat_ScaLAPACK_Grid *grid;
1250 PetscMPIInt iflg;
1251 MPI_Comm icomm;
1252
1253 PetscFunctionBegin;
1254 PetscCall(MatStashDestroy_Private(&A->stash));
1255 PetscCall(PetscFree(a->loc));
1256 PetscCall(PetscFree(a->pivots));
1257 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1258 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, &iflg));
1259 if (--grid->grid_refct == 0) {
1260 Cblacs_gridexit(grid->ictxt);
1261 Cblacs_gridexit(grid->ictxrow);
1262 Cblacs_gridexit(grid->ictxcol);
1263 PetscCall(PetscFree(grid));
1264 PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1265 }
1266 PetscCall(PetscCommDestroy(&icomm));
1267 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
1268 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1269 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
1270 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
1271 PetscCall(PetscFree(A->data));
1272 PetscFunctionReturn(PETSC_SUCCESS);
1273 }
1274
MatSetUp_ScaLAPACK(Mat A)1275 static PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1276 {
1277 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1278 PetscBLASInt info = 0;
1279 PetscBool flg;
1280
1281 PetscFunctionBegin;
1282 PetscCall(PetscLayoutSetUp(A->rmap));
1283 PetscCall(PetscLayoutSetUp(A->cmap));
1284
1285 /* check that the layout is as enforced by MatCreateScaLAPACK() */
1286 PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1287 PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1288 PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1289 PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1290
1291 /* compute local sizes */
1292 PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
1293 PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1294 a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1295 a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1296 a->lld = PetscMax(1, a->locr);
1297
1298 /* allocate local array */
1299 PetscCall(MatScaLAPACKSetPreallocation(A));
1300
1301 /* set up ScaLAPACK descriptor */
1302 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1303 PetscCheckScaLapackInfo("descinit", info);
1304 PetscFunctionReturn(PETSC_SUCCESS);
1305 }
1306
MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)1307 static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type)
1308 {
1309 PetscInt nstash, reallocs;
1310
1311 PetscFunctionBegin;
1312 if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1313 PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
1314 PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
1315 PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
1316 PetscFunctionReturn(PETSC_SUCCESS);
1317 }
1318
MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)1319 static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type)
1320 {
1321 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1322 PetscMPIInt n;
1323 PetscInt i, flg, *row, *col;
1324 PetscScalar *val;
1325 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc;
1326
1327 PetscFunctionBegin;
1328 if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1329 while (1) {
1330 PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1331 if (!flg) break;
1332 for (i = 0; i < n; i++) {
1333 PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
1334 PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1335 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1336 PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1337 switch (A->insertmode) {
1338 case INSERT_VALUES:
1339 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i];
1340 break;
1341 case ADD_VALUES:
1342 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i];
1343 break;
1344 default:
1345 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1346 }
1347 }
1348 }
1349 PetscCall(MatStashScatterEnd_Private(&A->stash));
1350 PetscFunctionReturn(PETSC_SUCCESS);
1351 }
1352
MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)1353 static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer)
1354 {
1355 Mat Adense, As;
1356 MPI_Comm comm;
1357
1358 PetscFunctionBegin;
1359 PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1360 PetscCall(MatCreate(comm, &Adense));
1361 PetscCall(MatSetType(Adense, MATDENSE));
1362 PetscCall(MatLoad(Adense, viewer));
1363 PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
1364 PetscCall(MatDestroy(&Adense));
1365 PetscCall(MatHeaderReplace(newMat, &As));
1366 PetscFunctionReturn(PETSC_SUCCESS);
1367 }
1368
1369 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1370 NULL,
1371 NULL,
1372 MatMult_ScaLAPACK,
1373 /* 4*/ MatMultAdd_ScaLAPACK,
1374 MatMultTranspose_ScaLAPACK,
1375 MatMultTransposeAdd_ScaLAPACK,
1376 MatSolve_ScaLAPACK,
1377 MatSolveAdd_ScaLAPACK,
1378 NULL,
1379 /*10*/ NULL,
1380 MatLUFactor_ScaLAPACK,
1381 MatCholeskyFactor_ScaLAPACK,
1382 NULL,
1383 MatTranspose_ScaLAPACK,
1384 /*15*/ MatGetInfo_ScaLAPACK,
1385 NULL,
1386 MatGetDiagonal_ScaLAPACK,
1387 MatDiagonalScale_ScaLAPACK,
1388 MatNorm_ScaLAPACK,
1389 /*20*/ MatAssemblyBegin_ScaLAPACK,
1390 MatAssemblyEnd_ScaLAPACK,
1391 MatSetOption_ScaLAPACK,
1392 MatZeroEntries_ScaLAPACK,
1393 /*24*/ NULL,
1394 MatLUFactorSymbolic_ScaLAPACK,
1395 MatLUFactorNumeric_ScaLAPACK,
1396 MatCholeskyFactorSymbolic_ScaLAPACK,
1397 MatCholeskyFactorNumeric_ScaLAPACK,
1398 /*29*/ MatSetUp_ScaLAPACK,
1399 NULL,
1400 NULL,
1401 NULL,
1402 NULL,
1403 /*34*/ MatDuplicate_ScaLAPACK,
1404 NULL,
1405 NULL,
1406 NULL,
1407 NULL,
1408 /*39*/ MatAXPY_ScaLAPACK,
1409 NULL,
1410 NULL,
1411 NULL,
1412 MatCopy_ScaLAPACK,
1413 /*44*/ NULL,
1414 MatScale_ScaLAPACK,
1415 MatShift_ScaLAPACK,
1416 NULL,
1417 NULL,
1418 /*49*/ NULL,
1419 NULL,
1420 NULL,
1421 NULL,
1422 NULL,
1423 /*54*/ NULL,
1424 NULL,
1425 NULL,
1426 NULL,
1427 NULL,
1428 /*59*/ NULL,
1429 MatDestroy_ScaLAPACK,
1430 MatView_ScaLAPACK,
1431 NULL,
1432 NULL,
1433 /*64*/ NULL,
1434 NULL,
1435 NULL,
1436 NULL,
1437 NULL,
1438 /*69*/ NULL,
1439 MatConvert_ScaLAPACK_Dense,
1440 NULL,
1441 NULL,
1442 NULL,
1443 /*74*/ NULL,
1444 NULL,
1445 NULL,
1446 NULL,
1447 MatLoad_ScaLAPACK,
1448 /*79*/ NULL,
1449 NULL,
1450 NULL,
1451 NULL,
1452 NULL,
1453 /*84*/ NULL,
1454 MatMatMultNumeric_ScaLAPACK,
1455 NULL,
1456 NULL,
1457 MatMatTransposeMultNumeric_ScaLAPACK,
1458 /*89*/ NULL,
1459 MatProductSetFromOptions_ScaLAPACK,
1460 NULL,
1461 NULL,
1462 MatConjugate_ScaLAPACK,
1463 /*94*/ NULL,
1464 NULL,
1465 NULL,
1466 NULL,
1467 NULL,
1468 /*99*/ NULL,
1469 MatMatSolve_ScaLAPACK,
1470 NULL,
1471 NULL,
1472 NULL,
1473 /*104*/ NULL,
1474 NULL,
1475 NULL,
1476 NULL,
1477 NULL,
1478 /*109*/ NULL,
1479 MatHermitianTranspose_ScaLAPACK,
1480 MatMultHermitianTranspose_ScaLAPACK,
1481 MatMultHermitianTransposeAdd_ScaLAPACK,
1482 NULL,
1483 /*114*/ NULL,
1484 NULL,
1485 NULL,
1486 NULL,
1487 NULL,
1488 /*119*/ NULL,
1489 NULL,
1490 MatTransposeMatMultNumeric_ScaLAPACK,
1491 NULL,
1492 /*124*/ NULL,
1493 NULL,
1494 NULL,
1495 NULL,
1496 NULL,
1497 /*129*/ NULL,
1498 NULL,
1499 NULL,
1500 NULL,
1501 NULL,
1502 /*134*/ NULL,
1503 NULL,
1504 NULL,
1505 NULL,
1506 NULL,
1507 NULL,
1508 /*140*/ NULL,
1509 NULL,
1510 NULL,
1511 NULL,
1512 NULL};
1513
MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash * stash,PetscInt * owners)1514 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners)
1515 {
1516 PetscInt *owner, *startv, *starti, bs2;
1517 PetscInt size = stash->size, nsends;
1518 PetscInt *sindices, **rindices, j, l;
1519 PetscScalar **rvalues, *svalues;
1520 MPI_Comm comm = stash->comm;
1521 MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1522 PetscMPIInt tag1 = stash->tag1, tag2 = stash->tag2, *sizes, *nlengths, nreceives, insends, ii;
1523 PetscInt *sp_idx, *sp_idy;
1524 PetscScalar *sp_val;
1525 PetscMatStashSpace space, space_next;
1526 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc;
1527 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)mat->data;
1528
1529 PetscFunctionBegin;
1530 { /* make sure all processors are either in INSERTMODE or ADDMODE */
1531 InsertMode addv;
1532 PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
1533 PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1534 mat->insertmode = addv; /* in case this processor had no cache */
1535 }
1536
1537 bs2 = stash->bs * stash->bs;
1538
1539 /* first count number of contributors to each processor */
1540 PetscCall(PetscCalloc1(size, &nlengths));
1541 PetscCall(PetscMalloc1(stash->n + 1, &owner));
1542
1543 ii = j = 0;
1544 space = stash->space_head;
1545 while (space) {
1546 space_next = space->next;
1547 for (l = 0; l < space->local_used; l++) {
1548 PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
1549 PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1550 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1551 j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
1552 nlengths[j]++;
1553 owner[ii] = j;
1554 ii++;
1555 }
1556 space = space_next;
1557 }
1558
1559 /* Now check what procs get messages - and compute nsends. */
1560 PetscCall(PetscCalloc1(size, &sizes));
1561 nsends = 0;
1562 for (PetscMPIInt i = 0; i < size; i++) {
1563 if (nlengths[i]) {
1564 sizes[i] = 1;
1565 nsends++;
1566 }
1567 }
1568
1569 {
1570 PetscMPIInt *onodes, *olengths;
1571
1572 /* Determine the number of messages to expect, their lengths, from from-ids */
1573 PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
1574 PetscCall(PetscMPIIntCast(nsends, &insends));
1575 PetscCall(PetscGatherMessageLengths(comm, insends, nreceives, nlengths, &onodes, &olengths));
1576 /* since clubbing row,col - lengths are multiplied by 2 */
1577 for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] *= 2;
1578 PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1579 /* values are size 'bs2' lengths (and remove earlier factor 2 */
1580 for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] = (PetscMPIInt)(olengths[i] * bs2 / 2);
1581 PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
1582 PetscCall(PetscFree(onodes));
1583 PetscCall(PetscFree(olengths));
1584 }
1585
1586 /* do sends:
1587 1) starts[i] gives the starting index in svalues for stuff going to
1588 the ith processor
1589 */
1590 PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
1591 PetscCall(PetscMalloc1(2 * nsends, &send_waits));
1592 PetscCall(PetscMalloc2(size, &startv, size, &starti));
1593 /* use 2 sends the first with all_a, the next with all_i and all_j */
1594 startv[0] = 0;
1595 starti[0] = 0;
1596 for (PetscMPIInt i = 1; i < size; i++) {
1597 startv[i] = startv[i - 1] + nlengths[i - 1];
1598 starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1599 }
1600
1601 ii = 0;
1602 space = stash->space_head;
1603 while (space) {
1604 space_next = space->next;
1605 sp_idx = space->idx;
1606 sp_idy = space->idy;
1607 sp_val = space->val;
1608 for (l = 0; l < space->local_used; l++) {
1609 j = owner[ii];
1610 if (bs2 == 1) {
1611 svalues[startv[j]] = sp_val[l];
1612 } else {
1613 PetscInt k;
1614 PetscScalar *buf1, *buf2;
1615 buf1 = svalues + bs2 * startv[j];
1616 buf2 = space->val + bs2 * l;
1617 for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1618 }
1619 sindices[starti[j]] = sp_idx[l];
1620 sindices[starti[j] + nlengths[j]] = sp_idy[l];
1621 startv[j]++;
1622 starti[j]++;
1623 ii++;
1624 }
1625 space = space_next;
1626 }
1627 startv[0] = 0;
1628 for (PetscMPIInt i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1629
1630 for (PetscMPIInt i = 0, count = 0; i < size; i++) {
1631 if (sizes[i]) {
1632 PetscCallMPI(MPIU_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
1633 PetscCallMPI(MPIU_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1634 }
1635 }
1636 #if defined(PETSC_USE_INFO)
1637 PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
1638 for (PetscMPIInt i = 0; i < size; i++) {
1639 if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %d: size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1640 }
1641 #endif
1642 PetscCall(PetscFree(nlengths));
1643 PetscCall(PetscFree(owner));
1644 PetscCall(PetscFree2(startv, starti));
1645 PetscCall(PetscFree(sizes));
1646
1647 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1648 PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
1649
1650 for (PetscMPIInt i = 0; i < nreceives; i++) {
1651 recv_waits[2 * i] = recv_waits1[i];
1652 recv_waits[2 * i + 1] = recv_waits2[i];
1653 }
1654 stash->recv_waits = recv_waits;
1655
1656 PetscCall(PetscFree(recv_waits1));
1657 PetscCall(PetscFree(recv_waits2));
1658
1659 stash->svalues = svalues;
1660 stash->sindices = sindices;
1661 stash->rvalues = rvalues;
1662 stash->rindices = rindices;
1663 stash->send_waits = send_waits;
1664 stash->nsends = (PetscMPIInt)nsends;
1665 stash->nrecvs = nreceives;
1666 stash->reproduce_count = 0;
1667 PetscFunctionReturn(PETSC_SUCCESS);
1668 }
1669
MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)1670 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb)
1671 {
1672 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1673
1674 PetscFunctionBegin;
1675 PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1676 PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1677 PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
1678 PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1679 PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1680 PetscFunctionReturn(PETSC_SUCCESS);
1681 }
1682
1683 /*@
1684 MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1685 the `MATSCALAPACK` matrix
1686
1687 Logically Collective
1688
1689 Input Parameters:
1690 + A - a `MATSCALAPACK` matrix
1691 . mb - the row block size
1692 - nb - the column block size
1693
1694 Level: intermediate
1695
1696 Note:
1697 This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1698
1699 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1700 @*/
MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)1701 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb)
1702 {
1703 PetscFunctionBegin;
1704 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1705 PetscValidLogicalCollectiveInt(A, mb, 2);
1706 PetscValidLogicalCollectiveInt(A, nb, 3);
1707 PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
1708 PetscFunctionReturn(PETSC_SUCCESS);
1709 }
1710
MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt * mb,PetscInt * nb)1711 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb)
1712 {
1713 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1714
1715 PetscFunctionBegin;
1716 if (mb) *mb = a->mb;
1717 if (nb) *nb = a->nb;
1718 PetscFunctionReturn(PETSC_SUCCESS);
1719 }
1720
1721 /*@
1722 MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1723 the `MATSCALAPACK` matrix
1724
1725 Not Collective
1726
1727 Input Parameter:
1728 . A - a `MATSCALAPACK` matrix
1729
1730 Output Parameters:
1731 + mb - the row block size
1732 - nb - the column block size
1733
1734 Level: intermediate
1735
1736 Note:
1737 This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1738
1739 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1740 @*/
MatScaLAPACKGetBlockSizes(Mat A,PetscInt * mb,PetscInt * nb)1741 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb)
1742 {
1743 PetscFunctionBegin;
1744 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1745 PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
1746 PetscFunctionReturn(PETSC_SUCCESS);
1747 }
1748
1749 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1750 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1751
1752 /*MC
1753 MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1754
1755 Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK
1756
1757 Options Database Keys:
1758 + -mat_type scalapack - sets the matrix type to `MATSCALAPACK`
1759 . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu`
1760 . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1761 - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1762
1763 Level: intermediate
1764
1765 Note:
1766 Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1767 range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1768 the given rank.
1769
1770 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()`
1771 M*/
1772
MatCreate_ScaLAPACK(Mat A)1773 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1774 {
1775 Mat_ScaLAPACK *a;
1776 PetscBool flg;
1777 PetscMPIInt iflg;
1778 Mat_ScaLAPACK_Grid *grid;
1779 MPI_Comm icomm;
1780 PetscBLASInt nprow, npcol, myrow, mycol;
1781 PetscInt optv1, k = 2, array[2] = {0, 0};
1782 PetscMPIInt size;
1783
1784 PetscFunctionBegin;
1785 A->ops[0] = MatOps_Values;
1786 A->insertmode = NOT_SET_VALUES;
1787
1788 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1789 A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK;
1790 A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1791 A->stash.ScatterEnd = MatStashScatterEnd_Ref;
1792 A->stash.ScatterDestroy = NULL;
1793
1794 PetscCall(PetscNew(&a));
1795 A->data = (void *)a;
1796
1797 /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1798 if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1799 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, NULL));
1800 PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1801 PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1802 }
1803 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1804 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, &iflg));
1805 if (!iflg) {
1806 PetscCall(PetscNew(&grid));
1807
1808 PetscCallMPI(MPI_Comm_size(icomm, &size));
1809 PetscCall(PetscBLASIntCast(PetscSqrtReal((PetscReal)size) + 0.001, &grid->nprow));
1810
1811 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
1812 PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg));
1813 if (flg) {
1814 PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
1815 PetscCall(PetscBLASIntCast(optv1, &grid->nprow));
1816 }
1817 PetscOptionsEnd();
1818
1819 if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1820 grid->npcol = size / grid->nprow;
1821 PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
1822 PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1823 grid->ictxt = Csys2blacs_handle(icomm);
1824 Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1825 Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1826 grid->grid_refct = 1;
1827 grid->nprow = nprow;
1828 grid->npcol = npcol;
1829 grid->myrow = myrow;
1830 grid->mycol = mycol;
1831 /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1832 grid->ictxrow = Csys2blacs_handle(icomm);
1833 Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1834 grid->ictxcol = Csys2blacs_handle(icomm);
1835 Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
1836 PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));
1837
1838 } else grid->grid_refct++;
1839 PetscCall(PetscCommDestroy(&icomm));
1840 a->grid = grid;
1841 a->mb = DEFAULT_BLOCKSIZE;
1842 a->nb = DEFAULT_BLOCKSIZE;
1843
1844 PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
1845 PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1846 if (flg) {
1847 a->mb = (PetscMPIInt)array[0];
1848 a->nb = (k > 1) ? (PetscMPIInt)array[1] : a->mb;
1849 }
1850 PetscOptionsEnd();
1851
1852 a->roworiented = PETSC_TRUE;
1853 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
1854 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
1855 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
1856 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
1857 PetscFunctionReturn(PETSC_SUCCESS);
1858 }
1859
1860 /*@C
1861 MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1862 (2D block cyclic distribution) for a `MATSCALAPACK` matrix
1863
1864 Collective
1865
1866 Input Parameters:
1867 + comm - MPI communicator
1868 . mb - row block size (or `PETSC_DECIDE` to have it set)
1869 . nb - column block size (or `PETSC_DECIDE` to have it set)
1870 . M - number of global rows
1871 . N - number of global columns
1872 . rsrc - coordinate of process that owns the first row of the distributed matrix
1873 - csrc - coordinate of process that owns the first column of the distributed matrix
1874
1875 Output Parameter:
1876 . A - the matrix
1877
1878 Options Database Key:
1879 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1880
1881 Level: intermediate
1882
1883 Notes:
1884 If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen
1885
1886 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1887 MatXXXXSetPreallocation() paradigm instead of this routine directly.
1888 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
1889
1890 Storage is completely managed by ScaLAPACK, so this requires PETSc to be
1891 configured with ScaLAPACK. In particular, PETSc's local sizes lose
1892 significance and are thus ignored. The block sizes refer to the values
1893 used for the distributed matrix, not the same meaning as in `MATBAIJ`.
1894
1895 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1896 @*/
MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat * A)1897 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A)
1898 {
1899 Mat_ScaLAPACK *a;
1900 PetscInt m, n;
1901
1902 PetscFunctionBegin;
1903 PetscCall(MatCreate(comm, A));
1904 PetscCall(MatSetType(*A, MATSCALAPACK));
1905 PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1906 /* rows and columns are NOT distributed according to PetscSplitOwnership */
1907 m = PETSC_DECIDE;
1908 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1909 n = PETSC_DECIDE;
1910 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1911 PetscCall(MatSetSizes(*A, m, n, M, N));
1912 a = (Mat_ScaLAPACK *)(*A)->data;
1913 PetscCall(PetscBLASIntCast(M, &a->M));
1914 PetscCall(PetscBLASIntCast(N, &a->N));
1915 PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1916 PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1917 PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
1918 PetscCall(PetscBLASIntCast(csrc, &a->csrc));
1919 PetscCall(MatSetUp(*A));
1920 PetscFunctionReturn(PETSC_SUCCESS);
1921 }
1922