1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2 3 const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 4 " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 5 " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 6 " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 7 " TITLE = {Sca{LAPACK} Users' Guide},\n" 8 " PUBLISHER = {SIAM},\n" 9 " ADDRESS = {Philadelphia, PA},\n" 10 " YEAR = 1997\n" 11 "}\n"; 12 static PetscBool ScaLAPACKCite = PETSC_FALSE; 13 14 #define DEFAULT_BLOCKSIZE 64 15 16 /* 17 The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18 is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19 */ 20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21 22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 23 { 24 PetscFunctionBegin; 25 PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n")); 26 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); 27 PetscFunctionReturn(PETSC_SUCCESS); 28 } 29 30 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer) 31 { 32 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 33 PetscBool iascii; 34 PetscViewerFormat format; 35 Mat Adense; 36 37 PetscFunctionBegin; 38 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 39 if (iascii) { 40 PetscCall(PetscViewerGetFormat(viewer, &format)); 41 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 42 PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb)); 43 PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol)); 44 PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc)); 45 PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc)); 46 PetscFunctionReturn(PETSC_SUCCESS); 47 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 48 PetscFunctionReturn(PETSC_SUCCESS); 49 } 50 } 51 /* convert to dense format and call MatView() */ 52 PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense)); 53 PetscCall(MatView(Adense, viewer)); 54 PetscCall(MatDestroy(&Adense)); 55 PetscFunctionReturn(PETSC_SUCCESS); 56 } 57 58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info) 59 { 60 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 61 PetscLogDouble isend[2], irecv[2]; 62 63 PetscFunctionBegin; 64 info->block_size = 1.0; 65 66 isend[0] = a->lld * a->locc; /* locally allocated */ 67 isend[1] = a->locr * a->locc; /* used submatrix */ 68 if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 69 info->nz_allocated = isend[0]; 70 info->nz_used = isend[1]; 71 } else if (flag == MAT_GLOBAL_MAX) { 72 PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 73 info->nz_allocated = irecv[0]; 74 info->nz_used = irecv[1]; 75 } else if (flag == MAT_GLOBAL_SUM) { 76 PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 77 info->nz_allocated = irecv[0]; 78 info->nz_used = irecv[1]; 79 } 80 81 info->nz_unneeded = 0; 82 info->assemblies = A->num_ass; 83 info->mallocs = 0; 84 info->memory = 0; /* REVIEW ME */ 85 info->fill_ratio_given = 0; 86 info->fill_ratio_needed = 0; 87 info->factor_mallocs = 0; 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 91 PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg) 92 { 93 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 94 95 PetscFunctionBegin; 96 switch (op) { 97 case MAT_NEW_NONZERO_LOCATIONS: 98 case MAT_NEW_NONZERO_LOCATION_ERR: 99 case MAT_NEW_NONZERO_ALLOCATION_ERR: 100 case MAT_SYMMETRIC: 101 case MAT_SORTED_FULL: 102 case MAT_HERMITIAN: 103 break; 104 case MAT_ROW_ORIENTED: 105 a->roworiented = flg; 106 break; 107 default: 108 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]); 109 } 110 PetscFunctionReturn(PETSC_SUCCESS); 111 } 112 113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode) 114 { 115 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 116 PetscInt i, j; 117 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 118 PetscBool roworiented = a->roworiented; 119 120 PetscFunctionBegin; 121 PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode); 122 for (i = 0; i < nr; i++) { 123 if (rows[i] < 0) continue; 124 PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx)); 125 for (j = 0; j < nc; j++) { 126 if (cols[j] < 0) continue; 127 PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx)); 128 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 129 if (rsrc == a->grid->myrow && csrc == a->grid->mycol) { 130 if (roworiented) { 131 switch (imode) { 132 case INSERT_VALUES: 133 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j]; 134 break; 135 default: 136 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j]; 137 break; 138 } 139 } else { 140 switch (imode) { 141 case INSERT_VALUES: 142 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr]; 143 break; 144 default: 145 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr]; 146 break; 147 } 148 } 149 } else { 150 PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 151 A->assembled = PETSC_FALSE; 152 PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES))); 153 } 154 } 155 } 156 PetscFunctionReturn(PETSC_SUCCESS); 157 } 158 159 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscScalar beta, const PetscScalar *x, PetscScalar *y) 160 { 161 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 162 PetscScalar *x2d, *y2d, alpha = 1.0; 163 const PetscInt *ranges; 164 PetscBLASInt xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info; 165 166 PetscFunctionBegin; 167 if (transpose) { 168 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 169 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 170 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 171 xlld = PetscMax(1, A->rmap->n); 172 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 173 PetscCheckScaLapackInfo("descinit", info); 174 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 175 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */ 176 ylld = 1; 177 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info)); 178 PetscCheckScaLapackInfo("descinit", info); 179 180 /* allocate 2d vectors */ 181 lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 182 lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 183 PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 184 xlld = PetscMax(1, lszx); 185 186 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 187 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 188 PetscCheckScaLapackInfo("descinit", info); 189 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 190 PetscCheckScaLapackInfo("descinit", info); 191 192 /* redistribute x as a column of a 2d matrix */ 193 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 194 195 /* redistribute y as a row of a 2d matrix */ 196 if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow)); 197 198 /* call PBLAS subroutine */ 199 PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 200 201 /* redistribute y from a row of a 2d matrix */ 202 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow)); 203 204 } else { /* non-transpose */ 205 206 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 207 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 208 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */ 209 xlld = 1; 210 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info)); 211 PetscCheckScaLapackInfo("descinit", info); 212 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 213 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */ 214 ylld = PetscMax(1, A->rmap->n); 215 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info)); 216 PetscCheckScaLapackInfo("descinit", info); 217 218 /* allocate 2d vectors */ 219 lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 220 lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 221 PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 222 ylld = PetscMax(1, lszy); 223 224 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 225 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 226 PetscCheckScaLapackInfo("descinit", info); 227 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 228 PetscCheckScaLapackInfo("descinit", info); 229 230 /* redistribute x as a row of a 2d matrix */ 231 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow)); 232 233 /* redistribute y as a column of a 2d matrix */ 234 if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol)); 235 236 /* call PBLAS subroutine */ 237 PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 238 239 /* redistribute y from a column of a 2d matrix */ 240 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol)); 241 } 242 PetscCall(PetscFree2(x2d, y2d)); 243 PetscFunctionReturn(PETSC_SUCCESS); 244 } 245 246 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y) 247 { 248 const PetscScalar *xarray; 249 PetscScalar *yarray; 250 251 PetscFunctionBegin; 252 PetscCall(VecGetArrayRead(x, &xarray)); 253 PetscCall(VecGetArray(y, &yarray)); 254 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 0.0, xarray, yarray)); 255 PetscCall(VecRestoreArrayRead(x, &xarray)); 256 PetscCall(VecRestoreArray(y, &yarray)); 257 PetscFunctionReturn(PETSC_SUCCESS); 258 } 259 260 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y) 261 { 262 const PetscScalar *xarray; 263 PetscScalar *yarray; 264 265 PetscFunctionBegin; 266 PetscCall(VecGetArrayRead(x, &xarray)); 267 PetscCall(VecGetArray(y, &yarray)); 268 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 0.0, xarray, yarray)); 269 PetscCall(VecRestoreArrayRead(x, &xarray)); 270 PetscCall(VecRestoreArray(y, &yarray)); 271 PetscFunctionReturn(PETSC_SUCCESS); 272 } 273 274 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 275 { 276 const PetscScalar *xarray; 277 PetscScalar *zarray; 278 279 PetscFunctionBegin; 280 if (y != z) PetscCall(VecCopy(y, z)); 281 PetscCall(VecGetArrayRead(x, &xarray)); 282 PetscCall(VecGetArray(z, &zarray)); 283 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 1.0, xarray, zarray)); 284 PetscCall(VecRestoreArrayRead(x, &xarray)); 285 PetscCall(VecRestoreArray(z, &zarray)); 286 PetscFunctionReturn(PETSC_SUCCESS); 287 } 288 289 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 290 { 291 const PetscScalar *xarray; 292 PetscScalar *zarray; 293 294 PetscFunctionBegin; 295 if (y != z) PetscCall(VecCopy(y, z)); 296 PetscCall(VecGetArrayRead(x, &xarray)); 297 PetscCall(VecGetArray(z, &zarray)); 298 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 1.0, xarray, zarray)); 299 PetscCall(VecRestoreArrayRead(x, &xarray)); 300 PetscCall(VecRestoreArray(z, &zarray)); 301 PetscFunctionReturn(PETSC_SUCCESS); 302 } 303 304 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 305 { 306 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 307 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 308 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 309 PetscScalar sone = 1.0, zero = 0.0; 310 PetscBLASInt one = 1; 311 312 PetscFunctionBegin; 313 PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 314 C->assembled = PETSC_TRUE; 315 PetscFunctionReturn(PETSC_SUCCESS); 316 } 317 318 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 319 { 320 PetscFunctionBegin; 321 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 322 PetscCall(MatSetType(C, MATSCALAPACK)); 323 PetscCall(MatSetUp(C)); 324 C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 325 PetscFunctionReturn(PETSC_SUCCESS); 326 } 327 328 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 329 { 330 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 331 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 332 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 333 PetscScalar sone = 1.0, zero = 0.0; 334 PetscBLASInt one = 1; 335 336 PetscFunctionBegin; 337 PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 338 C->assembled = PETSC_TRUE; 339 PetscFunctionReturn(PETSC_SUCCESS); 340 } 341 342 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 343 { 344 PetscFunctionBegin; 345 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE)); 346 PetscCall(MatSetType(C, MATSCALAPACK)); 347 PetscCall(MatSetUp(C)); 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 352 { 353 PetscFunctionBegin; 354 C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 355 C->ops->productsymbolic = MatProductSymbolic_AB; 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 360 { 361 PetscFunctionBegin; 362 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 363 C->ops->productsymbolic = MatProductSymbolic_ABt; 364 PetscFunctionReturn(PETSC_SUCCESS); 365 } 366 367 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 368 { 369 Mat_Product *product = C->product; 370 371 PetscFunctionBegin; 372 switch (product->type) { 373 case MATPRODUCT_AB: 374 PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C)); 375 break; 376 case MATPRODUCT_ABt: 377 PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C)); 378 break; 379 default: 380 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]); 381 } 382 PetscFunctionReturn(PETSC_SUCCESS); 383 } 384 385 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D) 386 { 387 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 388 PetscScalar *darray, *d2d, v; 389 const PetscInt *ranges; 390 PetscBLASInt j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 391 392 PetscFunctionBegin; 393 PetscCall(VecGetArray(D, &darray)); 394 395 if (A->rmap->N <= A->cmap->N) { /* row version */ 396 397 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 398 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 399 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 400 dlld = PetscMax(1, A->rmap->n); 401 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 402 PetscCheckScaLapackInfo("descinit", info); 403 404 /* allocate 2d vector */ 405 lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 406 PetscCall(PetscCalloc1(lszd, &d2d)); 407 dlld = PetscMax(1, lszd); 408 409 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 410 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 411 PetscCheckScaLapackInfo("descinit", info); 412 413 /* collect diagonal */ 414 for (j = 1; j <= a->M; j++) { 415 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc)); 416 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v)); 417 } 418 419 /* redistribute d from a column of a 2d matrix */ 420 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol)); 421 PetscCall(PetscFree(d2d)); 422 423 } else { /* column version */ 424 425 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 426 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 427 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 428 dlld = 1; 429 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 430 PetscCheckScaLapackInfo("descinit", info); 431 432 /* allocate 2d vector */ 433 lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 434 PetscCall(PetscCalloc1(lszd, &d2d)); 435 436 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 437 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 438 PetscCheckScaLapackInfo("descinit", info); 439 440 /* collect diagonal */ 441 for (j = 1; j <= a->N; j++) { 442 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc)); 443 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v)); 444 } 445 446 /* redistribute d from a row of a 2d matrix */ 447 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow)); 448 PetscCall(PetscFree(d2d)); 449 } 450 451 PetscCall(VecRestoreArray(D, &darray)); 452 PetscCall(VecAssemblyBegin(D)); 453 PetscCall(VecAssemblyEnd(D)); 454 PetscFunctionReturn(PETSC_SUCCESS); 455 } 456 457 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R) 458 { 459 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 460 const PetscScalar *d; 461 const PetscInt *ranges; 462 PetscScalar *d2d; 463 PetscBLASInt i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 464 465 PetscFunctionBegin; 466 if (R) { 467 PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d)); 468 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 469 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 470 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 471 dlld = 1; 472 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 473 PetscCheckScaLapackInfo("descinit", info); 474 475 /* allocate 2d vector */ 476 lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 477 PetscCall(PetscCalloc1(lszd, &d2d)); 478 479 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 480 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 481 PetscCheckScaLapackInfo("descinit", info); 482 483 /* redistribute d to a row of a 2d matrix */ 484 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow)); 485 486 /* broadcast along process columns */ 487 if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld); 488 else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol); 489 490 /* local scaling */ 491 for (j = 0; j < a->locc; j++) 492 for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j]; 493 494 PetscCall(PetscFree(d2d)); 495 PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d)); 496 } 497 if (L) { 498 PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d)); 499 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 500 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 501 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 502 dlld = PetscMax(1, A->rmap->n); 503 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 504 PetscCheckScaLapackInfo("descinit", info); 505 506 /* allocate 2d vector */ 507 lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 508 PetscCall(PetscCalloc1(lszd, &d2d)); 509 dlld = PetscMax(1, lszd); 510 511 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 512 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 513 PetscCheckScaLapackInfo("descinit", info); 514 515 /* redistribute d to a column of a 2d matrix */ 516 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol)); 517 518 /* broadcast along process rows */ 519 if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld); 520 else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0); 521 522 /* local scaling */ 523 for (i = 0; i < a->locr; i++) 524 for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i]; 525 526 PetscCall(PetscFree(d2d)); 527 PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d)); 528 } 529 PetscFunctionReturn(PETSC_SUCCESS); 530 } 531 532 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d) 533 { 534 PetscFunctionBegin; 535 *missing = PETSC_FALSE; 536 PetscFunctionReturn(PETSC_SUCCESS); 537 } 538 539 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a) 540 { 541 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 542 PetscBLASInt n, one = 1; 543 544 PetscFunctionBegin; 545 n = x->lld * x->locc; 546 PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one)); 547 PetscFunctionReturn(PETSC_SUCCESS); 548 } 549 550 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha) 551 { 552 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 553 PetscBLASInt i, n; 554 PetscScalar v; 555 556 PetscFunctionBegin; 557 n = PetscMin(x->M, x->N); 558 for (i = 1; i <= n; i++) { 559 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc)); 560 v += alpha; 561 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v)); 562 } 563 PetscFunctionReturn(PETSC_SUCCESS); 564 } 565 566 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 567 { 568 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 569 Mat_ScaLAPACK *y = (Mat_ScaLAPACK *)Y->data; 570 PetscBLASInt one = 1; 571 PetscScalar beta = 1.0; 572 573 PetscFunctionBegin; 574 MatScaLAPACKCheckDistribution(Y, 1, X, 3); 575 PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc)); 576 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 577 PetscFunctionReturn(PETSC_SUCCESS); 578 } 579 580 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str) 581 { 582 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 583 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 584 585 PetscFunctionBegin; 586 PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 587 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 588 PetscFunctionReturn(PETSC_SUCCESS); 589 } 590 591 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B) 592 { 593 Mat Bs; 594 MPI_Comm comm; 595 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 596 597 PetscFunctionBegin; 598 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 599 PetscCall(MatCreate(comm, &Bs)); 600 PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 601 PetscCall(MatSetType(Bs, MATSCALAPACK)); 602 b = (Mat_ScaLAPACK *)Bs->data; 603 b->M = a->M; 604 b->N = a->N; 605 b->mb = a->mb; 606 b->nb = a->nb; 607 b->rsrc = a->rsrc; 608 b->csrc = a->csrc; 609 PetscCall(MatSetUp(Bs)); 610 *B = Bs; 611 if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 612 Bs->assembled = PETSC_TRUE; 613 PetscFunctionReturn(PETSC_SUCCESS); 614 } 615 616 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 617 { 618 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 619 Mat Bs = *B; 620 PetscBLASInt one = 1; 621 PetscScalar sone = 1.0, zero = 0.0; 622 #if defined(PETSC_USE_COMPLEX) 623 PetscInt i, j; 624 #endif 625 626 PetscFunctionBegin; 627 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 628 PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 629 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 630 *B = Bs; 631 b = (Mat_ScaLAPACK *)Bs->data; 632 PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 633 #if defined(PETSC_USE_COMPLEX) 634 /* undo conjugation */ 635 for (i = 0; i < b->locr; i++) 636 for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]); 637 #endif 638 Bs->assembled = PETSC_TRUE; 639 PetscFunctionReturn(PETSC_SUCCESS); 640 } 641 642 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 643 { 644 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 645 PetscInt i, j; 646 647 PetscFunctionBegin; 648 for (i = 0; i < a->locr; i++) 649 for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]); 650 PetscFunctionReturn(PETSC_SUCCESS); 651 } 652 653 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 654 { 655 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 656 Mat Bs = *B; 657 PetscBLASInt one = 1; 658 PetscScalar sone = 1.0, zero = 0.0; 659 660 PetscFunctionBegin; 661 PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 662 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 663 *B = Bs; 664 b = (Mat_ScaLAPACK *)Bs->data; 665 PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 666 Bs->assembled = PETSC_TRUE; 667 PetscFunctionReturn(PETSC_SUCCESS); 668 } 669 670 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X) 671 { 672 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 673 PetscScalar *x, *x2d; 674 const PetscInt *ranges; 675 PetscBLASInt xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info; 676 677 PetscFunctionBegin; 678 PetscCall(VecCopy(B, X)); 679 PetscCall(VecGetArray(X, &x)); 680 681 /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 682 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 683 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 684 xlld = PetscMax(1, A->rmap->n); 685 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 686 PetscCheckScaLapackInfo("descinit", info); 687 688 /* allocate 2d vector */ 689 lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 690 PetscCall(PetscMalloc1(lszx, &x2d)); 691 xlld = PetscMax(1, lszx); 692 693 /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 694 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 695 PetscCheckScaLapackInfo("descinit", info); 696 697 /* redistribute x as a column of a 2d matrix */ 698 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 699 700 /* call ScaLAPACK subroutine */ 701 switch (A->factortype) { 702 case MAT_FACTOR_LU: 703 PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info)); 704 PetscCheckScaLapackInfo("getrs", info); 705 break; 706 case MAT_FACTOR_CHOLESKY: 707 PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info)); 708 PetscCheckScaLapackInfo("potrs", info); 709 break; 710 default: 711 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 712 } 713 714 /* redistribute x from a column of a 2d matrix */ 715 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol)); 716 717 PetscCall(PetscFree(x2d)); 718 PetscCall(VecRestoreArray(X, &x)); 719 PetscFunctionReturn(PETSC_SUCCESS); 720 } 721 722 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X) 723 { 724 PetscFunctionBegin; 725 PetscCall(MatSolve_ScaLAPACK(A, B, X)); 726 PetscCall(VecAXPY(X, 1, Y)); 727 PetscFunctionReturn(PETSC_SUCCESS); 728 } 729 730 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X) 731 { 732 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x; 733 PetscBool flg1, flg2; 734 PetscBLASInt one = 1, info; 735 736 PetscFunctionBegin; 737 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1)); 738 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2)); 739 PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK"); 740 MatScaLAPACKCheckDistribution(B, 1, X, 2); 741 b = (Mat_ScaLAPACK *)B->data; 742 x = (Mat_ScaLAPACK *)X->data; 743 PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc)); 744 745 switch (A->factortype) { 746 case MAT_FACTOR_LU: 747 PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info)); 748 PetscCheckScaLapackInfo("getrs", info); 749 break; 750 case MAT_FACTOR_CHOLESKY: 751 PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info)); 752 PetscCheckScaLapackInfo("potrs", info); 753 break; 754 default: 755 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 756 } 757 PetscFunctionReturn(PETSC_SUCCESS); 758 } 759 760 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo) 761 { 762 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 763 PetscBLASInt one = 1, info; 764 765 PetscFunctionBegin; 766 if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots)); 767 PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info)); 768 PetscCheckScaLapackInfo("getrf", info); 769 A->factortype = MAT_FACTOR_LU; 770 A->assembled = PETSC_TRUE; 771 772 PetscCall(PetscFree(A->solvertype)); 773 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 774 PetscFunctionReturn(PETSC_SUCCESS); 775 } 776 777 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 778 { 779 PetscFunctionBegin; 780 PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 781 PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info)); 782 PetscFunctionReturn(PETSC_SUCCESS); 783 } 784 785 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 786 { 787 PetscFunctionBegin; 788 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 789 PetscFunctionReturn(PETSC_SUCCESS); 790 } 791 792 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo) 793 { 794 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 795 PetscBLASInt one = 1, info; 796 797 PetscFunctionBegin; 798 PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info)); 799 PetscCheckScaLapackInfo("potrf", info); 800 A->factortype = MAT_FACTOR_CHOLESKY; 801 A->assembled = PETSC_TRUE; 802 803 PetscCall(PetscFree(A->solvertype)); 804 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 805 PetscFunctionReturn(PETSC_SUCCESS); 806 } 807 808 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 809 { 810 PetscFunctionBegin; 811 PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 812 PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info)); 813 PetscFunctionReturn(PETSC_SUCCESS); 814 } 815 816 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info) 817 { 818 PetscFunctionBegin; 819 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 820 PetscFunctionReturn(PETSC_SUCCESS); 821 } 822 823 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type) 824 { 825 PetscFunctionBegin; 826 *type = MATSOLVERSCALAPACK; 827 PetscFunctionReturn(PETSC_SUCCESS); 828 } 829 830 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F) 831 { 832 Mat B; 833 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 834 835 PetscFunctionBegin; 836 /* Create the factorization matrix */ 837 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B)); 838 B->trivialsymbolic = PETSC_TRUE; 839 B->factortype = ftype; 840 PetscCall(PetscFree(B->solvertype)); 841 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype)); 842 843 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack)); 844 *F = B; 845 PetscFunctionReturn(PETSC_SUCCESS); 846 } 847 848 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 849 { 850 PetscFunctionBegin; 851 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack)); 852 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack)); 853 PetscFunctionReturn(PETSC_SUCCESS); 854 } 855 856 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm) 857 { 858 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 859 PetscBLASInt one = 1, lwork = 0; 860 const char *ntype; 861 PetscScalar *work = NULL, dummy; 862 863 PetscFunctionBegin; 864 switch (type) { 865 case NORM_1: 866 ntype = "1"; 867 lwork = PetscMax(a->locr, a->locc); 868 break; 869 case NORM_FROBENIUS: 870 ntype = "F"; 871 work = &dummy; 872 break; 873 case NORM_INFINITY: 874 ntype = "I"; 875 lwork = PetscMax(a->locr, a->locc); 876 break; 877 default: 878 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type"); 879 } 880 if (lwork) PetscCall(PetscMalloc1(lwork, &work)); 881 *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work); 882 if (lwork) PetscCall(PetscFree(work)); 883 PetscFunctionReturn(PETSC_SUCCESS); 884 } 885 886 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 887 { 888 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 889 890 PetscFunctionBegin; 891 PetscCall(PetscArrayzero(a->loc, a->lld * a->locc)); 892 PetscFunctionReturn(PETSC_SUCCESS); 893 } 894 895 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols) 896 { 897 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 898 PetscInt i, n, nb, isrc, nproc, iproc, *idx; 899 900 PetscFunctionBegin; 901 if (rows) { 902 n = a->locr; 903 nb = a->mb; 904 isrc = a->rsrc; 905 nproc = a->grid->nprow; 906 iproc = a->grid->myrow; 907 PetscCall(PetscMalloc1(n, &idx)); 908 for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 909 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows)); 910 } 911 if (cols) { 912 n = a->locc; 913 nb = a->nb; 914 isrc = a->csrc; 915 nproc = a->grid->npcol; 916 iproc = a->grid->mycol; 917 PetscCall(PetscMalloc1(n, &idx)); 918 for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 919 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols)); 920 } 921 PetscFunctionReturn(PETSC_SUCCESS); 922 } 923 924 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B) 925 { 926 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 927 Mat Bmpi; 928 MPI_Comm comm; 929 PetscInt i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz; 930 const PetscInt *ranges, *branges, *cwork; 931 const PetscScalar *vwork; 932 PetscBLASInt bdesc[9], bmb, zero = 0, one = 1, lld, info; 933 PetscScalar *barray; 934 PetscBool differ = PETSC_FALSE; 935 PetscMPIInt size; 936 937 PetscFunctionBegin; 938 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 939 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 940 941 if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 942 PetscCallMPI(MPI_Comm_size(comm, &size)); 943 PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges)); 944 for (i = 0; i < size; i++) 945 if (ranges[i + 1] != branges[i + 1]) { 946 differ = PETSC_TRUE; 947 break; 948 } 949 } 950 951 if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 952 PetscCall(MatCreate(comm, &Bmpi)); 953 m = PETSC_DECIDE; 954 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 955 n = PETSC_DECIDE; 956 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 957 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 958 PetscCall(MatSetType(Bmpi, MATDENSE)); 959 PetscCall(MatSetUp(Bmpi)); 960 961 /* create ScaLAPACK descriptor for B (1d block distribution) */ 962 PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 963 lld = PetscMax(A->rmap->n, 1); /* local leading dimension */ 964 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 965 PetscCheckScaLapackInfo("descinit", info); 966 967 /* redistribute matrix */ 968 PetscCall(MatDenseGetArray(Bmpi, &barray)); 969 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 970 PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 971 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 972 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 973 974 /* transfer rows of auxiliary matrix to the final matrix B */ 975 PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend)); 976 for (i = rstart; i < rend; i++) { 977 PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork)); 978 PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES)); 979 PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork)); 980 } 981 PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY)); 982 PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY)); 983 PetscCall(MatDestroy(&Bmpi)); 984 985 } else { /* normal cases */ 986 987 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 988 else { 989 PetscCall(MatCreate(comm, &Bmpi)); 990 m = PETSC_DECIDE; 991 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 992 n = PETSC_DECIDE; 993 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 994 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 995 PetscCall(MatSetType(Bmpi, MATDENSE)); 996 PetscCall(MatSetUp(Bmpi)); 997 } 998 999 /* create ScaLAPACK descriptor for B (1d block distribution) */ 1000 PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 1001 lld = PetscMax(A->rmap->n, 1); /* local leading dimension */ 1002 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 1003 PetscCheckScaLapackInfo("descinit", info); 1004 1005 /* redistribute matrix */ 1006 PetscCall(MatDenseGetArray(Bmpi, &barray)); 1007 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 1008 PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 1009 1010 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 1011 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1012 if (reuse == MAT_INPLACE_MATRIX) { 1013 PetscCall(MatHeaderReplace(A, &Bmpi)); 1014 } else *B = Bmpi; 1015 } 1016 PetscFunctionReturn(PETSC_SUCCESS); 1017 } 1018 1019 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct) 1020 { 1021 const PetscInt *ranges; 1022 PetscMPIInt size; 1023 PetscInt i, n; 1024 1025 PetscFunctionBegin; 1026 *correct = PETSC_TRUE; 1027 PetscCallMPI(MPI_Comm_size(map->comm, &size)); 1028 if (size > 1) { 1029 PetscCall(PetscLayoutGetRanges(map, &ranges)); 1030 n = ranges[1] - ranges[0]; 1031 for (i = 1; i < size; i++) 1032 if (ranges[i + 1] - ranges[i] != n) break; 1033 *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n)); 1034 } 1035 PetscFunctionReturn(PETSC_SUCCESS); 1036 } 1037 1038 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B) 1039 { 1040 Mat_ScaLAPACK *b; 1041 Mat Bmpi; 1042 MPI_Comm comm; 1043 PetscInt M = A->rmap->N, N = A->cmap->N, m, n; 1044 const PetscInt *ranges, *rows, *cols; 1045 PetscBLASInt adesc[9], amb, zero = 0, one = 1, lld, info; 1046 PetscScalar *aarray; 1047 IS ir, ic; 1048 PetscInt lda; 1049 PetscBool flg; 1050 1051 PetscFunctionBegin; 1052 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1053 1054 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1055 else { 1056 PetscCall(MatCreate(comm, &Bmpi)); 1057 m = PETSC_DECIDE; 1058 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1059 n = PETSC_DECIDE; 1060 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 1061 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 1062 PetscCall(MatSetType(Bmpi, MATSCALAPACK)); 1063 PetscCall(MatSetUp(Bmpi)); 1064 } 1065 b = (Mat_ScaLAPACK *)Bmpi->data; 1066 1067 PetscCall(MatDenseGetLDA(A, &lda)); 1068 PetscCall(MatDenseGetArray(A, &aarray)); 1069 PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1070 if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1071 if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */ 1072 /* create ScaLAPACK descriptor for A (1d block distribution) */ 1073 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 1074 PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */ 1075 lld = PetscMax(lda, 1); /* local leading dimension */ 1076 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info)); 1077 PetscCheckScaLapackInfo("descinit", info); 1078 1079 /* redistribute matrix */ 1080 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol)); 1081 Bmpi->nooffprocentries = PETSC_TRUE; 1082 } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */ 1083 PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n); 1084 b->roworiented = PETSC_FALSE; 1085 PetscCall(MatGetOwnershipIS(A, &ir, &ic)); 1086 PetscCall(ISGetIndices(ir, &rows)); 1087 PetscCall(ISGetIndices(ic, &cols)); 1088 PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES)); 1089 PetscCall(ISRestoreIndices(ir, &rows)); 1090 PetscCall(ISRestoreIndices(ic, &cols)); 1091 PetscCall(ISDestroy(&ic)); 1092 PetscCall(ISDestroy(&ir)); 1093 } 1094 PetscCall(MatDenseRestoreArray(A, &aarray)); 1095 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 1096 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1097 if (reuse == MAT_INPLACE_MATRIX) { 1098 PetscCall(MatHeaderReplace(A, &Bmpi)); 1099 } else *B = Bmpi; 1100 PetscFunctionReturn(PETSC_SUCCESS); 1101 } 1102 1103 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1104 { 1105 Mat mat_scal; 1106 PetscInt M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols; 1107 const PetscInt *cols; 1108 const PetscScalar *vals; 1109 1110 PetscFunctionBegin; 1111 if (reuse == MAT_REUSE_MATRIX) { 1112 mat_scal = *newmat; 1113 PetscCall(MatZeroEntries(mat_scal)); 1114 } else { 1115 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1116 m = PETSC_DECIDE; 1117 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1118 n = PETSC_DECIDE; 1119 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 1120 PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 1121 PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 1122 PetscCall(MatSetUp(mat_scal)); 1123 } 1124 for (row = rstart; row < rend; row++) { 1125 PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 1126 PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES)); 1127 PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1128 } 1129 PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 1130 PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1131 1132 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1133 else *newmat = mat_scal; 1134 PetscFunctionReturn(PETSC_SUCCESS); 1135 } 1136 1137 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1138 { 1139 Mat mat_scal; 1140 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend; 1141 const PetscInt *cols; 1142 const PetscScalar *vals; 1143 PetscScalar v; 1144 1145 PetscFunctionBegin; 1146 if (reuse == MAT_REUSE_MATRIX) { 1147 mat_scal = *newmat; 1148 PetscCall(MatZeroEntries(mat_scal)); 1149 } else { 1150 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1151 m = PETSC_DECIDE; 1152 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1153 n = PETSC_DECIDE; 1154 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 1155 PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 1156 PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 1157 PetscCall(MatSetUp(mat_scal)); 1158 } 1159 PetscCall(MatGetRowUpperTriangular(A)); 1160 for (row = rstart; row < rend; row++) { 1161 PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 1162 PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES)); 1163 for (j = 0; j < ncols; j++) { /* lower triangular part */ 1164 if (cols[j] == row) continue; 1165 v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j]; 1166 PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES)); 1167 } 1168 PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1169 } 1170 PetscCall(MatRestoreRowUpperTriangular(A)); 1171 PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 1172 PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1173 1174 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1175 else *newmat = mat_scal; 1176 PetscFunctionReturn(PETSC_SUCCESS); 1177 } 1178 1179 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1180 { 1181 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1182 PetscInt sz = 0; 1183 1184 PetscFunctionBegin; 1185 PetscCall(PetscLayoutSetUp(A->rmap)); 1186 PetscCall(PetscLayoutSetUp(A->cmap)); 1187 if (!a->lld) a->lld = a->locr; 1188 1189 PetscCall(PetscFree(a->loc)); 1190 PetscCall(PetscIntMultError(a->lld, a->locc, &sz)); 1191 PetscCall(PetscCalloc1(sz, &a->loc)); 1192 1193 A->preallocated = PETSC_TRUE; 1194 PetscFunctionReturn(PETSC_SUCCESS); 1195 } 1196 1197 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1198 { 1199 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1200 Mat_ScaLAPACK_Grid *grid; 1201 PetscBool flg; 1202 MPI_Comm icomm; 1203 1204 PetscFunctionBegin; 1205 PetscCall(MatStashDestroy_Private(&A->stash)); 1206 PetscCall(PetscFree(a->loc)); 1207 PetscCall(PetscFree(a->pivots)); 1208 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 1209 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1210 if (--grid->grid_refct == 0) { 1211 Cblacs_gridexit(grid->ictxt); 1212 Cblacs_gridexit(grid->ictxrow); 1213 Cblacs_gridexit(grid->ictxcol); 1214 PetscCall(PetscFree(grid)); 1215 PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval)); 1216 } 1217 PetscCall(PetscCommDestroy(&icomm)); 1218 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL)); 1219 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1220 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL)); 1221 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL)); 1222 PetscCall(PetscFree(A->data)); 1223 PetscFunctionReturn(PETSC_SUCCESS); 1224 } 1225 1226 PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1227 { 1228 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1229 PetscBLASInt info = 0; 1230 PetscBool flg; 1231 1232 PetscFunctionBegin; 1233 PetscCall(PetscLayoutSetUp(A->rmap)); 1234 PetscCall(PetscLayoutSetUp(A->cmap)); 1235 1236 /* check that the layout is as enforced by MatCreateScaLAPACK() */ 1237 PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1238 PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1239 PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1240 PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1241 1242 /* compute local sizes */ 1243 PetscCall(PetscBLASIntCast(A->rmap->N, &a->M)); 1244 PetscCall(PetscBLASIntCast(A->cmap->N, &a->N)); 1245 a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 1246 a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 1247 a->lld = PetscMax(1, a->locr); 1248 1249 /* allocate local array */ 1250 PetscCall(MatScaLAPACKSetPreallocation(A)); 1251 1252 /* set up ScaLAPACK descriptor */ 1253 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info)); 1254 PetscCheckScaLapackInfo("descinit", info); 1255 PetscFunctionReturn(PETSC_SUCCESS); 1256 } 1257 1258 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type) 1259 { 1260 PetscInt nstash, reallocs; 1261 1262 PetscFunctionBegin; 1263 if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 1264 PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL)); 1265 PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs)); 1266 PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 1267 PetscFunctionReturn(PETSC_SUCCESS); 1268 } 1269 1270 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type) 1271 { 1272 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1273 PetscMPIInt n; 1274 PetscInt i, flg, *row, *col; 1275 PetscScalar *val; 1276 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1277 1278 PetscFunctionBegin; 1279 if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 1280 while (1) { 1281 PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg)); 1282 if (!flg) break; 1283 for (i = 0; i < n; i++) { 1284 PetscCall(PetscBLASIntCast(row[i] + 1, &gridx)); 1285 PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx)); 1286 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1287 PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process"); 1288 switch (A->insertmode) { 1289 case INSERT_VALUES: 1290 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i]; 1291 break; 1292 case ADD_VALUES: 1293 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i]; 1294 break; 1295 default: 1296 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode); 1297 } 1298 } 1299 } 1300 PetscCall(MatStashScatterEnd_Private(&A->stash)); 1301 PetscFunctionReturn(PETSC_SUCCESS); 1302 } 1303 1304 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer) 1305 { 1306 Mat Adense, As; 1307 MPI_Comm comm; 1308 1309 PetscFunctionBegin; 1310 PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm)); 1311 PetscCall(MatCreate(comm, &Adense)); 1312 PetscCall(MatSetType(Adense, MATDENSE)); 1313 PetscCall(MatLoad(Adense, viewer)); 1314 PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As)); 1315 PetscCall(MatDestroy(&Adense)); 1316 PetscCall(MatHeaderReplace(newMat, &As)); 1317 PetscFunctionReturn(PETSC_SUCCESS); 1318 } 1319 1320 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK, 1321 0, 1322 0, 1323 MatMult_ScaLAPACK, 1324 /* 4*/ MatMultAdd_ScaLAPACK, 1325 MatMultTranspose_ScaLAPACK, 1326 MatMultTransposeAdd_ScaLAPACK, 1327 MatSolve_ScaLAPACK, 1328 MatSolveAdd_ScaLAPACK, 1329 0, 1330 /*10*/ 0, 1331 MatLUFactor_ScaLAPACK, 1332 MatCholeskyFactor_ScaLAPACK, 1333 0, 1334 MatTranspose_ScaLAPACK, 1335 /*15*/ MatGetInfo_ScaLAPACK, 1336 0, 1337 MatGetDiagonal_ScaLAPACK, 1338 MatDiagonalScale_ScaLAPACK, 1339 MatNorm_ScaLAPACK, 1340 /*20*/ MatAssemblyBegin_ScaLAPACK, 1341 MatAssemblyEnd_ScaLAPACK, 1342 MatSetOption_ScaLAPACK, 1343 MatZeroEntries_ScaLAPACK, 1344 /*24*/ 0, 1345 MatLUFactorSymbolic_ScaLAPACK, 1346 MatLUFactorNumeric_ScaLAPACK, 1347 MatCholeskyFactorSymbolic_ScaLAPACK, 1348 MatCholeskyFactorNumeric_ScaLAPACK, 1349 /*29*/ MatSetUp_ScaLAPACK, 1350 0, 1351 0, 1352 0, 1353 0, 1354 /*34*/ MatDuplicate_ScaLAPACK, 1355 0, 1356 0, 1357 0, 1358 0, 1359 /*39*/ MatAXPY_ScaLAPACK, 1360 0, 1361 0, 1362 0, 1363 MatCopy_ScaLAPACK, 1364 /*44*/ 0, 1365 MatScale_ScaLAPACK, 1366 MatShift_ScaLAPACK, 1367 0, 1368 0, 1369 /*49*/ 0, 1370 0, 1371 0, 1372 0, 1373 0, 1374 /*54*/ 0, 1375 0, 1376 0, 1377 0, 1378 0, 1379 /*59*/ 0, 1380 MatDestroy_ScaLAPACK, 1381 MatView_ScaLAPACK, 1382 0, 1383 0, 1384 /*64*/ 0, 1385 0, 1386 0, 1387 0, 1388 0, 1389 /*69*/ 0, 1390 0, 1391 MatConvert_ScaLAPACK_Dense, 1392 0, 1393 0, 1394 /*74*/ 0, 1395 0, 1396 0, 1397 0, 1398 0, 1399 /*79*/ 0, 1400 0, 1401 0, 1402 0, 1403 MatLoad_ScaLAPACK, 1404 /*84*/ 0, 1405 0, 1406 0, 1407 0, 1408 0, 1409 /*89*/ 0, 1410 0, 1411 MatMatMultNumeric_ScaLAPACK, 1412 0, 1413 0, 1414 /*94*/ 0, 1415 0, 1416 0, 1417 MatMatTransposeMultNumeric_ScaLAPACK, 1418 0, 1419 /*99*/ MatProductSetFromOptions_ScaLAPACK, 1420 0, 1421 0, 1422 MatConjugate_ScaLAPACK, 1423 0, 1424 /*104*/ 0, 1425 0, 1426 0, 1427 0, 1428 0, 1429 /*109*/ MatMatSolve_ScaLAPACK, 1430 0, 1431 0, 1432 0, 1433 MatMissingDiagonal_ScaLAPACK, 1434 /*114*/ 0, 1435 0, 1436 0, 1437 0, 1438 0, 1439 /*119*/ 0, 1440 MatHermitianTranspose_ScaLAPACK, 1441 0, 1442 0, 1443 0, 1444 /*124*/ 0, 1445 0, 1446 0, 1447 0, 1448 0, 1449 /*129*/ 0, 1450 0, 1451 0, 1452 0, 1453 0, 1454 /*134*/ 0, 1455 0, 1456 0, 1457 0, 1458 0, 1459 0, 1460 /*140*/ 0, 1461 0, 1462 0, 1463 0, 1464 0, 1465 /*145*/ 0, 1466 0, 1467 0, 1468 0, 1469 0, 1470 /*150*/ 0, 1471 0}; 1472 1473 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners) 1474 { 1475 PetscInt *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2; 1476 PetscInt size = stash->size, nsends; 1477 PetscInt count, *sindices, **rindices, i, j, l; 1478 PetscScalar **rvalues, *svalues; 1479 MPI_Comm comm = stash->comm; 1480 MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2; 1481 PetscMPIInt *sizes, *nlengths, nreceives; 1482 PetscInt *sp_idx, *sp_idy; 1483 PetscScalar *sp_val; 1484 PetscMatStashSpace space, space_next; 1485 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1486 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)mat->data; 1487 1488 PetscFunctionBegin; 1489 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1490 InsertMode addv; 1491 PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat))); 1492 PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added"); 1493 mat->insertmode = addv; /* in case this processor had no cache */ 1494 } 1495 1496 bs2 = stash->bs * stash->bs; 1497 1498 /* first count number of contributors to each processor */ 1499 PetscCall(PetscCalloc1(size, &nlengths)); 1500 PetscCall(PetscMalloc1(stash->n + 1, &owner)); 1501 1502 i = j = 0; 1503 space = stash->space_head; 1504 while (space) { 1505 space_next = space->next; 1506 for (l = 0; l < space->local_used; l++) { 1507 PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx)); 1508 PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx)); 1509 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1510 j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc); 1511 nlengths[j]++; 1512 owner[i] = j; 1513 i++; 1514 } 1515 space = space_next; 1516 } 1517 1518 /* Now check what procs get messages - and compute nsends. */ 1519 PetscCall(PetscCalloc1(size, &sizes)); 1520 for (i = 0, nsends = 0; i < size; i++) { 1521 if (nlengths[i]) { 1522 sizes[i] = 1; 1523 nsends++; 1524 } 1525 } 1526 1527 { 1528 PetscMPIInt *onodes, *olengths; 1529 /* Determine the number of messages to expect, their lengths, from from-ids */ 1530 PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives)); 1531 PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths)); 1532 /* since clubbing row,col - lengths are multiplied by 2 */ 1533 for (i = 0; i < nreceives; i++) olengths[i] *= 2; 1534 PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1)); 1535 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1536 for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2; 1537 PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2)); 1538 PetscCall(PetscFree(onodes)); 1539 PetscCall(PetscFree(olengths)); 1540 } 1541 1542 /* do sends: 1543 1) starts[i] gives the starting index in svalues for stuff going to 1544 the ith processor 1545 */ 1546 PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices)); 1547 PetscCall(PetscMalloc1(2 * nsends, &send_waits)); 1548 PetscCall(PetscMalloc2(size, &startv, size, &starti)); 1549 /* use 2 sends the first with all_a, the next with all_i and all_j */ 1550 startv[0] = 0; 1551 starti[0] = 0; 1552 for (i = 1; i < size; i++) { 1553 startv[i] = startv[i - 1] + nlengths[i - 1]; 1554 starti[i] = starti[i - 1] + 2 * nlengths[i - 1]; 1555 } 1556 1557 i = 0; 1558 space = stash->space_head; 1559 while (space) { 1560 space_next = space->next; 1561 sp_idx = space->idx; 1562 sp_idy = space->idy; 1563 sp_val = space->val; 1564 for (l = 0; l < space->local_used; l++) { 1565 j = owner[i]; 1566 if (bs2 == 1) { 1567 svalues[startv[j]] = sp_val[l]; 1568 } else { 1569 PetscInt k; 1570 PetscScalar *buf1, *buf2; 1571 buf1 = svalues + bs2 * startv[j]; 1572 buf2 = space->val + bs2 * l; 1573 for (k = 0; k < bs2; k++) buf1[k] = buf2[k]; 1574 } 1575 sindices[starti[j]] = sp_idx[l]; 1576 sindices[starti[j] + nlengths[j]] = sp_idy[l]; 1577 startv[j]++; 1578 starti[j]++; 1579 i++; 1580 } 1581 space = space_next; 1582 } 1583 startv[0] = 0; 1584 for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1]; 1585 1586 for (i = 0, count = 0; i < size; i++) { 1587 if (sizes[i]) { 1588 PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++)); 1589 PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++)); 1590 } 1591 } 1592 #if defined(PETSC_USE_INFO) 1593 PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends)); 1594 for (i = 0; i < size; i++) { 1595 if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt))))); 1596 } 1597 #endif 1598 PetscCall(PetscFree(nlengths)); 1599 PetscCall(PetscFree(owner)); 1600 PetscCall(PetscFree2(startv, starti)); 1601 PetscCall(PetscFree(sizes)); 1602 1603 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 1604 PetscCall(PetscMalloc1(2 * nreceives, &recv_waits)); 1605 1606 for (i = 0; i < nreceives; i++) { 1607 recv_waits[2 * i] = recv_waits1[i]; 1608 recv_waits[2 * i + 1] = recv_waits2[i]; 1609 } 1610 stash->recv_waits = recv_waits; 1611 1612 PetscCall(PetscFree(recv_waits1)); 1613 PetscCall(PetscFree(recv_waits2)); 1614 1615 stash->svalues = svalues; 1616 stash->sindices = sindices; 1617 stash->rvalues = rvalues; 1618 stash->rindices = rindices; 1619 stash->send_waits = send_waits; 1620 stash->nsends = nsends; 1621 stash->nrecvs = nreceives; 1622 stash->reproduce_count = 0; 1623 PetscFunctionReturn(PETSC_SUCCESS); 1624 } 1625 1626 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb) 1627 { 1628 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1629 1630 PetscFunctionBegin; 1631 PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp"); 1632 PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb); 1633 PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb); 1634 PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 1635 PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 1636 PetscFunctionReturn(PETSC_SUCCESS); 1637 } 1638 1639 /*@ 1640 MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of 1641 the `MATSCALAPACK` matrix 1642 1643 Logically Collective 1644 1645 Input Parameters: 1646 + A - a `MATSCALAPACK` matrix 1647 . mb - the row block size 1648 - nb - the column block size 1649 1650 Level: intermediate 1651 1652 Note: 1653 This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices 1654 1655 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()` 1656 @*/ 1657 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb) 1658 { 1659 PetscFunctionBegin; 1660 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1661 PetscValidLogicalCollectiveInt(A, mb, 2); 1662 PetscValidLogicalCollectiveInt(A, nb, 3); 1663 PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb)); 1664 PetscFunctionReturn(PETSC_SUCCESS); 1665 } 1666 1667 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb) 1668 { 1669 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1670 1671 PetscFunctionBegin; 1672 if (mb) *mb = a->mb; 1673 if (nb) *nb = a->nb; 1674 PetscFunctionReturn(PETSC_SUCCESS); 1675 } 1676 1677 /*@ 1678 MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of 1679 the `MATSCALAPACK` matrix 1680 1681 Not Collective 1682 1683 Input Parameter: 1684 . A - a `MATSCALAPACK` matrix 1685 1686 Output Parameters: 1687 + mb - the row block size 1688 - nb - the column block size 1689 1690 Level: intermediate 1691 1692 Note: 1693 This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices 1694 1695 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()` 1696 @*/ 1697 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb) 1698 { 1699 PetscFunctionBegin; 1700 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1701 PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb)); 1702 PetscFunctionReturn(PETSC_SUCCESS); 1703 } 1704 1705 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *); 1706 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *); 1707 1708 /*MC 1709 MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1710 1711 Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK 1712 1713 Options Database Keys: 1714 + -mat_type scalapack - sets the matrix type to `MATSCALAPACK` 1715 . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu` 1716 . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1717 - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1718 1719 Level: intermediate 1720 1721 Note: 1722 Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous 1723 range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on 1724 the given rank. 1725 1726 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()` 1727 M*/ 1728 1729 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1730 { 1731 Mat_ScaLAPACK *a; 1732 PetscBool flg, flg1; 1733 Mat_ScaLAPACK_Grid *grid; 1734 MPI_Comm icomm; 1735 PetscBLASInt nprow, npcol, myrow, mycol; 1736 PetscInt optv1, k = 2, array[2] = {0, 0}; 1737 PetscMPIInt size; 1738 1739 PetscFunctionBegin; 1740 A->ops[0] = MatOps_Values; 1741 A->insertmode = NOT_SET_VALUES; 1742 1743 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash)); 1744 A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1745 A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1746 A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1747 A->stash.ScatterDestroy = NULL; 1748 1749 PetscCall(PetscNew(&a)); 1750 A->data = (void *)a; 1751 1752 /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1753 if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1754 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0)); 1755 PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); 1756 PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite)); 1757 } 1758 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 1759 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1760 if (!flg) { 1761 PetscCall(PetscNew(&grid)); 1762 1763 PetscCallMPI(MPI_Comm_size(icomm, &size)); 1764 grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001); 1765 1766 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat"); 1767 PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1)); 1768 if (flg1) { 1769 PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size); 1770 grid->nprow = optv1; 1771 } 1772 PetscOptionsEnd(); 1773 1774 if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1775 grid->npcol = size / grid->nprow; 1776 PetscCall(PetscBLASIntCast(grid->nprow, &nprow)); 1777 PetscCall(PetscBLASIntCast(grid->npcol, &npcol)); 1778 grid->ictxt = Csys2blacs_handle(icomm); 1779 Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol); 1780 Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol); 1781 grid->grid_refct = 1; 1782 grid->nprow = nprow; 1783 grid->npcol = npcol; 1784 grid->myrow = myrow; 1785 grid->mycol = mycol; 1786 /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1787 grid->ictxrow = Csys2blacs_handle(icomm); 1788 Cblacs_gridinit(&grid->ictxrow, "R", 1, size); 1789 grid->ictxcol = Csys2blacs_handle(icomm); 1790 Cblacs_gridinit(&grid->ictxcol, "R", size, 1); 1791 PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid)); 1792 1793 } else grid->grid_refct++; 1794 PetscCall(PetscCommDestroy(&icomm)); 1795 a->grid = grid; 1796 a->mb = DEFAULT_BLOCKSIZE; 1797 a->nb = DEFAULT_BLOCKSIZE; 1798 1799 PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat"); 1800 PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg)); 1801 if (flg) { 1802 a->mb = array[0]; 1803 a->nb = (k > 1) ? array[1] : a->mb; 1804 } 1805 PetscOptionsEnd(); 1806 1807 a->roworiented = PETSC_TRUE; 1808 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK)); 1809 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK)); 1810 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK)); 1811 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK)); 1812 PetscFunctionReturn(PETSC_SUCCESS); 1813 } 1814 1815 /*@C 1816 MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1817 (2D block cyclic distribution) for a `MATSCALAPACK` matrix 1818 1819 Collective 1820 1821 Input Parameters: 1822 + comm - MPI communicator 1823 . mb - row block size (or `PETSC_DECIDE` to have it set) 1824 . nb - column block size (or `PETSC_DECIDE` to have it set) 1825 . M - number of global rows 1826 . N - number of global columns 1827 . rsrc - coordinate of process that owns the first row of the distributed matrix 1828 - csrc - coordinate of process that owns the first column of the distributed matrix 1829 1830 Output Parameter: 1831 . A - the matrix 1832 1833 Options Database Key: 1834 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1835 1836 Level: intermediate 1837 1838 Notes: 1839 If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen 1840 1841 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 1842 MatXXXXSetPreallocation() paradigm instead of this routine directly. 1843 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 1844 1845 Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1846 configured with ScaLAPACK. In particular, PETSc's local sizes lose 1847 significance and are thus ignored. The block sizes refer to the values 1848 used for the distributed matrix, not the same meaning as in `MATBAIJ`. 1849 1850 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 1851 @*/ 1852 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A) 1853 { 1854 Mat_ScaLAPACK *a; 1855 PetscInt m, n; 1856 1857 PetscFunctionBegin; 1858 PetscCall(MatCreate(comm, A)); 1859 PetscCall(MatSetType(*A, MATSCALAPACK)); 1860 PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions"); 1861 /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1862 m = PETSC_DECIDE; 1863 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1864 n = PETSC_DECIDE; 1865 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 1866 PetscCall(MatSetSizes(*A, m, n, M, N)); 1867 a = (Mat_ScaLAPACK *)(*A)->data; 1868 PetscCall(PetscBLASIntCast(M, &a->M)); 1869 PetscCall(PetscBLASIntCast(N, &a->N)); 1870 PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 1871 PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 1872 PetscCall(PetscBLASIntCast(rsrc, &a->rsrc)); 1873 PetscCall(PetscBLASIntCast(csrc, &a->csrc)); 1874 PetscCall(MatSetUp(*A)); 1875 PetscFunctionReturn(PETSC_SUCCESS); 1876 } 1877