1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2 3 const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 4 " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 5 " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 6 " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 7 " TITLE = {Sca{LAPACK} Users' Guide},\n" 8 " PUBLISHER = {SIAM},\n" 9 " ADDRESS = {Philadelphia, PA},\n" 10 " YEAR = 1997\n" 11 "}\n"; 12 static PetscBool ScaLAPACKCite = PETSC_FALSE; 13 14 #define DEFAULT_BLOCKSIZE 64 15 16 /* 17 The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18 is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19 */ 20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21 22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 23 { 24 PetscFunctionBegin; 25 PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n")); 26 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); 27 PetscFunctionReturn(PETSC_SUCCESS); 28 } 29 30 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer) 31 { 32 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 33 PetscBool iascii; 34 PetscViewerFormat format; 35 Mat Adense; 36 37 PetscFunctionBegin; 38 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 39 if (iascii) { 40 PetscCall(PetscViewerGetFormat(viewer, &format)); 41 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 42 PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb)); 43 PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol)); 44 PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc)); 45 PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc)); 46 PetscFunctionReturn(PETSC_SUCCESS); 47 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 48 PetscFunctionReturn(PETSC_SUCCESS); 49 } 50 } 51 /* convert to dense format and call MatView() */ 52 PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense)); 53 PetscCall(MatView(Adense, viewer)); 54 PetscCall(MatDestroy(&Adense)); 55 PetscFunctionReturn(PETSC_SUCCESS); 56 } 57 58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info) 59 { 60 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 61 PetscLogDouble isend[2], irecv[2]; 62 63 PetscFunctionBegin; 64 info->block_size = 1.0; 65 66 isend[0] = a->lld * a->locc; /* locally allocated */ 67 isend[1] = a->locr * a->locc; /* used submatrix */ 68 if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 69 info->nz_allocated = isend[0]; 70 info->nz_used = isend[1]; 71 } else if (flag == MAT_GLOBAL_MAX) { 72 PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 73 info->nz_allocated = irecv[0]; 74 info->nz_used = irecv[1]; 75 } else if (flag == MAT_GLOBAL_SUM) { 76 PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 77 info->nz_allocated = irecv[0]; 78 info->nz_used = irecv[1]; 79 } 80 81 info->nz_unneeded = 0; 82 info->assemblies = A->num_ass; 83 info->mallocs = 0; 84 info->memory = 0; /* REVIEW ME */ 85 info->fill_ratio_given = 0; 86 info->fill_ratio_needed = 0; 87 info->factor_mallocs = 0; 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 91 static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg) 92 { 93 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 94 95 PetscFunctionBegin; 96 switch (op) { 97 case MAT_NEW_NONZERO_LOCATIONS: 98 case MAT_NEW_NONZERO_LOCATION_ERR: 99 case MAT_NEW_NONZERO_ALLOCATION_ERR: 100 case MAT_SYMMETRIC: 101 case MAT_SORTED_FULL: 102 case MAT_HERMITIAN: 103 break; 104 case MAT_ROW_ORIENTED: 105 a->roworiented = flg; 106 break; 107 default: 108 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]); 109 } 110 PetscFunctionReturn(PETSC_SUCCESS); 111 } 112 113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode) 114 { 115 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 116 PetscInt i, j; 117 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 118 PetscBool roworiented = a->roworiented; 119 120 PetscFunctionBegin; 121 PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode); 122 for (i = 0; i < nr; i++) { 123 if (rows[i] < 0) continue; 124 PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx)); 125 for (j = 0; j < nc; j++) { 126 if (cols[j] < 0) continue; 127 PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx)); 128 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 129 if (rsrc == a->grid->myrow && csrc == a->grid->mycol) { 130 if (roworiented) { 131 switch (imode) { 132 case INSERT_VALUES: 133 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j]; 134 break; 135 default: 136 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j]; 137 break; 138 } 139 } else { 140 switch (imode) { 141 case INSERT_VALUES: 142 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr]; 143 break; 144 default: 145 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr]; 146 break; 147 } 148 } 149 } else { 150 PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 151 A->assembled = PETSC_FALSE; 152 PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES))); 153 } 154 } 155 } 156 PetscFunctionReturn(PETSC_SUCCESS); 157 } 158 159 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscScalar beta, const PetscScalar *x, PetscScalar *y) 160 { 161 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 162 PetscScalar *x2d, *y2d, alpha = 1.0; 163 const PetscInt *ranges; 164 PetscBLASInt xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info; 165 166 PetscFunctionBegin; 167 if (transpose) { 168 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 169 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 170 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 171 xlld = PetscMax(1, A->rmap->n); 172 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 173 PetscCheckScaLapackInfo("descinit", info); 174 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 175 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */ 176 ylld = 1; 177 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info)); 178 PetscCheckScaLapackInfo("descinit", info); 179 180 /* allocate 2d vectors */ 181 lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 182 lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 183 PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 184 xlld = PetscMax(1, lszx); 185 186 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 187 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 188 PetscCheckScaLapackInfo("descinit", info); 189 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 190 PetscCheckScaLapackInfo("descinit", info); 191 192 /* redistribute x as a column of a 2d matrix */ 193 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 194 195 /* redistribute y as a row of a 2d matrix */ 196 if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow)); 197 198 /* call PBLAS subroutine */ 199 PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 200 201 /* redistribute y from a row of a 2d matrix */ 202 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow)); 203 204 } else { /* non-transpose */ 205 206 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 207 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 208 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */ 209 xlld = 1; 210 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info)); 211 PetscCheckScaLapackInfo("descinit", info); 212 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 213 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */ 214 ylld = PetscMax(1, A->rmap->n); 215 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info)); 216 PetscCheckScaLapackInfo("descinit", info); 217 218 /* allocate 2d vectors */ 219 lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 220 lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 221 PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 222 ylld = PetscMax(1, lszy); 223 224 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 225 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 226 PetscCheckScaLapackInfo("descinit", info); 227 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 228 PetscCheckScaLapackInfo("descinit", info); 229 230 /* redistribute x as a row of a 2d matrix */ 231 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow)); 232 233 /* redistribute y as a column of a 2d matrix */ 234 if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol)); 235 236 /* call PBLAS subroutine */ 237 PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 238 239 /* redistribute y from a column of a 2d matrix */ 240 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol)); 241 } 242 PetscCall(PetscFree2(x2d, y2d)); 243 PetscFunctionReturn(PETSC_SUCCESS); 244 } 245 246 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y) 247 { 248 const PetscScalar *xarray; 249 PetscScalar *yarray; 250 251 PetscFunctionBegin; 252 PetscCall(VecGetArrayRead(x, &xarray)); 253 PetscCall(VecGetArray(y, &yarray)); 254 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 0.0, xarray, yarray)); 255 PetscCall(VecRestoreArrayRead(x, &xarray)); 256 PetscCall(VecRestoreArray(y, &yarray)); 257 PetscFunctionReturn(PETSC_SUCCESS); 258 } 259 260 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y) 261 { 262 const PetscScalar *xarray; 263 PetscScalar *yarray; 264 265 PetscFunctionBegin; 266 PetscCall(VecGetArrayRead(x, &xarray)); 267 PetscCall(VecGetArray(y, &yarray)); 268 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 0.0, xarray, yarray)); 269 PetscCall(VecRestoreArrayRead(x, &xarray)); 270 PetscCall(VecRestoreArray(y, &yarray)); 271 PetscFunctionReturn(PETSC_SUCCESS); 272 } 273 274 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 275 { 276 const PetscScalar *xarray; 277 PetscScalar *zarray; 278 279 PetscFunctionBegin; 280 if (y != z) PetscCall(VecCopy(y, z)); 281 PetscCall(VecGetArrayRead(x, &xarray)); 282 PetscCall(VecGetArray(z, &zarray)); 283 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 1.0, xarray, zarray)); 284 PetscCall(VecRestoreArrayRead(x, &xarray)); 285 PetscCall(VecRestoreArray(z, &zarray)); 286 PetscFunctionReturn(PETSC_SUCCESS); 287 } 288 289 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 290 { 291 const PetscScalar *xarray; 292 PetscScalar *zarray; 293 294 PetscFunctionBegin; 295 if (y != z) PetscCall(VecCopy(y, z)); 296 PetscCall(VecGetArrayRead(x, &xarray)); 297 PetscCall(VecGetArray(z, &zarray)); 298 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 1.0, xarray, zarray)); 299 PetscCall(VecRestoreArrayRead(x, &xarray)); 300 PetscCall(VecRestoreArray(z, &zarray)); 301 PetscFunctionReturn(PETSC_SUCCESS); 302 } 303 304 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 305 { 306 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 307 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 308 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 309 PetscScalar sone = 1.0, zero = 0.0; 310 PetscBLASInt one = 1; 311 312 PetscFunctionBegin; 313 PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 314 C->assembled = PETSC_TRUE; 315 PetscFunctionReturn(PETSC_SUCCESS); 316 } 317 318 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 319 { 320 PetscFunctionBegin; 321 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 322 PetscCall(MatSetType(C, MATSCALAPACK)); 323 PetscCall(MatSetUp(C)); 324 C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 325 PetscFunctionReturn(PETSC_SUCCESS); 326 } 327 328 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 329 { 330 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 331 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 332 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 333 PetscScalar sone = 1.0, zero = 0.0; 334 PetscBLASInt one = 1; 335 336 PetscFunctionBegin; 337 PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 338 C->assembled = PETSC_TRUE; 339 PetscFunctionReturn(PETSC_SUCCESS); 340 } 341 342 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 343 { 344 PetscFunctionBegin; 345 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE)); 346 PetscCall(MatSetType(C, MATSCALAPACK)); 347 PetscCall(MatSetUp(C)); 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 352 { 353 PetscFunctionBegin; 354 C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 355 C->ops->productsymbolic = MatProductSymbolic_AB; 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 360 { 361 PetscFunctionBegin; 362 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 363 C->ops->productsymbolic = MatProductSymbolic_ABt; 364 PetscFunctionReturn(PETSC_SUCCESS); 365 } 366 367 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 368 { 369 Mat_Product *product = C->product; 370 371 PetscFunctionBegin; 372 switch (product->type) { 373 case MATPRODUCT_AB: 374 PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C)); 375 break; 376 case MATPRODUCT_ABt: 377 PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C)); 378 break; 379 default: 380 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]); 381 } 382 PetscFunctionReturn(PETSC_SUCCESS); 383 } 384 385 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D) 386 { 387 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 388 PetscScalar *darray, *d2d, v; 389 const PetscInt *ranges; 390 PetscBLASInt j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 391 392 PetscFunctionBegin; 393 PetscCall(VecGetArray(D, &darray)); 394 395 if (A->rmap->N <= A->cmap->N) { /* row version */ 396 397 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 398 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 399 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 400 dlld = PetscMax(1, A->rmap->n); 401 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 402 PetscCheckScaLapackInfo("descinit", info); 403 404 /* allocate 2d vector */ 405 lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 406 PetscCall(PetscCalloc1(lszd, &d2d)); 407 dlld = PetscMax(1, lszd); 408 409 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 410 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 411 PetscCheckScaLapackInfo("descinit", info); 412 413 /* collect diagonal */ 414 for (j = 1; j <= a->M; j++) { 415 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc)); 416 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v)); 417 } 418 419 /* redistribute d from a column of a 2d matrix */ 420 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol)); 421 PetscCall(PetscFree(d2d)); 422 423 } else { /* column version */ 424 425 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 426 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 427 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 428 dlld = 1; 429 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 430 PetscCheckScaLapackInfo("descinit", info); 431 432 /* allocate 2d vector */ 433 lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 434 PetscCall(PetscCalloc1(lszd, &d2d)); 435 436 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 437 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 438 PetscCheckScaLapackInfo("descinit", info); 439 440 /* collect diagonal */ 441 for (j = 1; j <= a->N; j++) { 442 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc)); 443 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v)); 444 } 445 446 /* redistribute d from a row of a 2d matrix */ 447 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow)); 448 PetscCall(PetscFree(d2d)); 449 } 450 451 PetscCall(VecRestoreArray(D, &darray)); 452 PetscCall(VecAssemblyBegin(D)); 453 PetscCall(VecAssemblyEnd(D)); 454 PetscFunctionReturn(PETSC_SUCCESS); 455 } 456 457 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R) 458 { 459 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 460 const PetscScalar *d; 461 const PetscInt *ranges; 462 PetscScalar *d2d; 463 PetscBLASInt i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 464 465 PetscFunctionBegin; 466 if (R) { 467 PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d)); 468 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 469 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 470 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 471 dlld = 1; 472 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 473 PetscCheckScaLapackInfo("descinit", info); 474 475 /* allocate 2d vector */ 476 lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 477 PetscCall(PetscCalloc1(lszd, &d2d)); 478 479 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 480 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 481 PetscCheckScaLapackInfo("descinit", info); 482 483 /* redistribute d to a row of a 2d matrix */ 484 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow)); 485 486 /* broadcast along process columns */ 487 if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld); 488 else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol); 489 490 /* local scaling */ 491 for (j = 0; j < a->locc; j++) 492 for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j]; 493 494 PetscCall(PetscFree(d2d)); 495 PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d)); 496 } 497 if (L) { 498 PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d)); 499 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 500 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 501 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 502 dlld = PetscMax(1, A->rmap->n); 503 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 504 PetscCheckScaLapackInfo("descinit", info); 505 506 /* allocate 2d vector */ 507 lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 508 PetscCall(PetscCalloc1(lszd, &d2d)); 509 dlld = PetscMax(1, lszd); 510 511 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 512 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 513 PetscCheckScaLapackInfo("descinit", info); 514 515 /* redistribute d to a column of a 2d matrix */ 516 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol)); 517 518 /* broadcast along process rows */ 519 if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld); 520 else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0); 521 522 /* local scaling */ 523 for (i = 0; i < a->locr; i++) 524 for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i]; 525 526 PetscCall(PetscFree(d2d)); 527 PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d)); 528 } 529 PetscFunctionReturn(PETSC_SUCCESS); 530 } 531 532 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d) 533 { 534 PetscFunctionBegin; 535 *missing = PETSC_FALSE; 536 PetscFunctionReturn(PETSC_SUCCESS); 537 } 538 539 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a) 540 { 541 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 542 PetscBLASInt n, one = 1; 543 544 PetscFunctionBegin; 545 n = x->lld * x->locc; 546 PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one)); 547 PetscFunctionReturn(PETSC_SUCCESS); 548 } 549 550 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha) 551 { 552 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 553 PetscBLASInt i, n; 554 PetscScalar v; 555 556 PetscFunctionBegin; 557 n = PetscMin(x->M, x->N); 558 for (i = 1; i <= n; i++) { 559 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc)); 560 v += alpha; 561 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v)); 562 } 563 PetscFunctionReturn(PETSC_SUCCESS); 564 } 565 566 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 567 { 568 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 569 Mat_ScaLAPACK *y = (Mat_ScaLAPACK *)Y->data; 570 PetscBLASInt one = 1; 571 PetscScalar beta = 1.0; 572 573 PetscFunctionBegin; 574 MatScaLAPACKCheckDistribution(Y, 1, X, 3); 575 PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc)); 576 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 577 PetscFunctionReturn(PETSC_SUCCESS); 578 } 579 580 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str) 581 { 582 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 583 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 584 585 PetscFunctionBegin; 586 PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 587 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 588 PetscFunctionReturn(PETSC_SUCCESS); 589 } 590 591 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B) 592 { 593 Mat Bs; 594 MPI_Comm comm; 595 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 596 597 PetscFunctionBegin; 598 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 599 PetscCall(MatCreate(comm, &Bs)); 600 PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 601 PetscCall(MatSetType(Bs, MATSCALAPACK)); 602 b = (Mat_ScaLAPACK *)Bs->data; 603 b->M = a->M; 604 b->N = a->N; 605 b->mb = a->mb; 606 b->nb = a->nb; 607 b->rsrc = a->rsrc; 608 b->csrc = a->csrc; 609 PetscCall(MatSetUp(Bs)); 610 *B = Bs; 611 if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 612 Bs->assembled = PETSC_TRUE; 613 PetscFunctionReturn(PETSC_SUCCESS); 614 } 615 616 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 617 { 618 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 619 Mat Bs = *B; 620 PetscBLASInt one = 1; 621 PetscScalar sone = 1.0, zero = 0.0; 622 #if defined(PETSC_USE_COMPLEX) 623 PetscInt i, j; 624 #endif 625 626 PetscFunctionBegin; 627 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 628 PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 629 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 630 *B = Bs; 631 b = (Mat_ScaLAPACK *)Bs->data; 632 PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 633 #if defined(PETSC_USE_COMPLEX) 634 /* undo conjugation */ 635 for (i = 0; i < b->locr; i++) 636 for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]); 637 #endif 638 Bs->assembled = PETSC_TRUE; 639 PetscFunctionReturn(PETSC_SUCCESS); 640 } 641 642 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 643 { 644 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 645 PetscInt i, j; 646 647 PetscFunctionBegin; 648 for (i = 0; i < a->locr; i++) 649 for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]); 650 PetscFunctionReturn(PETSC_SUCCESS); 651 } 652 653 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 654 { 655 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 656 Mat Bs = *B; 657 PetscBLASInt one = 1; 658 PetscScalar sone = 1.0, zero = 0.0; 659 660 PetscFunctionBegin; 661 PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 662 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 663 *B = Bs; 664 b = (Mat_ScaLAPACK *)Bs->data; 665 PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 666 Bs->assembled = PETSC_TRUE; 667 PetscFunctionReturn(PETSC_SUCCESS); 668 } 669 670 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X) 671 { 672 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 673 PetscScalar *x, *x2d; 674 const PetscInt *ranges; 675 PetscBLASInt xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info; 676 677 PetscFunctionBegin; 678 PetscCall(VecCopy(B, X)); 679 PetscCall(VecGetArray(X, &x)); 680 681 /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 682 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 683 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 684 xlld = PetscMax(1, A->rmap->n); 685 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 686 PetscCheckScaLapackInfo("descinit", info); 687 688 /* allocate 2d vector */ 689 lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 690 PetscCall(PetscMalloc1(lszx, &x2d)); 691 xlld = PetscMax(1, lszx); 692 693 /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 694 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 695 PetscCheckScaLapackInfo("descinit", info); 696 697 /* redistribute x as a column of a 2d matrix */ 698 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 699 700 /* call ScaLAPACK subroutine */ 701 switch (A->factortype) { 702 case MAT_FACTOR_LU: 703 PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info)); 704 PetscCheckScaLapackInfo("getrs", info); 705 break; 706 case MAT_FACTOR_CHOLESKY: 707 PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info)); 708 PetscCheckScaLapackInfo("potrs", info); 709 break; 710 default: 711 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 712 } 713 714 /* redistribute x from a column of a 2d matrix */ 715 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol)); 716 717 PetscCall(PetscFree(x2d)); 718 PetscCall(VecRestoreArray(X, &x)); 719 PetscFunctionReturn(PETSC_SUCCESS); 720 } 721 722 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X) 723 { 724 PetscFunctionBegin; 725 PetscCall(MatSolve_ScaLAPACK(A, B, X)); 726 PetscCall(VecAXPY(X, 1, Y)); 727 PetscFunctionReturn(PETSC_SUCCESS); 728 } 729 730 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X) 731 { 732 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x; 733 PetscBool flg1, flg2; 734 PetscBLASInt one = 1, info; 735 736 PetscFunctionBegin; 737 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1)); 738 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2)); 739 PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK"); 740 MatScaLAPACKCheckDistribution(B, 1, X, 2); 741 b = (Mat_ScaLAPACK *)B->data; 742 x = (Mat_ScaLAPACK *)X->data; 743 PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc)); 744 745 switch (A->factortype) { 746 case MAT_FACTOR_LU: 747 PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info)); 748 PetscCheckScaLapackInfo("getrs", info); 749 break; 750 case MAT_FACTOR_CHOLESKY: 751 PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info)); 752 PetscCheckScaLapackInfo("potrs", info); 753 break; 754 default: 755 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 756 } 757 PetscFunctionReturn(PETSC_SUCCESS); 758 } 759 760 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo) 761 { 762 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 763 PetscBLASInt one = 1, info; 764 765 PetscFunctionBegin; 766 if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots)); 767 PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info)); 768 PetscCheckScaLapackInfo("getrf", info); 769 A->factortype = MAT_FACTOR_LU; 770 A->assembled = PETSC_TRUE; 771 772 PetscCall(PetscFree(A->solvertype)); 773 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 774 PetscFunctionReturn(PETSC_SUCCESS); 775 } 776 777 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 778 { 779 PetscFunctionBegin; 780 PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 781 PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info)); 782 PetscFunctionReturn(PETSC_SUCCESS); 783 } 784 785 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 786 { 787 PetscFunctionBegin; 788 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 789 PetscFunctionReturn(PETSC_SUCCESS); 790 } 791 792 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo) 793 { 794 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 795 PetscBLASInt one = 1, info; 796 797 PetscFunctionBegin; 798 PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info)); 799 PetscCheckScaLapackInfo("potrf", info); 800 A->factortype = MAT_FACTOR_CHOLESKY; 801 A->assembled = PETSC_TRUE; 802 803 PetscCall(PetscFree(A->solvertype)); 804 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 805 PetscFunctionReturn(PETSC_SUCCESS); 806 } 807 808 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 809 { 810 PetscFunctionBegin; 811 PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 812 PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info)); 813 PetscFunctionReturn(PETSC_SUCCESS); 814 } 815 816 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info) 817 { 818 PetscFunctionBegin; 819 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 820 PetscFunctionReturn(PETSC_SUCCESS); 821 } 822 823 static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type) 824 { 825 PetscFunctionBegin; 826 *type = MATSOLVERSCALAPACK; 827 PetscFunctionReturn(PETSC_SUCCESS); 828 } 829 830 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F) 831 { 832 Mat B; 833 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 834 835 PetscFunctionBegin; 836 /* Create the factorization matrix */ 837 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B)); 838 B->trivialsymbolic = PETSC_TRUE; 839 B->factortype = ftype; 840 PetscCall(PetscFree(B->solvertype)); 841 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype)); 842 843 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack)); 844 *F = B; 845 PetscFunctionReturn(PETSC_SUCCESS); 846 } 847 848 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 849 { 850 PetscFunctionBegin; 851 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack)); 852 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack)); 853 PetscFunctionReturn(PETSC_SUCCESS); 854 } 855 856 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm) 857 { 858 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 859 PetscBLASInt one = 1, lwork = 0; 860 const char *ntype; 861 PetscScalar *work = NULL, dummy; 862 863 PetscFunctionBegin; 864 switch (type) { 865 case NORM_1: 866 ntype = "1"; 867 lwork = PetscMax(a->locr, a->locc); 868 break; 869 case NORM_FROBENIUS: 870 ntype = "F"; 871 work = &dummy; 872 break; 873 case NORM_INFINITY: 874 ntype = "I"; 875 lwork = PetscMax(a->locr, a->locc); 876 break; 877 default: 878 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type"); 879 } 880 if (lwork) PetscCall(PetscMalloc1(lwork, &work)); 881 *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work); 882 if (lwork) PetscCall(PetscFree(work)); 883 PetscFunctionReturn(PETSC_SUCCESS); 884 } 885 886 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 887 { 888 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 889 890 PetscFunctionBegin; 891 PetscCall(PetscArrayzero(a->loc, a->lld * a->locc)); 892 PetscFunctionReturn(PETSC_SUCCESS); 893 } 894 895 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols) 896 { 897 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 898 PetscInt i, n, nb, isrc, nproc, iproc, *idx; 899 900 PetscFunctionBegin; 901 if (rows) { 902 n = a->locr; 903 nb = a->mb; 904 isrc = a->rsrc; 905 nproc = a->grid->nprow; 906 iproc = a->grid->myrow; 907 PetscCall(PetscMalloc1(n, &idx)); 908 for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 909 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows)); 910 } 911 if (cols) { 912 n = a->locc; 913 nb = a->nb; 914 isrc = a->csrc; 915 nproc = a->grid->npcol; 916 iproc = a->grid->mycol; 917 PetscCall(PetscMalloc1(n, &idx)); 918 for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 919 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols)); 920 } 921 PetscFunctionReturn(PETSC_SUCCESS); 922 } 923 924 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B) 925 { 926 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 927 Mat Bmpi; 928 MPI_Comm comm; 929 PetscInt i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb; 930 const PetscInt *ranges, *branges, *cwork; 931 const PetscScalar *vwork; 932 PetscBLASInt bdesc[9], bmb, zero = 0, one = 1, lld, info; 933 PetscScalar *barray; 934 PetscBool differ = PETSC_FALSE; 935 PetscMPIInt size; 936 937 PetscFunctionBegin; 938 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 939 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 940 941 if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 942 PetscCallMPI(MPI_Comm_size(comm, &size)); 943 PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges)); 944 for (i = 0; i < size; i++) 945 if (ranges[i + 1] != branges[i + 1]) { 946 differ = PETSC_TRUE; 947 break; 948 } 949 } 950 951 if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 952 PetscCall(MatCreate(comm, &Bmpi)); 953 m = PETSC_DECIDE; 954 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 955 n = PETSC_DECIDE; 956 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 957 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 958 PetscCall(MatSetType(Bmpi, MATDENSE)); 959 PetscCall(MatSetUp(Bmpi)); 960 961 /* create ScaLAPACK descriptor for B (1d block distribution) */ 962 PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 963 PetscCall(MatDenseGetLDA(Bmpi, &ldb)); 964 lld = PetscMax(ldb, 1); /* local leading dimension */ 965 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 966 PetscCheckScaLapackInfo("descinit", info); 967 968 /* redistribute matrix */ 969 PetscCall(MatDenseGetArray(Bmpi, &barray)); 970 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 971 PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 972 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 973 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 974 975 /* transfer rows of auxiliary matrix to the final matrix B */ 976 PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend)); 977 for (i = rstart; i < rend; i++) { 978 PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork)); 979 PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES)); 980 PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork)); 981 } 982 PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY)); 983 PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY)); 984 PetscCall(MatDestroy(&Bmpi)); 985 986 } else { /* normal cases */ 987 988 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 989 else { 990 PetscCall(MatCreate(comm, &Bmpi)); 991 m = PETSC_DECIDE; 992 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 993 n = PETSC_DECIDE; 994 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 995 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 996 PetscCall(MatSetType(Bmpi, MATDENSE)); 997 PetscCall(MatSetUp(Bmpi)); 998 } 999 1000 /* create ScaLAPACK descriptor for B (1d block distribution) */ 1001 PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 1002 PetscCall(MatDenseGetLDA(Bmpi, &ldb)); 1003 lld = PetscMax(ldb, 1); /* local leading dimension */ 1004 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 1005 PetscCheckScaLapackInfo("descinit", info); 1006 1007 /* redistribute matrix */ 1008 PetscCall(MatDenseGetArray(Bmpi, &barray)); 1009 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 1010 PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 1011 1012 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 1013 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1014 if (reuse == MAT_INPLACE_MATRIX) { 1015 PetscCall(MatHeaderReplace(A, &Bmpi)); 1016 } else *B = Bmpi; 1017 } 1018 PetscFunctionReturn(PETSC_SUCCESS); 1019 } 1020 1021 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct) 1022 { 1023 const PetscInt *ranges; 1024 PetscMPIInt size; 1025 PetscInt i, n; 1026 1027 PetscFunctionBegin; 1028 *correct = PETSC_TRUE; 1029 PetscCallMPI(MPI_Comm_size(map->comm, &size)); 1030 if (size > 1) { 1031 PetscCall(PetscLayoutGetRanges(map, &ranges)); 1032 n = ranges[1] - ranges[0]; 1033 for (i = 1; i < size; i++) 1034 if (ranges[i + 1] - ranges[i] != n) break; 1035 *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n)); 1036 } 1037 PetscFunctionReturn(PETSC_SUCCESS); 1038 } 1039 1040 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B) 1041 { 1042 Mat_ScaLAPACK *b; 1043 Mat Bmpi; 1044 MPI_Comm comm; 1045 PetscInt M = A->rmap->N, N = A->cmap->N, m, n; 1046 const PetscInt *ranges, *rows, *cols; 1047 PetscBLASInt adesc[9], amb, zero = 0, one = 1, lld, info; 1048 PetscScalar *aarray; 1049 IS ir, ic; 1050 PetscInt lda; 1051 PetscBool flg; 1052 1053 PetscFunctionBegin; 1054 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1055 1056 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1057 else { 1058 PetscCall(MatCreate(comm, &Bmpi)); 1059 m = PETSC_DECIDE; 1060 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1061 n = PETSC_DECIDE; 1062 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 1063 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 1064 PetscCall(MatSetType(Bmpi, MATSCALAPACK)); 1065 PetscCall(MatSetUp(Bmpi)); 1066 } 1067 b = (Mat_ScaLAPACK *)Bmpi->data; 1068 1069 PetscCall(MatDenseGetLDA(A, &lda)); 1070 PetscCall(MatDenseGetArray(A, &aarray)); 1071 PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1072 if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1073 if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */ 1074 /* create ScaLAPACK descriptor for A (1d block distribution) */ 1075 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 1076 PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */ 1077 lld = PetscMax(lda, 1); /* local leading dimension */ 1078 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info)); 1079 PetscCheckScaLapackInfo("descinit", info); 1080 1081 /* redistribute matrix */ 1082 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol)); 1083 Bmpi->nooffprocentries = PETSC_TRUE; 1084 } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */ 1085 PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n); 1086 b->roworiented = PETSC_FALSE; 1087 PetscCall(MatGetOwnershipIS(A, &ir, &ic)); 1088 PetscCall(ISGetIndices(ir, &rows)); 1089 PetscCall(ISGetIndices(ic, &cols)); 1090 PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES)); 1091 PetscCall(ISRestoreIndices(ir, &rows)); 1092 PetscCall(ISRestoreIndices(ic, &cols)); 1093 PetscCall(ISDestroy(&ic)); 1094 PetscCall(ISDestroy(&ir)); 1095 } 1096 PetscCall(MatDenseRestoreArray(A, &aarray)); 1097 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 1098 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1099 if (reuse == MAT_INPLACE_MATRIX) { 1100 PetscCall(MatHeaderReplace(A, &Bmpi)); 1101 } else *B = Bmpi; 1102 PetscFunctionReturn(PETSC_SUCCESS); 1103 } 1104 1105 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1106 { 1107 Mat mat_scal; 1108 PetscInt M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols; 1109 const PetscInt *cols; 1110 const PetscScalar *vals; 1111 1112 PetscFunctionBegin; 1113 if (reuse == MAT_REUSE_MATRIX) { 1114 mat_scal = *newmat; 1115 PetscCall(MatZeroEntries(mat_scal)); 1116 } else { 1117 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1118 m = PETSC_DECIDE; 1119 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1120 n = PETSC_DECIDE; 1121 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 1122 PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 1123 PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 1124 PetscCall(MatSetUp(mat_scal)); 1125 } 1126 for (row = rstart; row < rend; row++) { 1127 PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 1128 PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES)); 1129 PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1130 } 1131 PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 1132 PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1133 1134 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1135 else *newmat = mat_scal; 1136 PetscFunctionReturn(PETSC_SUCCESS); 1137 } 1138 1139 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1140 { 1141 Mat mat_scal; 1142 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend; 1143 const PetscInt *cols; 1144 const PetscScalar *vals; 1145 PetscScalar v; 1146 1147 PetscFunctionBegin; 1148 if (reuse == MAT_REUSE_MATRIX) { 1149 mat_scal = *newmat; 1150 PetscCall(MatZeroEntries(mat_scal)); 1151 } else { 1152 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1153 m = PETSC_DECIDE; 1154 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1155 n = PETSC_DECIDE; 1156 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 1157 PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 1158 PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 1159 PetscCall(MatSetUp(mat_scal)); 1160 } 1161 PetscCall(MatGetRowUpperTriangular(A)); 1162 for (row = rstart; row < rend; row++) { 1163 PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 1164 PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES)); 1165 for (j = 0; j < ncols; j++) { /* lower triangular part */ 1166 if (cols[j] == row) continue; 1167 v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j]; 1168 PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES)); 1169 } 1170 PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1171 } 1172 PetscCall(MatRestoreRowUpperTriangular(A)); 1173 PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 1174 PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1175 1176 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1177 else *newmat = mat_scal; 1178 PetscFunctionReturn(PETSC_SUCCESS); 1179 } 1180 1181 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1182 { 1183 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1184 PetscInt sz = 0; 1185 1186 PetscFunctionBegin; 1187 PetscCall(PetscLayoutSetUp(A->rmap)); 1188 PetscCall(PetscLayoutSetUp(A->cmap)); 1189 if (!a->lld) a->lld = a->locr; 1190 1191 PetscCall(PetscFree(a->loc)); 1192 PetscCall(PetscIntMultError(a->lld, a->locc, &sz)); 1193 PetscCall(PetscCalloc1(sz, &a->loc)); 1194 1195 A->preallocated = PETSC_TRUE; 1196 PetscFunctionReturn(PETSC_SUCCESS); 1197 } 1198 1199 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1200 { 1201 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1202 Mat_ScaLAPACK_Grid *grid; 1203 PetscBool flg; 1204 MPI_Comm icomm; 1205 1206 PetscFunctionBegin; 1207 PetscCall(MatStashDestroy_Private(&A->stash)); 1208 PetscCall(PetscFree(a->loc)); 1209 PetscCall(PetscFree(a->pivots)); 1210 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 1211 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1212 if (--grid->grid_refct == 0) { 1213 Cblacs_gridexit(grid->ictxt); 1214 Cblacs_gridexit(grid->ictxrow); 1215 Cblacs_gridexit(grid->ictxcol); 1216 PetscCall(PetscFree(grid)); 1217 PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval)); 1218 } 1219 PetscCall(PetscCommDestroy(&icomm)); 1220 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL)); 1221 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1222 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL)); 1223 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL)); 1224 PetscCall(PetscFree(A->data)); 1225 PetscFunctionReturn(PETSC_SUCCESS); 1226 } 1227 1228 static PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1229 { 1230 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1231 PetscBLASInt info = 0; 1232 PetscBool flg; 1233 1234 PetscFunctionBegin; 1235 PetscCall(PetscLayoutSetUp(A->rmap)); 1236 PetscCall(PetscLayoutSetUp(A->cmap)); 1237 1238 /* check that the layout is as enforced by MatCreateScaLAPACK() */ 1239 PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1240 PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1241 PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1242 PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1243 1244 /* compute local sizes */ 1245 PetscCall(PetscBLASIntCast(A->rmap->N, &a->M)); 1246 PetscCall(PetscBLASIntCast(A->cmap->N, &a->N)); 1247 a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 1248 a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 1249 a->lld = PetscMax(1, a->locr); 1250 1251 /* allocate local array */ 1252 PetscCall(MatScaLAPACKSetPreallocation(A)); 1253 1254 /* set up ScaLAPACK descriptor */ 1255 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info)); 1256 PetscCheckScaLapackInfo("descinit", info); 1257 PetscFunctionReturn(PETSC_SUCCESS); 1258 } 1259 1260 static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type) 1261 { 1262 PetscInt nstash, reallocs; 1263 1264 PetscFunctionBegin; 1265 if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 1266 PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL)); 1267 PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs)); 1268 PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 1269 PetscFunctionReturn(PETSC_SUCCESS); 1270 } 1271 1272 static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type) 1273 { 1274 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1275 PetscMPIInt n; 1276 PetscInt i, flg, *row, *col; 1277 PetscScalar *val; 1278 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1279 1280 PetscFunctionBegin; 1281 if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 1282 while (1) { 1283 PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg)); 1284 if (!flg) break; 1285 for (i = 0; i < n; i++) { 1286 PetscCall(PetscBLASIntCast(row[i] + 1, &gridx)); 1287 PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx)); 1288 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1289 PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process"); 1290 switch (A->insertmode) { 1291 case INSERT_VALUES: 1292 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i]; 1293 break; 1294 case ADD_VALUES: 1295 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i]; 1296 break; 1297 default: 1298 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode); 1299 } 1300 } 1301 } 1302 PetscCall(MatStashScatterEnd_Private(&A->stash)); 1303 PetscFunctionReturn(PETSC_SUCCESS); 1304 } 1305 1306 static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer) 1307 { 1308 Mat Adense, As; 1309 MPI_Comm comm; 1310 1311 PetscFunctionBegin; 1312 PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm)); 1313 PetscCall(MatCreate(comm, &Adense)); 1314 PetscCall(MatSetType(Adense, MATDENSE)); 1315 PetscCall(MatLoad(Adense, viewer)); 1316 PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As)); 1317 PetscCall(MatDestroy(&Adense)); 1318 PetscCall(MatHeaderReplace(newMat, &As)); 1319 PetscFunctionReturn(PETSC_SUCCESS); 1320 } 1321 1322 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK, 1323 0, 1324 0, 1325 MatMult_ScaLAPACK, 1326 /* 4*/ MatMultAdd_ScaLAPACK, 1327 MatMultTranspose_ScaLAPACK, 1328 MatMultTransposeAdd_ScaLAPACK, 1329 MatSolve_ScaLAPACK, 1330 MatSolveAdd_ScaLAPACK, 1331 0, 1332 /*10*/ 0, 1333 MatLUFactor_ScaLAPACK, 1334 MatCholeskyFactor_ScaLAPACK, 1335 0, 1336 MatTranspose_ScaLAPACK, 1337 /*15*/ MatGetInfo_ScaLAPACK, 1338 0, 1339 MatGetDiagonal_ScaLAPACK, 1340 MatDiagonalScale_ScaLAPACK, 1341 MatNorm_ScaLAPACK, 1342 /*20*/ MatAssemblyBegin_ScaLAPACK, 1343 MatAssemblyEnd_ScaLAPACK, 1344 MatSetOption_ScaLAPACK, 1345 MatZeroEntries_ScaLAPACK, 1346 /*24*/ 0, 1347 MatLUFactorSymbolic_ScaLAPACK, 1348 MatLUFactorNumeric_ScaLAPACK, 1349 MatCholeskyFactorSymbolic_ScaLAPACK, 1350 MatCholeskyFactorNumeric_ScaLAPACK, 1351 /*29*/ MatSetUp_ScaLAPACK, 1352 0, 1353 0, 1354 0, 1355 0, 1356 /*34*/ MatDuplicate_ScaLAPACK, 1357 0, 1358 0, 1359 0, 1360 0, 1361 /*39*/ MatAXPY_ScaLAPACK, 1362 0, 1363 0, 1364 0, 1365 MatCopy_ScaLAPACK, 1366 /*44*/ 0, 1367 MatScale_ScaLAPACK, 1368 MatShift_ScaLAPACK, 1369 0, 1370 0, 1371 /*49*/ 0, 1372 0, 1373 0, 1374 0, 1375 0, 1376 /*54*/ 0, 1377 0, 1378 0, 1379 0, 1380 0, 1381 /*59*/ 0, 1382 MatDestroy_ScaLAPACK, 1383 MatView_ScaLAPACK, 1384 0, 1385 0, 1386 /*64*/ 0, 1387 0, 1388 0, 1389 0, 1390 0, 1391 /*69*/ 0, 1392 0, 1393 MatConvert_ScaLAPACK_Dense, 1394 0, 1395 0, 1396 /*74*/ 0, 1397 0, 1398 0, 1399 0, 1400 0, 1401 /*79*/ 0, 1402 0, 1403 0, 1404 0, 1405 MatLoad_ScaLAPACK, 1406 /*84*/ 0, 1407 0, 1408 0, 1409 0, 1410 0, 1411 /*89*/ 0, 1412 0, 1413 MatMatMultNumeric_ScaLAPACK, 1414 0, 1415 0, 1416 /*94*/ 0, 1417 0, 1418 0, 1419 MatMatTransposeMultNumeric_ScaLAPACK, 1420 0, 1421 /*99*/ MatProductSetFromOptions_ScaLAPACK, 1422 0, 1423 0, 1424 MatConjugate_ScaLAPACK, 1425 0, 1426 /*104*/ 0, 1427 0, 1428 0, 1429 0, 1430 0, 1431 /*109*/ MatMatSolve_ScaLAPACK, 1432 0, 1433 0, 1434 0, 1435 MatMissingDiagonal_ScaLAPACK, 1436 /*114*/ 0, 1437 0, 1438 0, 1439 0, 1440 0, 1441 /*119*/ 0, 1442 MatHermitianTranspose_ScaLAPACK, 1443 0, 1444 0, 1445 0, 1446 /*124*/ 0, 1447 0, 1448 0, 1449 0, 1450 0, 1451 /*129*/ 0, 1452 0, 1453 0, 1454 0, 1455 0, 1456 /*134*/ 0, 1457 0, 1458 0, 1459 0, 1460 0, 1461 0, 1462 /*140*/ 0, 1463 0, 1464 0, 1465 0, 1466 0, 1467 /*145*/ 0, 1468 0, 1469 0, 1470 0, 1471 0, 1472 /*150*/ 0, 1473 0}; 1474 1475 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners) 1476 { 1477 PetscInt *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2; 1478 PetscInt size = stash->size, nsends; 1479 PetscInt count, *sindices, **rindices, i, j, l; 1480 PetscScalar **rvalues, *svalues; 1481 MPI_Comm comm = stash->comm; 1482 MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2; 1483 PetscMPIInt *sizes, *nlengths, nreceives; 1484 PetscInt *sp_idx, *sp_idy; 1485 PetscScalar *sp_val; 1486 PetscMatStashSpace space, space_next; 1487 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1488 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)mat->data; 1489 1490 PetscFunctionBegin; 1491 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1492 InsertMode addv; 1493 PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat))); 1494 PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added"); 1495 mat->insertmode = addv; /* in case this processor had no cache */ 1496 } 1497 1498 bs2 = stash->bs * stash->bs; 1499 1500 /* first count number of contributors to each processor */ 1501 PetscCall(PetscCalloc1(size, &nlengths)); 1502 PetscCall(PetscMalloc1(stash->n + 1, &owner)); 1503 1504 i = j = 0; 1505 space = stash->space_head; 1506 while (space) { 1507 space_next = space->next; 1508 for (l = 0; l < space->local_used; l++) { 1509 PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx)); 1510 PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx)); 1511 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1512 j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc); 1513 nlengths[j]++; 1514 owner[i] = j; 1515 i++; 1516 } 1517 space = space_next; 1518 } 1519 1520 /* Now check what procs get messages - and compute nsends. */ 1521 PetscCall(PetscCalloc1(size, &sizes)); 1522 for (i = 0, nsends = 0; i < size; i++) { 1523 if (nlengths[i]) { 1524 sizes[i] = 1; 1525 nsends++; 1526 } 1527 } 1528 1529 { 1530 PetscMPIInt *onodes, *olengths; 1531 /* Determine the number of messages to expect, their lengths, from from-ids */ 1532 PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives)); 1533 PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths)); 1534 /* since clubbing row,col - lengths are multiplied by 2 */ 1535 for (i = 0; i < nreceives; i++) olengths[i] *= 2; 1536 PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1)); 1537 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1538 for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2; 1539 PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2)); 1540 PetscCall(PetscFree(onodes)); 1541 PetscCall(PetscFree(olengths)); 1542 } 1543 1544 /* do sends: 1545 1) starts[i] gives the starting index in svalues for stuff going to 1546 the ith processor 1547 */ 1548 PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices)); 1549 PetscCall(PetscMalloc1(2 * nsends, &send_waits)); 1550 PetscCall(PetscMalloc2(size, &startv, size, &starti)); 1551 /* use 2 sends the first with all_a, the next with all_i and all_j */ 1552 startv[0] = 0; 1553 starti[0] = 0; 1554 for (i = 1; i < size; i++) { 1555 startv[i] = startv[i - 1] + nlengths[i - 1]; 1556 starti[i] = starti[i - 1] + 2 * nlengths[i - 1]; 1557 } 1558 1559 i = 0; 1560 space = stash->space_head; 1561 while (space) { 1562 space_next = space->next; 1563 sp_idx = space->idx; 1564 sp_idy = space->idy; 1565 sp_val = space->val; 1566 for (l = 0; l < space->local_used; l++) { 1567 j = owner[i]; 1568 if (bs2 == 1) { 1569 svalues[startv[j]] = sp_val[l]; 1570 } else { 1571 PetscInt k; 1572 PetscScalar *buf1, *buf2; 1573 buf1 = svalues + bs2 * startv[j]; 1574 buf2 = space->val + bs2 * l; 1575 for (k = 0; k < bs2; k++) buf1[k] = buf2[k]; 1576 } 1577 sindices[starti[j]] = sp_idx[l]; 1578 sindices[starti[j] + nlengths[j]] = sp_idy[l]; 1579 startv[j]++; 1580 starti[j]++; 1581 i++; 1582 } 1583 space = space_next; 1584 } 1585 startv[0] = 0; 1586 for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1]; 1587 1588 for (i = 0, count = 0; i < size; i++) { 1589 if (sizes[i]) { 1590 PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++)); 1591 PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++)); 1592 } 1593 } 1594 #if defined(PETSC_USE_INFO) 1595 PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends)); 1596 for (i = 0; i < size; i++) { 1597 if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt))))); 1598 } 1599 #endif 1600 PetscCall(PetscFree(nlengths)); 1601 PetscCall(PetscFree(owner)); 1602 PetscCall(PetscFree2(startv, starti)); 1603 PetscCall(PetscFree(sizes)); 1604 1605 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 1606 PetscCall(PetscMalloc1(2 * nreceives, &recv_waits)); 1607 1608 for (i = 0; i < nreceives; i++) { 1609 recv_waits[2 * i] = recv_waits1[i]; 1610 recv_waits[2 * i + 1] = recv_waits2[i]; 1611 } 1612 stash->recv_waits = recv_waits; 1613 1614 PetscCall(PetscFree(recv_waits1)); 1615 PetscCall(PetscFree(recv_waits2)); 1616 1617 stash->svalues = svalues; 1618 stash->sindices = sindices; 1619 stash->rvalues = rvalues; 1620 stash->rindices = rindices; 1621 stash->send_waits = send_waits; 1622 stash->nsends = nsends; 1623 stash->nrecvs = nreceives; 1624 stash->reproduce_count = 0; 1625 PetscFunctionReturn(PETSC_SUCCESS); 1626 } 1627 1628 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb) 1629 { 1630 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1631 1632 PetscFunctionBegin; 1633 PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp"); 1634 PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb); 1635 PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb); 1636 PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 1637 PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 1638 PetscFunctionReturn(PETSC_SUCCESS); 1639 } 1640 1641 /*@ 1642 MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of 1643 the `MATSCALAPACK` matrix 1644 1645 Logically Collective 1646 1647 Input Parameters: 1648 + A - a `MATSCALAPACK` matrix 1649 . mb - the row block size 1650 - nb - the column block size 1651 1652 Level: intermediate 1653 1654 Note: 1655 This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices 1656 1657 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()` 1658 @*/ 1659 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb) 1660 { 1661 PetscFunctionBegin; 1662 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1663 PetscValidLogicalCollectiveInt(A, mb, 2); 1664 PetscValidLogicalCollectiveInt(A, nb, 3); 1665 PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb)); 1666 PetscFunctionReturn(PETSC_SUCCESS); 1667 } 1668 1669 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb) 1670 { 1671 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1672 1673 PetscFunctionBegin; 1674 if (mb) *mb = a->mb; 1675 if (nb) *nb = a->nb; 1676 PetscFunctionReturn(PETSC_SUCCESS); 1677 } 1678 1679 /*@ 1680 MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of 1681 the `MATSCALAPACK` matrix 1682 1683 Not Collective 1684 1685 Input Parameter: 1686 . A - a `MATSCALAPACK` matrix 1687 1688 Output Parameters: 1689 + mb - the row block size 1690 - nb - the column block size 1691 1692 Level: intermediate 1693 1694 Note: 1695 This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices 1696 1697 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()` 1698 @*/ 1699 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb) 1700 { 1701 PetscFunctionBegin; 1702 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1703 PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb)); 1704 PetscFunctionReturn(PETSC_SUCCESS); 1705 } 1706 1707 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *); 1708 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *); 1709 1710 /*MC 1711 MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1712 1713 Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK 1714 1715 Options Database Keys: 1716 + -mat_type scalapack - sets the matrix type to `MATSCALAPACK` 1717 . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu` 1718 . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1719 - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1720 1721 Level: intermediate 1722 1723 Note: 1724 Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous 1725 range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on 1726 the given rank. 1727 1728 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()` 1729 M*/ 1730 1731 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1732 { 1733 Mat_ScaLAPACK *a; 1734 PetscBool flg, flg1; 1735 Mat_ScaLAPACK_Grid *grid; 1736 MPI_Comm icomm; 1737 PetscBLASInt nprow, npcol, myrow, mycol; 1738 PetscInt optv1, k = 2, array[2] = {0, 0}; 1739 PetscMPIInt size; 1740 1741 PetscFunctionBegin; 1742 A->ops[0] = MatOps_Values; 1743 A->insertmode = NOT_SET_VALUES; 1744 1745 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash)); 1746 A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1747 A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1748 A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1749 A->stash.ScatterDestroy = NULL; 1750 1751 PetscCall(PetscNew(&a)); 1752 A->data = (void *)a; 1753 1754 /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1755 if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1756 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0)); 1757 PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); 1758 PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite)); 1759 } 1760 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 1761 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1762 if (!flg) { 1763 PetscCall(PetscNew(&grid)); 1764 1765 PetscCallMPI(MPI_Comm_size(icomm, &size)); 1766 grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001); 1767 1768 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat"); 1769 PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1)); 1770 if (flg1) { 1771 PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size); 1772 grid->nprow = optv1; 1773 } 1774 PetscOptionsEnd(); 1775 1776 if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1777 grid->npcol = size / grid->nprow; 1778 PetscCall(PetscBLASIntCast(grid->nprow, &nprow)); 1779 PetscCall(PetscBLASIntCast(grid->npcol, &npcol)); 1780 grid->ictxt = Csys2blacs_handle(icomm); 1781 Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol); 1782 Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol); 1783 grid->grid_refct = 1; 1784 grid->nprow = nprow; 1785 grid->npcol = npcol; 1786 grid->myrow = myrow; 1787 grid->mycol = mycol; 1788 /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1789 grid->ictxrow = Csys2blacs_handle(icomm); 1790 Cblacs_gridinit(&grid->ictxrow, "R", 1, size); 1791 grid->ictxcol = Csys2blacs_handle(icomm); 1792 Cblacs_gridinit(&grid->ictxcol, "R", size, 1); 1793 PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid)); 1794 1795 } else grid->grid_refct++; 1796 PetscCall(PetscCommDestroy(&icomm)); 1797 a->grid = grid; 1798 a->mb = DEFAULT_BLOCKSIZE; 1799 a->nb = DEFAULT_BLOCKSIZE; 1800 1801 PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat"); 1802 PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg)); 1803 if (flg) { 1804 a->mb = array[0]; 1805 a->nb = (k > 1) ? array[1] : a->mb; 1806 } 1807 PetscOptionsEnd(); 1808 1809 a->roworiented = PETSC_TRUE; 1810 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK)); 1811 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK)); 1812 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK)); 1813 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK)); 1814 PetscFunctionReturn(PETSC_SUCCESS); 1815 } 1816 1817 /*@C 1818 MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1819 (2D block cyclic distribution) for a `MATSCALAPACK` matrix 1820 1821 Collective 1822 1823 Input Parameters: 1824 + comm - MPI communicator 1825 . mb - row block size (or `PETSC_DECIDE` to have it set) 1826 . nb - column block size (or `PETSC_DECIDE` to have it set) 1827 . M - number of global rows 1828 . N - number of global columns 1829 . rsrc - coordinate of process that owns the first row of the distributed matrix 1830 - csrc - coordinate of process that owns the first column of the distributed matrix 1831 1832 Output Parameter: 1833 . A - the matrix 1834 1835 Options Database Key: 1836 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1837 1838 Level: intermediate 1839 1840 Notes: 1841 If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen 1842 1843 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 1844 MatXXXXSetPreallocation() paradigm instead of this routine directly. 1845 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 1846 1847 Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1848 configured with ScaLAPACK. In particular, PETSc's local sizes lose 1849 significance and are thus ignored. The block sizes refer to the values 1850 used for the distributed matrix, not the same meaning as in `MATBAIJ`. 1851 1852 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 1853 @*/ 1854 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A) 1855 { 1856 Mat_ScaLAPACK *a; 1857 PetscInt m, n; 1858 1859 PetscFunctionBegin; 1860 PetscCall(MatCreate(comm, A)); 1861 PetscCall(MatSetType(*A, MATSCALAPACK)); 1862 PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions"); 1863 /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1864 m = PETSC_DECIDE; 1865 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1866 n = PETSC_DECIDE; 1867 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 1868 PetscCall(MatSetSizes(*A, m, n, M, N)); 1869 a = (Mat_ScaLAPACK *)(*A)->data; 1870 PetscCall(PetscBLASIntCast(M, &a->M)); 1871 PetscCall(PetscBLASIntCast(N, &a->N)); 1872 PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 1873 PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 1874 PetscCall(PetscBLASIntCast(rsrc, &a->rsrc)); 1875 PetscCall(PetscBLASIntCast(csrc, &a->csrc)); 1876 PetscCall(MatSetUp(*A)); 1877 PetscFunctionReturn(PETSC_SUCCESS); 1878 } 1879