1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2 3 const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 4 " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 5 " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 6 " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 7 " TITLE = {Sca{LAPACK} Users' Guide},\n" 8 " PUBLISHER = {SIAM},\n" 9 " ADDRESS = {Philadelphia, PA},\n" 10 " YEAR = 1997\n" 11 "}\n"; 12 static PetscBool ScaLAPACKCite = PETSC_FALSE; 13 14 #define DEFAULT_BLOCKSIZE 64 15 16 /* 17 The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18 is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19 */ 20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21 22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 23 { 24 PetscFunctionBegin; 25 PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n")); 26 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); 27 PetscFunctionReturn(0); 28 } 29 30 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer) 31 { 32 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 33 PetscBool iascii; 34 PetscViewerFormat format; 35 Mat Adense; 36 37 PetscFunctionBegin; 38 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 39 if (iascii) { 40 PetscCall(PetscViewerGetFormat(viewer, &format)); 41 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 42 PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb)); 43 PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol)); 44 PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc)); 45 PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc)); 46 PetscFunctionReturn(0); 47 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 48 PetscFunctionReturn(0); 49 } 50 } 51 /* convert to dense format and call MatView() */ 52 PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense)); 53 PetscCall(MatView(Adense, viewer)); 54 PetscCall(MatDestroy(&Adense)); 55 PetscFunctionReturn(0); 56 } 57 58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info) 59 { 60 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 61 PetscLogDouble isend[2], irecv[2]; 62 63 PetscFunctionBegin; 64 info->block_size = 1.0; 65 66 isend[0] = a->lld * a->locc; /* locally allocated */ 67 isend[1] = a->locr * a->locc; /* used submatrix */ 68 if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 69 info->nz_allocated = isend[0]; 70 info->nz_used = isend[1]; 71 } else if (flag == MAT_GLOBAL_MAX) { 72 PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 73 info->nz_allocated = irecv[0]; 74 info->nz_used = irecv[1]; 75 } else if (flag == MAT_GLOBAL_SUM) { 76 PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 77 info->nz_allocated = irecv[0]; 78 info->nz_used = irecv[1]; 79 } 80 81 info->nz_unneeded = 0; 82 info->assemblies = A->num_ass; 83 info->mallocs = 0; 84 info->memory = 0; /* REVIEW ME */ 85 info->fill_ratio_given = 0; 86 info->fill_ratio_needed = 0; 87 info->factor_mallocs = 0; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg) 92 { 93 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 94 95 PetscFunctionBegin; 96 switch (op) { 97 case MAT_NEW_NONZERO_LOCATIONS: 98 case MAT_NEW_NONZERO_LOCATION_ERR: 99 case MAT_NEW_NONZERO_ALLOCATION_ERR: 100 case MAT_SYMMETRIC: 101 case MAT_SORTED_FULL: 102 case MAT_HERMITIAN: 103 break; 104 case MAT_ROW_ORIENTED: 105 a->roworiented = flg; 106 break; 107 default: 108 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]); 109 } 110 PetscFunctionReturn(0); 111 } 112 113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode) 114 { 115 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 116 PetscInt i, j; 117 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 118 PetscBool roworiented = a->roworiented; 119 120 PetscFunctionBegin; 121 PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode); 122 for (i = 0; i < nr; i++) { 123 if (rows[i] < 0) continue; 124 PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx)); 125 for (j = 0; j < nc; j++) { 126 if (cols[j] < 0) continue; 127 PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx)); 128 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 129 if (rsrc == a->grid->myrow && csrc == a->grid->mycol) { 130 if (roworiented) { 131 switch (imode) { 132 case INSERT_VALUES: 133 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j]; 134 break; 135 default: 136 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j]; 137 break; 138 } 139 } else { 140 switch (imode) { 141 case INSERT_VALUES: 142 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr]; 143 break; 144 default: 145 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr]; 146 break; 147 } 148 } 149 } else { 150 PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 151 A->assembled = PETSC_FALSE; 152 PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES))); 153 } 154 } 155 } 156 PetscFunctionReturn(0); 157 } 158 159 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscScalar beta, const PetscScalar *x, PetscScalar *y) 160 { 161 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 162 PetscScalar *x2d, *y2d, alpha = 1.0; 163 const PetscInt *ranges; 164 PetscBLASInt xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info; 165 166 PetscFunctionBegin; 167 if (transpose) { 168 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 169 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 170 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 171 xlld = PetscMax(1, A->rmap->n); 172 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 173 PetscCheckScaLapackInfo("descinit", info); 174 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 175 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */ 176 ylld = 1; 177 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info)); 178 PetscCheckScaLapackInfo("descinit", info); 179 180 /* allocate 2d vectors */ 181 lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 182 lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 183 PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 184 xlld = PetscMax(1, lszx); 185 186 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 187 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 188 PetscCheckScaLapackInfo("descinit", info); 189 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 190 PetscCheckScaLapackInfo("descinit", info); 191 192 /* redistribute x as a column of a 2d matrix */ 193 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 194 195 /* redistribute y as a row of a 2d matrix */ 196 if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow)); 197 198 /* call PBLAS subroutine */ 199 PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 200 201 /* redistribute y from a row of a 2d matrix */ 202 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow)); 203 204 } else { /* non-transpose */ 205 206 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 207 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 208 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */ 209 xlld = 1; 210 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info)); 211 PetscCheckScaLapackInfo("descinit", info); 212 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 213 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */ 214 ylld = PetscMax(1, A->rmap->n); 215 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info)); 216 PetscCheckScaLapackInfo("descinit", info); 217 218 /* allocate 2d vectors */ 219 lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 220 lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 221 PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 222 ylld = PetscMax(1, lszy); 223 224 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 225 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 226 PetscCheckScaLapackInfo("descinit", info); 227 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 228 PetscCheckScaLapackInfo("descinit", info); 229 230 /* redistribute x as a row of a 2d matrix */ 231 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow)); 232 233 /* redistribute y as a column of a 2d matrix */ 234 if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol)); 235 236 /* call PBLAS subroutine */ 237 PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 238 239 /* redistribute y from a column of a 2d matrix */ 240 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol)); 241 } 242 PetscCall(PetscFree2(x2d, y2d)); 243 PetscFunctionReturn(0); 244 } 245 246 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y) 247 { 248 const PetscScalar *xarray; 249 PetscScalar *yarray; 250 251 PetscFunctionBegin; 252 PetscCall(VecGetArrayRead(x, &xarray)); 253 PetscCall(VecGetArray(y, &yarray)); 254 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 0.0, xarray, yarray)); 255 PetscCall(VecRestoreArrayRead(x, &xarray)); 256 PetscCall(VecRestoreArray(y, &yarray)); 257 PetscFunctionReturn(0); 258 } 259 260 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y) 261 { 262 const PetscScalar *xarray; 263 PetscScalar *yarray; 264 265 PetscFunctionBegin; 266 PetscCall(VecGetArrayRead(x, &xarray)); 267 PetscCall(VecGetArray(y, &yarray)); 268 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 0.0, xarray, yarray)); 269 PetscCall(VecRestoreArrayRead(x, &xarray)); 270 PetscCall(VecRestoreArray(y, &yarray)); 271 PetscFunctionReturn(0); 272 } 273 274 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 275 { 276 const PetscScalar *xarray; 277 PetscScalar *zarray; 278 279 PetscFunctionBegin; 280 if (y != z) PetscCall(VecCopy(y, z)); 281 PetscCall(VecGetArrayRead(x, &xarray)); 282 PetscCall(VecGetArray(z, &zarray)); 283 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 1.0, xarray, zarray)); 284 PetscCall(VecRestoreArrayRead(x, &xarray)); 285 PetscCall(VecRestoreArray(z, &zarray)); 286 PetscFunctionReturn(0); 287 } 288 289 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 290 { 291 const PetscScalar *xarray; 292 PetscScalar *zarray; 293 294 PetscFunctionBegin; 295 if (y != z) PetscCall(VecCopy(y, z)); 296 PetscCall(VecGetArrayRead(x, &xarray)); 297 PetscCall(VecGetArray(z, &zarray)); 298 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 1.0, xarray, zarray)); 299 PetscCall(VecRestoreArrayRead(x, &xarray)); 300 PetscCall(VecRestoreArray(z, &zarray)); 301 PetscFunctionReturn(0); 302 } 303 304 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 305 { 306 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 307 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 308 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 309 PetscScalar sone = 1.0, zero = 0.0; 310 PetscBLASInt one = 1; 311 312 PetscFunctionBegin; 313 PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 314 C->assembled = PETSC_TRUE; 315 PetscFunctionReturn(0); 316 } 317 318 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 319 { 320 PetscFunctionBegin; 321 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 322 PetscCall(MatSetType(C, MATSCALAPACK)); 323 PetscCall(MatSetUp(C)); 324 C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 325 PetscFunctionReturn(0); 326 } 327 328 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 329 { 330 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 331 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 332 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 333 PetscScalar sone = 1.0, zero = 0.0; 334 PetscBLASInt one = 1; 335 336 PetscFunctionBegin; 337 PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 338 C->assembled = PETSC_TRUE; 339 PetscFunctionReturn(0); 340 } 341 342 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 343 { 344 PetscFunctionBegin; 345 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE)); 346 PetscCall(MatSetType(C, MATSCALAPACK)); 347 PetscCall(MatSetUp(C)); 348 PetscFunctionReturn(0); 349 } 350 351 /* --------------------------------------- */ 352 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 353 { 354 PetscFunctionBegin; 355 C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 356 C->ops->productsymbolic = MatProductSymbolic_AB; 357 PetscFunctionReturn(0); 358 } 359 360 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 361 { 362 PetscFunctionBegin; 363 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 364 C->ops->productsymbolic = MatProductSymbolic_ABt; 365 PetscFunctionReturn(0); 366 } 367 368 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 369 { 370 Mat_Product *product = C->product; 371 372 PetscFunctionBegin; 373 switch (product->type) { 374 case MATPRODUCT_AB: 375 PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C)); 376 break; 377 case MATPRODUCT_ABt: 378 PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C)); 379 break; 380 default: 381 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]); 382 } 383 PetscFunctionReturn(0); 384 } 385 /* --------------------------------------- */ 386 387 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D) 388 { 389 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 390 PetscScalar *darray, *d2d, v; 391 const PetscInt *ranges; 392 PetscBLASInt j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 393 394 PetscFunctionBegin; 395 PetscCall(VecGetArray(D, &darray)); 396 397 if (A->rmap->N <= A->cmap->N) { /* row version */ 398 399 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 400 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 401 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 402 dlld = PetscMax(1, A->rmap->n); 403 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 404 PetscCheckScaLapackInfo("descinit", info); 405 406 /* allocate 2d vector */ 407 lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 408 PetscCall(PetscCalloc1(lszd, &d2d)); 409 dlld = PetscMax(1, lszd); 410 411 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 412 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 413 PetscCheckScaLapackInfo("descinit", info); 414 415 /* collect diagonal */ 416 for (j = 1; j <= a->M; j++) { 417 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc)); 418 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v)); 419 } 420 421 /* redistribute d from a column of a 2d matrix */ 422 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol)); 423 PetscCall(PetscFree(d2d)); 424 425 } else { /* column version */ 426 427 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 428 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 429 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 430 dlld = 1; 431 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 432 PetscCheckScaLapackInfo("descinit", info); 433 434 /* allocate 2d vector */ 435 lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 436 PetscCall(PetscCalloc1(lszd, &d2d)); 437 438 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 439 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 440 PetscCheckScaLapackInfo("descinit", info); 441 442 /* collect diagonal */ 443 for (j = 1; j <= a->N; j++) { 444 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc)); 445 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v)); 446 } 447 448 /* redistribute d from a row of a 2d matrix */ 449 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow)); 450 PetscCall(PetscFree(d2d)); 451 } 452 453 PetscCall(VecRestoreArray(D, &darray)); 454 PetscCall(VecAssemblyBegin(D)); 455 PetscCall(VecAssemblyEnd(D)); 456 PetscFunctionReturn(0); 457 } 458 459 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R) 460 { 461 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 462 const PetscScalar *d; 463 const PetscInt *ranges; 464 PetscScalar *d2d; 465 PetscBLASInt i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 466 467 PetscFunctionBegin; 468 if (R) { 469 PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d)); 470 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 471 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 472 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 473 dlld = 1; 474 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 475 PetscCheckScaLapackInfo("descinit", info); 476 477 /* allocate 2d vector */ 478 lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 479 PetscCall(PetscCalloc1(lszd, &d2d)); 480 481 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 482 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 483 PetscCheckScaLapackInfo("descinit", info); 484 485 /* redistribute d to a row of a 2d matrix */ 486 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow)); 487 488 /* broadcast along process columns */ 489 if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld); 490 else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol); 491 492 /* local scaling */ 493 for (j = 0; j < a->locc; j++) 494 for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j]; 495 496 PetscCall(PetscFree(d2d)); 497 PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d)); 498 } 499 if (L) { 500 PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d)); 501 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 502 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 503 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 504 dlld = PetscMax(1, A->rmap->n); 505 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 506 PetscCheckScaLapackInfo("descinit", info); 507 508 /* allocate 2d vector */ 509 lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 510 PetscCall(PetscCalloc1(lszd, &d2d)); 511 dlld = PetscMax(1, lszd); 512 513 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 514 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 515 PetscCheckScaLapackInfo("descinit", info); 516 517 /* redistribute d to a column of a 2d matrix */ 518 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol)); 519 520 /* broadcast along process rows */ 521 if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld); 522 else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0); 523 524 /* local scaling */ 525 for (i = 0; i < a->locr; i++) 526 for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i]; 527 528 PetscCall(PetscFree(d2d)); 529 PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d)); 530 } 531 PetscFunctionReturn(0); 532 } 533 534 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d) 535 { 536 PetscFunctionBegin; 537 *missing = PETSC_FALSE; 538 PetscFunctionReturn(0); 539 } 540 541 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a) 542 { 543 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 544 PetscBLASInt n, one = 1; 545 546 PetscFunctionBegin; 547 n = x->lld * x->locc; 548 PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one)); 549 PetscFunctionReturn(0); 550 } 551 552 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha) 553 { 554 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 555 PetscBLASInt i, n; 556 PetscScalar v; 557 558 PetscFunctionBegin; 559 n = PetscMin(x->M, x->N); 560 for (i = 1; i <= n; i++) { 561 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc)); 562 v += alpha; 563 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v)); 564 } 565 PetscFunctionReturn(0); 566 } 567 568 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 569 { 570 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 571 Mat_ScaLAPACK *y = (Mat_ScaLAPACK *)Y->data; 572 PetscBLASInt one = 1; 573 PetscScalar beta = 1.0; 574 575 PetscFunctionBegin; 576 MatScaLAPACKCheckDistribution(Y, 1, X, 3); 577 PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc)); 578 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 579 PetscFunctionReturn(0); 580 } 581 582 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str) 583 { 584 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 585 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 586 587 PetscFunctionBegin; 588 PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 589 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 590 PetscFunctionReturn(0); 591 } 592 593 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B) 594 { 595 Mat Bs; 596 MPI_Comm comm; 597 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 598 599 PetscFunctionBegin; 600 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 601 PetscCall(MatCreate(comm, &Bs)); 602 PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 603 PetscCall(MatSetType(Bs, MATSCALAPACK)); 604 b = (Mat_ScaLAPACK *)Bs->data; 605 b->M = a->M; 606 b->N = a->N; 607 b->mb = a->mb; 608 b->nb = a->nb; 609 b->rsrc = a->rsrc; 610 b->csrc = a->csrc; 611 PetscCall(MatSetUp(Bs)); 612 *B = Bs; 613 if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 614 Bs->assembled = PETSC_TRUE; 615 PetscFunctionReturn(0); 616 } 617 618 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 619 { 620 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 621 Mat Bs = *B; 622 PetscBLASInt one = 1; 623 PetscScalar sone = 1.0, zero = 0.0; 624 #if defined(PETSC_USE_COMPLEX) 625 PetscInt i, j; 626 #endif 627 628 PetscFunctionBegin; 629 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 630 PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 631 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 632 *B = Bs; 633 b = (Mat_ScaLAPACK *)Bs->data; 634 PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 635 #if defined(PETSC_USE_COMPLEX) 636 /* undo conjugation */ 637 for (i = 0; i < b->locr; i++) 638 for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]); 639 #endif 640 Bs->assembled = PETSC_TRUE; 641 PetscFunctionReturn(0); 642 } 643 644 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 645 { 646 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 647 PetscInt i, j; 648 649 PetscFunctionBegin; 650 for (i = 0; i < a->locr; i++) 651 for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]); 652 PetscFunctionReturn(0); 653 } 654 655 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 656 { 657 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 658 Mat Bs = *B; 659 PetscBLASInt one = 1; 660 PetscScalar sone = 1.0, zero = 0.0; 661 662 PetscFunctionBegin; 663 PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 664 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 665 *B = Bs; 666 b = (Mat_ScaLAPACK *)Bs->data; 667 PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 668 Bs->assembled = PETSC_TRUE; 669 PetscFunctionReturn(0); 670 } 671 672 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X) 673 { 674 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 675 PetscScalar *x, *x2d; 676 const PetscInt *ranges; 677 PetscBLASInt xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info; 678 679 PetscFunctionBegin; 680 PetscCall(VecCopy(B, X)); 681 PetscCall(VecGetArray(X, &x)); 682 683 /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 684 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 685 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 686 xlld = PetscMax(1, A->rmap->n); 687 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 688 PetscCheckScaLapackInfo("descinit", info); 689 690 /* allocate 2d vector */ 691 lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 692 PetscCall(PetscMalloc1(lszx, &x2d)); 693 xlld = PetscMax(1, lszx); 694 695 /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 696 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 697 PetscCheckScaLapackInfo("descinit", info); 698 699 /* redistribute x as a column of a 2d matrix */ 700 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 701 702 /* call ScaLAPACK subroutine */ 703 switch (A->factortype) { 704 case MAT_FACTOR_LU: 705 PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info)); 706 PetscCheckScaLapackInfo("getrs", info); 707 break; 708 case MAT_FACTOR_CHOLESKY: 709 PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info)); 710 PetscCheckScaLapackInfo("potrs", info); 711 break; 712 default: 713 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 714 } 715 716 /* redistribute x from a column of a 2d matrix */ 717 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol)); 718 719 PetscCall(PetscFree(x2d)); 720 PetscCall(VecRestoreArray(X, &x)); 721 PetscFunctionReturn(0); 722 } 723 724 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X) 725 { 726 PetscFunctionBegin; 727 PetscCall(MatSolve_ScaLAPACK(A, B, X)); 728 PetscCall(VecAXPY(X, 1, Y)); 729 PetscFunctionReturn(0); 730 } 731 732 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X) 733 { 734 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x; 735 PetscBool flg1, flg2; 736 PetscBLASInt one = 1, info; 737 738 PetscFunctionBegin; 739 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1)); 740 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2)); 741 PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK"); 742 MatScaLAPACKCheckDistribution(B, 1, X, 2); 743 b = (Mat_ScaLAPACK *)B->data; 744 x = (Mat_ScaLAPACK *)X->data; 745 PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc)); 746 747 switch (A->factortype) { 748 case MAT_FACTOR_LU: 749 PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info)); 750 PetscCheckScaLapackInfo("getrs", info); 751 break; 752 case MAT_FACTOR_CHOLESKY: 753 PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info)); 754 PetscCheckScaLapackInfo("potrs", info); 755 break; 756 default: 757 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 758 } 759 PetscFunctionReturn(0); 760 } 761 762 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo) 763 { 764 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 765 PetscBLASInt one = 1, info; 766 767 PetscFunctionBegin; 768 if (!a->pivots) { PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots)); } 769 PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info)); 770 PetscCheckScaLapackInfo("getrf", info); 771 A->factortype = MAT_FACTOR_LU; 772 A->assembled = PETSC_TRUE; 773 774 PetscCall(PetscFree(A->solvertype)); 775 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 776 PetscFunctionReturn(0); 777 } 778 779 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 780 { 781 PetscFunctionBegin; 782 PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 783 PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info)); 784 PetscFunctionReturn(0); 785 } 786 787 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 788 { 789 PetscFunctionBegin; 790 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 791 PetscFunctionReturn(0); 792 } 793 794 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo) 795 { 796 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 797 PetscBLASInt one = 1, info; 798 799 PetscFunctionBegin; 800 PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info)); 801 PetscCheckScaLapackInfo("potrf", info); 802 A->factortype = MAT_FACTOR_CHOLESKY; 803 A->assembled = PETSC_TRUE; 804 805 PetscCall(PetscFree(A->solvertype)); 806 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 807 PetscFunctionReturn(0); 808 } 809 810 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 811 { 812 PetscFunctionBegin; 813 PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 814 PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info)); 815 PetscFunctionReturn(0); 816 } 817 818 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info) 819 { 820 PetscFunctionBegin; 821 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 822 PetscFunctionReturn(0); 823 } 824 825 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type) 826 { 827 PetscFunctionBegin; 828 *type = MATSOLVERSCALAPACK; 829 PetscFunctionReturn(0); 830 } 831 832 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F) 833 { 834 Mat B; 835 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 836 837 PetscFunctionBegin; 838 /* Create the factorization matrix */ 839 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B)); 840 B->trivialsymbolic = PETSC_TRUE; 841 B->factortype = ftype; 842 PetscCall(PetscFree(B->solvertype)); 843 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype)); 844 845 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack)); 846 *F = B; 847 PetscFunctionReturn(0); 848 } 849 850 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 851 { 852 PetscFunctionBegin; 853 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack)); 854 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack)); 855 PetscFunctionReturn(0); 856 } 857 858 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm) 859 { 860 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 861 PetscBLASInt one = 1, lwork = 0; 862 const char *ntype; 863 PetscScalar *work = NULL, dummy; 864 865 PetscFunctionBegin; 866 switch (type) { 867 case NORM_1: 868 ntype = "1"; 869 lwork = PetscMax(a->locr, a->locc); 870 break; 871 case NORM_FROBENIUS: 872 ntype = "F"; 873 work = &dummy; 874 break; 875 case NORM_INFINITY: 876 ntype = "I"; 877 lwork = PetscMax(a->locr, a->locc); 878 break; 879 default: 880 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type"); 881 } 882 if (lwork) PetscCall(PetscMalloc1(lwork, &work)); 883 *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work); 884 if (lwork) PetscCall(PetscFree(work)); 885 PetscFunctionReturn(0); 886 } 887 888 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 889 { 890 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 891 892 PetscFunctionBegin; 893 PetscCall(PetscArrayzero(a->loc, a->lld * a->locc)); 894 PetscFunctionReturn(0); 895 } 896 897 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols) 898 { 899 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 900 PetscInt i, n, nb, isrc, nproc, iproc, *idx; 901 902 PetscFunctionBegin; 903 if (rows) { 904 n = a->locr; 905 nb = a->mb; 906 isrc = a->rsrc; 907 nproc = a->grid->nprow; 908 iproc = a->grid->myrow; 909 PetscCall(PetscMalloc1(n, &idx)); 910 for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 911 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows)); 912 } 913 if (cols) { 914 n = a->locc; 915 nb = a->nb; 916 isrc = a->csrc; 917 nproc = a->grid->npcol; 918 iproc = a->grid->mycol; 919 PetscCall(PetscMalloc1(n, &idx)); 920 for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 921 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols)); 922 } 923 PetscFunctionReturn(0); 924 } 925 926 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B) 927 { 928 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 929 Mat Bmpi; 930 MPI_Comm comm; 931 PetscInt i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz; 932 const PetscInt *ranges, *branges, *cwork; 933 const PetscScalar *vwork; 934 PetscBLASInt bdesc[9], bmb, zero = 0, one = 1, lld, info; 935 PetscScalar *barray; 936 PetscBool differ = PETSC_FALSE; 937 PetscMPIInt size; 938 939 PetscFunctionBegin; 940 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 941 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 942 943 if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 944 PetscCallMPI(MPI_Comm_size(comm, &size)); 945 PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges)); 946 for (i = 0; i < size; i++) 947 if (ranges[i + 1] != branges[i + 1]) { 948 differ = PETSC_TRUE; 949 break; 950 } 951 } 952 953 if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 954 PetscCall(MatCreate(comm, &Bmpi)); 955 m = PETSC_DECIDE; 956 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 957 n = PETSC_DECIDE; 958 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 959 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 960 PetscCall(MatSetType(Bmpi, MATDENSE)); 961 PetscCall(MatSetUp(Bmpi)); 962 963 /* create ScaLAPACK descriptor for B (1d block distribution) */ 964 PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 965 lld = PetscMax(A->rmap->n, 1); /* local leading dimension */ 966 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 967 PetscCheckScaLapackInfo("descinit", info); 968 969 /* redistribute matrix */ 970 PetscCall(MatDenseGetArray(Bmpi, &barray)); 971 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 972 PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 973 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 974 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 975 976 /* transfer rows of auxiliary matrix to the final matrix B */ 977 PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend)); 978 for (i = rstart; i < rend; i++) { 979 PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork)); 980 PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES)); 981 PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork)); 982 } 983 PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY)); 984 PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY)); 985 PetscCall(MatDestroy(&Bmpi)); 986 987 } else { /* normal cases */ 988 989 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 990 else { 991 PetscCall(MatCreate(comm, &Bmpi)); 992 m = PETSC_DECIDE; 993 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 994 n = PETSC_DECIDE; 995 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 996 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 997 PetscCall(MatSetType(Bmpi, MATDENSE)); 998 PetscCall(MatSetUp(Bmpi)); 999 } 1000 1001 /* create ScaLAPACK descriptor for B (1d block distribution) */ 1002 PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 1003 lld = PetscMax(A->rmap->n, 1); /* local leading dimension */ 1004 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 1005 PetscCheckScaLapackInfo("descinit", info); 1006 1007 /* redistribute matrix */ 1008 PetscCall(MatDenseGetArray(Bmpi, &barray)); 1009 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 1010 PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 1011 1012 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 1013 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1014 if (reuse == MAT_INPLACE_MATRIX) { 1015 PetscCall(MatHeaderReplace(A, &Bmpi)); 1016 } else *B = Bmpi; 1017 } 1018 PetscFunctionReturn(0); 1019 } 1020 1021 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct) 1022 { 1023 const PetscInt *ranges; 1024 PetscMPIInt size; 1025 PetscInt i, n; 1026 1027 PetscFunctionBegin; 1028 *correct = PETSC_TRUE; 1029 PetscCallMPI(MPI_Comm_size(map->comm, &size)); 1030 if (size > 1) { 1031 PetscCall(PetscLayoutGetRanges(map, &ranges)); 1032 n = ranges[1] - ranges[0]; 1033 for (i = 1; i < size; i++) 1034 if (ranges[i + 1] - ranges[i] != n) break; 1035 *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n)); 1036 } 1037 PetscFunctionReturn(0); 1038 } 1039 1040 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B) 1041 { 1042 Mat_ScaLAPACK *b; 1043 Mat Bmpi; 1044 MPI_Comm comm; 1045 PetscInt M = A->rmap->N, N = A->cmap->N, m, n; 1046 const PetscInt *ranges, *rows, *cols; 1047 PetscBLASInt adesc[9], amb, zero = 0, one = 1, lld, info; 1048 PetscScalar *aarray; 1049 IS ir, ic; 1050 PetscInt lda; 1051 PetscBool flg; 1052 1053 PetscFunctionBegin; 1054 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1055 1056 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1057 else { 1058 PetscCall(MatCreate(comm, &Bmpi)); 1059 m = PETSC_DECIDE; 1060 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1061 n = PETSC_DECIDE; 1062 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 1063 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 1064 PetscCall(MatSetType(Bmpi, MATSCALAPACK)); 1065 PetscCall(MatSetUp(Bmpi)); 1066 } 1067 b = (Mat_ScaLAPACK *)Bmpi->data; 1068 1069 PetscCall(MatDenseGetLDA(A, &lda)); 1070 PetscCall(MatDenseGetArray(A, &aarray)); 1071 PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1072 if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1073 if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */ 1074 /* create ScaLAPACK descriptor for A (1d block distribution) */ 1075 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 1076 PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */ 1077 lld = PetscMax(lda, 1); /* local leading dimension */ 1078 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info)); 1079 PetscCheckScaLapackInfo("descinit", info); 1080 1081 /* redistribute matrix */ 1082 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol)); 1083 Bmpi->nooffprocentries = PETSC_TRUE; 1084 } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */ 1085 PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n); 1086 b->roworiented = PETSC_FALSE; 1087 PetscCall(MatGetOwnershipIS(A, &ir, &ic)); 1088 PetscCall(ISGetIndices(ir, &rows)); 1089 PetscCall(ISGetIndices(ic, &cols)); 1090 PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES)); 1091 PetscCall(ISRestoreIndices(ir, &rows)); 1092 PetscCall(ISRestoreIndices(ic, &cols)); 1093 PetscCall(ISDestroy(&ic)); 1094 PetscCall(ISDestroy(&ir)); 1095 } 1096 PetscCall(MatDenseRestoreArray(A, &aarray)); 1097 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 1098 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1099 if (reuse == MAT_INPLACE_MATRIX) { 1100 PetscCall(MatHeaderReplace(A, &Bmpi)); 1101 } else *B = Bmpi; 1102 PetscFunctionReturn(0); 1103 } 1104 1105 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1106 { 1107 Mat mat_scal; 1108 PetscInt M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols; 1109 const PetscInt *cols; 1110 const PetscScalar *vals; 1111 1112 PetscFunctionBegin; 1113 if (reuse == MAT_REUSE_MATRIX) { 1114 mat_scal = *newmat; 1115 PetscCall(MatZeroEntries(mat_scal)); 1116 } else { 1117 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1118 m = PETSC_DECIDE; 1119 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1120 n = PETSC_DECIDE; 1121 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 1122 PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 1123 PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 1124 PetscCall(MatSetUp(mat_scal)); 1125 } 1126 for (row = rstart; row < rend; row++) { 1127 PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 1128 PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES)); 1129 PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1130 } 1131 PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 1132 PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1133 1134 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1135 else *newmat = mat_scal; 1136 PetscFunctionReturn(0); 1137 } 1138 1139 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1140 { 1141 Mat mat_scal; 1142 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend; 1143 const PetscInt *cols; 1144 const PetscScalar *vals; 1145 PetscScalar v; 1146 1147 PetscFunctionBegin; 1148 if (reuse == MAT_REUSE_MATRIX) { 1149 mat_scal = *newmat; 1150 PetscCall(MatZeroEntries(mat_scal)); 1151 } else { 1152 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1153 m = PETSC_DECIDE; 1154 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1155 n = PETSC_DECIDE; 1156 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 1157 PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 1158 PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 1159 PetscCall(MatSetUp(mat_scal)); 1160 } 1161 PetscCall(MatGetRowUpperTriangular(A)); 1162 for (row = rstart; row < rend; row++) { 1163 PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 1164 PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES)); 1165 for (j = 0; j < ncols; j++) { /* lower triangular part */ 1166 if (cols[j] == row) continue; 1167 v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j]; 1168 PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES)); 1169 } 1170 PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1171 } 1172 PetscCall(MatRestoreRowUpperTriangular(A)); 1173 PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 1174 PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1175 1176 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1177 else *newmat = mat_scal; 1178 PetscFunctionReturn(0); 1179 } 1180 1181 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1182 { 1183 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1184 PetscInt sz = 0; 1185 1186 PetscFunctionBegin; 1187 PetscCall(PetscLayoutSetUp(A->rmap)); 1188 PetscCall(PetscLayoutSetUp(A->cmap)); 1189 if (!a->lld) a->lld = a->locr; 1190 1191 PetscCall(PetscFree(a->loc)); 1192 PetscCall(PetscIntMultError(a->lld, a->locc, &sz)); 1193 PetscCall(PetscCalloc1(sz, &a->loc)); 1194 1195 A->preallocated = PETSC_TRUE; 1196 PetscFunctionReturn(0); 1197 } 1198 1199 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1200 { 1201 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1202 Mat_ScaLAPACK_Grid *grid; 1203 PetscBool flg; 1204 MPI_Comm icomm; 1205 1206 PetscFunctionBegin; 1207 PetscCall(MatStashDestroy_Private(&A->stash)); 1208 PetscCall(PetscFree(a->loc)); 1209 PetscCall(PetscFree(a->pivots)); 1210 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 1211 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1212 if (--grid->grid_refct == 0) { 1213 Cblacs_gridexit(grid->ictxt); 1214 Cblacs_gridexit(grid->ictxrow); 1215 Cblacs_gridexit(grid->ictxcol); 1216 PetscCall(PetscFree(grid)); 1217 PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval)); 1218 } 1219 PetscCall(PetscCommDestroy(&icomm)); 1220 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL)); 1221 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1222 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL)); 1223 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL)); 1224 PetscCall(PetscFree(A->data)); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1229 { 1230 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1231 PetscBLASInt info = 0; 1232 PetscBool flg; 1233 1234 PetscFunctionBegin; 1235 PetscCall(PetscLayoutSetUp(A->rmap)); 1236 PetscCall(PetscLayoutSetUp(A->cmap)); 1237 1238 /* check that the layout is as enforced by MatCreateScaLAPACK() */ 1239 PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1240 PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1241 PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1242 PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1243 1244 /* compute local sizes */ 1245 PetscCall(PetscBLASIntCast(A->rmap->N, &a->M)); 1246 PetscCall(PetscBLASIntCast(A->cmap->N, &a->N)); 1247 a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 1248 a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 1249 a->lld = PetscMax(1, a->locr); 1250 1251 /* allocate local array */ 1252 PetscCall(MatScaLAPACKSetPreallocation(A)); 1253 1254 /* set up ScaLAPACK descriptor */ 1255 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info)); 1256 PetscCheckScaLapackInfo("descinit", info); 1257 PetscFunctionReturn(0); 1258 } 1259 1260 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type) 1261 { 1262 PetscInt nstash, reallocs; 1263 1264 PetscFunctionBegin; 1265 if (A->nooffprocentries) PetscFunctionReturn(0); 1266 PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL)); 1267 PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs)); 1268 PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 1269 PetscFunctionReturn(0); 1270 } 1271 1272 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type) 1273 { 1274 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1275 PetscMPIInt n; 1276 PetscInt i, flg, *row, *col; 1277 PetscScalar *val; 1278 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1279 1280 PetscFunctionBegin; 1281 if (A->nooffprocentries) PetscFunctionReturn(0); 1282 while (1) { 1283 PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg)); 1284 if (!flg) break; 1285 for (i = 0; i < n; i++) { 1286 PetscCall(PetscBLASIntCast(row[i] + 1, &gridx)); 1287 PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx)); 1288 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1289 PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process"); 1290 switch (A->insertmode) { 1291 case INSERT_VALUES: 1292 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i]; 1293 break; 1294 case ADD_VALUES: 1295 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i]; 1296 break; 1297 default: 1298 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode); 1299 } 1300 } 1301 } 1302 PetscCall(MatStashScatterEnd_Private(&A->stash)); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer) 1307 { 1308 Mat Adense, As; 1309 MPI_Comm comm; 1310 1311 PetscFunctionBegin; 1312 PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm)); 1313 PetscCall(MatCreate(comm, &Adense)); 1314 PetscCall(MatSetType(Adense, MATDENSE)); 1315 PetscCall(MatLoad(Adense, viewer)); 1316 PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As)); 1317 PetscCall(MatDestroy(&Adense)); 1318 PetscCall(MatHeaderReplace(newMat, &As)); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 /* -------------------------------------------------------------------*/ 1323 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK, 1324 0, 1325 0, 1326 MatMult_ScaLAPACK, 1327 /* 4*/ MatMultAdd_ScaLAPACK, 1328 MatMultTranspose_ScaLAPACK, 1329 MatMultTransposeAdd_ScaLAPACK, 1330 MatSolve_ScaLAPACK, 1331 MatSolveAdd_ScaLAPACK, 1332 0, 1333 /*10*/ 0, 1334 MatLUFactor_ScaLAPACK, 1335 MatCholeskyFactor_ScaLAPACK, 1336 0, 1337 MatTranspose_ScaLAPACK, 1338 /*15*/ MatGetInfo_ScaLAPACK, 1339 0, 1340 MatGetDiagonal_ScaLAPACK, 1341 MatDiagonalScale_ScaLAPACK, 1342 MatNorm_ScaLAPACK, 1343 /*20*/ MatAssemblyBegin_ScaLAPACK, 1344 MatAssemblyEnd_ScaLAPACK, 1345 MatSetOption_ScaLAPACK, 1346 MatZeroEntries_ScaLAPACK, 1347 /*24*/ 0, 1348 MatLUFactorSymbolic_ScaLAPACK, 1349 MatLUFactorNumeric_ScaLAPACK, 1350 MatCholeskyFactorSymbolic_ScaLAPACK, 1351 MatCholeskyFactorNumeric_ScaLAPACK, 1352 /*29*/ MatSetUp_ScaLAPACK, 1353 0, 1354 0, 1355 0, 1356 0, 1357 /*34*/ MatDuplicate_ScaLAPACK, 1358 0, 1359 0, 1360 0, 1361 0, 1362 /*39*/ MatAXPY_ScaLAPACK, 1363 0, 1364 0, 1365 0, 1366 MatCopy_ScaLAPACK, 1367 /*44*/ 0, 1368 MatScale_ScaLAPACK, 1369 MatShift_ScaLAPACK, 1370 0, 1371 0, 1372 /*49*/ 0, 1373 0, 1374 0, 1375 0, 1376 0, 1377 /*54*/ 0, 1378 0, 1379 0, 1380 0, 1381 0, 1382 /*59*/ 0, 1383 MatDestroy_ScaLAPACK, 1384 MatView_ScaLAPACK, 1385 0, 1386 0, 1387 /*64*/ 0, 1388 0, 1389 0, 1390 0, 1391 0, 1392 /*69*/ 0, 1393 0, 1394 MatConvert_ScaLAPACK_Dense, 1395 0, 1396 0, 1397 /*74*/ 0, 1398 0, 1399 0, 1400 0, 1401 0, 1402 /*79*/ 0, 1403 0, 1404 0, 1405 0, 1406 MatLoad_ScaLAPACK, 1407 /*84*/ 0, 1408 0, 1409 0, 1410 0, 1411 0, 1412 /*89*/ 0, 1413 0, 1414 MatMatMultNumeric_ScaLAPACK, 1415 0, 1416 0, 1417 /*94*/ 0, 1418 0, 1419 0, 1420 MatMatTransposeMultNumeric_ScaLAPACK, 1421 0, 1422 /*99*/ MatProductSetFromOptions_ScaLAPACK, 1423 0, 1424 0, 1425 MatConjugate_ScaLAPACK, 1426 0, 1427 /*104*/ 0, 1428 0, 1429 0, 1430 0, 1431 0, 1432 /*109*/ MatMatSolve_ScaLAPACK, 1433 0, 1434 0, 1435 0, 1436 MatMissingDiagonal_ScaLAPACK, 1437 /*114*/ 0, 1438 0, 1439 0, 1440 0, 1441 0, 1442 /*119*/ 0, 1443 MatHermitianTranspose_ScaLAPACK, 1444 0, 1445 0, 1446 0, 1447 /*124*/ 0, 1448 0, 1449 0, 1450 0, 1451 0, 1452 /*129*/ 0, 1453 0, 1454 0, 1455 0, 1456 0, 1457 /*134*/ 0, 1458 0, 1459 0, 1460 0, 1461 0, 1462 0, 1463 /*140*/ 0, 1464 0, 1465 0, 1466 0, 1467 0, 1468 /*145*/ 0, 1469 0, 1470 0, 1471 0, 1472 0, 1473 /*150*/ 0, 1474 0}; 1475 1476 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners) 1477 { 1478 PetscInt *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2; 1479 PetscInt size = stash->size, nsends; 1480 PetscInt count, *sindices, **rindices, i, j, l; 1481 PetscScalar **rvalues, *svalues; 1482 MPI_Comm comm = stash->comm; 1483 MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2; 1484 PetscMPIInt *sizes, *nlengths, nreceives; 1485 PetscInt *sp_idx, *sp_idy; 1486 PetscScalar *sp_val; 1487 PetscMatStashSpace space, space_next; 1488 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1489 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)mat->data; 1490 1491 PetscFunctionBegin; 1492 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1493 InsertMode addv; 1494 PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat))); 1495 PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added"); 1496 mat->insertmode = addv; /* in case this processor had no cache */ 1497 } 1498 1499 bs2 = stash->bs * stash->bs; 1500 1501 /* first count number of contributors to each processor */ 1502 PetscCall(PetscCalloc1(size, &nlengths)); 1503 PetscCall(PetscMalloc1(stash->n + 1, &owner)); 1504 1505 i = j = 0; 1506 space = stash->space_head; 1507 while (space) { 1508 space_next = space->next; 1509 for (l = 0; l < space->local_used; l++) { 1510 PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx)); 1511 PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx)); 1512 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1513 j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc); 1514 nlengths[j]++; 1515 owner[i] = j; 1516 i++; 1517 } 1518 space = space_next; 1519 } 1520 1521 /* Now check what procs get messages - and compute nsends. */ 1522 PetscCall(PetscCalloc1(size, &sizes)); 1523 for (i = 0, nsends = 0; i < size; i++) { 1524 if (nlengths[i]) { 1525 sizes[i] = 1; 1526 nsends++; 1527 } 1528 } 1529 1530 { 1531 PetscMPIInt *onodes, *olengths; 1532 /* Determine the number of messages to expect, their lengths, from from-ids */ 1533 PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives)); 1534 PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths)); 1535 /* since clubbing row,col - lengths are multiplied by 2 */ 1536 for (i = 0; i < nreceives; i++) olengths[i] *= 2; 1537 PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1)); 1538 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1539 for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2; 1540 PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2)); 1541 PetscCall(PetscFree(onodes)); 1542 PetscCall(PetscFree(olengths)); 1543 } 1544 1545 /* do sends: 1546 1) starts[i] gives the starting index in svalues for stuff going to 1547 the ith processor 1548 */ 1549 PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices)); 1550 PetscCall(PetscMalloc1(2 * nsends, &send_waits)); 1551 PetscCall(PetscMalloc2(size, &startv, size, &starti)); 1552 /* use 2 sends the first with all_a, the next with all_i and all_j */ 1553 startv[0] = 0; 1554 starti[0] = 0; 1555 for (i = 1; i < size; i++) { 1556 startv[i] = startv[i - 1] + nlengths[i - 1]; 1557 starti[i] = starti[i - 1] + 2 * nlengths[i - 1]; 1558 } 1559 1560 i = 0; 1561 space = stash->space_head; 1562 while (space) { 1563 space_next = space->next; 1564 sp_idx = space->idx; 1565 sp_idy = space->idy; 1566 sp_val = space->val; 1567 for (l = 0; l < space->local_used; l++) { 1568 j = owner[i]; 1569 if (bs2 == 1) { 1570 svalues[startv[j]] = sp_val[l]; 1571 } else { 1572 PetscInt k; 1573 PetscScalar *buf1, *buf2; 1574 buf1 = svalues + bs2 * startv[j]; 1575 buf2 = space->val + bs2 * l; 1576 for (k = 0; k < bs2; k++) buf1[k] = buf2[k]; 1577 } 1578 sindices[starti[j]] = sp_idx[l]; 1579 sindices[starti[j] + nlengths[j]] = sp_idy[l]; 1580 startv[j]++; 1581 starti[j]++; 1582 i++; 1583 } 1584 space = space_next; 1585 } 1586 startv[0] = 0; 1587 for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1]; 1588 1589 for (i = 0, count = 0; i < size; i++) { 1590 if (sizes[i]) { 1591 PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++)); 1592 PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++)); 1593 } 1594 } 1595 #if defined(PETSC_USE_INFO) 1596 PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends)); 1597 for (i = 0; i < size; i++) { 1598 if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt))))); 1599 } 1600 #endif 1601 PetscCall(PetscFree(nlengths)); 1602 PetscCall(PetscFree(owner)); 1603 PetscCall(PetscFree2(startv, starti)); 1604 PetscCall(PetscFree(sizes)); 1605 1606 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 1607 PetscCall(PetscMalloc1(2 * nreceives, &recv_waits)); 1608 1609 for (i = 0; i < nreceives; i++) { 1610 recv_waits[2 * i] = recv_waits1[i]; 1611 recv_waits[2 * i + 1] = recv_waits2[i]; 1612 } 1613 stash->recv_waits = recv_waits; 1614 1615 PetscCall(PetscFree(recv_waits1)); 1616 PetscCall(PetscFree(recv_waits2)); 1617 1618 stash->svalues = svalues; 1619 stash->sindices = sindices; 1620 stash->rvalues = rvalues; 1621 stash->rindices = rindices; 1622 stash->send_waits = send_waits; 1623 stash->nsends = nsends; 1624 stash->nrecvs = nreceives; 1625 stash->reproduce_count = 0; 1626 PetscFunctionReturn(0); 1627 } 1628 1629 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb) 1630 { 1631 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1632 1633 PetscFunctionBegin; 1634 PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp"); 1635 PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb); 1636 PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb); 1637 PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 1638 PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 1639 PetscFunctionReturn(0); 1640 } 1641 1642 /*@ 1643 MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of 1644 the `MATSCALAPACK` matrix 1645 1646 Logically Collective 1647 1648 Input Parameters: 1649 + A - a `MATSCALAPACK` matrix 1650 . mb - the row block size 1651 - nb - the column block size 1652 1653 Level: intermediate 1654 1655 .seealso: `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()` 1656 @*/ 1657 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb) 1658 { 1659 PetscFunctionBegin; 1660 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1661 PetscValidLogicalCollectiveInt(A, mb, 2); 1662 PetscValidLogicalCollectiveInt(A, nb, 3); 1663 PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb)); 1664 PetscFunctionReturn(0); 1665 } 1666 1667 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb) 1668 { 1669 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1670 1671 PetscFunctionBegin; 1672 if (mb) *mb = a->mb; 1673 if (nb) *nb = a->nb; 1674 PetscFunctionReturn(0); 1675 } 1676 1677 /*@ 1678 MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of 1679 the `MATSCALAPACK` matrix 1680 1681 Not collective 1682 1683 Input Parameter: 1684 . A - a `MATSCALAPACK` matrix 1685 1686 Output Parameters: 1687 + mb - the row block size 1688 - nb - the column block size 1689 1690 Level: intermediate 1691 1692 .seealso: `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()` 1693 @*/ 1694 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb) 1695 { 1696 PetscFunctionBegin; 1697 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1698 PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb)); 1699 PetscFunctionReturn(0); 1700 } 1701 1702 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *); 1703 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *); 1704 1705 /*MC 1706 MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1707 1708 Use ./configure --download-scalapack to install PETSc to use ScaLAPACK 1709 1710 Options Database Keys: 1711 + -mat_type scalapack - sets the matrix type to `MATSCALAPACK` during a call to `MatSetFromOptions()` 1712 . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option -pc_type lu 1713 . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1714 - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1715 1716 Note: 1717 Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous 1718 range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on 1719 the given rank. 1720 1721 Level: beginner 1722 1723 .seealso: `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()` 1724 M*/ 1725 1726 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1727 { 1728 Mat_ScaLAPACK *a; 1729 PetscBool flg, flg1; 1730 Mat_ScaLAPACK_Grid *grid; 1731 MPI_Comm icomm; 1732 PetscBLASInt nprow, npcol, myrow, mycol; 1733 PetscInt optv1, k = 2, array[2] = {0, 0}; 1734 PetscMPIInt size; 1735 1736 PetscFunctionBegin; 1737 PetscCall(PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps))); 1738 A->insertmode = NOT_SET_VALUES; 1739 1740 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash)); 1741 A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1742 A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1743 A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1744 A->stash.ScatterDestroy = NULL; 1745 1746 PetscCall(PetscNew(&a)); 1747 A->data = (void *)a; 1748 1749 /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1750 if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1751 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0)); 1752 PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); 1753 PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite)); 1754 } 1755 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 1756 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1757 if (!flg) { 1758 PetscCall(PetscNew(&grid)); 1759 1760 PetscCallMPI(MPI_Comm_size(icomm, &size)); 1761 grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001); 1762 1763 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat"); 1764 PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1)); 1765 if (flg1) { 1766 PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size); 1767 grid->nprow = optv1; 1768 } 1769 PetscOptionsEnd(); 1770 1771 if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1772 grid->npcol = size / grid->nprow; 1773 PetscCall(PetscBLASIntCast(grid->nprow, &nprow)); 1774 PetscCall(PetscBLASIntCast(grid->npcol, &npcol)); 1775 grid->ictxt = Csys2blacs_handle(icomm); 1776 Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol); 1777 Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol); 1778 grid->grid_refct = 1; 1779 grid->nprow = nprow; 1780 grid->npcol = npcol; 1781 grid->myrow = myrow; 1782 grid->mycol = mycol; 1783 /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1784 grid->ictxrow = Csys2blacs_handle(icomm); 1785 Cblacs_gridinit(&grid->ictxrow, "R", 1, size); 1786 grid->ictxcol = Csys2blacs_handle(icomm); 1787 Cblacs_gridinit(&grid->ictxcol, "R", size, 1); 1788 PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid)); 1789 1790 } else grid->grid_refct++; 1791 PetscCall(PetscCommDestroy(&icomm)); 1792 a->grid = grid; 1793 a->mb = DEFAULT_BLOCKSIZE; 1794 a->nb = DEFAULT_BLOCKSIZE; 1795 1796 PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat"); 1797 PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg)); 1798 if (flg) { 1799 a->mb = array[0]; 1800 a->nb = (k > 1) ? array[1] : a->mb; 1801 } 1802 PetscOptionsEnd(); 1803 1804 a->roworiented = PETSC_TRUE; 1805 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK)); 1806 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK)); 1807 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK)); 1808 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK)); 1809 PetscFunctionReturn(0); 1810 } 1811 1812 /*@C 1813 MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1814 (2D block cyclic distribution) for a `MATSCALAPACK` matrix 1815 1816 Collective 1817 1818 Input Parameters: 1819 + comm - MPI communicator 1820 . mb - row block size (or `PETSC_DECIDE` to have it set) 1821 . nb - column block size (or `PETSC_DECIDE` to have it set) 1822 . M - number of global rows 1823 . N - number of global columns 1824 . rsrc - coordinate of process that owns the first row of the distributed matrix 1825 - csrc - coordinate of process that owns the first column of the distributed matrix 1826 1827 Output Parameter: 1828 . A - the matrix 1829 1830 Options Database Key: 1831 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1832 1833 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 1834 MatXXXXSetPreallocation() paradigm instead of this routine directly. 1835 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 1836 1837 Note: 1838 If `PETSC_DECIDE` is used for the block sizes, then an appropriate value 1839 is chosen. 1840 1841 Storage Information: 1842 Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1843 configured with ScaLAPACK. In particular, PETSc's local sizes lose 1844 significance and are thus ignored. The block sizes refer to the values 1845 used for the distributed matrix, not the same meaning as in `MATBAIJ`. 1846 1847 Level: intermediate 1848 1849 .seealso: `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 1850 @*/ 1851 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A) 1852 { 1853 Mat_ScaLAPACK *a; 1854 PetscInt m, n; 1855 1856 PetscFunctionBegin; 1857 PetscCall(MatCreate(comm, A)); 1858 PetscCall(MatSetType(*A, MATSCALAPACK)); 1859 PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions"); 1860 /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1861 m = PETSC_DECIDE; 1862 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1863 n = PETSC_DECIDE; 1864 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 1865 PetscCall(MatSetSizes(*A, m, n, M, N)); 1866 a = (Mat_ScaLAPACK *)(*A)->data; 1867 PetscCall(PetscBLASIntCast(M, &a->M)); 1868 PetscCall(PetscBLASIntCast(N, &a->N)); 1869 PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 1870 PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 1871 PetscCall(PetscBLASIntCast(rsrc, &a->rsrc)); 1872 PetscCall(PetscBLASIntCast(csrc, &a->csrc)); 1873 PetscCall(MatSetUp(*A)); 1874 PetscFunctionReturn(0); 1875 } 1876