1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2 3 const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 4 " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 5 " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 6 " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 7 " TITLE = {Sca{LAPACK} Users' Guide},\n" 8 " PUBLISHER = {SIAM},\n" 9 " ADDRESS = {Philadelphia, PA},\n" 10 " YEAR = 1997\n" 11 "}\n"; 12 static PetscBool ScaLAPACKCite = PETSC_FALSE; 13 14 #define DEFAULT_BLOCKSIZE 64 15 16 /* 17 The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18 is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19 */ 20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21 22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 23 { 24 PetscFunctionBegin; 25 PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n")); 26 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); 27 PetscFunctionReturn(PETSC_SUCCESS); 28 } 29 30 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer) 31 { 32 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 33 PetscBool iascii; 34 PetscViewerFormat format; 35 Mat Adense; 36 37 PetscFunctionBegin; 38 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 39 if (iascii) { 40 PetscCall(PetscViewerGetFormat(viewer, &format)); 41 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 42 PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb)); 43 PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol)); 44 PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc)); 45 PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc)); 46 PetscFunctionReturn(PETSC_SUCCESS); 47 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 48 PetscFunctionReturn(PETSC_SUCCESS); 49 } 50 } 51 /* convert to dense format and call MatView() */ 52 PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense)); 53 PetscCall(MatView(Adense, viewer)); 54 PetscCall(MatDestroy(&Adense)); 55 PetscFunctionReturn(PETSC_SUCCESS); 56 } 57 58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info) 59 { 60 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 61 PetscLogDouble isend[2], irecv[2]; 62 63 PetscFunctionBegin; 64 info->block_size = 1.0; 65 66 isend[0] = a->lld * a->locc; /* locally allocated */ 67 isend[1] = a->locr * a->locc; /* used submatrix */ 68 if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 69 info->nz_allocated = isend[0]; 70 info->nz_used = isend[1]; 71 } else if (flag == MAT_GLOBAL_MAX) { 72 PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 73 info->nz_allocated = irecv[0]; 74 info->nz_used = irecv[1]; 75 } else if (flag == MAT_GLOBAL_SUM) { 76 PetscCallMPI(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 77 info->nz_allocated = irecv[0]; 78 info->nz_used = irecv[1]; 79 } 80 81 info->nz_unneeded = 0; 82 info->assemblies = A->num_ass; 83 info->mallocs = 0; 84 info->memory = 0; /* REVIEW ME */ 85 info->fill_ratio_given = 0; 86 info->fill_ratio_needed = 0; 87 info->factor_mallocs = 0; 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 91 static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg) 92 { 93 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 94 95 PetscFunctionBegin; 96 switch (op) { 97 case MAT_NEW_NONZERO_LOCATIONS: 98 case MAT_NEW_NONZERO_LOCATION_ERR: 99 case MAT_NEW_NONZERO_ALLOCATION_ERR: 100 case MAT_SYMMETRIC: 101 case MAT_SORTED_FULL: 102 case MAT_HERMITIAN: 103 break; 104 case MAT_ROW_ORIENTED: 105 a->roworiented = flg; 106 break; 107 default: 108 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]); 109 } 110 PetscFunctionReturn(PETSC_SUCCESS); 111 } 112 113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode) 114 { 115 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 116 PetscInt i, j; 117 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 118 PetscBool roworiented = a->roworiented; 119 120 PetscFunctionBegin; 121 PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode); 122 for (i = 0; i < nr; i++) { 123 if (rows[i] < 0) continue; 124 PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx)); 125 for (j = 0; j < nc; j++) { 126 if (cols[j] < 0) continue; 127 PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx)); 128 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 129 if (rsrc == a->grid->myrow && csrc == a->grid->mycol) { 130 if (roworiented) { 131 switch (imode) { 132 case INSERT_VALUES: 133 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j]; 134 break; 135 default: 136 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j]; 137 break; 138 } 139 } else { 140 switch (imode) { 141 case INSERT_VALUES: 142 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr]; 143 break; 144 default: 145 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr]; 146 break; 147 } 148 } 149 } else { 150 PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 151 A->assembled = PETSC_FALSE; 152 PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES))); 153 } 154 } 155 } 156 PetscFunctionReturn(PETSC_SUCCESS); 157 } 158 159 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscBool hermitian, PetscScalar beta, const PetscScalar *x, PetscScalar *y) 160 { 161 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 162 PetscScalar *x2d, *y2d, alpha = 1.0; 163 const PetscInt *ranges; 164 PetscBLASInt xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info; 165 166 PetscFunctionBegin; 167 if (transpose) { 168 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 169 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 170 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 171 PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld)); 172 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 173 PetscCheckScaLapackInfo("descinit", info); 174 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 175 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */ 176 ylld = 1; 177 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info)); 178 PetscCheckScaLapackInfo("descinit", info); 179 180 /* allocate 2d vectors */ 181 lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 182 lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 183 PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 184 PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld)); 185 186 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 187 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 188 PetscCheckScaLapackInfo("descinit", info); 189 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 190 PetscCheckScaLapackInfo("descinit", info); 191 192 /* redistribute x as a column of a 2d matrix */ 193 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 194 195 /* redistribute y as a row of a 2d matrix */ 196 if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow)); 197 198 /* call PBLAS subroutine */ 199 if (hermitian) PetscCallBLAS("PBLASgemv", PBLASgemv_("C", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 200 else PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 201 202 /* redistribute y from a row of a 2d matrix */ 203 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow)); 204 205 } else { /* non-transpose */ 206 207 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 208 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 209 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */ 210 xlld = 1; 211 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info)); 212 PetscCheckScaLapackInfo("descinit", info); 213 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 214 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */ 215 PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &ylld)); 216 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info)); 217 PetscCheckScaLapackInfo("descinit", info); 218 219 /* allocate 2d vectors */ 220 lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 221 lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 222 PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 223 PetscCall(PetscBLASIntCast(PetscMax(1, lszy), &ylld)); 224 225 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 226 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 227 PetscCheckScaLapackInfo("descinit", info); 228 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 229 PetscCheckScaLapackInfo("descinit", info); 230 231 /* redistribute x as a row of a 2d matrix */ 232 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow)); 233 234 /* redistribute y as a column of a 2d matrix */ 235 if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol)); 236 237 /* call PBLAS subroutine */ 238 PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 239 240 /* redistribute y from a column of a 2d matrix */ 241 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol)); 242 } 243 PetscCall(PetscFree2(x2d, y2d)); 244 PetscFunctionReturn(PETSC_SUCCESS); 245 } 246 247 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y) 248 { 249 const PetscScalar *xarray; 250 PetscScalar *yarray; 251 252 PetscFunctionBegin; 253 PetscCall(VecGetArrayRead(x, &xarray)); 254 PetscCall(VecGetArray(y, &yarray)); 255 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 0.0, xarray, yarray)); 256 PetscCall(VecRestoreArrayRead(x, &xarray)); 257 PetscCall(VecRestoreArray(y, &yarray)); 258 PetscFunctionReturn(PETSC_SUCCESS); 259 } 260 261 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y) 262 { 263 const PetscScalar *xarray; 264 PetscScalar *yarray; 265 266 PetscFunctionBegin; 267 PetscCall(VecGetArrayRead(x, &xarray)); 268 PetscCall(VecGetArray(y, &yarray)); 269 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 0.0, xarray, yarray)); 270 PetscCall(VecRestoreArrayRead(x, &xarray)); 271 PetscCall(VecRestoreArray(y, &yarray)); 272 PetscFunctionReturn(PETSC_SUCCESS); 273 } 274 275 static PetscErrorCode MatMultHermitianTranspose_ScaLAPACK(Mat A, Vec x, Vec y) 276 { 277 const PetscScalar *xarray; 278 PetscScalar *yarray; 279 280 PetscFunctionBegin; 281 PetscCall(VecGetArrayRead(x, &xarray)); 282 PetscCall(VecGetArrayWrite(y, &yarray)); 283 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 0.0, xarray, yarray)); 284 PetscCall(VecRestoreArrayRead(x, &xarray)); 285 PetscCall(VecRestoreArrayWrite(y, &yarray)); 286 PetscFunctionReturn(PETSC_SUCCESS); 287 } 288 289 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 290 { 291 const PetscScalar *xarray; 292 PetscScalar *zarray; 293 294 PetscFunctionBegin; 295 if (y != z) PetscCall(VecCopy(y, z)); 296 PetscCall(VecGetArrayRead(x, &xarray)); 297 PetscCall(VecGetArray(z, &zarray)); 298 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 1.0, xarray, zarray)); 299 PetscCall(VecRestoreArrayRead(x, &xarray)); 300 PetscCall(VecRestoreArray(z, &zarray)); 301 PetscFunctionReturn(PETSC_SUCCESS); 302 } 303 304 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 305 { 306 const PetscScalar *xarray; 307 PetscScalar *zarray; 308 309 PetscFunctionBegin; 310 if (y != z) PetscCall(VecCopy(y, z)); 311 PetscCall(VecGetArrayRead(x, &xarray)); 312 PetscCall(VecGetArray(z, &zarray)); 313 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 1.0, xarray, zarray)); 314 PetscCall(VecRestoreArrayRead(x, &xarray)); 315 PetscCall(VecRestoreArray(z, &zarray)); 316 PetscFunctionReturn(PETSC_SUCCESS); 317 } 318 319 static PetscErrorCode MatMultHermitianTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 320 { 321 const PetscScalar *xarray; 322 PetscScalar *zarray; 323 324 PetscFunctionBegin; 325 if (y != z) PetscCall(VecCopy(y, z)); 326 PetscCall(VecGetArrayRead(x, &xarray)); 327 PetscCall(VecGetArray(z, &zarray)); 328 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 1.0, xarray, zarray)); 329 PetscCall(VecRestoreArrayRead(x, &xarray)); 330 PetscCall(VecRestoreArray(z, &zarray)); 331 PetscFunctionReturn(PETSC_SUCCESS); 332 } 333 334 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 335 { 336 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 337 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 338 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 339 PetscScalar sone = 1.0, zero = 0.0; 340 PetscBLASInt one = 1; 341 342 PetscFunctionBegin; 343 PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 344 C->assembled = PETSC_TRUE; 345 PetscFunctionReturn(PETSC_SUCCESS); 346 } 347 348 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 349 { 350 PetscFunctionBegin; 351 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 352 PetscCall(MatSetType(C, MATSCALAPACK)); 353 PetscCall(MatSetUp(C)); 354 C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 355 PetscFunctionReturn(PETSC_SUCCESS); 356 } 357 358 static PetscErrorCode MatTransposeMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 359 { 360 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 361 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 362 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 363 PetscScalar sone = 1.0, zero = 0.0; 364 PetscBLASInt one = 1; 365 366 PetscFunctionBegin; 367 PetscCallBLAS("PBLASgemm", PBLASgemm_("T", "N", &a->N, &b->N, &a->M, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 368 C->assembled = PETSC_TRUE; 369 PetscFunctionReturn(PETSC_SUCCESS); 370 } 371 372 static PetscErrorCode MatTransposeMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 373 { 374 PetscFunctionBegin; 375 PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 376 PetscCall(MatSetType(C, MATSCALAPACK)); 377 PetscCall(MatSetUp(C)); 378 C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_ScaLAPACK; 379 PetscFunctionReturn(PETSC_SUCCESS); 380 } 381 382 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 383 { 384 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 385 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 386 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 387 PetscScalar sone = 1.0, zero = 0.0; 388 PetscBLASInt one = 1; 389 390 PetscFunctionBegin; 391 PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 392 C->assembled = PETSC_TRUE; 393 PetscFunctionReturn(PETSC_SUCCESS); 394 } 395 396 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 397 { 398 PetscFunctionBegin; 399 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE)); 400 PetscCall(MatSetType(C, MATSCALAPACK)); 401 PetscCall(MatSetUp(C)); 402 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_ScaLAPACK; 403 PetscFunctionReturn(PETSC_SUCCESS); 404 } 405 406 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 407 { 408 Mat_Product *product = C->product; 409 410 PetscFunctionBegin; 411 switch (product->type) { 412 case MATPRODUCT_AB: 413 C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 414 C->ops->productsymbolic = MatProductSymbolic_AB; 415 break; 416 case MATPRODUCT_AtB: 417 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_ScaLAPACK; 418 C->ops->productsymbolic = MatProductSymbolic_AtB; 419 break; 420 case MATPRODUCT_ABt: 421 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 422 C->ops->productsymbolic = MatProductSymbolic_ABt; 423 break; 424 default: 425 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]); 426 } 427 PetscFunctionReturn(PETSC_SUCCESS); 428 } 429 430 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D) 431 { 432 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 433 PetscScalar *darray, *d2d, v; 434 const PetscInt *ranges; 435 PetscBLASInt j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 436 437 PetscFunctionBegin; 438 PetscCall(VecGetArray(D, &darray)); 439 440 if (A->rmap->N <= A->cmap->N) { /* row version */ 441 442 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 443 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 444 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 445 PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld)); 446 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 447 PetscCheckScaLapackInfo("descinit", info); 448 449 /* allocate 2d vector */ 450 lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 451 PetscCall(PetscCalloc1(lszd, &d2d)); 452 PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld)); 453 454 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 455 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 456 PetscCheckScaLapackInfo("descinit", info); 457 458 /* collect diagonal */ 459 for (j = 1; j <= a->M; j++) { 460 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc)); 461 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v)); 462 } 463 464 /* redistribute d from a column of a 2d matrix */ 465 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol)); 466 PetscCall(PetscFree(d2d)); 467 468 } else { /* column version */ 469 470 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 471 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 472 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 473 dlld = 1; 474 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 475 PetscCheckScaLapackInfo("descinit", info); 476 477 /* allocate 2d vector */ 478 lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 479 PetscCall(PetscCalloc1(lszd, &d2d)); 480 481 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 482 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 483 PetscCheckScaLapackInfo("descinit", info); 484 485 /* collect diagonal */ 486 for (j = 1; j <= a->N; j++) { 487 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc)); 488 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v)); 489 } 490 491 /* redistribute d from a row of a 2d matrix */ 492 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow)); 493 PetscCall(PetscFree(d2d)); 494 } 495 496 PetscCall(VecRestoreArray(D, &darray)); 497 PetscCall(VecAssemblyBegin(D)); 498 PetscCall(VecAssemblyEnd(D)); 499 PetscFunctionReturn(PETSC_SUCCESS); 500 } 501 502 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R) 503 { 504 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 505 const PetscScalar *d; 506 const PetscInt *ranges; 507 PetscScalar *d2d; 508 PetscBLASInt i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 509 510 PetscFunctionBegin; 511 if (R) { 512 PetscCall(VecGetArrayRead(R, &d)); 513 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 514 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 515 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 516 dlld = 1; 517 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 518 PetscCheckScaLapackInfo("descinit", info); 519 520 /* allocate 2d vector */ 521 lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 522 PetscCall(PetscCalloc1(lszd, &d2d)); 523 524 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 525 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 526 PetscCheckScaLapackInfo("descinit", info); 527 528 /* redistribute d to a row of a 2d matrix */ 529 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow)); 530 531 /* broadcast along process columns */ 532 if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld); 533 else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol); 534 535 /* local scaling */ 536 for (j = 0; j < a->locc; j++) 537 for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j]; 538 539 PetscCall(PetscFree(d2d)); 540 PetscCall(VecRestoreArrayRead(R, &d)); 541 } 542 if (L) { 543 PetscCall(VecGetArrayRead(L, &d)); 544 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 545 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 546 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 547 PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &dlld)); 548 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 549 PetscCheckScaLapackInfo("descinit", info); 550 551 /* allocate 2d vector */ 552 lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 553 PetscCall(PetscCalloc1(lszd, &d2d)); 554 PetscCall(PetscBLASIntCast(PetscMax(1, lszd), &dlld)); 555 556 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 557 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 558 PetscCheckScaLapackInfo("descinit", info); 559 560 /* redistribute d to a column of a 2d matrix */ 561 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol)); 562 563 /* broadcast along process rows */ 564 if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld); 565 else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0); 566 567 /* local scaling */ 568 for (i = 0; i < a->locr; i++) 569 for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i]; 570 571 PetscCall(PetscFree(d2d)); 572 PetscCall(VecRestoreArrayRead(L, &d)); 573 } 574 PetscFunctionReturn(PETSC_SUCCESS); 575 } 576 577 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d) 578 { 579 PetscFunctionBegin; 580 *missing = PETSC_FALSE; 581 PetscFunctionReturn(PETSC_SUCCESS); 582 } 583 584 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a) 585 { 586 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 587 PetscBLASInt n, one = 1; 588 589 PetscFunctionBegin; 590 n = x->lld * x->locc; 591 PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one)); 592 PetscFunctionReturn(PETSC_SUCCESS); 593 } 594 595 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha) 596 { 597 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 598 PetscBLASInt i, n; 599 PetscScalar v; 600 601 PetscFunctionBegin; 602 n = PetscMin(x->M, x->N); 603 for (i = 1; i <= n; i++) { 604 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc)); 605 v += alpha; 606 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v)); 607 } 608 PetscFunctionReturn(PETSC_SUCCESS); 609 } 610 611 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 612 { 613 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 614 Mat_ScaLAPACK *y = (Mat_ScaLAPACK *)Y->data; 615 PetscBLASInt one = 1; 616 PetscScalar beta = 1.0; 617 618 PetscFunctionBegin; 619 MatScaLAPACKCheckDistribution(Y, 1, X, 3); 620 PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc)); 621 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 622 PetscFunctionReturn(PETSC_SUCCESS); 623 } 624 625 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str) 626 { 627 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 628 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 629 630 PetscFunctionBegin; 631 PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 632 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 633 PetscFunctionReturn(PETSC_SUCCESS); 634 } 635 636 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B) 637 { 638 Mat Bs; 639 MPI_Comm comm; 640 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 641 642 PetscFunctionBegin; 643 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 644 PetscCall(MatCreate(comm, &Bs)); 645 PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 646 PetscCall(MatSetType(Bs, MATSCALAPACK)); 647 b = (Mat_ScaLAPACK *)Bs->data; 648 b->M = a->M; 649 b->N = a->N; 650 b->mb = a->mb; 651 b->nb = a->nb; 652 b->rsrc = a->rsrc; 653 b->csrc = a->csrc; 654 PetscCall(MatSetUp(Bs)); 655 *B = Bs; 656 if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 657 Bs->assembled = PETSC_TRUE; 658 PetscFunctionReturn(PETSC_SUCCESS); 659 } 660 661 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 662 { 663 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 664 Mat Bs = *B; 665 PetscBLASInt one = 1; 666 PetscScalar sone = 1.0, zero = 0.0; 667 #if defined(PETSC_USE_COMPLEX) 668 PetscInt i, j; 669 #endif 670 671 PetscFunctionBegin; 672 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 673 PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 674 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 675 *B = Bs; 676 b = (Mat_ScaLAPACK *)Bs->data; 677 PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 678 #if defined(PETSC_USE_COMPLEX) 679 /* undo conjugation */ 680 for (i = 0; i < b->locr; i++) 681 for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]); 682 #endif 683 Bs->assembled = PETSC_TRUE; 684 PetscFunctionReturn(PETSC_SUCCESS); 685 } 686 687 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 688 { 689 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 690 PetscInt i, j; 691 692 PetscFunctionBegin; 693 for (i = 0; i < a->locr; i++) 694 for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]); 695 PetscFunctionReturn(PETSC_SUCCESS); 696 } 697 698 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 699 { 700 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 701 Mat Bs = *B; 702 PetscBLASInt one = 1; 703 PetscScalar sone = 1.0, zero = 0.0; 704 705 PetscFunctionBegin; 706 PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 707 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 708 *B = Bs; 709 b = (Mat_ScaLAPACK *)Bs->data; 710 PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 711 Bs->assembled = PETSC_TRUE; 712 PetscFunctionReturn(PETSC_SUCCESS); 713 } 714 715 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X) 716 { 717 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 718 PetscScalar *x, *x2d; 719 const PetscInt *ranges; 720 PetscBLASInt xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info; 721 722 PetscFunctionBegin; 723 PetscCall(VecCopy(B, X)); 724 PetscCall(VecGetArray(X, &x)); 725 726 /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 727 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 728 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 729 PetscCall(PetscBLASIntCast(PetscMax(1, A->rmap->n), &xlld)); 730 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 731 PetscCheckScaLapackInfo("descinit", info); 732 733 /* allocate 2d vector */ 734 lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 735 PetscCall(PetscMalloc1(lszx, &x2d)); 736 PetscCall(PetscBLASIntCast(PetscMax(1, lszx), &xlld)); 737 738 /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 739 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 740 PetscCheckScaLapackInfo("descinit", info); 741 742 /* redistribute x as a column of a 2d matrix */ 743 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 744 745 /* call ScaLAPACK subroutine */ 746 switch (A->factortype) { 747 case MAT_FACTOR_LU: 748 PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info)); 749 PetscCheckScaLapackInfo("getrs", info); 750 break; 751 case MAT_FACTOR_CHOLESKY: 752 PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info)); 753 PetscCheckScaLapackInfo("potrs", info); 754 break; 755 default: 756 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 757 } 758 759 /* redistribute x from a column of a 2d matrix */ 760 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol)); 761 762 PetscCall(PetscFree(x2d)); 763 PetscCall(VecRestoreArray(X, &x)); 764 PetscFunctionReturn(PETSC_SUCCESS); 765 } 766 767 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X) 768 { 769 PetscFunctionBegin; 770 PetscCall(MatSolve_ScaLAPACK(A, B, X)); 771 PetscCall(VecAXPY(X, 1, Y)); 772 PetscFunctionReturn(PETSC_SUCCESS); 773 } 774 775 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X) 776 { 777 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *x; 778 PetscBool flg1, flg2; 779 PetscBLASInt one = 1, info; 780 Mat C; 781 MatType type; 782 783 PetscFunctionBegin; 784 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1)); 785 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2)); 786 if (flg1 && flg2) MatScaLAPACKCheckDistribution(B, 2, X, 3); 787 if (flg2) { 788 if (flg1) PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN)); 789 else PetscCall(MatConvert(B, MATSCALAPACK, MAT_REUSE_MATRIX, &X)); 790 C = X; 791 } else { 792 PetscCall(MatConvert(B, MATSCALAPACK, MAT_INITIAL_MATRIX, &C)); 793 } 794 x = (Mat_ScaLAPACK *)C->data; 795 796 switch (A->factortype) { 797 case MAT_FACTOR_LU: 798 PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info)); 799 PetscCheckScaLapackInfo("getrs", info); 800 break; 801 case MAT_FACTOR_CHOLESKY: 802 PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info)); 803 PetscCheckScaLapackInfo("potrs", info); 804 break; 805 default: 806 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 807 } 808 if (!flg2) { 809 PetscCall(MatGetType(X, &type)); 810 PetscCall(MatConvert(C, type, MAT_REUSE_MATRIX, &X)); 811 PetscCall(MatDestroy(&C)); 812 } 813 PetscFunctionReturn(PETSC_SUCCESS); 814 } 815 816 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo) 817 { 818 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 819 PetscBLASInt one = 1, info; 820 821 PetscFunctionBegin; 822 if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots)); 823 PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info)); 824 PetscCheckScaLapackInfo("getrf", info); 825 A->factortype = MAT_FACTOR_LU; 826 A->assembled = PETSC_TRUE; 827 828 PetscCall(PetscFree(A->solvertype)); 829 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 830 PetscFunctionReturn(PETSC_SUCCESS); 831 } 832 833 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 834 { 835 PetscFunctionBegin; 836 PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 837 PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info)); 838 PetscFunctionReturn(PETSC_SUCCESS); 839 } 840 841 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 842 { 843 PetscFunctionBegin; 844 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 845 PetscFunctionReturn(PETSC_SUCCESS); 846 } 847 848 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo) 849 { 850 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 851 PetscBLASInt one = 1, info; 852 853 PetscFunctionBegin; 854 PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info)); 855 PetscCheckScaLapackInfo("potrf", info); 856 A->factortype = MAT_FACTOR_CHOLESKY; 857 A->assembled = PETSC_TRUE; 858 859 PetscCall(PetscFree(A->solvertype)); 860 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 861 PetscFunctionReturn(PETSC_SUCCESS); 862 } 863 864 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 865 { 866 PetscFunctionBegin; 867 PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 868 PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info)); 869 PetscFunctionReturn(PETSC_SUCCESS); 870 } 871 872 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info) 873 { 874 PetscFunctionBegin; 875 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 876 PetscFunctionReturn(PETSC_SUCCESS); 877 } 878 879 static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type) 880 { 881 PetscFunctionBegin; 882 *type = MATSOLVERSCALAPACK; 883 PetscFunctionReturn(PETSC_SUCCESS); 884 } 885 886 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F) 887 { 888 Mat B; 889 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 890 891 PetscFunctionBegin; 892 /* Create the factorization matrix */ 893 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B)); 894 B->trivialsymbolic = PETSC_TRUE; 895 B->factortype = ftype; 896 PetscCall(PetscFree(B->solvertype)); 897 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype)); 898 899 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack)); 900 *F = B; 901 PetscFunctionReturn(PETSC_SUCCESS); 902 } 903 904 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 905 { 906 PetscFunctionBegin; 907 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack)); 908 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack)); 909 PetscFunctionReturn(PETSC_SUCCESS); 910 } 911 912 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm) 913 { 914 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 915 PetscBLASInt one = 1, lwork = 0; 916 const char *ntype; 917 PetscScalar *work = NULL, dummy; 918 919 PetscFunctionBegin; 920 switch (type) { 921 case NORM_1: 922 ntype = "1"; 923 lwork = PetscMax(a->locr, a->locc); 924 break; 925 case NORM_FROBENIUS: 926 ntype = "F"; 927 work = &dummy; 928 break; 929 case NORM_INFINITY: 930 ntype = "I"; 931 lwork = PetscMax(a->locr, a->locc); 932 break; 933 default: 934 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type"); 935 } 936 if (lwork) PetscCall(PetscMalloc1(lwork, &work)); 937 *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work); 938 if (lwork) PetscCall(PetscFree(work)); 939 PetscFunctionReturn(PETSC_SUCCESS); 940 } 941 942 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 943 { 944 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 945 946 PetscFunctionBegin; 947 PetscCall(PetscArrayzero(a->loc, a->lld * a->locc)); 948 PetscFunctionReturn(PETSC_SUCCESS); 949 } 950 951 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols) 952 { 953 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 954 PetscInt i, n, nb, isrc, nproc, iproc, *idx; 955 956 PetscFunctionBegin; 957 if (rows) { 958 n = a->locr; 959 nb = a->mb; 960 isrc = a->rsrc; 961 nproc = a->grid->nprow; 962 iproc = a->grid->myrow; 963 PetscCall(PetscMalloc1(n, &idx)); 964 for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 965 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows)); 966 } 967 if (cols) { 968 n = a->locc; 969 nb = a->nb; 970 isrc = a->csrc; 971 nproc = a->grid->npcol; 972 iproc = a->grid->mycol; 973 PetscCall(PetscMalloc1(n, &idx)); 974 for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 975 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols)); 976 } 977 PetscFunctionReturn(PETSC_SUCCESS); 978 } 979 980 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B) 981 { 982 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 983 Mat Bmpi; 984 MPI_Comm comm; 985 PetscInt i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb; 986 const PetscInt *ranges, *branges, *cwork; 987 const PetscScalar *vwork; 988 PetscBLASInt bdesc[9], bmb, zero = 0, one = 1, lld, info; 989 PetscScalar *barray; 990 PetscBool differ = PETSC_FALSE; 991 PetscMPIInt size; 992 993 PetscFunctionBegin; 994 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 995 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 996 997 if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 998 PetscCallMPI(MPI_Comm_size(comm, &size)); 999 PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges)); 1000 for (i = 0; i < size; i++) 1001 if (ranges[i + 1] != branges[i + 1]) { 1002 differ = PETSC_TRUE; 1003 break; 1004 } 1005 } 1006 1007 if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 1008 PetscCall(MatCreate(comm, &Bmpi)); 1009 m = PETSC_DECIDE; 1010 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1011 n = PETSC_DECIDE; 1012 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 1013 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 1014 PetscCall(MatSetType(Bmpi, MATDENSE)); 1015 PetscCall(MatSetUp(Bmpi)); 1016 1017 /* create ScaLAPACK descriptor for B (1d block distribution) */ 1018 PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 1019 PetscCall(MatDenseGetLDA(Bmpi, &ldb)); 1020 PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */ 1021 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 1022 PetscCheckScaLapackInfo("descinit", info); 1023 1024 /* redistribute matrix */ 1025 PetscCall(MatDenseGetArray(Bmpi, &barray)); 1026 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 1027 PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 1028 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 1029 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1030 1031 /* transfer rows of auxiliary matrix to the final matrix B */ 1032 PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend)); 1033 for (i = rstart; i < rend; i++) { 1034 PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork)); 1035 PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES)); 1036 PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork)); 1037 } 1038 PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY)); 1039 PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY)); 1040 PetscCall(MatDestroy(&Bmpi)); 1041 1042 } else { /* normal cases */ 1043 1044 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1045 else { 1046 PetscCall(MatCreate(comm, &Bmpi)); 1047 m = PETSC_DECIDE; 1048 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1049 n = PETSC_DECIDE; 1050 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 1051 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 1052 PetscCall(MatSetType(Bmpi, MATDENSE)); 1053 PetscCall(MatSetUp(Bmpi)); 1054 } 1055 1056 /* create ScaLAPACK descriptor for B (1d block distribution) */ 1057 PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 1058 PetscCall(MatDenseGetLDA(Bmpi, &ldb)); 1059 PetscCall(PetscBLASIntCast(PetscMax(ldb, 1), &lld)); /* local leading dimension */ 1060 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 1061 PetscCheckScaLapackInfo("descinit", info); 1062 1063 /* redistribute matrix */ 1064 PetscCall(MatDenseGetArray(Bmpi, &barray)); 1065 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 1066 PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 1067 1068 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 1069 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1070 if (reuse == MAT_INPLACE_MATRIX) { 1071 PetscCall(MatHeaderReplace(A, &Bmpi)); 1072 } else *B = Bmpi; 1073 } 1074 PetscFunctionReturn(PETSC_SUCCESS); 1075 } 1076 1077 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct) 1078 { 1079 const PetscInt *ranges; 1080 PetscMPIInt size; 1081 PetscInt i, n; 1082 1083 PetscFunctionBegin; 1084 *correct = PETSC_TRUE; 1085 PetscCallMPI(MPI_Comm_size(map->comm, &size)); 1086 if (size > 1) { 1087 PetscCall(PetscLayoutGetRanges(map, &ranges)); 1088 n = ranges[1] - ranges[0]; 1089 for (i = 1; i < size; i++) 1090 if (ranges[i + 1] - ranges[i] != n) break; 1091 *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n)); 1092 } 1093 PetscFunctionReturn(PETSC_SUCCESS); 1094 } 1095 1096 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B) 1097 { 1098 Mat_ScaLAPACK *b; 1099 Mat Bmpi; 1100 MPI_Comm comm; 1101 PetscInt M = A->rmap->N, N = A->cmap->N, m, n; 1102 const PetscInt *ranges, *rows, *cols; 1103 PetscBLASInt adesc[9], amb, zero = 0, one = 1, lld, info; 1104 const PetscScalar *aarray; 1105 IS ir, ic; 1106 PetscInt lda; 1107 PetscBool flg; 1108 1109 PetscFunctionBegin; 1110 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1111 1112 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1113 else { 1114 PetscCall(MatCreate(comm, &Bmpi)); 1115 m = PETSC_DECIDE; 1116 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1117 n = PETSC_DECIDE; 1118 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 1119 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 1120 PetscCall(MatSetType(Bmpi, MATSCALAPACK)); 1121 PetscCall(MatSetUp(Bmpi)); 1122 } 1123 b = (Mat_ScaLAPACK *)Bmpi->data; 1124 1125 PetscCall(MatDenseGetLDA(A, &lda)); 1126 PetscCall(MatDenseGetArrayRead(A, &aarray)); 1127 PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1128 if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1129 if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */ 1130 /* create ScaLAPACK descriptor for A (1d block distribution) */ 1131 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 1132 PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */ 1133 PetscCall(PetscBLASIntCast(PetscMax(lda, 1), &lld)); /* local leading dimension */ 1134 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info)); 1135 PetscCheckScaLapackInfo("descinit", info); 1136 1137 /* redistribute matrix */ 1138 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol)); 1139 Bmpi->nooffprocentries = PETSC_TRUE; 1140 } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */ 1141 PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n); 1142 b->roworiented = PETSC_FALSE; 1143 PetscCall(MatGetOwnershipIS(A, &ir, &ic)); 1144 PetscCall(ISGetIndices(ir, &rows)); 1145 PetscCall(ISGetIndices(ic, &cols)); 1146 PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES)); 1147 PetscCall(ISRestoreIndices(ir, &rows)); 1148 PetscCall(ISRestoreIndices(ic, &cols)); 1149 PetscCall(ISDestroy(&ic)); 1150 PetscCall(ISDestroy(&ir)); 1151 } 1152 PetscCall(MatDenseRestoreArrayRead(A, &aarray)); 1153 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 1154 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1155 if (reuse == MAT_INPLACE_MATRIX) { 1156 PetscCall(MatHeaderReplace(A, &Bmpi)); 1157 } else *B = Bmpi; 1158 PetscFunctionReturn(PETSC_SUCCESS); 1159 } 1160 1161 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1162 { 1163 Mat mat_scal; 1164 PetscInt M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols; 1165 const PetscInt *cols; 1166 const PetscScalar *vals; 1167 1168 PetscFunctionBegin; 1169 if (reuse == MAT_REUSE_MATRIX) { 1170 mat_scal = *newmat; 1171 PetscCall(MatZeroEntries(mat_scal)); 1172 } else { 1173 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1174 m = PETSC_DECIDE; 1175 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1176 n = PETSC_DECIDE; 1177 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 1178 PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 1179 PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 1180 PetscCall(MatSetUp(mat_scal)); 1181 } 1182 for (row = rstart; row < rend; row++) { 1183 PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 1184 PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES)); 1185 PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1186 } 1187 PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 1188 PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1189 1190 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1191 else *newmat = mat_scal; 1192 PetscFunctionReturn(PETSC_SUCCESS); 1193 } 1194 1195 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1196 { 1197 Mat mat_scal; 1198 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend; 1199 const PetscInt *cols; 1200 const PetscScalar *vals; 1201 PetscScalar v; 1202 1203 PetscFunctionBegin; 1204 if (reuse == MAT_REUSE_MATRIX) { 1205 mat_scal = *newmat; 1206 PetscCall(MatZeroEntries(mat_scal)); 1207 } else { 1208 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1209 m = PETSC_DECIDE; 1210 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1211 n = PETSC_DECIDE; 1212 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 1213 PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 1214 PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 1215 PetscCall(MatSetUp(mat_scal)); 1216 } 1217 PetscCall(MatGetRowUpperTriangular(A)); 1218 for (row = rstart; row < rend; row++) { 1219 PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 1220 PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES)); 1221 for (j = 0; j < ncols; j++) { /* lower triangular part */ 1222 if (cols[j] == row) continue; 1223 v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j]; 1224 PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES)); 1225 } 1226 PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1227 } 1228 PetscCall(MatRestoreRowUpperTriangular(A)); 1229 PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 1230 PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1231 1232 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1233 else *newmat = mat_scal; 1234 PetscFunctionReturn(PETSC_SUCCESS); 1235 } 1236 1237 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1238 { 1239 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1240 PetscInt sz = 0; 1241 1242 PetscFunctionBegin; 1243 PetscCall(PetscLayoutSetUp(A->rmap)); 1244 PetscCall(PetscLayoutSetUp(A->cmap)); 1245 if (!a->lld) a->lld = a->locr; 1246 1247 PetscCall(PetscFree(a->loc)); 1248 PetscCall(PetscIntMultError(a->lld, a->locc, &sz)); 1249 PetscCall(PetscCalloc1(sz, &a->loc)); 1250 1251 A->preallocated = PETSC_TRUE; 1252 PetscFunctionReturn(PETSC_SUCCESS); 1253 } 1254 1255 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1256 { 1257 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1258 Mat_ScaLAPACK_Grid *grid; 1259 PetscBool flg; 1260 MPI_Comm icomm; 1261 1262 PetscFunctionBegin; 1263 PetscCall(MatStashDestroy_Private(&A->stash)); 1264 PetscCall(PetscFree(a->loc)); 1265 PetscCall(PetscFree(a->pivots)); 1266 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 1267 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1268 if (--grid->grid_refct == 0) { 1269 Cblacs_gridexit(grid->ictxt); 1270 Cblacs_gridexit(grid->ictxrow); 1271 Cblacs_gridexit(grid->ictxcol); 1272 PetscCall(PetscFree(grid)); 1273 PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval)); 1274 } 1275 PetscCall(PetscCommDestroy(&icomm)); 1276 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL)); 1277 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1278 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL)); 1279 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL)); 1280 PetscCall(PetscFree(A->data)); 1281 PetscFunctionReturn(PETSC_SUCCESS); 1282 } 1283 1284 static PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1285 { 1286 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1287 PetscBLASInt info = 0; 1288 PetscBool flg; 1289 1290 PetscFunctionBegin; 1291 PetscCall(PetscLayoutSetUp(A->rmap)); 1292 PetscCall(PetscLayoutSetUp(A->cmap)); 1293 1294 /* check that the layout is as enforced by MatCreateScaLAPACK() */ 1295 PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1296 PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1297 PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1298 PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1299 1300 /* compute local sizes */ 1301 PetscCall(PetscBLASIntCast(A->rmap->N, &a->M)); 1302 PetscCall(PetscBLASIntCast(A->cmap->N, &a->N)); 1303 a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 1304 a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 1305 a->lld = PetscMax(1, a->locr); 1306 1307 /* allocate local array */ 1308 PetscCall(MatScaLAPACKSetPreallocation(A)); 1309 1310 /* set up ScaLAPACK descriptor */ 1311 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info)); 1312 PetscCheckScaLapackInfo("descinit", info); 1313 PetscFunctionReturn(PETSC_SUCCESS); 1314 } 1315 1316 static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type) 1317 { 1318 PetscInt nstash, reallocs; 1319 1320 PetscFunctionBegin; 1321 if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 1322 PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL)); 1323 PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs)); 1324 PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 1325 PetscFunctionReturn(PETSC_SUCCESS); 1326 } 1327 1328 static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type) 1329 { 1330 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1331 PetscMPIInt n; 1332 PetscInt i, flg, *row, *col; 1333 PetscScalar *val; 1334 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1335 1336 PetscFunctionBegin; 1337 if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 1338 while (1) { 1339 PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg)); 1340 if (!flg) break; 1341 for (i = 0; i < n; i++) { 1342 PetscCall(PetscBLASIntCast(row[i] + 1, &gridx)); 1343 PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx)); 1344 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1345 PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process"); 1346 switch (A->insertmode) { 1347 case INSERT_VALUES: 1348 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i]; 1349 break; 1350 case ADD_VALUES: 1351 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i]; 1352 break; 1353 default: 1354 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode); 1355 } 1356 } 1357 } 1358 PetscCall(MatStashScatterEnd_Private(&A->stash)); 1359 PetscFunctionReturn(PETSC_SUCCESS); 1360 } 1361 1362 static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer) 1363 { 1364 Mat Adense, As; 1365 MPI_Comm comm; 1366 1367 PetscFunctionBegin; 1368 PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm)); 1369 PetscCall(MatCreate(comm, &Adense)); 1370 PetscCall(MatSetType(Adense, MATDENSE)); 1371 PetscCall(MatLoad(Adense, viewer)); 1372 PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As)); 1373 PetscCall(MatDestroy(&Adense)); 1374 PetscCall(MatHeaderReplace(newMat, &As)); 1375 PetscFunctionReturn(PETSC_SUCCESS); 1376 } 1377 1378 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK, 1379 NULL, 1380 NULL, 1381 MatMult_ScaLAPACK, 1382 /* 4*/ MatMultAdd_ScaLAPACK, 1383 MatMultTranspose_ScaLAPACK, 1384 MatMultTransposeAdd_ScaLAPACK, 1385 MatSolve_ScaLAPACK, 1386 MatSolveAdd_ScaLAPACK, 1387 NULL, 1388 /*10*/ NULL, 1389 MatLUFactor_ScaLAPACK, 1390 MatCholeskyFactor_ScaLAPACK, 1391 NULL, 1392 MatTranspose_ScaLAPACK, 1393 /*15*/ MatGetInfo_ScaLAPACK, 1394 NULL, 1395 MatGetDiagonal_ScaLAPACK, 1396 MatDiagonalScale_ScaLAPACK, 1397 MatNorm_ScaLAPACK, 1398 /*20*/ MatAssemblyBegin_ScaLAPACK, 1399 MatAssemblyEnd_ScaLAPACK, 1400 MatSetOption_ScaLAPACK, 1401 MatZeroEntries_ScaLAPACK, 1402 /*24*/ NULL, 1403 MatLUFactorSymbolic_ScaLAPACK, 1404 MatLUFactorNumeric_ScaLAPACK, 1405 MatCholeskyFactorSymbolic_ScaLAPACK, 1406 MatCholeskyFactorNumeric_ScaLAPACK, 1407 /*29*/ MatSetUp_ScaLAPACK, 1408 NULL, 1409 NULL, 1410 NULL, 1411 NULL, 1412 /*34*/ MatDuplicate_ScaLAPACK, 1413 NULL, 1414 NULL, 1415 NULL, 1416 NULL, 1417 /*39*/ MatAXPY_ScaLAPACK, 1418 NULL, 1419 NULL, 1420 NULL, 1421 MatCopy_ScaLAPACK, 1422 /*44*/ NULL, 1423 MatScale_ScaLAPACK, 1424 MatShift_ScaLAPACK, 1425 NULL, 1426 NULL, 1427 /*49*/ NULL, 1428 NULL, 1429 NULL, 1430 NULL, 1431 NULL, 1432 /*54*/ NULL, 1433 NULL, 1434 NULL, 1435 NULL, 1436 NULL, 1437 /*59*/ NULL, 1438 MatDestroy_ScaLAPACK, 1439 MatView_ScaLAPACK, 1440 NULL, 1441 NULL, 1442 /*64*/ NULL, 1443 NULL, 1444 NULL, 1445 NULL, 1446 NULL, 1447 /*69*/ NULL, 1448 NULL, 1449 MatConvert_ScaLAPACK_Dense, 1450 NULL, 1451 NULL, 1452 /*74*/ NULL, 1453 NULL, 1454 NULL, 1455 NULL, 1456 NULL, 1457 /*79*/ NULL, 1458 NULL, 1459 NULL, 1460 NULL, 1461 MatLoad_ScaLAPACK, 1462 /*84*/ NULL, 1463 NULL, 1464 NULL, 1465 NULL, 1466 NULL, 1467 /*89*/ NULL, 1468 NULL, 1469 MatMatMultNumeric_ScaLAPACK, 1470 NULL, 1471 NULL, 1472 /*94*/ NULL, 1473 NULL, 1474 NULL, 1475 MatMatTransposeMultNumeric_ScaLAPACK, 1476 NULL, 1477 /*99*/ MatProductSetFromOptions_ScaLAPACK, 1478 NULL, 1479 NULL, 1480 MatConjugate_ScaLAPACK, 1481 NULL, 1482 /*104*/ NULL, 1483 NULL, 1484 NULL, 1485 NULL, 1486 NULL, 1487 /*109*/ MatMatSolve_ScaLAPACK, 1488 NULL, 1489 NULL, 1490 NULL, 1491 MatMissingDiagonal_ScaLAPACK, 1492 /*114*/ NULL, 1493 NULL, 1494 NULL, 1495 NULL, 1496 NULL, 1497 /*119*/ NULL, 1498 MatHermitianTranspose_ScaLAPACK, 1499 MatMultHermitianTranspose_ScaLAPACK, 1500 MatMultHermitianTransposeAdd_ScaLAPACK, 1501 NULL, 1502 /*124*/ NULL, 1503 NULL, 1504 NULL, 1505 NULL, 1506 NULL, 1507 /*129*/ NULL, 1508 NULL, 1509 NULL, 1510 MatTransposeMatMultNumeric_ScaLAPACK, 1511 NULL, 1512 /*134*/ NULL, 1513 NULL, 1514 NULL, 1515 NULL, 1516 NULL, 1517 NULL, 1518 /*140*/ NULL, 1519 NULL, 1520 NULL, 1521 NULL, 1522 NULL, 1523 /*145*/ NULL, 1524 NULL, 1525 NULL, 1526 NULL, 1527 NULL, 1528 /*150*/ NULL, 1529 NULL, 1530 NULL, 1531 NULL, 1532 NULL, 1533 /*155*/ NULL, 1534 NULL}; 1535 1536 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners) 1537 { 1538 PetscInt *owner, *startv, *starti, bs2; 1539 PetscInt size = stash->size, nsends; 1540 PetscInt *sindices, **rindices, j, l; 1541 PetscScalar **rvalues, *svalues; 1542 MPI_Comm comm = stash->comm; 1543 MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2; 1544 PetscMPIInt tag1 = stash->tag1, tag2 = stash->tag2, *sizes, *nlengths, nreceives, insends, ii; 1545 PetscInt *sp_idx, *sp_idy; 1546 PetscScalar *sp_val; 1547 PetscMatStashSpace space, space_next; 1548 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1549 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)mat->data; 1550 1551 PetscFunctionBegin; 1552 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1553 InsertMode addv; 1554 PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat))); 1555 PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added"); 1556 mat->insertmode = addv; /* in case this processor had no cache */ 1557 } 1558 1559 bs2 = stash->bs * stash->bs; 1560 1561 /* first count number of contributors to each processor */ 1562 PetscCall(PetscCalloc1(size, &nlengths)); 1563 PetscCall(PetscMalloc1(stash->n + 1, &owner)); 1564 1565 ii = j = 0; 1566 space = stash->space_head; 1567 while (space) { 1568 space_next = space->next; 1569 for (l = 0; l < space->local_used; l++) { 1570 PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx)); 1571 PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx)); 1572 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1573 j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc); 1574 nlengths[j]++; 1575 owner[ii] = j; 1576 ii++; 1577 } 1578 space = space_next; 1579 } 1580 1581 /* Now check what procs get messages - and compute nsends. */ 1582 PetscCall(PetscCalloc1(size, &sizes)); 1583 nsends = 0; 1584 for (PetscMPIInt i = 0; i < size; i++) { 1585 if (nlengths[i]) { 1586 sizes[i] = 1; 1587 nsends++; 1588 } 1589 } 1590 1591 { 1592 PetscMPIInt *onodes, *olengths; 1593 1594 /* Determine the number of messages to expect, their lengths, from from-ids */ 1595 PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives)); 1596 PetscCall(PetscMPIIntCast(nsends, &insends)); 1597 PetscCall(PetscGatherMessageLengths(comm, insends, nreceives, nlengths, &onodes, &olengths)); 1598 /* since clubbing row,col - lengths are multiplied by 2 */ 1599 for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] *= 2; 1600 PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1)); 1601 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1602 for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] = (PetscMPIInt)(olengths[i] * bs2 / 2); 1603 PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2)); 1604 PetscCall(PetscFree(onodes)); 1605 PetscCall(PetscFree(olengths)); 1606 } 1607 1608 /* do sends: 1609 1) starts[i] gives the starting index in svalues for stuff going to 1610 the ith processor 1611 */ 1612 PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices)); 1613 PetscCall(PetscMalloc1(2 * nsends, &send_waits)); 1614 PetscCall(PetscMalloc2(size, &startv, size, &starti)); 1615 /* use 2 sends the first with all_a, the next with all_i and all_j */ 1616 startv[0] = 0; 1617 starti[0] = 0; 1618 for (PetscMPIInt i = 1; i < size; i++) { 1619 startv[i] = startv[i - 1] + nlengths[i - 1]; 1620 starti[i] = starti[i - 1] + 2 * nlengths[i - 1]; 1621 } 1622 1623 ii = 0; 1624 space = stash->space_head; 1625 while (space) { 1626 space_next = space->next; 1627 sp_idx = space->idx; 1628 sp_idy = space->idy; 1629 sp_val = space->val; 1630 for (l = 0; l < space->local_used; l++) { 1631 j = owner[ii]; 1632 if (bs2 == 1) { 1633 svalues[startv[j]] = sp_val[l]; 1634 } else { 1635 PetscInt k; 1636 PetscScalar *buf1, *buf2; 1637 buf1 = svalues + bs2 * startv[j]; 1638 buf2 = space->val + bs2 * l; 1639 for (k = 0; k < bs2; k++) buf1[k] = buf2[k]; 1640 } 1641 sindices[starti[j]] = sp_idx[l]; 1642 sindices[starti[j] + nlengths[j]] = sp_idy[l]; 1643 startv[j]++; 1644 starti[j]++; 1645 ii++; 1646 } 1647 space = space_next; 1648 } 1649 startv[0] = 0; 1650 for (PetscMPIInt i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1]; 1651 1652 for (PetscMPIInt i = 0, count = 0; i < size; i++) { 1653 if (sizes[i]) { 1654 PetscCallMPI(MPIU_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++)); 1655 PetscCallMPI(MPIU_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++)); 1656 } 1657 } 1658 #if defined(PETSC_USE_INFO) 1659 PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends)); 1660 for (PetscMPIInt i = 0; i < size; i++) { 1661 if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %d: size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt))))); 1662 } 1663 #endif 1664 PetscCall(PetscFree(nlengths)); 1665 PetscCall(PetscFree(owner)); 1666 PetscCall(PetscFree2(startv, starti)); 1667 PetscCall(PetscFree(sizes)); 1668 1669 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 1670 PetscCall(PetscMalloc1(2 * nreceives, &recv_waits)); 1671 1672 for (PetscMPIInt i = 0; i < nreceives; i++) { 1673 recv_waits[2 * i] = recv_waits1[i]; 1674 recv_waits[2 * i + 1] = recv_waits2[i]; 1675 } 1676 stash->recv_waits = recv_waits; 1677 1678 PetscCall(PetscFree(recv_waits1)); 1679 PetscCall(PetscFree(recv_waits2)); 1680 1681 stash->svalues = svalues; 1682 stash->sindices = sindices; 1683 stash->rvalues = rvalues; 1684 stash->rindices = rindices; 1685 stash->send_waits = send_waits; 1686 stash->nsends = (PetscMPIInt)nsends; 1687 stash->nrecvs = nreceives; 1688 stash->reproduce_count = 0; 1689 PetscFunctionReturn(PETSC_SUCCESS); 1690 } 1691 1692 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb) 1693 { 1694 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1695 1696 PetscFunctionBegin; 1697 PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp"); 1698 PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb); 1699 PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb); 1700 PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 1701 PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 1702 PetscFunctionReturn(PETSC_SUCCESS); 1703 } 1704 1705 /*@ 1706 MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of 1707 the `MATSCALAPACK` matrix 1708 1709 Logically Collective 1710 1711 Input Parameters: 1712 + A - a `MATSCALAPACK` matrix 1713 . mb - the row block size 1714 - nb - the column block size 1715 1716 Level: intermediate 1717 1718 Note: 1719 This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices 1720 1721 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()` 1722 @*/ 1723 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb) 1724 { 1725 PetscFunctionBegin; 1726 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1727 PetscValidLogicalCollectiveInt(A, mb, 2); 1728 PetscValidLogicalCollectiveInt(A, nb, 3); 1729 PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb)); 1730 PetscFunctionReturn(PETSC_SUCCESS); 1731 } 1732 1733 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb) 1734 { 1735 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1736 1737 PetscFunctionBegin; 1738 if (mb) *mb = a->mb; 1739 if (nb) *nb = a->nb; 1740 PetscFunctionReturn(PETSC_SUCCESS); 1741 } 1742 1743 /*@ 1744 MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of 1745 the `MATSCALAPACK` matrix 1746 1747 Not Collective 1748 1749 Input Parameter: 1750 . A - a `MATSCALAPACK` matrix 1751 1752 Output Parameters: 1753 + mb - the row block size 1754 - nb - the column block size 1755 1756 Level: intermediate 1757 1758 Note: 1759 This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices 1760 1761 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()` 1762 @*/ 1763 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb) 1764 { 1765 PetscFunctionBegin; 1766 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1767 PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb)); 1768 PetscFunctionReturn(PETSC_SUCCESS); 1769 } 1770 1771 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *); 1772 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *); 1773 1774 /*MC 1775 MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1776 1777 Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK 1778 1779 Options Database Keys: 1780 + -mat_type scalapack - sets the matrix type to `MATSCALAPACK` 1781 . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu` 1782 . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1783 - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1784 1785 Level: intermediate 1786 1787 Note: 1788 Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous 1789 range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on 1790 the given rank. 1791 1792 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()` 1793 M*/ 1794 1795 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1796 { 1797 Mat_ScaLAPACK *a; 1798 PetscBool flg, flg1; 1799 Mat_ScaLAPACK_Grid *grid; 1800 MPI_Comm icomm; 1801 PetscBLASInt nprow, npcol, myrow, mycol; 1802 PetscInt optv1, k = 2, array[2] = {0, 0}; 1803 PetscMPIInt size; 1804 1805 PetscFunctionBegin; 1806 A->ops[0] = MatOps_Values; 1807 A->insertmode = NOT_SET_VALUES; 1808 1809 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash)); 1810 A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1811 A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1812 A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1813 A->stash.ScatterDestroy = NULL; 1814 1815 PetscCall(PetscNew(&a)); 1816 A->data = (void *)a; 1817 1818 /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1819 if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1820 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, NULL)); 1821 PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); 1822 PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite)); 1823 } 1824 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 1825 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1826 if (!flg) { 1827 PetscCall(PetscNew(&grid)); 1828 1829 PetscCallMPI(MPI_Comm_size(icomm, &size)); 1830 PetscCall(PetscBLASIntCast(PetscSqrtReal((PetscReal)size) + 0.001, &grid->nprow)); 1831 1832 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat"); 1833 PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1)); 1834 if (flg1) { 1835 PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size); 1836 PetscCall(PetscBLASIntCast(optv1, &grid->nprow)); 1837 } 1838 PetscOptionsEnd(); 1839 1840 if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1841 grid->npcol = size / grid->nprow; 1842 PetscCall(PetscBLASIntCast(grid->nprow, &nprow)); 1843 PetscCall(PetscBLASIntCast(grid->npcol, &npcol)); 1844 grid->ictxt = Csys2blacs_handle(icomm); 1845 Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol); 1846 Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol); 1847 grid->grid_refct = 1; 1848 grid->nprow = nprow; 1849 grid->npcol = npcol; 1850 grid->myrow = myrow; 1851 grid->mycol = mycol; 1852 /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1853 grid->ictxrow = Csys2blacs_handle(icomm); 1854 Cblacs_gridinit(&grid->ictxrow, "R", 1, size); 1855 grid->ictxcol = Csys2blacs_handle(icomm); 1856 Cblacs_gridinit(&grid->ictxcol, "R", size, 1); 1857 PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid)); 1858 1859 } else grid->grid_refct++; 1860 PetscCall(PetscCommDestroy(&icomm)); 1861 a->grid = grid; 1862 a->mb = DEFAULT_BLOCKSIZE; 1863 a->nb = DEFAULT_BLOCKSIZE; 1864 1865 PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat"); 1866 PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg)); 1867 if (flg) { 1868 a->mb = (PetscMPIInt)array[0]; 1869 a->nb = (k > 1) ? (PetscMPIInt)array[1] : a->mb; 1870 } 1871 PetscOptionsEnd(); 1872 1873 a->roworiented = PETSC_TRUE; 1874 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK)); 1875 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK)); 1876 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK)); 1877 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK)); 1878 PetscFunctionReturn(PETSC_SUCCESS); 1879 } 1880 1881 /*@C 1882 MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1883 (2D block cyclic distribution) for a `MATSCALAPACK` matrix 1884 1885 Collective 1886 1887 Input Parameters: 1888 + comm - MPI communicator 1889 . mb - row block size (or `PETSC_DECIDE` to have it set) 1890 . nb - column block size (or `PETSC_DECIDE` to have it set) 1891 . M - number of global rows 1892 . N - number of global columns 1893 . rsrc - coordinate of process that owns the first row of the distributed matrix 1894 - csrc - coordinate of process that owns the first column of the distributed matrix 1895 1896 Output Parameter: 1897 . A - the matrix 1898 1899 Options Database Key: 1900 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1901 1902 Level: intermediate 1903 1904 Notes: 1905 If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen 1906 1907 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 1908 MatXXXXSetPreallocation() paradigm instead of this routine directly. 1909 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 1910 1911 Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1912 configured with ScaLAPACK. In particular, PETSc's local sizes lose 1913 significance and are thus ignored. The block sizes refer to the values 1914 used for the distributed matrix, not the same meaning as in `MATBAIJ`. 1915 1916 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 1917 @*/ 1918 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A) 1919 { 1920 Mat_ScaLAPACK *a; 1921 PetscInt m, n; 1922 1923 PetscFunctionBegin; 1924 PetscCall(MatCreate(comm, A)); 1925 PetscCall(MatSetType(*A, MATSCALAPACK)); 1926 PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions"); 1927 /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1928 m = PETSC_DECIDE; 1929 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1930 n = PETSC_DECIDE; 1931 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 1932 PetscCall(MatSetSizes(*A, m, n, M, N)); 1933 a = (Mat_ScaLAPACK *)(*A)->data; 1934 PetscCall(PetscBLASIntCast(M, &a->M)); 1935 PetscCall(PetscBLASIntCast(N, &a->N)); 1936 PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 1937 PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 1938 PetscCall(PetscBLASIntCast(rsrc, &a->rsrc)); 1939 PetscCall(PetscBLASIntCast(csrc, &a->csrc)); 1940 PetscCall(MatSetUp(*A)); 1941 PetscFunctionReturn(PETSC_SUCCESS); 1942 } 1943