1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2 3 const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 4 " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 5 " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 6 " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 7 " TITLE = {Sca{LAPACK} Users' Guide},\n" 8 " PUBLISHER = {SIAM},\n" 9 " ADDRESS = {Philadelphia, PA},\n" 10 " YEAR = 1997\n" 11 "}\n"; 12 static PetscBool ScaLAPACKCite = PETSC_FALSE; 13 14 #define DEFAULT_BLOCKSIZE 64 15 16 /* 17 The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18 is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19 */ 20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21 22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 23 { 24 PetscFunctionBegin; 25 PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n")); 26 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); 27 PetscFunctionReturn(PETSC_SUCCESS); 28 } 29 30 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer) 31 { 32 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 33 PetscBool iascii; 34 PetscViewerFormat format; 35 Mat Adense; 36 37 PetscFunctionBegin; 38 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 39 if (iascii) { 40 PetscCall(PetscViewerGetFormat(viewer, &format)); 41 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 42 PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb)); 43 PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol)); 44 PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc)); 45 PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc)); 46 PetscFunctionReturn(PETSC_SUCCESS); 47 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 48 PetscFunctionReturn(PETSC_SUCCESS); 49 } 50 } 51 /* convert to dense format and call MatView() */ 52 PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense)); 53 PetscCall(MatView(Adense, viewer)); 54 PetscCall(MatDestroy(&Adense)); 55 PetscFunctionReturn(PETSC_SUCCESS); 56 } 57 58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info) 59 { 60 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 61 PetscLogDouble isend[2], irecv[2]; 62 63 PetscFunctionBegin; 64 info->block_size = 1.0; 65 66 isend[0] = a->lld * a->locc; /* locally allocated */ 67 isend[1] = a->locr * a->locc; /* used submatrix */ 68 if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 69 info->nz_allocated = isend[0]; 70 info->nz_used = isend[1]; 71 } else if (flag == MAT_GLOBAL_MAX) { 72 PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 73 info->nz_allocated = irecv[0]; 74 info->nz_used = irecv[1]; 75 } else if (flag == MAT_GLOBAL_SUM) { 76 PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 77 info->nz_allocated = irecv[0]; 78 info->nz_used = irecv[1]; 79 } 80 81 info->nz_unneeded = 0; 82 info->assemblies = A->num_ass; 83 info->mallocs = 0; 84 info->memory = 0; /* REVIEW ME */ 85 info->fill_ratio_given = 0; 86 info->fill_ratio_needed = 0; 87 info->factor_mallocs = 0; 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 91 static PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg) 92 { 93 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 94 95 PetscFunctionBegin; 96 switch (op) { 97 case MAT_NEW_NONZERO_LOCATIONS: 98 case MAT_NEW_NONZERO_LOCATION_ERR: 99 case MAT_NEW_NONZERO_ALLOCATION_ERR: 100 case MAT_SYMMETRIC: 101 case MAT_SORTED_FULL: 102 case MAT_HERMITIAN: 103 break; 104 case MAT_ROW_ORIENTED: 105 a->roworiented = flg; 106 break; 107 default: 108 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]); 109 } 110 PetscFunctionReturn(PETSC_SUCCESS); 111 } 112 113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode) 114 { 115 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 116 PetscInt i, j; 117 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 118 PetscBool roworiented = a->roworiented; 119 120 PetscFunctionBegin; 121 PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode); 122 for (i = 0; i < nr; i++) { 123 if (rows[i] < 0) continue; 124 PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx)); 125 for (j = 0; j < nc; j++) { 126 if (cols[j] < 0) continue; 127 PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx)); 128 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 129 if (rsrc == a->grid->myrow && csrc == a->grid->mycol) { 130 if (roworiented) { 131 switch (imode) { 132 case INSERT_VALUES: 133 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j]; 134 break; 135 default: 136 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j]; 137 break; 138 } 139 } else { 140 switch (imode) { 141 case INSERT_VALUES: 142 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr]; 143 break; 144 default: 145 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr]; 146 break; 147 } 148 } 149 } else { 150 PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 151 A->assembled = PETSC_FALSE; 152 PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES))); 153 } 154 } 155 } 156 PetscFunctionReturn(PETSC_SUCCESS); 157 } 158 159 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscBool hermitian, PetscScalar beta, const PetscScalar *x, PetscScalar *y) 160 { 161 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 162 PetscScalar *x2d, *y2d, alpha = 1.0; 163 const PetscInt *ranges; 164 PetscBLASInt xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info; 165 166 PetscFunctionBegin; 167 if (transpose) { 168 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 169 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 170 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 171 xlld = PetscMax(1, A->rmap->n); 172 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 173 PetscCheckScaLapackInfo("descinit", info); 174 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 175 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */ 176 ylld = 1; 177 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info)); 178 PetscCheckScaLapackInfo("descinit", info); 179 180 /* allocate 2d vectors */ 181 lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 182 lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 183 PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 184 xlld = PetscMax(1, lszx); 185 186 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 187 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 188 PetscCheckScaLapackInfo("descinit", info); 189 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 190 PetscCheckScaLapackInfo("descinit", info); 191 192 /* redistribute x as a column of a 2d matrix */ 193 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 194 195 /* redistribute y as a row of a 2d matrix */ 196 if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow)); 197 198 /* call PBLAS subroutine */ 199 if (hermitian) PetscCallBLAS("PBLASgemv", PBLASgemv_("C", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 200 else PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 201 202 /* redistribute y from a row of a 2d matrix */ 203 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow)); 204 205 } else { /* non-transpose */ 206 207 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 208 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 209 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */ 210 xlld = 1; 211 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info)); 212 PetscCheckScaLapackInfo("descinit", info); 213 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 214 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */ 215 ylld = PetscMax(1, A->rmap->n); 216 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info)); 217 PetscCheckScaLapackInfo("descinit", info); 218 219 /* allocate 2d vectors */ 220 lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 221 lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 222 PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 223 ylld = PetscMax(1, lszy); 224 225 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 226 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 227 PetscCheckScaLapackInfo("descinit", info); 228 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 229 PetscCheckScaLapackInfo("descinit", info); 230 231 /* redistribute x as a row of a 2d matrix */ 232 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow)); 233 234 /* redistribute y as a column of a 2d matrix */ 235 if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol)); 236 237 /* call PBLAS subroutine */ 238 PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 239 240 /* redistribute y from a column of a 2d matrix */ 241 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol)); 242 } 243 PetscCall(PetscFree2(x2d, y2d)); 244 PetscFunctionReturn(PETSC_SUCCESS); 245 } 246 247 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y) 248 { 249 const PetscScalar *xarray; 250 PetscScalar *yarray; 251 252 PetscFunctionBegin; 253 PetscCall(VecGetArrayRead(x, &xarray)); 254 PetscCall(VecGetArray(y, &yarray)); 255 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 0.0, xarray, yarray)); 256 PetscCall(VecRestoreArrayRead(x, &xarray)); 257 PetscCall(VecRestoreArray(y, &yarray)); 258 PetscFunctionReturn(PETSC_SUCCESS); 259 } 260 261 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y) 262 { 263 const PetscScalar *xarray; 264 PetscScalar *yarray; 265 266 PetscFunctionBegin; 267 PetscCall(VecGetArrayRead(x, &xarray)); 268 PetscCall(VecGetArray(y, &yarray)); 269 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 0.0, xarray, yarray)); 270 PetscCall(VecRestoreArrayRead(x, &xarray)); 271 PetscCall(VecRestoreArray(y, &yarray)); 272 PetscFunctionReturn(PETSC_SUCCESS); 273 } 274 275 static PetscErrorCode MatMultHermitianTranspose_ScaLAPACK(Mat A, Vec x, Vec y) 276 { 277 const PetscScalar *xarray; 278 PetscScalar *yarray; 279 280 PetscFunctionBegin; 281 PetscCall(VecGetArrayRead(x, &xarray)); 282 PetscCall(VecGetArrayWrite(y, &yarray)); 283 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 0.0, xarray, yarray)); 284 PetscCall(VecRestoreArrayRead(x, &xarray)); 285 PetscCall(VecRestoreArrayWrite(y, &yarray)); 286 PetscFunctionReturn(PETSC_SUCCESS); 287 } 288 289 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 290 { 291 const PetscScalar *xarray; 292 PetscScalar *zarray; 293 294 PetscFunctionBegin; 295 if (y != z) PetscCall(VecCopy(y, z)); 296 PetscCall(VecGetArrayRead(x, &xarray)); 297 PetscCall(VecGetArray(z, &zarray)); 298 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, PETSC_FALSE, 1.0, xarray, zarray)); 299 PetscCall(VecRestoreArrayRead(x, &xarray)); 300 PetscCall(VecRestoreArray(z, &zarray)); 301 PetscFunctionReturn(PETSC_SUCCESS); 302 } 303 304 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 305 { 306 const PetscScalar *xarray; 307 PetscScalar *zarray; 308 309 PetscFunctionBegin; 310 if (y != z) PetscCall(VecCopy(y, z)); 311 PetscCall(VecGetArrayRead(x, &xarray)); 312 PetscCall(VecGetArray(z, &zarray)); 313 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_FALSE, 1.0, xarray, zarray)); 314 PetscCall(VecRestoreArrayRead(x, &xarray)); 315 PetscCall(VecRestoreArray(z, &zarray)); 316 PetscFunctionReturn(PETSC_SUCCESS); 317 } 318 319 static PetscErrorCode MatMultHermitianTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) 320 { 321 const PetscScalar *xarray; 322 PetscScalar *zarray; 323 324 PetscFunctionBegin; 325 if (y != z) PetscCall(VecCopy(y, z)); 326 PetscCall(VecGetArrayRead(x, &xarray)); 327 PetscCall(VecGetArray(z, &zarray)); 328 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, PETSC_TRUE, 1.0, xarray, zarray)); 329 PetscCall(VecRestoreArrayRead(x, &xarray)); 330 PetscCall(VecRestoreArray(z, &zarray)); 331 PetscFunctionReturn(PETSC_SUCCESS); 332 } 333 334 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 335 { 336 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 337 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 338 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 339 PetscScalar sone = 1.0, zero = 0.0; 340 PetscBLASInt one = 1; 341 342 PetscFunctionBegin; 343 PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 344 C->assembled = PETSC_TRUE; 345 PetscFunctionReturn(PETSC_SUCCESS); 346 } 347 348 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 349 { 350 PetscFunctionBegin; 351 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 352 PetscCall(MatSetType(C, MATSCALAPACK)); 353 PetscCall(MatSetUp(C)); 354 C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 355 PetscFunctionReturn(PETSC_SUCCESS); 356 } 357 358 static PetscErrorCode MatTransposeMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 359 { 360 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 361 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 362 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 363 PetscScalar sone = 1.0, zero = 0.0; 364 PetscBLASInt one = 1; 365 366 PetscFunctionBegin; 367 PetscCallBLAS("PBLASgemm", PBLASgemm_("T", "N", &a->N, &b->N, &a->M, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 368 C->assembled = PETSC_TRUE; 369 PetscFunctionReturn(PETSC_SUCCESS); 370 } 371 372 static PetscErrorCode MatTransposeMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 373 { 374 PetscFunctionBegin; 375 PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 376 PetscCall(MatSetType(C, MATSCALAPACK)); 377 PetscCall(MatSetUp(C)); 378 C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_ScaLAPACK; 379 PetscFunctionReturn(PETSC_SUCCESS); 380 } 381 382 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) 383 { 384 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 385 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 386 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 387 PetscScalar sone = 1.0, zero = 0.0; 388 PetscBLASInt one = 1; 389 390 PetscFunctionBegin; 391 PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 392 C->assembled = PETSC_TRUE; 393 PetscFunctionReturn(PETSC_SUCCESS); 394 } 395 396 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) 397 { 398 PetscFunctionBegin; 399 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE)); 400 PetscCall(MatSetType(C, MATSCALAPACK)); 401 PetscCall(MatSetUp(C)); 402 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_ScaLAPACK; 403 PetscFunctionReturn(PETSC_SUCCESS); 404 } 405 406 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 407 { 408 Mat_Product *product = C->product; 409 410 PetscFunctionBegin; 411 switch (product->type) { 412 case MATPRODUCT_AB: 413 C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 414 C->ops->productsymbolic = MatProductSymbolic_AB; 415 break; 416 case MATPRODUCT_AtB: 417 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_ScaLAPACK; 418 C->ops->productsymbolic = MatProductSymbolic_AtB; 419 break; 420 case MATPRODUCT_ABt: 421 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 422 C->ops->productsymbolic = MatProductSymbolic_ABt; 423 break; 424 default: 425 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]); 426 } 427 PetscFunctionReturn(PETSC_SUCCESS); 428 } 429 430 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D) 431 { 432 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 433 PetscScalar *darray, *d2d, v; 434 const PetscInt *ranges; 435 PetscBLASInt j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 436 437 PetscFunctionBegin; 438 PetscCall(VecGetArray(D, &darray)); 439 440 if (A->rmap->N <= A->cmap->N) { /* row version */ 441 442 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 443 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 444 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 445 dlld = PetscMax(1, A->rmap->n); 446 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 447 PetscCheckScaLapackInfo("descinit", info); 448 449 /* allocate 2d vector */ 450 lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 451 PetscCall(PetscCalloc1(lszd, &d2d)); 452 dlld = PetscMax(1, lszd); 453 454 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 455 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 456 PetscCheckScaLapackInfo("descinit", info); 457 458 /* collect diagonal */ 459 for (j = 1; j <= a->M; j++) { 460 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc)); 461 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v)); 462 } 463 464 /* redistribute d from a column of a 2d matrix */ 465 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol)); 466 PetscCall(PetscFree(d2d)); 467 468 } else { /* column version */ 469 470 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 471 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 472 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 473 dlld = 1; 474 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 475 PetscCheckScaLapackInfo("descinit", info); 476 477 /* allocate 2d vector */ 478 lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 479 PetscCall(PetscCalloc1(lszd, &d2d)); 480 481 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 482 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 483 PetscCheckScaLapackInfo("descinit", info); 484 485 /* collect diagonal */ 486 for (j = 1; j <= a->N; j++) { 487 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc)); 488 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v)); 489 } 490 491 /* redistribute d from a row of a 2d matrix */ 492 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow)); 493 PetscCall(PetscFree(d2d)); 494 } 495 496 PetscCall(VecRestoreArray(D, &darray)); 497 PetscCall(VecAssemblyBegin(D)); 498 PetscCall(VecAssemblyEnd(D)); 499 PetscFunctionReturn(PETSC_SUCCESS); 500 } 501 502 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R) 503 { 504 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 505 const PetscScalar *d; 506 const PetscInt *ranges; 507 PetscScalar *d2d; 508 PetscBLASInt i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 509 510 PetscFunctionBegin; 511 if (R) { 512 PetscCall(VecGetArrayRead(R, &d)); 513 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 514 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 515 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 516 dlld = 1; 517 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 518 PetscCheckScaLapackInfo("descinit", info); 519 520 /* allocate 2d vector */ 521 lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 522 PetscCall(PetscCalloc1(lszd, &d2d)); 523 524 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 525 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 526 PetscCheckScaLapackInfo("descinit", info); 527 528 /* redistribute d to a row of a 2d matrix */ 529 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow)); 530 531 /* broadcast along process columns */ 532 if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld); 533 else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol); 534 535 /* local scaling */ 536 for (j = 0; j < a->locc; j++) 537 for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j]; 538 539 PetscCall(PetscFree(d2d)); 540 PetscCall(VecRestoreArrayRead(R, &d)); 541 } 542 if (L) { 543 PetscCall(VecGetArrayRead(L, &d)); 544 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 545 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 546 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 547 dlld = PetscMax(1, A->rmap->n); 548 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 549 PetscCheckScaLapackInfo("descinit", info); 550 551 /* allocate 2d vector */ 552 lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 553 PetscCall(PetscCalloc1(lszd, &d2d)); 554 dlld = PetscMax(1, lszd); 555 556 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 557 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 558 PetscCheckScaLapackInfo("descinit", info); 559 560 /* redistribute d to a column of a 2d matrix */ 561 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol)); 562 563 /* broadcast along process rows */ 564 if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld); 565 else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0); 566 567 /* local scaling */ 568 for (i = 0; i < a->locr; i++) 569 for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i]; 570 571 PetscCall(PetscFree(d2d)); 572 PetscCall(VecRestoreArrayRead(L, &d)); 573 } 574 PetscFunctionReturn(PETSC_SUCCESS); 575 } 576 577 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d) 578 { 579 PetscFunctionBegin; 580 *missing = PETSC_FALSE; 581 PetscFunctionReturn(PETSC_SUCCESS); 582 } 583 584 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a) 585 { 586 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 587 PetscBLASInt n, one = 1; 588 589 PetscFunctionBegin; 590 n = x->lld * x->locc; 591 PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one)); 592 PetscFunctionReturn(PETSC_SUCCESS); 593 } 594 595 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha) 596 { 597 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 598 PetscBLASInt i, n; 599 PetscScalar v; 600 601 PetscFunctionBegin; 602 n = PetscMin(x->M, x->N); 603 for (i = 1; i <= n; i++) { 604 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc)); 605 v += alpha; 606 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v)); 607 } 608 PetscFunctionReturn(PETSC_SUCCESS); 609 } 610 611 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 612 { 613 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 614 Mat_ScaLAPACK *y = (Mat_ScaLAPACK *)Y->data; 615 PetscBLASInt one = 1; 616 PetscScalar beta = 1.0; 617 618 PetscFunctionBegin; 619 MatScaLAPACKCheckDistribution(Y, 1, X, 3); 620 PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc)); 621 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 622 PetscFunctionReturn(PETSC_SUCCESS); 623 } 624 625 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str) 626 { 627 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 628 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 629 630 PetscFunctionBegin; 631 PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 632 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 633 PetscFunctionReturn(PETSC_SUCCESS); 634 } 635 636 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B) 637 { 638 Mat Bs; 639 MPI_Comm comm; 640 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 641 642 PetscFunctionBegin; 643 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 644 PetscCall(MatCreate(comm, &Bs)); 645 PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 646 PetscCall(MatSetType(Bs, MATSCALAPACK)); 647 b = (Mat_ScaLAPACK *)Bs->data; 648 b->M = a->M; 649 b->N = a->N; 650 b->mb = a->mb; 651 b->nb = a->nb; 652 b->rsrc = a->rsrc; 653 b->csrc = a->csrc; 654 PetscCall(MatSetUp(Bs)); 655 *B = Bs; 656 if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 657 Bs->assembled = PETSC_TRUE; 658 PetscFunctionReturn(PETSC_SUCCESS); 659 } 660 661 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 662 { 663 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 664 Mat Bs = *B; 665 PetscBLASInt one = 1; 666 PetscScalar sone = 1.0, zero = 0.0; 667 #if defined(PETSC_USE_COMPLEX) 668 PetscInt i, j; 669 #endif 670 671 PetscFunctionBegin; 672 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 673 PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 674 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 675 *B = Bs; 676 b = (Mat_ScaLAPACK *)Bs->data; 677 PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 678 #if defined(PETSC_USE_COMPLEX) 679 /* undo conjugation */ 680 for (i = 0; i < b->locr; i++) 681 for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]); 682 #endif 683 Bs->assembled = PETSC_TRUE; 684 PetscFunctionReturn(PETSC_SUCCESS); 685 } 686 687 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 688 { 689 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 690 PetscInt i, j; 691 692 PetscFunctionBegin; 693 for (i = 0; i < a->locr; i++) 694 for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]); 695 PetscFunctionReturn(PETSC_SUCCESS); 696 } 697 698 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) 699 { 700 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 701 Mat Bs = *B; 702 PetscBLASInt one = 1; 703 PetscScalar sone = 1.0, zero = 0.0; 704 705 PetscFunctionBegin; 706 PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 707 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 708 *B = Bs; 709 b = (Mat_ScaLAPACK *)Bs->data; 710 PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 711 Bs->assembled = PETSC_TRUE; 712 PetscFunctionReturn(PETSC_SUCCESS); 713 } 714 715 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X) 716 { 717 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 718 PetscScalar *x, *x2d; 719 const PetscInt *ranges; 720 PetscBLASInt xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info; 721 722 PetscFunctionBegin; 723 PetscCall(VecCopy(B, X)); 724 PetscCall(VecGetArray(X, &x)); 725 726 /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 727 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 728 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 729 xlld = PetscMax(1, A->rmap->n); 730 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 731 PetscCheckScaLapackInfo("descinit", info); 732 733 /* allocate 2d vector */ 734 lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 735 PetscCall(PetscMalloc1(lszx, &x2d)); 736 xlld = PetscMax(1, lszx); 737 738 /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 739 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 740 PetscCheckScaLapackInfo("descinit", info); 741 742 /* redistribute x as a column of a 2d matrix */ 743 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 744 745 /* call ScaLAPACK subroutine */ 746 switch (A->factortype) { 747 case MAT_FACTOR_LU: 748 PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info)); 749 PetscCheckScaLapackInfo("getrs", info); 750 break; 751 case MAT_FACTOR_CHOLESKY: 752 PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info)); 753 PetscCheckScaLapackInfo("potrs", info); 754 break; 755 default: 756 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 757 } 758 759 /* redistribute x from a column of a 2d matrix */ 760 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol)); 761 762 PetscCall(PetscFree(x2d)); 763 PetscCall(VecRestoreArray(X, &x)); 764 PetscFunctionReturn(PETSC_SUCCESS); 765 } 766 767 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X) 768 { 769 PetscFunctionBegin; 770 PetscCall(MatSolve_ScaLAPACK(A, B, X)); 771 PetscCall(VecAXPY(X, 1, Y)); 772 PetscFunctionReturn(PETSC_SUCCESS); 773 } 774 775 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X) 776 { 777 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x; 778 PetscBool flg1, flg2; 779 PetscBLASInt one = 1, info; 780 781 PetscFunctionBegin; 782 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1)); 783 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2)); 784 PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK"); 785 MatScaLAPACKCheckDistribution(B, 1, X, 2); 786 b = (Mat_ScaLAPACK *)B->data; 787 x = (Mat_ScaLAPACK *)X->data; 788 PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc)); 789 790 switch (A->factortype) { 791 case MAT_FACTOR_LU: 792 PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info)); 793 PetscCheckScaLapackInfo("getrs", info); 794 break; 795 case MAT_FACTOR_CHOLESKY: 796 PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info)); 797 PetscCheckScaLapackInfo("potrs", info); 798 break; 799 default: 800 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 801 } 802 PetscFunctionReturn(PETSC_SUCCESS); 803 } 804 805 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo) 806 { 807 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 808 PetscBLASInt one = 1, info; 809 810 PetscFunctionBegin; 811 if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots)); 812 PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info)); 813 PetscCheckScaLapackInfo("getrf", info); 814 A->factortype = MAT_FACTOR_LU; 815 A->assembled = PETSC_TRUE; 816 817 PetscCall(PetscFree(A->solvertype)); 818 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 819 PetscFunctionReturn(PETSC_SUCCESS); 820 } 821 822 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 823 { 824 PetscFunctionBegin; 825 PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 826 PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info)); 827 PetscFunctionReturn(PETSC_SUCCESS); 828 } 829 830 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 831 { 832 PetscFunctionBegin; 833 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 834 PetscFunctionReturn(PETSC_SUCCESS); 835 } 836 837 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo) 838 { 839 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 840 PetscBLASInt one = 1, info; 841 842 PetscFunctionBegin; 843 PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info)); 844 PetscCheckScaLapackInfo("potrf", info); 845 A->factortype = MAT_FACTOR_CHOLESKY; 846 A->assembled = PETSC_TRUE; 847 848 PetscCall(PetscFree(A->solvertype)); 849 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 850 PetscFunctionReturn(PETSC_SUCCESS); 851 } 852 853 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) 854 { 855 PetscFunctionBegin; 856 PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 857 PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info)); 858 PetscFunctionReturn(PETSC_SUCCESS); 859 } 860 861 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info) 862 { 863 PetscFunctionBegin; 864 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 865 PetscFunctionReturn(PETSC_SUCCESS); 866 } 867 868 static PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type) 869 { 870 PetscFunctionBegin; 871 *type = MATSOLVERSCALAPACK; 872 PetscFunctionReturn(PETSC_SUCCESS); 873 } 874 875 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F) 876 { 877 Mat B; 878 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 879 880 PetscFunctionBegin; 881 /* Create the factorization matrix */ 882 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B)); 883 B->trivialsymbolic = PETSC_TRUE; 884 B->factortype = ftype; 885 PetscCall(PetscFree(B->solvertype)); 886 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype)); 887 888 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack)); 889 *F = B; 890 PetscFunctionReturn(PETSC_SUCCESS); 891 } 892 893 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 894 { 895 PetscFunctionBegin; 896 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack)); 897 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack)); 898 PetscFunctionReturn(PETSC_SUCCESS); 899 } 900 901 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm) 902 { 903 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 904 PetscBLASInt one = 1, lwork = 0; 905 const char *ntype; 906 PetscScalar *work = NULL, dummy; 907 908 PetscFunctionBegin; 909 switch (type) { 910 case NORM_1: 911 ntype = "1"; 912 lwork = PetscMax(a->locr, a->locc); 913 break; 914 case NORM_FROBENIUS: 915 ntype = "F"; 916 work = &dummy; 917 break; 918 case NORM_INFINITY: 919 ntype = "I"; 920 lwork = PetscMax(a->locr, a->locc); 921 break; 922 default: 923 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type"); 924 } 925 if (lwork) PetscCall(PetscMalloc1(lwork, &work)); 926 *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work); 927 if (lwork) PetscCall(PetscFree(work)); 928 PetscFunctionReturn(PETSC_SUCCESS); 929 } 930 931 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 932 { 933 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 934 935 PetscFunctionBegin; 936 PetscCall(PetscArrayzero(a->loc, a->lld * a->locc)); 937 PetscFunctionReturn(PETSC_SUCCESS); 938 } 939 940 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols) 941 { 942 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 943 PetscInt i, n, nb, isrc, nproc, iproc, *idx; 944 945 PetscFunctionBegin; 946 if (rows) { 947 n = a->locr; 948 nb = a->mb; 949 isrc = a->rsrc; 950 nproc = a->grid->nprow; 951 iproc = a->grid->myrow; 952 PetscCall(PetscMalloc1(n, &idx)); 953 for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 954 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows)); 955 } 956 if (cols) { 957 n = a->locc; 958 nb = a->nb; 959 isrc = a->csrc; 960 nproc = a->grid->npcol; 961 iproc = a->grid->mycol; 962 PetscCall(PetscMalloc1(n, &idx)); 963 for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 964 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols)); 965 } 966 PetscFunctionReturn(PETSC_SUCCESS); 967 } 968 969 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B) 970 { 971 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 972 Mat Bmpi; 973 MPI_Comm comm; 974 PetscInt i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz, ldb; 975 const PetscInt *ranges, *branges, *cwork; 976 const PetscScalar *vwork; 977 PetscBLASInt bdesc[9], bmb, zero = 0, one = 1, lld, info; 978 PetscScalar *barray; 979 PetscBool differ = PETSC_FALSE; 980 PetscMPIInt size; 981 982 PetscFunctionBegin; 983 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 984 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 985 986 if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 987 PetscCallMPI(MPI_Comm_size(comm, &size)); 988 PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges)); 989 for (i = 0; i < size; i++) 990 if (ranges[i + 1] != branges[i + 1]) { 991 differ = PETSC_TRUE; 992 break; 993 } 994 } 995 996 if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 997 PetscCall(MatCreate(comm, &Bmpi)); 998 m = PETSC_DECIDE; 999 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1000 n = PETSC_DECIDE; 1001 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 1002 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 1003 PetscCall(MatSetType(Bmpi, MATDENSE)); 1004 PetscCall(MatSetUp(Bmpi)); 1005 1006 /* create ScaLAPACK descriptor for B (1d block distribution) */ 1007 PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 1008 PetscCall(MatDenseGetLDA(Bmpi, &ldb)); 1009 lld = PetscMax(ldb, 1); /* local leading dimension */ 1010 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 1011 PetscCheckScaLapackInfo("descinit", info); 1012 1013 /* redistribute matrix */ 1014 PetscCall(MatDenseGetArray(Bmpi, &barray)); 1015 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 1016 PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 1017 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 1018 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1019 1020 /* transfer rows of auxiliary matrix to the final matrix B */ 1021 PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend)); 1022 for (i = rstart; i < rend; i++) { 1023 PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork)); 1024 PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES)); 1025 PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork)); 1026 } 1027 PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY)); 1028 PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY)); 1029 PetscCall(MatDestroy(&Bmpi)); 1030 1031 } else { /* normal cases */ 1032 1033 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1034 else { 1035 PetscCall(MatCreate(comm, &Bmpi)); 1036 m = PETSC_DECIDE; 1037 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1038 n = PETSC_DECIDE; 1039 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 1040 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 1041 PetscCall(MatSetType(Bmpi, MATDENSE)); 1042 PetscCall(MatSetUp(Bmpi)); 1043 } 1044 1045 /* create ScaLAPACK descriptor for B (1d block distribution) */ 1046 PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 1047 PetscCall(MatDenseGetLDA(Bmpi, &ldb)); 1048 lld = PetscMax(ldb, 1); /* local leading dimension */ 1049 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 1050 PetscCheckScaLapackInfo("descinit", info); 1051 1052 /* redistribute matrix */ 1053 PetscCall(MatDenseGetArray(Bmpi, &barray)); 1054 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 1055 PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 1056 1057 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 1058 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1059 if (reuse == MAT_INPLACE_MATRIX) { 1060 PetscCall(MatHeaderReplace(A, &Bmpi)); 1061 } else *B = Bmpi; 1062 } 1063 PetscFunctionReturn(PETSC_SUCCESS); 1064 } 1065 1066 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct) 1067 { 1068 const PetscInt *ranges; 1069 PetscMPIInt size; 1070 PetscInt i, n; 1071 1072 PetscFunctionBegin; 1073 *correct = PETSC_TRUE; 1074 PetscCallMPI(MPI_Comm_size(map->comm, &size)); 1075 if (size > 1) { 1076 PetscCall(PetscLayoutGetRanges(map, &ranges)); 1077 n = ranges[1] - ranges[0]; 1078 for (i = 1; i < size; i++) 1079 if (ranges[i + 1] - ranges[i] != n) break; 1080 *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n)); 1081 } 1082 PetscFunctionReturn(PETSC_SUCCESS); 1083 } 1084 1085 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B) 1086 { 1087 Mat_ScaLAPACK *b; 1088 Mat Bmpi; 1089 MPI_Comm comm; 1090 PetscInt M = A->rmap->N, N = A->cmap->N, m, n; 1091 const PetscInt *ranges, *rows, *cols; 1092 PetscBLASInt adesc[9], amb, zero = 0, one = 1, lld, info; 1093 const PetscScalar *aarray; 1094 IS ir, ic; 1095 PetscInt lda; 1096 PetscBool flg; 1097 1098 PetscFunctionBegin; 1099 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1100 1101 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1102 else { 1103 PetscCall(MatCreate(comm, &Bmpi)); 1104 m = PETSC_DECIDE; 1105 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1106 n = PETSC_DECIDE; 1107 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 1108 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 1109 PetscCall(MatSetType(Bmpi, MATSCALAPACK)); 1110 PetscCall(MatSetUp(Bmpi)); 1111 } 1112 b = (Mat_ScaLAPACK *)Bmpi->data; 1113 1114 PetscCall(MatDenseGetLDA(A, &lda)); 1115 PetscCall(MatDenseGetArrayRead(A, &aarray)); 1116 PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1117 if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1118 if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */ 1119 /* create ScaLAPACK descriptor for A (1d block distribution) */ 1120 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 1121 PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */ 1122 lld = PetscMax(lda, 1); /* local leading dimension */ 1123 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info)); 1124 PetscCheckScaLapackInfo("descinit", info); 1125 1126 /* redistribute matrix */ 1127 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol)); 1128 Bmpi->nooffprocentries = PETSC_TRUE; 1129 } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */ 1130 PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n); 1131 b->roworiented = PETSC_FALSE; 1132 PetscCall(MatGetOwnershipIS(A, &ir, &ic)); 1133 PetscCall(ISGetIndices(ir, &rows)); 1134 PetscCall(ISGetIndices(ic, &cols)); 1135 PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES)); 1136 PetscCall(ISRestoreIndices(ir, &rows)); 1137 PetscCall(ISRestoreIndices(ic, &cols)); 1138 PetscCall(ISDestroy(&ic)); 1139 PetscCall(ISDestroy(&ir)); 1140 } 1141 PetscCall(MatDenseRestoreArrayRead(A, &aarray)); 1142 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 1143 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1144 if (reuse == MAT_INPLACE_MATRIX) { 1145 PetscCall(MatHeaderReplace(A, &Bmpi)); 1146 } else *B = Bmpi; 1147 PetscFunctionReturn(PETSC_SUCCESS); 1148 } 1149 1150 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1151 { 1152 Mat mat_scal; 1153 PetscInt M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols; 1154 const PetscInt *cols; 1155 const PetscScalar *vals; 1156 1157 PetscFunctionBegin; 1158 if (reuse == MAT_REUSE_MATRIX) { 1159 mat_scal = *newmat; 1160 PetscCall(MatZeroEntries(mat_scal)); 1161 } else { 1162 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1163 m = PETSC_DECIDE; 1164 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1165 n = PETSC_DECIDE; 1166 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 1167 PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 1168 PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 1169 PetscCall(MatSetUp(mat_scal)); 1170 } 1171 for (row = rstart; row < rend; row++) { 1172 PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 1173 PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES)); 1174 PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1175 } 1176 PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 1177 PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1178 1179 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1180 else *newmat = mat_scal; 1181 PetscFunctionReturn(PETSC_SUCCESS); 1182 } 1183 1184 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1185 { 1186 Mat mat_scal; 1187 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend; 1188 const PetscInt *cols; 1189 const PetscScalar *vals; 1190 PetscScalar v; 1191 1192 PetscFunctionBegin; 1193 if (reuse == MAT_REUSE_MATRIX) { 1194 mat_scal = *newmat; 1195 PetscCall(MatZeroEntries(mat_scal)); 1196 } else { 1197 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1198 m = PETSC_DECIDE; 1199 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1200 n = PETSC_DECIDE; 1201 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 1202 PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 1203 PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 1204 PetscCall(MatSetUp(mat_scal)); 1205 } 1206 PetscCall(MatGetRowUpperTriangular(A)); 1207 for (row = rstart; row < rend; row++) { 1208 PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 1209 PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES)); 1210 for (j = 0; j < ncols; j++) { /* lower triangular part */ 1211 if (cols[j] == row) continue; 1212 v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j]; 1213 PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES)); 1214 } 1215 PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1216 } 1217 PetscCall(MatRestoreRowUpperTriangular(A)); 1218 PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 1219 PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1220 1221 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1222 else *newmat = mat_scal; 1223 PetscFunctionReturn(PETSC_SUCCESS); 1224 } 1225 1226 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1227 { 1228 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1229 PetscInt sz = 0; 1230 1231 PetscFunctionBegin; 1232 PetscCall(PetscLayoutSetUp(A->rmap)); 1233 PetscCall(PetscLayoutSetUp(A->cmap)); 1234 if (!a->lld) a->lld = a->locr; 1235 1236 PetscCall(PetscFree(a->loc)); 1237 PetscCall(PetscIntMultError(a->lld, a->locc, &sz)); 1238 PetscCall(PetscCalloc1(sz, &a->loc)); 1239 1240 A->preallocated = PETSC_TRUE; 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1245 { 1246 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1247 Mat_ScaLAPACK_Grid *grid; 1248 PetscBool flg; 1249 MPI_Comm icomm; 1250 1251 PetscFunctionBegin; 1252 PetscCall(MatStashDestroy_Private(&A->stash)); 1253 PetscCall(PetscFree(a->loc)); 1254 PetscCall(PetscFree(a->pivots)); 1255 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 1256 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1257 if (--grid->grid_refct == 0) { 1258 Cblacs_gridexit(grid->ictxt); 1259 Cblacs_gridexit(grid->ictxrow); 1260 Cblacs_gridexit(grid->ictxcol); 1261 PetscCall(PetscFree(grid)); 1262 PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval)); 1263 } 1264 PetscCall(PetscCommDestroy(&icomm)); 1265 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL)); 1266 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1267 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL)); 1268 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL)); 1269 PetscCall(PetscFree(A->data)); 1270 PetscFunctionReturn(PETSC_SUCCESS); 1271 } 1272 1273 static PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1274 { 1275 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1276 PetscBLASInt info = 0; 1277 PetscBool flg; 1278 1279 PetscFunctionBegin; 1280 PetscCall(PetscLayoutSetUp(A->rmap)); 1281 PetscCall(PetscLayoutSetUp(A->cmap)); 1282 1283 /* check that the layout is as enforced by MatCreateScaLAPACK() */ 1284 PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1285 PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1286 PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1287 PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1288 1289 /* compute local sizes */ 1290 PetscCall(PetscBLASIntCast(A->rmap->N, &a->M)); 1291 PetscCall(PetscBLASIntCast(A->cmap->N, &a->N)); 1292 a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 1293 a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 1294 a->lld = PetscMax(1, a->locr); 1295 1296 /* allocate local array */ 1297 PetscCall(MatScaLAPACKSetPreallocation(A)); 1298 1299 /* set up ScaLAPACK descriptor */ 1300 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info)); 1301 PetscCheckScaLapackInfo("descinit", info); 1302 PetscFunctionReturn(PETSC_SUCCESS); 1303 } 1304 1305 static PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type) 1306 { 1307 PetscInt nstash, reallocs; 1308 1309 PetscFunctionBegin; 1310 if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 1311 PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL)); 1312 PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs)); 1313 PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 1314 PetscFunctionReturn(PETSC_SUCCESS); 1315 } 1316 1317 static PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type) 1318 { 1319 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1320 PetscMPIInt n; 1321 PetscInt i, flg, *row, *col; 1322 PetscScalar *val; 1323 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1324 1325 PetscFunctionBegin; 1326 if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 1327 while (1) { 1328 PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg)); 1329 if (!flg) break; 1330 for (i = 0; i < n; i++) { 1331 PetscCall(PetscBLASIntCast(row[i] + 1, &gridx)); 1332 PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx)); 1333 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1334 PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process"); 1335 switch (A->insertmode) { 1336 case INSERT_VALUES: 1337 a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i]; 1338 break; 1339 case ADD_VALUES: 1340 a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i]; 1341 break; 1342 default: 1343 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode); 1344 } 1345 } 1346 } 1347 PetscCall(MatStashScatterEnd_Private(&A->stash)); 1348 PetscFunctionReturn(PETSC_SUCCESS); 1349 } 1350 1351 static PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer) 1352 { 1353 Mat Adense, As; 1354 MPI_Comm comm; 1355 1356 PetscFunctionBegin; 1357 PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm)); 1358 PetscCall(MatCreate(comm, &Adense)); 1359 PetscCall(MatSetType(Adense, MATDENSE)); 1360 PetscCall(MatLoad(Adense, viewer)); 1361 PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As)); 1362 PetscCall(MatDestroy(&Adense)); 1363 PetscCall(MatHeaderReplace(newMat, &As)); 1364 PetscFunctionReturn(PETSC_SUCCESS); 1365 } 1366 1367 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK, 1368 0, 1369 0, 1370 MatMult_ScaLAPACK, 1371 /* 4*/ MatMultAdd_ScaLAPACK, 1372 MatMultTranspose_ScaLAPACK, 1373 MatMultTransposeAdd_ScaLAPACK, 1374 MatSolve_ScaLAPACK, 1375 MatSolveAdd_ScaLAPACK, 1376 0, 1377 /*10*/ 0, 1378 MatLUFactor_ScaLAPACK, 1379 MatCholeskyFactor_ScaLAPACK, 1380 0, 1381 MatTranspose_ScaLAPACK, 1382 /*15*/ MatGetInfo_ScaLAPACK, 1383 0, 1384 MatGetDiagonal_ScaLAPACK, 1385 MatDiagonalScale_ScaLAPACK, 1386 MatNorm_ScaLAPACK, 1387 /*20*/ MatAssemblyBegin_ScaLAPACK, 1388 MatAssemblyEnd_ScaLAPACK, 1389 MatSetOption_ScaLAPACK, 1390 MatZeroEntries_ScaLAPACK, 1391 /*24*/ 0, 1392 MatLUFactorSymbolic_ScaLAPACK, 1393 MatLUFactorNumeric_ScaLAPACK, 1394 MatCholeskyFactorSymbolic_ScaLAPACK, 1395 MatCholeskyFactorNumeric_ScaLAPACK, 1396 /*29*/ MatSetUp_ScaLAPACK, 1397 0, 1398 0, 1399 0, 1400 0, 1401 /*34*/ MatDuplicate_ScaLAPACK, 1402 0, 1403 0, 1404 0, 1405 0, 1406 /*39*/ MatAXPY_ScaLAPACK, 1407 0, 1408 0, 1409 0, 1410 MatCopy_ScaLAPACK, 1411 /*44*/ 0, 1412 MatScale_ScaLAPACK, 1413 MatShift_ScaLAPACK, 1414 0, 1415 0, 1416 /*49*/ 0, 1417 0, 1418 0, 1419 0, 1420 0, 1421 /*54*/ 0, 1422 0, 1423 0, 1424 0, 1425 0, 1426 /*59*/ 0, 1427 MatDestroy_ScaLAPACK, 1428 MatView_ScaLAPACK, 1429 0, 1430 0, 1431 /*64*/ 0, 1432 0, 1433 0, 1434 0, 1435 0, 1436 /*69*/ 0, 1437 0, 1438 MatConvert_ScaLAPACK_Dense, 1439 0, 1440 0, 1441 /*74*/ 0, 1442 0, 1443 0, 1444 0, 1445 0, 1446 /*79*/ 0, 1447 0, 1448 0, 1449 0, 1450 MatLoad_ScaLAPACK, 1451 /*84*/ 0, 1452 0, 1453 0, 1454 0, 1455 0, 1456 /*89*/ 0, 1457 0, 1458 MatMatMultNumeric_ScaLAPACK, 1459 0, 1460 0, 1461 /*94*/ 0, 1462 0, 1463 0, 1464 MatMatTransposeMultNumeric_ScaLAPACK, 1465 0, 1466 /*99*/ MatProductSetFromOptions_ScaLAPACK, 1467 0, 1468 0, 1469 MatConjugate_ScaLAPACK, 1470 0, 1471 /*104*/ 0, 1472 0, 1473 0, 1474 0, 1475 0, 1476 /*109*/ MatMatSolve_ScaLAPACK, 1477 0, 1478 0, 1479 0, 1480 MatMissingDiagonal_ScaLAPACK, 1481 /*114*/ 0, 1482 0, 1483 0, 1484 0, 1485 0, 1486 /*119*/ 0, 1487 MatHermitianTranspose_ScaLAPACK, 1488 MatMultHermitianTranspose_ScaLAPACK, 1489 MatMultHermitianTransposeAdd_ScaLAPACK, 1490 0, 1491 /*124*/ 0, 1492 0, 1493 0, 1494 0, 1495 0, 1496 /*129*/ 0, 1497 0, 1498 0, 1499 MatTransposeMatMultNumeric_ScaLAPACK, 1500 0, 1501 /*134*/ 0, 1502 0, 1503 0, 1504 0, 1505 0, 1506 0, 1507 /*140*/ 0, 1508 0, 1509 0, 1510 0, 1511 0, 1512 /*145*/ 0, 1513 0, 1514 0, 1515 0, 1516 0, 1517 /*150*/ 0, 1518 0, 1519 0}; 1520 1521 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners) 1522 { 1523 PetscInt *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2; 1524 PetscInt size = stash->size, nsends; 1525 PetscInt count, *sindices, **rindices, i, j, l; 1526 PetscScalar **rvalues, *svalues; 1527 MPI_Comm comm = stash->comm; 1528 MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2; 1529 PetscMPIInt *sizes, *nlengths, nreceives; 1530 PetscInt *sp_idx, *sp_idy; 1531 PetscScalar *sp_val; 1532 PetscMatStashSpace space, space_next; 1533 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1534 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)mat->data; 1535 1536 PetscFunctionBegin; 1537 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1538 InsertMode addv; 1539 PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat))); 1540 PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added"); 1541 mat->insertmode = addv; /* in case this processor had no cache */ 1542 } 1543 1544 bs2 = stash->bs * stash->bs; 1545 1546 /* first count number of contributors to each processor */ 1547 PetscCall(PetscCalloc1(size, &nlengths)); 1548 PetscCall(PetscMalloc1(stash->n + 1, &owner)); 1549 1550 i = j = 0; 1551 space = stash->space_head; 1552 while (space) { 1553 space_next = space->next; 1554 for (l = 0; l < space->local_used; l++) { 1555 PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx)); 1556 PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx)); 1557 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1558 j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc); 1559 nlengths[j]++; 1560 owner[i] = j; 1561 i++; 1562 } 1563 space = space_next; 1564 } 1565 1566 /* Now check what procs get messages - and compute nsends. */ 1567 PetscCall(PetscCalloc1(size, &sizes)); 1568 for (i = 0, nsends = 0; i < size; i++) { 1569 if (nlengths[i]) { 1570 sizes[i] = 1; 1571 nsends++; 1572 } 1573 } 1574 1575 { 1576 PetscMPIInt *onodes, *olengths; 1577 /* Determine the number of messages to expect, their lengths, from from-ids */ 1578 PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives)); 1579 PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths)); 1580 /* since clubbing row,col - lengths are multiplied by 2 */ 1581 for (i = 0; i < nreceives; i++) olengths[i] *= 2; 1582 PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1)); 1583 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1584 for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2; 1585 PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2)); 1586 PetscCall(PetscFree(onodes)); 1587 PetscCall(PetscFree(olengths)); 1588 } 1589 1590 /* do sends: 1591 1) starts[i] gives the starting index in svalues for stuff going to 1592 the ith processor 1593 */ 1594 PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices)); 1595 PetscCall(PetscMalloc1(2 * nsends, &send_waits)); 1596 PetscCall(PetscMalloc2(size, &startv, size, &starti)); 1597 /* use 2 sends the first with all_a, the next with all_i and all_j */ 1598 startv[0] = 0; 1599 starti[0] = 0; 1600 for (i = 1; i < size; i++) { 1601 startv[i] = startv[i - 1] + nlengths[i - 1]; 1602 starti[i] = starti[i - 1] + 2 * nlengths[i - 1]; 1603 } 1604 1605 i = 0; 1606 space = stash->space_head; 1607 while (space) { 1608 space_next = space->next; 1609 sp_idx = space->idx; 1610 sp_idy = space->idy; 1611 sp_val = space->val; 1612 for (l = 0; l < space->local_used; l++) { 1613 j = owner[i]; 1614 if (bs2 == 1) { 1615 svalues[startv[j]] = sp_val[l]; 1616 } else { 1617 PetscInt k; 1618 PetscScalar *buf1, *buf2; 1619 buf1 = svalues + bs2 * startv[j]; 1620 buf2 = space->val + bs2 * l; 1621 for (k = 0; k < bs2; k++) buf1[k] = buf2[k]; 1622 } 1623 sindices[starti[j]] = sp_idx[l]; 1624 sindices[starti[j] + nlengths[j]] = sp_idy[l]; 1625 startv[j]++; 1626 starti[j]++; 1627 i++; 1628 } 1629 space = space_next; 1630 } 1631 startv[0] = 0; 1632 for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1]; 1633 1634 for (i = 0, count = 0; i < size; i++) { 1635 if (sizes[i]) { 1636 PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++)); 1637 PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++)); 1638 } 1639 } 1640 #if defined(PETSC_USE_INFO) 1641 PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends)); 1642 for (i = 0; i < size; i++) { 1643 if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt))))); 1644 } 1645 #endif 1646 PetscCall(PetscFree(nlengths)); 1647 PetscCall(PetscFree(owner)); 1648 PetscCall(PetscFree2(startv, starti)); 1649 PetscCall(PetscFree(sizes)); 1650 1651 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 1652 PetscCall(PetscMalloc1(2 * nreceives, &recv_waits)); 1653 1654 for (i = 0; i < nreceives; i++) { 1655 recv_waits[2 * i] = recv_waits1[i]; 1656 recv_waits[2 * i + 1] = recv_waits2[i]; 1657 } 1658 stash->recv_waits = recv_waits; 1659 1660 PetscCall(PetscFree(recv_waits1)); 1661 PetscCall(PetscFree(recv_waits2)); 1662 1663 stash->svalues = svalues; 1664 stash->sindices = sindices; 1665 stash->rvalues = rvalues; 1666 stash->rindices = rindices; 1667 stash->send_waits = send_waits; 1668 stash->nsends = nsends; 1669 stash->nrecvs = nreceives; 1670 stash->reproduce_count = 0; 1671 PetscFunctionReturn(PETSC_SUCCESS); 1672 } 1673 1674 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb) 1675 { 1676 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1677 1678 PetscFunctionBegin; 1679 PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp"); 1680 PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb); 1681 PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb); 1682 PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 1683 PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 1684 PetscFunctionReturn(PETSC_SUCCESS); 1685 } 1686 1687 /*@ 1688 MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of 1689 the `MATSCALAPACK` matrix 1690 1691 Logically Collective 1692 1693 Input Parameters: 1694 + A - a `MATSCALAPACK` matrix 1695 . mb - the row block size 1696 - nb - the column block size 1697 1698 Level: intermediate 1699 1700 Note: 1701 This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices 1702 1703 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()` 1704 @*/ 1705 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb) 1706 { 1707 PetscFunctionBegin; 1708 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1709 PetscValidLogicalCollectiveInt(A, mb, 2); 1710 PetscValidLogicalCollectiveInt(A, nb, 3); 1711 PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb)); 1712 PetscFunctionReturn(PETSC_SUCCESS); 1713 } 1714 1715 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb) 1716 { 1717 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1718 1719 PetscFunctionBegin; 1720 if (mb) *mb = a->mb; 1721 if (nb) *nb = a->nb; 1722 PetscFunctionReturn(PETSC_SUCCESS); 1723 } 1724 1725 /*@ 1726 MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of 1727 the `MATSCALAPACK` matrix 1728 1729 Not Collective 1730 1731 Input Parameter: 1732 . A - a `MATSCALAPACK` matrix 1733 1734 Output Parameters: 1735 + mb - the row block size 1736 - nb - the column block size 1737 1738 Level: intermediate 1739 1740 Note: 1741 This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices 1742 1743 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()` 1744 @*/ 1745 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb) 1746 { 1747 PetscFunctionBegin; 1748 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1749 PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb)); 1750 PetscFunctionReturn(PETSC_SUCCESS); 1751 } 1752 1753 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *); 1754 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *); 1755 1756 /*MC 1757 MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1758 1759 Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK 1760 1761 Options Database Keys: 1762 + -mat_type scalapack - sets the matrix type to `MATSCALAPACK` 1763 . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu` 1764 . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1765 - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1766 1767 Level: intermediate 1768 1769 Note: 1770 Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous 1771 range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on 1772 the given rank. 1773 1774 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()` 1775 M*/ 1776 1777 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1778 { 1779 Mat_ScaLAPACK *a; 1780 PetscBool flg, flg1; 1781 Mat_ScaLAPACK_Grid *grid; 1782 MPI_Comm icomm; 1783 PetscBLASInt nprow, npcol, myrow, mycol; 1784 PetscInt optv1, k = 2, array[2] = {0, 0}; 1785 PetscMPIInt size; 1786 1787 PetscFunctionBegin; 1788 A->ops[0] = MatOps_Values; 1789 A->insertmode = NOT_SET_VALUES; 1790 1791 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash)); 1792 A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1793 A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1794 A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1795 A->stash.ScatterDestroy = NULL; 1796 1797 PetscCall(PetscNew(&a)); 1798 A->data = (void *)a; 1799 1800 /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1801 if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1802 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0)); 1803 PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); 1804 PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite)); 1805 } 1806 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 1807 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1808 if (!flg) { 1809 PetscCall(PetscNew(&grid)); 1810 1811 PetscCallMPI(MPI_Comm_size(icomm, &size)); 1812 grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001); 1813 1814 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat"); 1815 PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1)); 1816 if (flg1) { 1817 PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size); 1818 grid->nprow = optv1; 1819 } 1820 PetscOptionsEnd(); 1821 1822 if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1823 grid->npcol = size / grid->nprow; 1824 PetscCall(PetscBLASIntCast(grid->nprow, &nprow)); 1825 PetscCall(PetscBLASIntCast(grid->npcol, &npcol)); 1826 grid->ictxt = Csys2blacs_handle(icomm); 1827 Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol); 1828 Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol); 1829 grid->grid_refct = 1; 1830 grid->nprow = nprow; 1831 grid->npcol = npcol; 1832 grid->myrow = myrow; 1833 grid->mycol = mycol; 1834 /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1835 grid->ictxrow = Csys2blacs_handle(icomm); 1836 Cblacs_gridinit(&grid->ictxrow, "R", 1, size); 1837 grid->ictxcol = Csys2blacs_handle(icomm); 1838 Cblacs_gridinit(&grid->ictxcol, "R", size, 1); 1839 PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid)); 1840 1841 } else grid->grid_refct++; 1842 PetscCall(PetscCommDestroy(&icomm)); 1843 a->grid = grid; 1844 a->mb = DEFAULT_BLOCKSIZE; 1845 a->nb = DEFAULT_BLOCKSIZE; 1846 1847 PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat"); 1848 PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg)); 1849 if (flg) { 1850 a->mb = array[0]; 1851 a->nb = (k > 1) ? array[1] : a->mb; 1852 } 1853 PetscOptionsEnd(); 1854 1855 a->roworiented = PETSC_TRUE; 1856 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK)); 1857 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK)); 1858 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK)); 1859 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK)); 1860 PetscFunctionReturn(PETSC_SUCCESS); 1861 } 1862 1863 /*@C 1864 MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1865 (2D block cyclic distribution) for a `MATSCALAPACK` matrix 1866 1867 Collective 1868 1869 Input Parameters: 1870 + comm - MPI communicator 1871 . mb - row block size (or `PETSC_DECIDE` to have it set) 1872 . nb - column block size (or `PETSC_DECIDE` to have it set) 1873 . M - number of global rows 1874 . N - number of global columns 1875 . rsrc - coordinate of process that owns the first row of the distributed matrix 1876 - csrc - coordinate of process that owns the first column of the distributed matrix 1877 1878 Output Parameter: 1879 . A - the matrix 1880 1881 Options Database Key: 1882 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1883 1884 Level: intermediate 1885 1886 Notes: 1887 If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen 1888 1889 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 1890 MatXXXXSetPreallocation() paradigm instead of this routine directly. 1891 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 1892 1893 Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1894 configured with ScaLAPACK. In particular, PETSc's local sizes lose 1895 significance and are thus ignored. The block sizes refer to the values 1896 used for the distributed matrix, not the same meaning as in `MATBAIJ`. 1897 1898 .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 1899 @*/ 1900 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A) 1901 { 1902 Mat_ScaLAPACK *a; 1903 PetscInt m, n; 1904 1905 PetscFunctionBegin; 1906 PetscCall(MatCreate(comm, A)); 1907 PetscCall(MatSetType(*A, MATSCALAPACK)); 1908 PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions"); 1909 /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1910 m = PETSC_DECIDE; 1911 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1912 n = PETSC_DECIDE; 1913 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 1914 PetscCall(MatSetSizes(*A, m, n, M, N)); 1915 a = (Mat_ScaLAPACK *)(*A)->data; 1916 PetscCall(PetscBLASIntCast(M, &a->M)); 1917 PetscCall(PetscBLASIntCast(N, &a->N)); 1918 PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 1919 PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 1920 PetscCall(PetscBLASIntCast(rsrc, &a->rsrc)); 1921 PetscCall(PetscBLASIntCast(csrc, &a->csrc)); 1922 PetscCall(MatSetUp(*A)); 1923 PetscFunctionReturn(PETSC_SUCCESS); 1924 } 1925