1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2 3 const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 4 " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 5 " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 6 " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 7 " TITLE = {Sca{LAPACK} Users' Guide},\n" 8 " PUBLISHER = {SIAM},\n" 9 " ADDRESS = {Philadelphia, PA},\n" 10 " YEAR = 1997\n" 11 "}\n"; 12 static PetscBool ScaLAPACKCite = PETSC_FALSE; 13 14 #define DEFAULT_BLOCKSIZE 64 15 16 /* 17 The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18 is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19 */ 20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21 22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) { 23 PetscFunctionBegin; 24 PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n")); 25 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); 26 PetscFunctionReturn(0); 27 } 28 29 static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer) { 30 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 31 PetscBool iascii; 32 PetscViewerFormat format; 33 Mat Adense; 34 35 PetscFunctionBegin; 36 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 37 if (iascii) { 38 PetscCall(PetscViewerGetFormat(viewer, &format)); 39 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 40 PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb)); 41 PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol)); 42 PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc)); 43 PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc)); 44 PetscFunctionReturn(0); 45 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 46 PetscFunctionReturn(0); 47 } 48 } 49 /* convert to dense format and call MatView() */ 50 PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense)); 51 PetscCall(MatView(Adense, viewer)); 52 PetscCall(MatDestroy(&Adense)); 53 PetscFunctionReturn(0); 54 } 55 56 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info) { 57 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 58 PetscLogDouble isend[2], irecv[2]; 59 60 PetscFunctionBegin; 61 info->block_size = 1.0; 62 63 isend[0] = a->lld * a->locc; /* locally allocated */ 64 isend[1] = a->locr * a->locc; /* used submatrix */ 65 if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 66 info->nz_allocated = isend[0]; 67 info->nz_used = isend[1]; 68 } else if (flag == MAT_GLOBAL_MAX) { 69 PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 70 info->nz_allocated = irecv[0]; 71 info->nz_used = irecv[1]; 72 } else if (flag == MAT_GLOBAL_SUM) { 73 PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 74 info->nz_allocated = irecv[0]; 75 info->nz_used = irecv[1]; 76 } 77 78 info->nz_unneeded = 0; 79 info->assemblies = A->num_ass; 80 info->mallocs = 0; 81 info->memory = ((PetscObject)A)->mem; 82 info->fill_ratio_given = 0; 83 info->fill_ratio_needed = 0; 84 info->factor_mallocs = 0; 85 PetscFunctionReturn(0); 86 } 87 88 PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg) { 89 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 90 91 PetscFunctionBegin; 92 switch (op) { 93 case MAT_NEW_NONZERO_LOCATIONS: 94 case MAT_NEW_NONZERO_LOCATION_ERR: 95 case MAT_NEW_NONZERO_ALLOCATION_ERR: 96 case MAT_SYMMETRIC: 97 case MAT_SORTED_FULL: 98 case MAT_HERMITIAN: break; 99 case MAT_ROW_ORIENTED: a->roworiented = flg; break; 100 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]); 101 } 102 PetscFunctionReturn(0); 103 } 104 105 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode) { 106 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 107 PetscInt i, j; 108 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 109 PetscBool roworiented = a->roworiented; 110 111 PetscFunctionBegin; 112 PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode); 113 for (i = 0; i < nr; i++) { 114 if (rows[i] < 0) continue; 115 PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx)); 116 for (j = 0; j < nc; j++) { 117 if (cols[j] < 0) continue; 118 PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx)); 119 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 120 if (rsrc == a->grid->myrow && csrc == a->grid->mycol) { 121 if (roworiented) { 122 switch (imode) { 123 case INSERT_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j]; break; 124 default: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j]; break; 125 } 126 } else { 127 switch (imode) { 128 case INSERT_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr]; break; 129 default: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr]; break; 130 } 131 } 132 } else { 133 PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 134 A->assembled = PETSC_FALSE; 135 PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES))); 136 } 137 } 138 } 139 PetscFunctionReturn(0); 140 } 141 142 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscScalar beta, const PetscScalar *x, PetscScalar *y) { 143 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 144 PetscScalar *x2d, *y2d, alpha = 1.0; 145 const PetscInt *ranges; 146 PetscBLASInt xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info; 147 148 PetscFunctionBegin; 149 if (transpose) { 150 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 151 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 152 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 153 xlld = PetscMax(1, A->rmap->n); 154 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 155 PetscCheckScaLapackInfo("descinit", info); 156 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 157 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */ 158 ylld = 1; 159 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info)); 160 PetscCheckScaLapackInfo("descinit", info); 161 162 /* allocate 2d vectors */ 163 lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 164 lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 165 PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 166 xlld = PetscMax(1, lszx); 167 168 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 169 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 170 PetscCheckScaLapackInfo("descinit", info); 171 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 172 PetscCheckScaLapackInfo("descinit", info); 173 174 /* redistribute x as a column of a 2d matrix */ 175 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 176 177 /* redistribute y as a row of a 2d matrix */ 178 if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow)); 179 180 /* call PBLAS subroutine */ 181 PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 182 183 /* redistribute y from a row of a 2d matrix */ 184 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow)); 185 186 } else { /* non-transpose */ 187 188 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 189 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 190 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */ 191 xlld = 1; 192 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info)); 193 PetscCheckScaLapackInfo("descinit", info); 194 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 195 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */ 196 ylld = PetscMax(1, A->rmap->n); 197 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info)); 198 PetscCheckScaLapackInfo("descinit", info); 199 200 /* allocate 2d vectors */ 201 lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 202 lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 203 PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d)); 204 ylld = PetscMax(1, lszy); 205 206 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 207 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 208 PetscCheckScaLapackInfo("descinit", info); 209 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info)); 210 PetscCheckScaLapackInfo("descinit", info); 211 212 /* redistribute x as a row of a 2d matrix */ 213 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow)); 214 215 /* redistribute y as a column of a 2d matrix */ 216 if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol)); 217 218 /* call PBLAS subroutine */ 219 PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one)); 220 221 /* redistribute y from a column of a 2d matrix */ 222 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol)); 223 } 224 PetscCall(PetscFree2(x2d, y2d)); 225 PetscFunctionReturn(0); 226 } 227 228 static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y) { 229 const PetscScalar *xarray; 230 PetscScalar *yarray; 231 232 PetscFunctionBegin; 233 PetscCall(VecGetArrayRead(x, &xarray)); 234 PetscCall(VecGetArray(y, &yarray)); 235 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 0.0, xarray, yarray)); 236 PetscCall(VecRestoreArrayRead(x, &xarray)); 237 PetscCall(VecRestoreArray(y, &yarray)); 238 PetscFunctionReturn(0); 239 } 240 241 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y) { 242 const PetscScalar *xarray; 243 PetscScalar *yarray; 244 245 PetscFunctionBegin; 246 PetscCall(VecGetArrayRead(x, &xarray)); 247 PetscCall(VecGetArray(y, &yarray)); 248 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 0.0, xarray, yarray)); 249 PetscCall(VecRestoreArrayRead(x, &xarray)); 250 PetscCall(VecRestoreArray(y, &yarray)); 251 PetscFunctionReturn(0); 252 } 253 254 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) { 255 const PetscScalar *xarray; 256 PetscScalar *zarray; 257 258 PetscFunctionBegin; 259 if (y != z) PetscCall(VecCopy(y, z)); 260 PetscCall(VecGetArrayRead(x, &xarray)); 261 PetscCall(VecGetArray(z, &zarray)); 262 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 1.0, xarray, zarray)); 263 PetscCall(VecRestoreArrayRead(x, &xarray)); 264 PetscCall(VecRestoreArray(z, &zarray)); 265 PetscFunctionReturn(0); 266 } 267 268 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z) { 269 const PetscScalar *xarray; 270 PetscScalar *zarray; 271 272 PetscFunctionBegin; 273 if (y != z) PetscCall(VecCopy(y, z)); 274 PetscCall(VecGetArrayRead(x, &xarray)); 275 PetscCall(VecGetArray(z, &zarray)); 276 PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 1.0, xarray, zarray)); 277 PetscCall(VecRestoreArrayRead(x, &xarray)); 278 PetscCall(VecRestoreArray(z, &zarray)); 279 PetscFunctionReturn(0); 280 } 281 282 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) { 283 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 284 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 285 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 286 PetscScalar sone = 1.0, zero = 0.0; 287 PetscBLASInt one = 1; 288 289 PetscFunctionBegin; 290 PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 291 C->assembled = PETSC_TRUE; 292 PetscFunctionReturn(0); 293 } 294 295 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) { 296 PetscFunctionBegin; 297 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 298 PetscCall(MatSetType(C, MATSCALAPACK)); 299 PetscCall(MatSetUp(C)); 300 C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 301 PetscFunctionReturn(0); 302 } 303 304 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C) { 305 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 306 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 307 Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data; 308 PetscScalar sone = 1.0, zero = 0.0; 309 PetscBLASInt one = 1; 310 311 PetscFunctionBegin; 312 PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc)); 313 C->assembled = PETSC_TRUE; 314 PetscFunctionReturn(0); 315 } 316 317 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C) { 318 PetscFunctionBegin; 319 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE)); 320 PetscCall(MatSetType(C, MATSCALAPACK)); 321 PetscCall(MatSetUp(C)); 322 PetscFunctionReturn(0); 323 } 324 325 /* --------------------------------------- */ 326 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) { 327 PetscFunctionBegin; 328 C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 329 C->ops->productsymbolic = MatProductSymbolic_AB; 330 PetscFunctionReturn(0); 331 } 332 333 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) { 334 PetscFunctionBegin; 335 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 336 C->ops->productsymbolic = MatProductSymbolic_ABt; 337 PetscFunctionReturn(0); 338 } 339 340 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) { 341 Mat_Product *product = C->product; 342 343 PetscFunctionBegin; 344 switch (product->type) { 345 case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C)); break; 346 case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C)); break; 347 default: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]); 348 } 349 PetscFunctionReturn(0); 350 } 351 /* --------------------------------------- */ 352 353 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D) { 354 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 355 PetscScalar *darray, *d2d, v; 356 const PetscInt *ranges; 357 PetscBLASInt j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 358 359 PetscFunctionBegin; 360 PetscCall(VecGetArray(D, &darray)); 361 362 if (A->rmap->N <= A->cmap->N) { /* row version */ 363 364 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 365 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 366 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 367 dlld = PetscMax(1, A->rmap->n); 368 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 369 PetscCheckScaLapackInfo("descinit", info); 370 371 /* allocate 2d vector */ 372 lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 373 PetscCall(PetscCalloc1(lszd, &d2d)); 374 dlld = PetscMax(1, lszd); 375 376 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 377 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 378 PetscCheckScaLapackInfo("descinit", info); 379 380 /* collect diagonal */ 381 for (j = 1; j <= a->M; j++) { 382 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc)); 383 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v)); 384 } 385 386 /* redistribute d from a column of a 2d matrix */ 387 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol)); 388 PetscCall(PetscFree(d2d)); 389 390 } else { /* column version */ 391 392 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 393 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 394 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 395 dlld = 1; 396 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 397 PetscCheckScaLapackInfo("descinit", info); 398 399 /* allocate 2d vector */ 400 lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 401 PetscCall(PetscCalloc1(lszd, &d2d)); 402 403 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 404 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 405 PetscCheckScaLapackInfo("descinit", info); 406 407 /* collect diagonal */ 408 for (j = 1; j <= a->N; j++) { 409 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc)); 410 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v)); 411 } 412 413 /* redistribute d from a row of a 2d matrix */ 414 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow)); 415 PetscCall(PetscFree(d2d)); 416 } 417 418 PetscCall(VecRestoreArray(D, &darray)); 419 PetscCall(VecAssemblyBegin(D)); 420 PetscCall(VecAssemblyEnd(D)); 421 PetscFunctionReturn(0); 422 } 423 424 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R) { 425 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 426 const PetscScalar *d; 427 const PetscInt *ranges; 428 PetscScalar *d2d; 429 PetscBLASInt i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info; 430 431 PetscFunctionBegin; 432 if (R) { 433 PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d)); 434 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 435 PetscCall(PetscLayoutGetRanges(A->cmap, &ranges)); 436 PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */ 437 dlld = 1; 438 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info)); 439 PetscCheckScaLapackInfo("descinit", info); 440 441 /* allocate 2d vector */ 442 lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 443 PetscCall(PetscCalloc1(lszd, &d2d)); 444 445 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 446 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 447 PetscCheckScaLapackInfo("descinit", info); 448 449 /* redistribute d to a row of a 2d matrix */ 450 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow)); 451 452 /* broadcast along process columns */ 453 if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld); 454 else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol); 455 456 /* local scaling */ 457 for (j = 0; j < a->locc; j++) 458 for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j]; 459 460 PetscCall(PetscFree(d2d)); 461 PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d)); 462 } 463 if (L) { 464 PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d)); 465 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 466 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 467 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */ 468 dlld = PetscMax(1, A->rmap->n); 469 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info)); 470 PetscCheckScaLapackInfo("descinit", info); 471 472 /* allocate 2d vector */ 473 lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 474 PetscCall(PetscCalloc1(lszd, &d2d)); 475 dlld = PetscMax(1, lszd); 476 477 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 478 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info)); 479 PetscCheckScaLapackInfo("descinit", info); 480 481 /* redistribute d to a column of a 2d matrix */ 482 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol)); 483 484 /* broadcast along process rows */ 485 if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld); 486 else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0); 487 488 /* local scaling */ 489 for (i = 0; i < a->locr; i++) 490 for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i]; 491 492 PetscCall(PetscFree(d2d)); 493 PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d)); 494 } 495 PetscFunctionReturn(0); 496 } 497 498 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d) { 499 PetscFunctionBegin; 500 *missing = PETSC_FALSE; 501 PetscFunctionReturn(0); 502 } 503 504 static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a) { 505 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 506 PetscBLASInt n, one = 1; 507 508 PetscFunctionBegin; 509 n = x->lld * x->locc; 510 PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one)); 511 PetscFunctionReturn(0); 512 } 513 514 static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha) { 515 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 516 PetscBLASInt i, n; 517 PetscScalar v; 518 519 PetscFunctionBegin; 520 n = PetscMin(x->M, x->N); 521 for (i = 1; i <= n; i++) { 522 PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc)); 523 v += alpha; 524 PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v)); 525 } 526 PetscFunctionReturn(0); 527 } 528 529 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str) { 530 Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data; 531 Mat_ScaLAPACK *y = (Mat_ScaLAPACK *)Y->data; 532 PetscBLASInt one = 1; 533 PetscScalar beta = 1.0; 534 535 PetscFunctionBegin; 536 MatScaLAPACKCheckDistribution(Y, 1, X, 3); 537 PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc)); 538 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 539 PetscFunctionReturn(0); 540 } 541 542 static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str) { 543 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 544 Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data; 545 546 PetscFunctionBegin; 547 PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 548 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 549 PetscFunctionReturn(0); 550 } 551 552 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B) { 553 Mat Bs; 554 MPI_Comm comm; 555 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 556 557 PetscFunctionBegin; 558 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 559 PetscCall(MatCreate(comm, &Bs)); 560 PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 561 PetscCall(MatSetType(Bs, MATSCALAPACK)); 562 b = (Mat_ScaLAPACK *)Bs->data; 563 b->M = a->M; 564 b->N = a->N; 565 b->mb = a->mb; 566 b->nb = a->nb; 567 b->rsrc = a->rsrc; 568 b->csrc = a->csrc; 569 PetscCall(MatSetUp(Bs)); 570 *B = Bs; 571 if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc)); 572 Bs->assembled = PETSC_TRUE; 573 PetscFunctionReturn(0); 574 } 575 576 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) { 577 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 578 Mat Bs = *B; 579 PetscBLASInt one = 1; 580 PetscScalar sone = 1.0, zero = 0.0; 581 #if defined(PETSC_USE_COMPLEX) 582 PetscInt i, j; 583 #endif 584 585 PetscFunctionBegin; 586 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 587 if (reuse == MAT_INITIAL_MATRIX) { 588 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 589 *B = Bs; 590 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 591 b = (Mat_ScaLAPACK *)Bs->data; 592 PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 593 #if defined(PETSC_USE_COMPLEX) 594 /* undo conjugation */ 595 for (i = 0; i < b->locr; i++) 596 for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]); 597 #endif 598 Bs->assembled = PETSC_TRUE; 599 PetscFunctionReturn(0); 600 } 601 602 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) { 603 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 604 PetscInt i, j; 605 606 PetscFunctionBegin; 607 for (i = 0; i < a->locr; i++) 608 for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]); 609 PetscFunctionReturn(0); 610 } 611 612 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B) { 613 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b; 614 Mat Bs = *B; 615 PetscBLASInt one = 1; 616 PetscScalar sone = 1.0, zero = 0.0; 617 618 PetscFunctionBegin; 619 if (reuse == MAT_INITIAL_MATRIX) { 620 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs)); 621 *B = Bs; 622 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported"); 623 b = (Mat_ScaLAPACK *)Bs->data; 624 PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc)); 625 Bs->assembled = PETSC_TRUE; 626 PetscFunctionReturn(0); 627 } 628 629 static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X) { 630 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 631 PetscScalar *x, *x2d; 632 const PetscInt *ranges; 633 PetscBLASInt xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info; 634 635 PetscFunctionBegin; 636 PetscCall(VecCopy(B, X)); 637 PetscCall(VecGetArray(X, &x)); 638 639 /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 640 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 641 PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */ 642 xlld = PetscMax(1, A->rmap->n); 643 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info)); 644 PetscCheckScaLapackInfo("descinit", info); 645 646 /* allocate 2d vector */ 647 lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 648 PetscCall(PetscMalloc1(lszx, &x2d)); 649 xlld = PetscMax(1, lszx); 650 651 /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 652 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info)); 653 PetscCheckScaLapackInfo("descinit", info); 654 655 /* redistribute x as a column of a 2d matrix */ 656 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol)); 657 658 /* call ScaLAPACK subroutine */ 659 switch (A->factortype) { 660 case MAT_FACTOR_LU: 661 PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info)); 662 PetscCheckScaLapackInfo("getrs", info); 663 break; 664 case MAT_FACTOR_CHOLESKY: 665 PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info)); 666 PetscCheckScaLapackInfo("potrs", info); 667 break; 668 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 669 } 670 671 /* redistribute x from a column of a 2d matrix */ 672 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol)); 673 674 PetscCall(PetscFree(x2d)); 675 PetscCall(VecRestoreArray(X, &x)); 676 PetscFunctionReturn(0); 677 } 678 679 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X) { 680 PetscFunctionBegin; 681 PetscCall(MatSolve_ScaLAPACK(A, B, X)); 682 PetscCall(VecAXPY(X, 1, Y)); 683 PetscFunctionReturn(0); 684 } 685 686 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X) { 687 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x; 688 PetscBool flg1, flg2; 689 PetscBLASInt one = 1, info; 690 691 PetscFunctionBegin; 692 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1)); 693 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2)); 694 PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK"); 695 MatScaLAPACKCheckDistribution(B, 1, X, 2); 696 b = (Mat_ScaLAPACK *)B->data; 697 x = (Mat_ScaLAPACK *)X->data; 698 PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc)); 699 700 switch (A->factortype) { 701 case MAT_FACTOR_LU: 702 PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info)); 703 PetscCheckScaLapackInfo("getrs", info); 704 break; 705 case MAT_FACTOR_CHOLESKY: 706 PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info)); 707 PetscCheckScaLapackInfo("potrs", info); 708 break; 709 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); 710 } 711 PetscFunctionReturn(0); 712 } 713 714 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo) { 715 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 716 PetscBLASInt one = 1, info; 717 718 PetscFunctionBegin; 719 if (!a->pivots) { 720 PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots)); 721 PetscCall(PetscLogObjectMemory((PetscObject)A, a->locr * sizeof(PetscBLASInt))); 722 } 723 PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info)); 724 PetscCheckScaLapackInfo("getrf", info); 725 A->factortype = MAT_FACTOR_LU; 726 A->assembled = PETSC_TRUE; 727 728 PetscCall(PetscFree(A->solvertype)); 729 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 730 PetscFunctionReturn(0); 731 } 732 733 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) { 734 PetscFunctionBegin; 735 PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 736 PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info)); 737 PetscFunctionReturn(0); 738 } 739 740 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) { 741 PetscFunctionBegin; 742 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 743 PetscFunctionReturn(0); 744 } 745 746 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo) { 747 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 748 PetscBLASInt one = 1, info; 749 750 PetscFunctionBegin; 751 PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info)); 752 PetscCheckScaLapackInfo("potrf", info); 753 A->factortype = MAT_FACTOR_CHOLESKY; 754 A->assembled = PETSC_TRUE; 755 756 PetscCall(PetscFree(A->solvertype)); 757 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype)); 758 PetscFunctionReturn(0); 759 } 760 761 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info) { 762 PetscFunctionBegin; 763 PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN)); 764 PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info)); 765 PetscFunctionReturn(0); 766 } 767 768 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info) { 769 PetscFunctionBegin; 770 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 771 PetscFunctionReturn(0); 772 } 773 774 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type) { 775 PetscFunctionBegin; 776 *type = MATSOLVERSCALAPACK; 777 PetscFunctionReturn(0); 778 } 779 780 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F) { 781 Mat B; 782 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 783 784 PetscFunctionBegin; 785 /* Create the factorization matrix */ 786 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B)); 787 B->trivialsymbolic = PETSC_TRUE; 788 B->factortype = ftype; 789 PetscCall(PetscFree(B->solvertype)); 790 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype)); 791 792 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack)); 793 *F = B; 794 PetscFunctionReturn(0); 795 } 796 797 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) { 798 PetscFunctionBegin; 799 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack)); 800 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack)); 801 PetscFunctionReturn(0); 802 } 803 804 static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm) { 805 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 806 PetscBLASInt one = 1, lwork = 0; 807 const char *ntype; 808 PetscScalar *work = NULL, dummy; 809 810 PetscFunctionBegin; 811 switch (type) { 812 case NORM_1: 813 ntype = "1"; 814 lwork = PetscMax(a->locr, a->locc); 815 break; 816 case NORM_FROBENIUS: 817 ntype = "F"; 818 work = &dummy; 819 break; 820 case NORM_INFINITY: 821 ntype = "I"; 822 lwork = PetscMax(a->locr, a->locc); 823 break; 824 default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type"); 825 } 826 if (lwork) PetscCall(PetscMalloc1(lwork, &work)); 827 *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work); 828 if (lwork) PetscCall(PetscFree(work)); 829 PetscFunctionReturn(0); 830 } 831 832 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) { 833 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 834 835 PetscFunctionBegin; 836 PetscCall(PetscArrayzero(a->loc, a->lld * a->locc)); 837 PetscFunctionReturn(0); 838 } 839 840 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols) { 841 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 842 PetscInt i, n, nb, isrc, nproc, iproc, *idx; 843 844 PetscFunctionBegin; 845 if (rows) { 846 n = a->locr; 847 nb = a->mb; 848 isrc = a->rsrc; 849 nproc = a->grid->nprow; 850 iproc = a->grid->myrow; 851 PetscCall(PetscMalloc1(n, &idx)); 852 for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 853 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows)); 854 } 855 if (cols) { 856 n = a->locc; 857 nb = a->nb; 858 isrc = a->csrc; 859 nproc = a->grid->npcol; 860 iproc = a->grid->mycol; 861 PetscCall(PetscMalloc1(n, &idx)); 862 for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb; 863 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols)); 864 } 865 PetscFunctionReturn(0); 866 } 867 868 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B) { 869 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 870 Mat Bmpi; 871 MPI_Comm comm; 872 PetscInt i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz; 873 const PetscInt *ranges, *branges, *cwork; 874 const PetscScalar *vwork; 875 PetscBLASInt bdesc[9], bmb, zero = 0, one = 1, lld, info; 876 PetscScalar *barray; 877 PetscBool differ = PETSC_FALSE; 878 PetscMPIInt size; 879 880 PetscFunctionBegin; 881 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 882 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 883 884 if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 885 PetscCallMPI(MPI_Comm_size(comm, &size)); 886 PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges)); 887 for (i = 0; i < size; i++) 888 if (ranges[i + 1] != branges[i + 1]) { 889 differ = PETSC_TRUE; 890 break; 891 } 892 } 893 894 if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 895 PetscCall(MatCreate(comm, &Bmpi)); 896 m = PETSC_DECIDE; 897 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 898 n = PETSC_DECIDE; 899 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 900 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 901 PetscCall(MatSetType(Bmpi, MATDENSE)); 902 PetscCall(MatSetUp(Bmpi)); 903 904 /* create ScaLAPACK descriptor for B (1d block distribution) */ 905 PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 906 lld = PetscMax(A->rmap->n, 1); /* local leading dimension */ 907 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 908 PetscCheckScaLapackInfo("descinit", info); 909 910 /* redistribute matrix */ 911 PetscCall(MatDenseGetArray(Bmpi, &barray)); 912 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 913 PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 914 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 915 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 916 917 /* transfer rows of auxiliary matrix to the final matrix B */ 918 PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend)); 919 for (i = rstart; i < rend; i++) { 920 PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork)); 921 PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES)); 922 PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork)); 923 } 924 PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY)); 925 PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY)); 926 PetscCall(MatDestroy(&Bmpi)); 927 928 } else { /* normal cases */ 929 930 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 931 else { 932 PetscCall(MatCreate(comm, &Bmpi)); 933 m = PETSC_DECIDE; 934 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 935 n = PETSC_DECIDE; 936 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 937 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 938 PetscCall(MatSetType(Bmpi, MATDENSE)); 939 PetscCall(MatSetUp(Bmpi)); 940 } 941 942 /* create ScaLAPACK descriptor for B (1d block distribution) */ 943 PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */ 944 lld = PetscMax(A->rmap->n, 1); /* local leading dimension */ 945 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info)); 946 PetscCheckScaLapackInfo("descinit", info); 947 948 /* redistribute matrix */ 949 PetscCall(MatDenseGetArray(Bmpi, &barray)); 950 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol)); 951 PetscCall(MatDenseRestoreArray(Bmpi, &barray)); 952 953 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 954 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 955 if (reuse == MAT_INPLACE_MATRIX) { 956 PetscCall(MatHeaderReplace(A, &Bmpi)); 957 } else *B = Bmpi; 958 } 959 PetscFunctionReturn(0); 960 } 961 962 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct) { 963 const PetscInt *ranges; 964 PetscMPIInt size; 965 PetscInt i, n; 966 967 PetscFunctionBegin; 968 *correct = PETSC_TRUE; 969 PetscCallMPI(MPI_Comm_size(map->comm, &size)); 970 if (size > 1) { 971 PetscCall(PetscLayoutGetRanges(map, &ranges)); 972 n = ranges[1] - ranges[0]; 973 for (i = 1; i < size; i++) 974 if (ranges[i + 1] - ranges[i] != n) break; 975 *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n)); 976 } 977 PetscFunctionReturn(0); 978 } 979 980 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B) { 981 Mat_ScaLAPACK *b; 982 Mat Bmpi; 983 MPI_Comm comm; 984 PetscInt M = A->rmap->N, N = A->cmap->N, m, n; 985 const PetscInt *ranges, *rows, *cols; 986 PetscBLASInt adesc[9], amb, zero = 0, one = 1, lld, info; 987 PetscScalar *aarray; 988 IS ir, ic; 989 PetscInt lda; 990 PetscBool flg; 991 992 PetscFunctionBegin; 993 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 994 995 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 996 else { 997 PetscCall(MatCreate(comm, &Bmpi)); 998 m = PETSC_DECIDE; 999 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1000 n = PETSC_DECIDE; 1001 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 1002 PetscCall(MatSetSizes(Bmpi, m, n, M, N)); 1003 PetscCall(MatSetType(Bmpi, MATSCALAPACK)); 1004 PetscCall(MatSetUp(Bmpi)); 1005 } 1006 b = (Mat_ScaLAPACK *)Bmpi->data; 1007 1008 PetscCall(MatDenseGetLDA(A, &lda)); 1009 PetscCall(MatDenseGetArray(A, &aarray)); 1010 PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1011 if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1012 if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */ 1013 /* create ScaLAPACK descriptor for A (1d block distribution) */ 1014 PetscCall(PetscLayoutGetRanges(A->rmap, &ranges)); 1015 PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */ 1016 lld = PetscMax(lda, 1); /* local leading dimension */ 1017 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info)); 1018 PetscCheckScaLapackInfo("descinit", info); 1019 1020 /* redistribute matrix */ 1021 PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol)); 1022 Bmpi->nooffprocentries = PETSC_TRUE; 1023 } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */ 1024 PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n); 1025 b->roworiented = PETSC_FALSE; 1026 PetscCall(MatGetOwnershipIS(A, &ir, &ic)); 1027 PetscCall(ISGetIndices(ir, &rows)); 1028 PetscCall(ISGetIndices(ic, &cols)); 1029 PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES)); 1030 PetscCall(ISRestoreIndices(ir, &rows)); 1031 PetscCall(ISRestoreIndices(ic, &cols)); 1032 PetscCall(ISDestroy(&ic)); 1033 PetscCall(ISDestroy(&ir)); 1034 } 1035 PetscCall(MatDenseRestoreArray(A, &aarray)); 1036 PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY)); 1037 PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY)); 1038 if (reuse == MAT_INPLACE_MATRIX) { 1039 PetscCall(MatHeaderReplace(A, &Bmpi)); 1040 } else *B = Bmpi; 1041 PetscFunctionReturn(0); 1042 } 1043 1044 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 1045 Mat mat_scal; 1046 PetscInt M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols; 1047 const PetscInt *cols; 1048 const PetscScalar *vals; 1049 1050 PetscFunctionBegin; 1051 if (reuse == MAT_REUSE_MATRIX) { 1052 mat_scal = *newmat; 1053 PetscCall(MatZeroEntries(mat_scal)); 1054 } else { 1055 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1056 m = PETSC_DECIDE; 1057 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1058 n = PETSC_DECIDE; 1059 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 1060 PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 1061 PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 1062 PetscCall(MatSetUp(mat_scal)); 1063 } 1064 for (row = rstart; row < rend; row++) { 1065 PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 1066 PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES)); 1067 PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1068 } 1069 PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 1070 PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1071 1072 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1073 else *newmat = mat_scal; 1074 PetscFunctionReturn(0); 1075 } 1076 1077 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 1078 Mat mat_scal; 1079 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend; 1080 const PetscInt *cols; 1081 const PetscScalar *vals; 1082 PetscScalar v; 1083 1084 PetscFunctionBegin; 1085 if (reuse == MAT_REUSE_MATRIX) { 1086 mat_scal = *newmat; 1087 PetscCall(MatZeroEntries(mat_scal)); 1088 } else { 1089 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal)); 1090 m = PETSC_DECIDE; 1091 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M)); 1092 n = PETSC_DECIDE; 1093 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N)); 1094 PetscCall(MatSetSizes(mat_scal, m, n, M, N)); 1095 PetscCall(MatSetType(mat_scal, MATSCALAPACK)); 1096 PetscCall(MatSetUp(mat_scal)); 1097 } 1098 PetscCall(MatGetRowUpperTriangular(A)); 1099 for (row = rstart; row < rend; row++) { 1100 PetscCall(MatGetRow(A, row, &ncols, &cols, &vals)); 1101 PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES)); 1102 for (j = 0; j < ncols; j++) { /* lower triangular part */ 1103 if (cols[j] == row) continue; 1104 v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j]; 1105 PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES)); 1106 } 1107 PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals)); 1108 } 1109 PetscCall(MatRestoreRowUpperTriangular(A)); 1110 PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY)); 1111 PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY)); 1112 1113 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal)); 1114 else *newmat = mat_scal; 1115 PetscFunctionReturn(0); 1116 } 1117 1118 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) { 1119 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1120 PetscInt sz = 0; 1121 1122 PetscFunctionBegin; 1123 PetscCall(PetscLayoutSetUp(A->rmap)); 1124 PetscCall(PetscLayoutSetUp(A->cmap)); 1125 if (!a->lld) a->lld = a->locr; 1126 1127 PetscCall(PetscFree(a->loc)); 1128 PetscCall(PetscIntMultError(a->lld, a->locc, &sz)); 1129 PetscCall(PetscCalloc1(sz, &a->loc)); 1130 PetscCall(PetscLogObjectMemory((PetscObject)A, sz * sizeof(PetscScalar))); 1131 1132 A->preallocated = PETSC_TRUE; 1133 PetscFunctionReturn(0); 1134 } 1135 1136 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) { 1137 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1138 Mat_ScaLAPACK_Grid *grid; 1139 PetscBool flg; 1140 MPI_Comm icomm; 1141 1142 PetscFunctionBegin; 1143 PetscCall(MatStashDestroy_Private(&A->stash)); 1144 PetscCall(PetscFree(a->loc)); 1145 PetscCall(PetscFree(a->pivots)); 1146 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 1147 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1148 if (--grid->grid_refct == 0) { 1149 Cblacs_gridexit(grid->ictxt); 1150 Cblacs_gridexit(grid->ictxrow); 1151 Cblacs_gridexit(grid->ictxcol); 1152 PetscCall(PetscFree(grid)); 1153 PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval)); 1154 } 1155 PetscCall(PetscCommDestroy(&icomm)); 1156 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL)); 1157 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1158 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL)); 1159 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL)); 1160 PetscCall(PetscFree(A->data)); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 PetscErrorCode MatSetUp_ScaLAPACK(Mat A) { 1165 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1166 PetscBLASInt info = 0; 1167 PetscBool flg; 1168 1169 PetscFunctionBegin; 1170 PetscCall(PetscLayoutSetUp(A->rmap)); 1171 PetscCall(PetscLayoutSetUp(A->cmap)); 1172 1173 /* check that the layout is as enforced by MatCreateScaLAPACK() */ 1174 PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg)); 1175 PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1176 PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg)); 1177 PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1178 1179 /* compute local sizes */ 1180 PetscCall(PetscBLASIntCast(A->rmap->N, &a->M)); 1181 PetscCall(PetscBLASIntCast(A->cmap->N, &a->N)); 1182 a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow); 1183 a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol); 1184 a->lld = PetscMax(1, a->locr); 1185 1186 /* allocate local array */ 1187 PetscCall(MatScaLAPACKSetPreallocation(A)); 1188 1189 /* set up ScaLAPACK descriptor */ 1190 PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info)); 1191 PetscCheckScaLapackInfo("descinit", info); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type) { 1196 PetscInt nstash, reallocs; 1197 1198 PetscFunctionBegin; 1199 if (A->nooffprocentries) PetscFunctionReturn(0); 1200 PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL)); 1201 PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs)); 1202 PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 1203 PetscFunctionReturn(0); 1204 } 1205 1206 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type) { 1207 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1208 PetscMPIInt n; 1209 PetscInt i, flg, *row, *col; 1210 PetscScalar *val; 1211 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1212 1213 PetscFunctionBegin; 1214 if (A->nooffprocentries) PetscFunctionReturn(0); 1215 while (1) { 1216 PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg)); 1217 if (!flg) break; 1218 for (i = 0; i < n; i++) { 1219 PetscCall(PetscBLASIntCast(row[i] + 1, &gridx)); 1220 PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx)); 1221 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1222 PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process"); 1223 switch (A->insertmode) { 1224 case INSERT_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i]; break; 1225 case ADD_VALUES: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i]; break; 1226 default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode); 1227 } 1228 } 1229 } 1230 PetscCall(MatStashScatterEnd_Private(&A->stash)); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer) { 1235 Mat Adense, As; 1236 MPI_Comm comm; 1237 1238 PetscFunctionBegin; 1239 PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm)); 1240 PetscCall(MatCreate(comm, &Adense)); 1241 PetscCall(MatSetType(Adense, MATDENSE)); 1242 PetscCall(MatLoad(Adense, viewer)); 1243 PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As)); 1244 PetscCall(MatDestroy(&Adense)); 1245 PetscCall(MatHeaderReplace(newMat, &As)); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 /* -------------------------------------------------------------------*/ 1250 static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK, 1251 0, 1252 0, 1253 MatMult_ScaLAPACK, 1254 /* 4*/ MatMultAdd_ScaLAPACK, 1255 MatMultTranspose_ScaLAPACK, 1256 MatMultTransposeAdd_ScaLAPACK, 1257 MatSolve_ScaLAPACK, 1258 MatSolveAdd_ScaLAPACK, 1259 0, 1260 /*10*/ 0, 1261 MatLUFactor_ScaLAPACK, 1262 MatCholeskyFactor_ScaLAPACK, 1263 0, 1264 MatTranspose_ScaLAPACK, 1265 /*15*/ MatGetInfo_ScaLAPACK, 1266 0, 1267 MatGetDiagonal_ScaLAPACK, 1268 MatDiagonalScale_ScaLAPACK, 1269 MatNorm_ScaLAPACK, 1270 /*20*/ MatAssemblyBegin_ScaLAPACK, 1271 MatAssemblyEnd_ScaLAPACK, 1272 MatSetOption_ScaLAPACK, 1273 MatZeroEntries_ScaLAPACK, 1274 /*24*/ 0, 1275 MatLUFactorSymbolic_ScaLAPACK, 1276 MatLUFactorNumeric_ScaLAPACK, 1277 MatCholeskyFactorSymbolic_ScaLAPACK, 1278 MatCholeskyFactorNumeric_ScaLAPACK, 1279 /*29*/ MatSetUp_ScaLAPACK, 1280 0, 1281 0, 1282 0, 1283 0, 1284 /*34*/ MatDuplicate_ScaLAPACK, 1285 0, 1286 0, 1287 0, 1288 0, 1289 /*39*/ MatAXPY_ScaLAPACK, 1290 0, 1291 0, 1292 0, 1293 MatCopy_ScaLAPACK, 1294 /*44*/ 0, 1295 MatScale_ScaLAPACK, 1296 MatShift_ScaLAPACK, 1297 0, 1298 0, 1299 /*49*/ 0, 1300 0, 1301 0, 1302 0, 1303 0, 1304 /*54*/ 0, 1305 0, 1306 0, 1307 0, 1308 0, 1309 /*59*/ 0, 1310 MatDestroy_ScaLAPACK, 1311 MatView_ScaLAPACK, 1312 0, 1313 0, 1314 /*64*/ 0, 1315 0, 1316 0, 1317 0, 1318 0, 1319 /*69*/ 0, 1320 0, 1321 MatConvert_ScaLAPACK_Dense, 1322 0, 1323 0, 1324 /*74*/ 0, 1325 0, 1326 0, 1327 0, 1328 0, 1329 /*79*/ 0, 1330 0, 1331 0, 1332 0, 1333 MatLoad_ScaLAPACK, 1334 /*84*/ 0, 1335 0, 1336 0, 1337 0, 1338 0, 1339 /*89*/ 0, 1340 0, 1341 MatMatMultNumeric_ScaLAPACK, 1342 0, 1343 0, 1344 /*94*/ 0, 1345 0, 1346 0, 1347 MatMatTransposeMultNumeric_ScaLAPACK, 1348 0, 1349 /*99*/ MatProductSetFromOptions_ScaLAPACK, 1350 0, 1351 0, 1352 MatConjugate_ScaLAPACK, 1353 0, 1354 /*104*/ 0, 1355 0, 1356 0, 1357 0, 1358 0, 1359 /*109*/ MatMatSolve_ScaLAPACK, 1360 0, 1361 0, 1362 0, 1363 MatMissingDiagonal_ScaLAPACK, 1364 /*114*/ 0, 1365 0, 1366 0, 1367 0, 1368 0, 1369 /*119*/ 0, 1370 MatHermitianTranspose_ScaLAPACK, 1371 0, 1372 0, 1373 0, 1374 /*124*/ 0, 1375 0, 1376 0, 1377 0, 1378 0, 1379 /*129*/ 0, 1380 0, 1381 0, 1382 0, 1383 0, 1384 /*134*/ 0, 1385 0, 1386 0, 1387 0, 1388 0, 1389 0, 1390 /*140*/ 0, 1391 0, 1392 0, 1393 0, 1394 0, 1395 /*145*/ 0, 1396 0, 1397 0, 1398 0, 1399 0, 1400 /*150*/ 0}; 1401 1402 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners) { 1403 PetscInt *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2; 1404 PetscInt size = stash->size, nsends; 1405 PetscInt count, *sindices, **rindices, i, j, l; 1406 PetscScalar **rvalues, *svalues; 1407 MPI_Comm comm = stash->comm; 1408 MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2; 1409 PetscMPIInt *sizes, *nlengths, nreceives; 1410 PetscInt *sp_idx, *sp_idy; 1411 PetscScalar *sp_val; 1412 PetscMatStashSpace space, space_next; 1413 PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc; 1414 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)mat->data; 1415 1416 PetscFunctionBegin; 1417 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1418 InsertMode addv; 1419 PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat))); 1420 PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added"); 1421 mat->insertmode = addv; /* in case this processor had no cache */ 1422 } 1423 1424 bs2 = stash->bs * stash->bs; 1425 1426 /* first count number of contributors to each processor */ 1427 PetscCall(PetscCalloc1(size, &nlengths)); 1428 PetscCall(PetscMalloc1(stash->n + 1, &owner)); 1429 1430 i = j = 0; 1431 space = stash->space_head; 1432 while (space) { 1433 space_next = space->next; 1434 for (l = 0; l < space->local_used; l++) { 1435 PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx)); 1436 PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx)); 1437 PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc)); 1438 j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc); 1439 nlengths[j]++; 1440 owner[i] = j; 1441 i++; 1442 } 1443 space = space_next; 1444 } 1445 1446 /* Now check what procs get messages - and compute nsends. */ 1447 PetscCall(PetscCalloc1(size, &sizes)); 1448 for (i = 0, nsends = 0; i < size; i++) { 1449 if (nlengths[i]) { 1450 sizes[i] = 1; 1451 nsends++; 1452 } 1453 } 1454 1455 { 1456 PetscMPIInt *onodes, *olengths; 1457 /* Determine the number of messages to expect, their lengths, from from-ids */ 1458 PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives)); 1459 PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths)); 1460 /* since clubbing row,col - lengths are multiplied by 2 */ 1461 for (i = 0; i < nreceives; i++) olengths[i] *= 2; 1462 PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1)); 1463 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1464 for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2; 1465 PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2)); 1466 PetscCall(PetscFree(onodes)); 1467 PetscCall(PetscFree(olengths)); 1468 } 1469 1470 /* do sends: 1471 1) starts[i] gives the starting index in svalues for stuff going to 1472 the ith processor 1473 */ 1474 PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices)); 1475 PetscCall(PetscMalloc1(2 * nsends, &send_waits)); 1476 PetscCall(PetscMalloc2(size, &startv, size, &starti)); 1477 /* use 2 sends the first with all_a, the next with all_i and all_j */ 1478 startv[0] = 0; 1479 starti[0] = 0; 1480 for (i = 1; i < size; i++) { 1481 startv[i] = startv[i - 1] + nlengths[i - 1]; 1482 starti[i] = starti[i - 1] + 2 * nlengths[i - 1]; 1483 } 1484 1485 i = 0; 1486 space = stash->space_head; 1487 while (space) { 1488 space_next = space->next; 1489 sp_idx = space->idx; 1490 sp_idy = space->idy; 1491 sp_val = space->val; 1492 for (l = 0; l < space->local_used; l++) { 1493 j = owner[i]; 1494 if (bs2 == 1) { 1495 svalues[startv[j]] = sp_val[l]; 1496 } else { 1497 PetscInt k; 1498 PetscScalar *buf1, *buf2; 1499 buf1 = svalues + bs2 * startv[j]; 1500 buf2 = space->val + bs2 * l; 1501 for (k = 0; k < bs2; k++) buf1[k] = buf2[k]; 1502 } 1503 sindices[starti[j]] = sp_idx[l]; 1504 sindices[starti[j] + nlengths[j]] = sp_idy[l]; 1505 startv[j]++; 1506 starti[j]++; 1507 i++; 1508 } 1509 space = space_next; 1510 } 1511 startv[0] = 0; 1512 for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1]; 1513 1514 for (i = 0, count = 0; i < size; i++) { 1515 if (sizes[i]) { 1516 PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++)); 1517 PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++)); 1518 } 1519 } 1520 #if defined(PETSC_USE_INFO) 1521 PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends)); 1522 for (i = 0; i < size; i++) { 1523 if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt))))); 1524 } 1525 #endif 1526 PetscCall(PetscFree(nlengths)); 1527 PetscCall(PetscFree(owner)); 1528 PetscCall(PetscFree2(startv, starti)); 1529 PetscCall(PetscFree(sizes)); 1530 1531 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 1532 PetscCall(PetscMalloc1(2 * nreceives, &recv_waits)); 1533 1534 for (i = 0; i < nreceives; i++) { 1535 recv_waits[2 * i] = recv_waits1[i]; 1536 recv_waits[2 * i + 1] = recv_waits2[i]; 1537 } 1538 stash->recv_waits = recv_waits; 1539 1540 PetscCall(PetscFree(recv_waits1)); 1541 PetscCall(PetscFree(recv_waits2)); 1542 1543 stash->svalues = svalues; 1544 stash->sindices = sindices; 1545 stash->rvalues = rvalues; 1546 stash->rindices = rindices; 1547 stash->send_waits = send_waits; 1548 stash->nsends = nsends; 1549 stash->nrecvs = nreceives; 1550 stash->reproduce_count = 0; 1551 PetscFunctionReturn(0); 1552 } 1553 1554 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb) { 1555 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1556 1557 PetscFunctionBegin; 1558 PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp"); 1559 PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb); 1560 PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb); 1561 PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 1562 PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 1563 PetscFunctionReturn(0); 1564 } 1565 1566 /*@ 1567 MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of 1568 the `MATSCALAPACK` matrix 1569 1570 Logically Collective on A 1571 1572 Input Parameters: 1573 + A - a `MATSCALAPACK` matrix 1574 . mb - the row block size 1575 - nb - the column block size 1576 1577 Level: intermediate 1578 1579 .seealso: `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()` 1580 @*/ 1581 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb) { 1582 PetscFunctionBegin; 1583 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1584 PetscValidLogicalCollectiveInt(A, mb, 2); 1585 PetscValidLogicalCollectiveInt(A, nb, 3); 1586 PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb)); 1587 PetscFunctionReturn(0); 1588 } 1589 1590 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb) { 1591 Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data; 1592 1593 PetscFunctionBegin; 1594 if (mb) *mb = a->mb; 1595 if (nb) *nb = a->nb; 1596 PetscFunctionReturn(0); 1597 } 1598 1599 /*@ 1600 MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of 1601 the `MATSCALAPACK` matrix 1602 1603 Not collective 1604 1605 Input Parameter: 1606 . A - a `MATSCALAPACK` matrix 1607 1608 Output Parameters: 1609 + mb - the row block size 1610 - nb - the column block size 1611 1612 Level: intermediate 1613 1614 .seealso: `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()` 1615 @*/ 1616 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb) { 1617 PetscFunctionBegin; 1618 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1619 PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb)); 1620 PetscFunctionReturn(0); 1621 } 1622 1623 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *); 1624 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *); 1625 1626 /*MC 1627 MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1628 1629 Use ./configure --download-scalapack to install PETSc to use ScaLAPACK 1630 1631 Options Database Keys: 1632 + -mat_type scalapack - sets the matrix type to `MATSCALAPACK` during a call to `MatSetFromOptions()` 1633 . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option -pc_type lu 1634 . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1635 - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1636 1637 Note: 1638 Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous 1639 range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on 1640 the given rank. 1641 1642 Level: beginner 1643 1644 .seealso: `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()` 1645 M*/ 1646 1647 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) { 1648 Mat_ScaLAPACK *a; 1649 PetscBool flg, flg1; 1650 Mat_ScaLAPACK_Grid *grid; 1651 MPI_Comm icomm; 1652 PetscBLASInt nprow, npcol, myrow, mycol; 1653 PetscInt optv1, k = 2, array[2] = {0, 0}; 1654 PetscMPIInt size; 1655 1656 PetscFunctionBegin; 1657 PetscCall(PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps))); 1658 A->insertmode = NOT_SET_VALUES; 1659 1660 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash)); 1661 A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1662 A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1663 A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1664 A->stash.ScatterDestroy = NULL; 1665 1666 PetscCall(PetscNewLog(A, &a)); 1667 A->data = (void *)a; 1668 1669 /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1670 if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1671 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0)); 1672 PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); 1673 PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite)); 1674 } 1675 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL)); 1676 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg)); 1677 if (!flg) { 1678 PetscCall(PetscNewLog(A, &grid)); 1679 1680 PetscCallMPI(MPI_Comm_size(icomm, &size)); 1681 grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001); 1682 1683 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat"); 1684 PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1)); 1685 if (flg1) { 1686 PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size); 1687 grid->nprow = optv1; 1688 } 1689 PetscOptionsEnd(); 1690 1691 if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1692 grid->npcol = size / grid->nprow; 1693 PetscCall(PetscBLASIntCast(grid->nprow, &nprow)); 1694 PetscCall(PetscBLASIntCast(grid->npcol, &npcol)); 1695 grid->ictxt = Csys2blacs_handle(icomm); 1696 Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol); 1697 Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol); 1698 grid->grid_refct = 1; 1699 grid->nprow = nprow; 1700 grid->npcol = npcol; 1701 grid->myrow = myrow; 1702 grid->mycol = mycol; 1703 /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1704 grid->ictxrow = Csys2blacs_handle(icomm); 1705 Cblacs_gridinit(&grid->ictxrow, "R", 1, size); 1706 grid->ictxcol = Csys2blacs_handle(icomm); 1707 Cblacs_gridinit(&grid->ictxcol, "R", size, 1); 1708 PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid)); 1709 1710 } else grid->grid_refct++; 1711 PetscCall(PetscCommDestroy(&icomm)); 1712 a->grid = grid; 1713 a->mb = DEFAULT_BLOCKSIZE; 1714 a->nb = DEFAULT_BLOCKSIZE; 1715 1716 PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat"); 1717 PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg)); 1718 if (flg) { 1719 a->mb = array[0]; 1720 a->nb = (k > 1) ? array[1] : a->mb; 1721 } 1722 PetscOptionsEnd(); 1723 1724 a->roworiented = PETSC_TRUE; 1725 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK)); 1726 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK)); 1727 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK)); 1728 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK)); 1729 PetscFunctionReturn(0); 1730 } 1731 1732 /*@C 1733 MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1734 (2D block cyclic distribution) for a `MATSCALAPACK` matrix 1735 1736 Collective 1737 1738 Input Parameters: 1739 + comm - MPI communicator 1740 . mb - row block size (or `PETSC_DECIDE` to have it set) 1741 . nb - column block size (or `PETSC_DECIDE` to have it set) 1742 . M - number of global rows 1743 . N - number of global columns 1744 . rsrc - coordinate of process that owns the first row of the distributed matrix 1745 - csrc - coordinate of process that owns the first column of the distributed matrix 1746 1747 Output Parameter: 1748 . A - the matrix 1749 1750 Options Database Key: 1751 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1752 1753 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 1754 MatXXXXSetPreallocation() paradigm instead of this routine directly. 1755 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 1756 1757 Note: 1758 If `PETSC_DECIDE` is used for the block sizes, then an appropriate value 1759 is chosen. 1760 1761 Storage Information: 1762 Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1763 configured with ScaLAPACK. In particular, PETSc's local sizes lose 1764 significance and are thus ignored. The block sizes refer to the values 1765 used for the distributed matrix, not the same meaning as in `MATBAIJ`. 1766 1767 Level: intermediate 1768 1769 .seealso: `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 1770 @*/ 1771 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A) { 1772 Mat_ScaLAPACK *a; 1773 PetscInt m, n; 1774 1775 PetscFunctionBegin; 1776 PetscCall(MatCreate(comm, A)); 1777 PetscCall(MatSetType(*A, MATSCALAPACK)); 1778 PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions"); 1779 /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1780 m = PETSC_DECIDE; 1781 PetscCall(PetscSplitOwnershipEqual(comm, &m, &M)); 1782 n = PETSC_DECIDE; 1783 PetscCall(PetscSplitOwnershipEqual(comm, &n, &N)); 1784 PetscCall(MatSetSizes(*A, m, n, M, N)); 1785 a = (Mat_ScaLAPACK *)(*A)->data; 1786 PetscCall(PetscBLASIntCast(M, &a->M)); 1787 PetscCall(PetscBLASIntCast(N, &a->N)); 1788 PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb)); 1789 PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb)); 1790 PetscCall(PetscBLASIntCast(rsrc, &a->rsrc)); 1791 PetscCall(PetscBLASIntCast(csrc, &a->csrc)); 1792 PetscCall(MatSetUp(*A)); 1793 PetscFunctionReturn(0); 1794 } 1795