1 2 /* 3 Basic functions for basic parallel dense matrices. 4 Portions of this code are under: 5 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 6 */ 7 8 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 9 #include <../src/mat/impls/aij/mpi/mpiaij.h> 10 #include <petscblaslapack.h> 11 12 /*@ 13 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the sequential or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 .seealso: `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 25 @*/ 26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 27 { 28 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 33 PetscValidPointer(B, 2); 34 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 35 if (flg) *B = mat->A; 36 else { 37 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 38 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 39 *B = A; 40 } 41 PetscFunctionReturn(0); 42 } 43 44 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 45 { 46 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 47 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 48 49 PetscFunctionBegin; 50 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 51 PetscFunctionReturn(0); 52 } 53 54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 55 { 56 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 57 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 58 PetscScalar *v; 59 60 PetscFunctionBegin; 61 PetscCall(MatDenseGetArray(mat->A, &v)); 62 PetscCall(MatDenseGetLDA(mat->A, &lda)); 63 rend2 = PetscMin(rend, A->cmap->N); 64 if (rend2 > rstart) { 65 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 66 PetscCall(PetscLogFlops(rend2 - rstart)); 67 } 68 PetscCall(MatDenseRestoreArray(mat->A, &v)); 69 PetscFunctionReturn(0); 70 } 71 72 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 73 { 74 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 75 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 76 77 PetscFunctionBegin; 78 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 79 lrow = row - rstart; 80 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 81 PetscFunctionReturn(0); 82 } 83 84 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 85 { 86 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 87 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 88 89 PetscFunctionBegin; 90 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 91 lrow = row - rstart; 92 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 93 PetscFunctionReturn(0); 94 } 95 96 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 97 { 98 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 99 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 100 PetscScalar *array; 101 MPI_Comm comm; 102 PetscBool flg; 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(MatHasCongruentLayouts(A, &flg)); 107 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 108 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 109 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 110 #if defined(PETSC_HAVE_CUDA) 111 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 112 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 113 #elif (PETSC_HAVE_HIP) 114 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 115 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 116 #endif 117 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm)); 118 PetscCall(MatCreate(comm, &B)); 119 PetscCall(MatSetSizes(B, m, m, m, m)); 120 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 121 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 122 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 123 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 124 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 125 *a = B; 126 PetscCall(MatDestroy(&B)); 127 } else *a = B; 128 PetscFunctionReturn(0); 129 } 130 131 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 132 { 133 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 134 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 135 PetscBool roworiented = A->roworiented; 136 137 PetscFunctionBegin; 138 for (i = 0; i < m; i++) { 139 if (idxm[i] < 0) continue; 140 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 141 if (idxm[i] >= rstart && idxm[i] < rend) { 142 row = idxm[i] - rstart; 143 if (roworiented) { 144 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv)); 145 } else { 146 for (j = 0; j < n; j++) { 147 if (idxn[j] < 0) continue; 148 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 149 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv)); 150 } 151 } 152 } else if (!A->donotstash) { 153 mat->assembled = PETSC_FALSE; 154 if (roworiented) { 155 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE)); 156 } else { 157 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE)); 158 } 159 } 160 } 161 PetscFunctionReturn(0); 162 } 163 164 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 165 { 166 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 167 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 168 169 PetscFunctionBegin; 170 for (i = 0; i < m; i++) { 171 if (idxm[i] < 0) continue; /* negative row */ 172 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 173 if (idxm[i] >= rstart && idxm[i] < rend) { 174 row = idxm[i] - rstart; 175 for (j = 0; j < n; j++) { 176 if (idxn[j] < 0) continue; /* negative column */ 177 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 178 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 179 } 180 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 181 } 182 PetscFunctionReturn(0); 183 } 184 185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 186 { 187 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 188 189 PetscFunctionBegin; 190 PetscCall(MatDenseGetLDA(a->A, lda)); 191 PetscFunctionReturn(0); 192 } 193 194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 195 { 196 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 197 MatType mtype = MATSEQDENSE; 198 199 PetscFunctionBegin; 200 if (!a->A) { 201 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 202 PetscCall(PetscLayoutSetUp(A->rmap)); 203 PetscCall(PetscLayoutSetUp(A->cmap)); 204 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 205 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 206 #if defined(PETSC_HAVE_CUDA) 207 PetscBool iscuda; 208 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 209 if (iscuda) mtype = MATSEQDENSECUDA; 210 #elif (PETSC_HAVE_HIP) 211 PetscBool iship; 212 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 213 if (iship) mtype = MATSEQDENSEHIP; 214 #endif 215 PetscCall(MatSetType(a->A, mtype)); 216 } 217 PetscCall(MatDenseSetLDA(a->A, lda)); 218 PetscFunctionReturn(0); 219 } 220 221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 222 { 223 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 224 225 PetscFunctionBegin; 226 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 227 PetscCall(MatDenseGetArray(a->A, array)); 228 PetscFunctionReturn(0); 229 } 230 231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) 232 { 233 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 234 235 PetscFunctionBegin; 236 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 237 PetscCall(MatDenseGetArrayRead(a->A, array)); 238 PetscFunctionReturn(0); 239 } 240 241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 242 { 243 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 244 245 PetscFunctionBegin; 246 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 247 PetscCall(MatDenseGetArrayWrite(a->A, array)); 248 PetscFunctionReturn(0); 249 } 250 251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 252 { 253 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 254 255 PetscFunctionBegin; 256 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 257 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 258 PetscCall(MatDensePlaceArray(a->A, array)); 259 PetscFunctionReturn(0); 260 } 261 262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 263 { 264 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 265 266 PetscFunctionBegin; 267 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 268 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 269 PetscCall(MatDenseResetArray(a->A)); 270 PetscFunctionReturn(0); 271 } 272 273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 274 { 275 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 276 277 PetscFunctionBegin; 278 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 279 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 280 PetscCall(MatDenseReplaceArray(a->A, array)); 281 PetscFunctionReturn(0); 282 } 283 284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 285 { 286 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 287 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 288 const PetscInt *irow, *icol; 289 const PetscScalar *v; 290 PetscScalar *bv; 291 Mat newmat; 292 IS iscol_local; 293 MPI_Comm comm_is, comm_mat; 294 295 PetscFunctionBegin; 296 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 297 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 298 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 299 300 PetscCall(ISAllGather(iscol, &iscol_local)); 301 PetscCall(ISGetIndices(isrow, &irow)); 302 PetscCall(ISGetIndices(iscol_local, &icol)); 303 PetscCall(ISGetLocalSize(isrow, &nrows)); 304 PetscCall(ISGetLocalSize(iscol, &ncols)); 305 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 306 307 /* No parallel redistribution currently supported! Should really check each index set 308 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 309 original matrix! */ 310 311 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 312 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 313 314 /* Check submatrix call */ 315 if (scall == MAT_REUSE_MATRIX) { 316 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 317 /* Really need to test rows and column sizes! */ 318 newmat = *B; 319 } else { 320 /* Create and fill new matrix */ 321 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 322 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 323 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 324 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 325 } 326 327 /* Now extract the data pointers and do the copy, column at a time */ 328 newmatd = (Mat_MPIDense *)newmat->data; 329 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 330 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 331 PetscCall(MatDenseGetLDA(mat->A, &lda)); 332 for (i = 0; i < Ncols; i++) { 333 const PetscScalar *av = v + lda * icol[i]; 334 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 335 } 336 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 337 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 338 339 /* Assemble the matrices so that the correct flags are set */ 340 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 341 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 342 343 /* Free work space */ 344 PetscCall(ISRestoreIndices(isrow, &irow)); 345 PetscCall(ISRestoreIndices(iscol_local, &icol)); 346 PetscCall(ISDestroy(&iscol_local)); 347 *B = newmat; 348 PetscFunctionReturn(0); 349 } 350 351 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 352 { 353 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 354 355 PetscFunctionBegin; 356 PetscCall(MatDenseRestoreArray(a->A, array)); 357 PetscFunctionReturn(0); 358 } 359 360 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) 361 { 362 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 363 364 PetscFunctionBegin; 365 PetscCall(MatDenseRestoreArrayRead(a->A, array)); 366 PetscFunctionReturn(0); 367 } 368 369 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 370 { 371 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 372 373 PetscFunctionBegin; 374 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 375 PetscFunctionReturn(0); 376 } 377 378 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 379 { 380 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 381 PetscInt nstash, reallocs; 382 383 PetscFunctionBegin; 384 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 385 386 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 387 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 388 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 389 PetscFunctionReturn(0); 390 } 391 392 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 393 { 394 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 395 PetscInt i, *row, *col, flg, j, rstart, ncols; 396 PetscMPIInt n; 397 PetscScalar *val; 398 399 PetscFunctionBegin; 400 if (!mdn->donotstash && !mat->nooffprocentries) { 401 /* wait on receives */ 402 while (1) { 403 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 404 if (!flg) break; 405 406 for (i = 0; i < n;) { 407 /* Now identify the consecutive vals belonging to the same row */ 408 for (j = i, rstart = row[j]; j < n; j++) { 409 if (row[j] != rstart) break; 410 } 411 if (j < n) ncols = j - i; 412 else ncols = n - i; 413 /* Now assemble all these values with a single function call */ 414 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 415 i = j; 416 } 417 } 418 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 419 } 420 421 PetscCall(MatAssemblyBegin(mdn->A, mode)); 422 PetscCall(MatAssemblyEnd(mdn->A, mode)); 423 PetscFunctionReturn(0); 424 } 425 426 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 427 { 428 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 429 430 PetscFunctionBegin; 431 PetscCall(MatZeroEntries(l->A)); 432 PetscFunctionReturn(0); 433 } 434 435 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 436 { 437 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 438 PetscInt i, len, *lrows; 439 440 PetscFunctionBegin; 441 /* get locally owned rows */ 442 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 443 /* fix right hand side if needed */ 444 if (x && b) { 445 const PetscScalar *xx; 446 PetscScalar *bb; 447 448 PetscCall(VecGetArrayRead(x, &xx)); 449 PetscCall(VecGetArrayWrite(b, &bb)); 450 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 451 PetscCall(VecRestoreArrayRead(x, &xx)); 452 PetscCall(VecRestoreArrayWrite(b, &bb)); 453 } 454 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 455 if (diag != 0.0) { 456 Vec d; 457 458 PetscCall(MatCreateVecs(A, NULL, &d)); 459 PetscCall(VecSet(d, diag)); 460 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 461 PetscCall(VecDestroy(&d)); 462 } 463 PetscCall(PetscFree(lrows)); 464 PetscFunctionReturn(0); 465 } 466 467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 471 472 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 475 const PetscScalar *ax; 476 PetscScalar *ay; 477 PetscMemType axmtype, aymtype; 478 479 PetscFunctionBegin; 480 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 481 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 482 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 483 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 484 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 485 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 486 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 487 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 488 PetscFunctionReturn(0); 489 } 490 491 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 494 const PetscScalar *ax; 495 PetscScalar *ay; 496 PetscMemType axmtype, aymtype; 497 498 PetscFunctionBegin; 499 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 500 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 501 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 502 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 503 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 504 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 505 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 506 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 507 PetscFunctionReturn(0); 508 } 509 510 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 511 { 512 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 513 const PetscScalar *ax; 514 PetscScalar *ay; 515 PetscMemType axmtype, aymtype; 516 517 PetscFunctionBegin; 518 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 519 PetscCall(VecSet(yy, 0.0)); 520 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 521 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 522 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 523 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 524 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 525 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 526 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 527 PetscFunctionReturn(0); 528 } 529 530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 531 { 532 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 533 const PetscScalar *ax; 534 PetscScalar *ay; 535 PetscMemType axmtype, aymtype; 536 537 PetscFunctionBegin; 538 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 539 PetscCall(VecCopy(yy, zz)); 540 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 541 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 542 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 543 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 544 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 545 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 546 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 547 PetscFunctionReturn(0); 548 } 549 550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 551 { 552 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 553 PetscInt lda, len, i, n, m = A->rmap->n, radd; 554 PetscScalar *x, zero = 0.0; 555 const PetscScalar *av; 556 557 PetscFunctionBegin; 558 PetscCall(VecSet(v, zero)); 559 PetscCall(VecGetArray(v, &x)); 560 PetscCall(VecGetSize(v, &n)); 561 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 562 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 563 radd = A->rmap->rstart * m; 564 PetscCall(MatDenseGetArrayRead(a->A, &av)); 565 PetscCall(MatDenseGetLDA(a->A, &lda)); 566 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 567 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 568 PetscCall(VecRestoreArray(v, &x)); 569 PetscFunctionReturn(0); 570 } 571 572 PetscErrorCode MatDestroy_MPIDense(Mat mat) 573 { 574 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 575 576 PetscFunctionBegin; 577 #if defined(PETSC_USE_LOG) 578 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 579 #endif 580 PetscCall(MatStashDestroy_Private(&mat->stash)); 581 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 582 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 583 PetscCall(MatDestroy(&mdn->A)); 584 PetscCall(VecDestroy(&mdn->lvec)); 585 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 586 PetscCall(VecDestroy(&mdn->cvec)); 587 PetscCall(MatDestroy(&mdn->cmat)); 588 589 PetscCall(PetscFree(mat->data)); 590 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 591 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 596 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 598 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 599 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 600 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 601 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 603 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 604 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 605 #if defined(PETSC_HAVE_ELEMENTAL) 606 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 607 #endif 608 #if defined(PETSC_HAVE_SCALAPACK) 609 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 610 #endif 611 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 612 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 613 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 614 #if defined(PETSC_HAVE_CUDA) 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 619 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 620 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 621 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 622 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 623 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 624 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 625 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 626 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 627 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 632 #endif 633 #if defined(PETSC_HAVE_HIP) 634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 646 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 649 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 651 #endif 652 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 662 663 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 664 PetscFunctionReturn(0); 665 } 666 667 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer); 668 669 #include <petscdraw.h> 670 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 671 { 672 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 673 PetscMPIInt rank; 674 PetscViewerType vtype; 675 PetscBool iascii, isdraw; 676 PetscViewer sviewer; 677 PetscViewerFormat format; 678 679 PetscFunctionBegin; 680 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 681 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 682 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 683 if (iascii) { 684 PetscCall(PetscViewerGetType(viewer, &vtype)); 685 PetscCall(PetscViewerGetFormat(viewer, &format)); 686 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 687 MatInfo info; 688 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 689 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 690 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 691 (PetscInt)info.memory)); 692 PetscCall(PetscViewerFlush(viewer)); 693 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 694 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 695 PetscFunctionReturn(0); 696 } else if (format == PETSC_VIEWER_ASCII_INFO) { 697 PetscFunctionReturn(0); 698 } 699 } else if (isdraw) { 700 PetscDraw draw; 701 PetscBool isnull; 702 703 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 704 PetscCall(PetscDrawIsNull(draw, &isnull)); 705 if (isnull) PetscFunctionReturn(0); 706 } 707 708 { 709 /* assemble the entire matrix onto first processor. */ 710 Mat A; 711 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 712 PetscInt *cols; 713 PetscScalar *vals; 714 715 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 716 if (rank == 0) { 717 PetscCall(MatSetSizes(A, M, N, M, N)); 718 } else { 719 PetscCall(MatSetSizes(A, 0, 0, M, N)); 720 } 721 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 722 PetscCall(MatSetType(A, MATMPIDENSE)); 723 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 724 725 /* Copy the matrix ... This isn't the most efficient means, 726 but it's quick for now */ 727 A->insertmode = INSERT_VALUES; 728 729 row = mat->rmap->rstart; 730 m = mdn->A->rmap->n; 731 for (i = 0; i < m; i++) { 732 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 733 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 734 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 735 row++; 736 } 737 738 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 739 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 740 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 741 if (rank == 0) { 742 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name)); 743 PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer)); 744 } 745 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 746 PetscCall(PetscViewerFlush(viewer)); 747 PetscCall(MatDestroy(&A)); 748 } 749 PetscFunctionReturn(0); 750 } 751 752 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 753 { 754 PetscBool iascii, isbinary, isdraw, issocket; 755 756 PetscFunctionBegin; 757 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 758 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 759 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 760 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 761 762 if (iascii || issocket || isdraw) { 763 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 764 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 765 PetscFunctionReturn(0); 766 } 767 768 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 769 { 770 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 771 Mat mdn = mat->A; 772 PetscLogDouble isend[5], irecv[5]; 773 774 PetscFunctionBegin; 775 info->block_size = 1.0; 776 777 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 778 779 isend[0] = info->nz_used; 780 isend[1] = info->nz_allocated; 781 isend[2] = info->nz_unneeded; 782 isend[3] = info->memory; 783 isend[4] = info->mallocs; 784 if (flag == MAT_LOCAL) { 785 info->nz_used = isend[0]; 786 info->nz_allocated = isend[1]; 787 info->nz_unneeded = isend[2]; 788 info->memory = isend[3]; 789 info->mallocs = isend[4]; 790 } else if (flag == MAT_GLOBAL_MAX) { 791 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 792 793 info->nz_used = irecv[0]; 794 info->nz_allocated = irecv[1]; 795 info->nz_unneeded = irecv[2]; 796 info->memory = irecv[3]; 797 info->mallocs = irecv[4]; 798 } else if (flag == MAT_GLOBAL_SUM) { 799 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 800 801 info->nz_used = irecv[0]; 802 info->nz_allocated = irecv[1]; 803 info->nz_unneeded = irecv[2]; 804 info->memory = irecv[3]; 805 info->mallocs = irecv[4]; 806 } 807 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 808 info->fill_ratio_needed = 0; 809 info->factor_mallocs = 0; 810 PetscFunctionReturn(0); 811 } 812 813 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 814 { 815 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 816 817 PetscFunctionBegin; 818 switch (op) { 819 case MAT_NEW_NONZERO_LOCATIONS: 820 case MAT_NEW_NONZERO_LOCATION_ERR: 821 case MAT_NEW_NONZERO_ALLOCATION_ERR: 822 MatCheckPreallocated(A, 1); 823 PetscCall(MatSetOption(a->A, op, flg)); 824 break; 825 case MAT_ROW_ORIENTED: 826 MatCheckPreallocated(A, 1); 827 a->roworiented = flg; 828 PetscCall(MatSetOption(a->A, op, flg)); 829 break; 830 case MAT_FORCE_DIAGONAL_ENTRIES: 831 case MAT_KEEP_NONZERO_PATTERN: 832 case MAT_USE_HASH_TABLE: 833 case MAT_SORTED_FULL: 834 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 835 break; 836 case MAT_IGNORE_OFF_PROC_ENTRIES: 837 a->donotstash = flg; 838 break; 839 case MAT_SYMMETRIC: 840 case MAT_STRUCTURALLY_SYMMETRIC: 841 case MAT_HERMITIAN: 842 case MAT_SYMMETRY_ETERNAL: 843 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 844 case MAT_SPD: 845 case MAT_IGNORE_LOWER_TRIANGULAR: 846 case MAT_IGNORE_ZERO_ENTRIES: 847 case MAT_SPD_ETERNAL: 848 /* if the diagonal matrix is square it inherits some of the properties above */ 849 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 850 break; 851 default: 852 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 853 } 854 PetscFunctionReturn(0); 855 } 856 857 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 858 { 859 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 860 const PetscScalar *l; 861 PetscScalar x, *v, *vv, *r; 862 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 863 864 PetscFunctionBegin; 865 PetscCall(MatDenseGetArray(mdn->A, &vv)); 866 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 867 PetscCall(MatGetLocalSize(A, &s2, &s3)); 868 if (ll) { 869 PetscCall(VecGetLocalSize(ll, &s2a)); 870 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 871 PetscCall(VecGetArrayRead(ll, &l)); 872 for (i = 0; i < m; i++) { 873 x = l[i]; 874 v = vv + i; 875 for (j = 0; j < n; j++) { 876 (*v) *= x; 877 v += lda; 878 } 879 } 880 PetscCall(VecRestoreArrayRead(ll, &l)); 881 PetscCall(PetscLogFlops(1.0 * n * m)); 882 } 883 if (rr) { 884 const PetscScalar *ar; 885 886 PetscCall(VecGetLocalSize(rr, &s3a)); 887 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 888 PetscCall(VecGetArrayRead(rr, &ar)); 889 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 890 PetscCall(VecGetArray(mdn->lvec, &r)); 891 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 892 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 893 PetscCall(VecRestoreArrayRead(rr, &ar)); 894 for (i = 0; i < n; i++) { 895 x = r[i]; 896 v = vv + i * lda; 897 for (j = 0; j < m; j++) (*v++) *= x; 898 } 899 PetscCall(VecRestoreArray(mdn->lvec, &r)); 900 PetscCall(PetscLogFlops(1.0 * n * m)); 901 } 902 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 903 PetscFunctionReturn(0); 904 } 905 906 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 907 { 908 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 909 PetscInt i, j; 910 PetscMPIInt size; 911 PetscReal sum = 0.0; 912 const PetscScalar *av, *v; 913 914 PetscFunctionBegin; 915 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 916 v = av; 917 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 918 if (size == 1) { 919 PetscCall(MatNorm(mdn->A, type, nrm)); 920 } else { 921 if (type == NORM_FROBENIUS) { 922 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 923 sum += PetscRealPart(PetscConj(*v) * (*v)); 924 v++; 925 } 926 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 927 *nrm = PetscSqrtReal(*nrm); 928 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 929 } else if (type == NORM_1) { 930 PetscReal *tmp, *tmp2; 931 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 932 *nrm = 0.0; 933 v = av; 934 for (j = 0; j < mdn->A->cmap->n; j++) { 935 for (i = 0; i < mdn->A->rmap->n; i++) { 936 tmp[j] += PetscAbsScalar(*v); 937 v++; 938 } 939 } 940 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 941 for (j = 0; j < A->cmap->N; j++) { 942 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 943 } 944 PetscCall(PetscFree2(tmp, tmp2)); 945 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 946 } else if (type == NORM_INFINITY) { /* max row norm */ 947 PetscReal ntemp; 948 PetscCall(MatNorm(mdn->A, type, &ntemp)); 949 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 950 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 951 } 952 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 953 PetscFunctionReturn(0); 954 } 955 956 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 957 { 958 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 959 Mat B; 960 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 961 PetscInt j, i, lda; 962 PetscScalar *v; 963 964 PetscFunctionBegin; 965 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 966 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 967 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 968 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 969 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 970 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 971 } else B = *matout; 972 973 m = a->A->rmap->n; 974 n = a->A->cmap->n; 975 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 976 PetscCall(MatDenseGetLDA(a->A, &lda)); 977 PetscCall(PetscMalloc1(m, &rwork)); 978 for (i = 0; i < m; i++) rwork[i] = rstart + i; 979 for (j = 0; j < n; j++) { 980 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 981 v += lda; 982 } 983 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 984 PetscCall(PetscFree(rwork)); 985 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 986 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 987 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 988 *matout = B; 989 } else { 990 PetscCall(MatHeaderMerge(A, &B)); 991 } 992 PetscFunctionReturn(0); 993 } 994 995 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 996 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 997 998 PetscErrorCode MatSetUp_MPIDense(Mat A) 999 { 1000 PetscFunctionBegin; 1001 PetscCall(PetscLayoutSetUp(A->rmap)); 1002 PetscCall(PetscLayoutSetUp(A->cmap)); 1003 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1004 PetscFunctionReturn(0); 1005 } 1006 1007 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1008 { 1009 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1010 1011 PetscFunctionBegin; 1012 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1013 PetscFunctionReturn(0); 1014 } 1015 1016 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1017 { 1018 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1019 1020 PetscFunctionBegin; 1021 PetscCall(MatConjugate(a->A)); 1022 PetscFunctionReturn(0); 1023 } 1024 1025 PetscErrorCode MatRealPart_MPIDense(Mat A) 1026 { 1027 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1028 1029 PetscFunctionBegin; 1030 PetscCall(MatRealPart(a->A)); 1031 PetscFunctionReturn(0); 1032 } 1033 1034 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1035 { 1036 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1037 1038 PetscFunctionBegin; 1039 PetscCall(MatImaginaryPart(a->A)); 1040 PetscFunctionReturn(0); 1041 } 1042 1043 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1044 { 1045 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1046 1047 PetscFunctionBegin; 1048 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1049 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1050 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1051 PetscFunctionReturn(0); 1052 } 1053 1054 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1055 1056 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1057 { 1058 PetscInt i, m, n; 1059 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1060 PetscReal *work; 1061 1062 PetscFunctionBegin; 1063 PetscCall(MatGetSize(A, &m, &n)); 1064 PetscCall(PetscMalloc1(n, &work)); 1065 if (type == REDUCTION_MEAN_REALPART) { 1066 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1067 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1068 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1069 } else { 1070 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1071 } 1072 if (type == NORM_2) { 1073 for (i = 0; i < n; i++) work[i] *= work[i]; 1074 } 1075 if (type == NORM_INFINITY) { 1076 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1077 } else { 1078 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1079 } 1080 PetscCall(PetscFree(work)); 1081 if (type == NORM_2) { 1082 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1083 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1084 for (i = 0; i < n; i++) reductions[i] /= m; 1085 } 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #if defined(PETSC_HAVE_CUDA) 1090 PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha) 1091 { 1092 PetscScalar *da; 1093 PetscInt lda; 1094 1095 PetscFunctionBegin; 1096 PetscCall(MatDenseCUDAGetArray(A, &da)); 1097 PetscCall(MatDenseGetLDA(A, &lda)); 1098 PetscCall(PetscInfo(A, "Performing Shift on backend\n")); 1099 PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N)); 1100 PetscCall(MatDenseCUDARestoreArray(A, &da)); 1101 PetscFunctionReturn(0); 1102 } 1103 1104 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1105 { 1106 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1107 PetscInt lda; 1108 1109 PetscFunctionBegin; 1110 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1111 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1112 if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1113 a->vecinuse = col + 1; 1114 PetscCall(MatDenseGetLDA(a->A, &lda)); 1115 PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1116 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1117 *v = a->cvec; 1118 PetscFunctionReturn(0); 1119 } 1120 1121 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1122 { 1123 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1124 1125 PetscFunctionBegin; 1126 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1127 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1128 a->vecinuse = 0; 1129 PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1130 PetscCall(VecCUDAResetArray(a->cvec)); 1131 if (v) *v = NULL; 1132 PetscFunctionReturn(0); 1133 } 1134 1135 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1136 { 1137 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1138 PetscInt lda; 1139 1140 PetscFunctionBegin; 1141 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1142 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1143 if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1144 a->vecinuse = col + 1; 1145 PetscCall(MatDenseGetLDA(a->A, &lda)); 1146 PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse)); 1147 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1148 PetscCall(VecLockReadPush(a->cvec)); 1149 *v = a->cvec; 1150 PetscFunctionReturn(0); 1151 } 1152 1153 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1154 { 1155 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1156 1157 PetscFunctionBegin; 1158 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1159 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1160 a->vecinuse = 0; 1161 PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse)); 1162 PetscCall(VecLockReadPop(a->cvec)); 1163 PetscCall(VecCUDAResetArray(a->cvec)); 1164 if (v) *v = NULL; 1165 PetscFunctionReturn(0); 1166 } 1167 1168 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1169 { 1170 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1171 PetscInt lda; 1172 1173 PetscFunctionBegin; 1174 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1175 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1176 if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1177 a->vecinuse = col + 1; 1178 PetscCall(MatDenseGetLDA(a->A, &lda)); 1179 PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1180 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1181 *v = a->cvec; 1182 PetscFunctionReturn(0); 1183 } 1184 1185 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1186 { 1187 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1188 1189 PetscFunctionBegin; 1190 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1191 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1192 a->vecinuse = 0; 1193 PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1194 PetscCall(VecCUDAResetArray(a->cvec)); 1195 if (v) *v = NULL; 1196 PetscFunctionReturn(0); 1197 } 1198 1199 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1200 { 1201 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1202 1203 PetscFunctionBegin; 1204 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1205 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1206 PetscCall(MatDenseCUDAPlaceArray(l->A, a)); 1207 PetscFunctionReturn(0); 1208 } 1209 1210 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1211 { 1212 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1213 1214 PetscFunctionBegin; 1215 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1216 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1217 PetscCall(MatDenseCUDAResetArray(l->A)); 1218 PetscFunctionReturn(0); 1219 } 1220 1221 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1222 { 1223 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1224 1225 PetscFunctionBegin; 1226 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1227 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1228 PetscCall(MatDenseCUDAReplaceArray(l->A, a)); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1233 { 1234 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1235 1236 PetscFunctionBegin; 1237 PetscCall(MatDenseCUDAGetArrayWrite(l->A, a)); 1238 PetscFunctionReturn(0); 1239 } 1240 1241 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1242 { 1243 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1244 1245 PetscFunctionBegin; 1246 PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a)); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1251 { 1252 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1253 1254 PetscFunctionBegin; 1255 PetscCall(MatDenseCUDAGetArrayRead(l->A, a)); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1260 { 1261 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1262 1263 PetscFunctionBegin; 1264 PetscCall(MatDenseCUDARestoreArrayRead(l->A, a)); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1269 { 1270 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1271 1272 PetscFunctionBegin; 1273 PetscCall(MatDenseCUDAGetArray(l->A, a)); 1274 PetscFunctionReturn(0); 1275 } 1276 1277 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1278 { 1279 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1280 1281 PetscFunctionBegin; 1282 PetscCall(MatDenseCUDARestoreArray(l->A, a)); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1287 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1288 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *); 1289 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1290 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1291 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *); 1292 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *); 1293 1294 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind) 1295 { 1296 Mat_MPIDense *d = (Mat_MPIDense *)mat->data; 1297 1298 PetscFunctionBegin; 1299 PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1300 PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1301 if (d->A) PetscCall(MatBindToCPU(d->A, bind)); 1302 mat->boundtocpu = bind; 1303 if (!bind) { 1304 PetscBool iscuda; 1305 1306 PetscCall(PetscFree(mat->defaultrandtype)); 1307 PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype)); 1308 PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda)); 1309 if (!iscuda) PetscCall(VecDestroy(&d->cvec)); 1310 PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda)); 1311 if (!iscuda) PetscCall(MatDestroy(&d->cmat)); 1312 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA)); 1313 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA)); 1314 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA)); 1315 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA)); 1316 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA)); 1317 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA)); 1318 mat->ops->shift = MatShift_MPIDenseCUDA; 1319 } else { 1320 PetscCall(PetscFree(mat->defaultrandtype)); 1321 PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype)); 1322 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1323 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1324 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1325 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1326 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1327 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1328 mat->ops->shift = MatShift_MPIDense; 1329 } 1330 if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind)); 1331 PetscFunctionReturn(0); 1332 } 1333 1334 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1335 { 1336 Mat_MPIDense *d = (Mat_MPIDense *)A->data; 1337 PetscBool iscuda; 1338 1339 PetscFunctionBegin; 1340 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1341 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 1342 if (!iscuda) PetscFunctionReturn(0); 1343 PetscCall(PetscLayoutSetUp(A->rmap)); 1344 PetscCall(PetscLayoutSetUp(A->cmap)); 1345 if (!d->A) { 1346 PetscCall(MatCreate(PETSC_COMM_SELF, &d->A)); 1347 PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 1348 } 1349 PetscCall(MatSetType(d->A, MATSEQDENSECUDA)); 1350 PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data)); 1351 A->preallocated = PETSC_TRUE; 1352 A->assembled = PETSC_TRUE; 1353 PetscFunctionReturn(0); 1354 } 1355 #endif 1356 1357 #if defined(PETSC_HAVE_HIP) 1358 PetscErrorCode MatShift_MPIDenseHIP(Mat A, PetscScalar alpha) 1359 { 1360 PetscScalar *da; 1361 PetscInt lda; 1362 1363 PetscFunctionBegin; 1364 PetscCall(MatDenseHIPGetArray(A, &da)); 1365 PetscCall(MatDenseGetLDA(A, &lda)); 1366 PetscCall(PetscInfo(A, "Performing Shift on backend\n")); 1367 PetscCall(MatShift_DenseHIP_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N)); 1368 PetscCall(MatDenseHIPRestoreArray(A, &da)); 1369 PetscFunctionReturn(0); 1370 } 1371 1372 static PetscErrorCode MatDenseGetColumnVec_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1373 { 1374 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1375 PetscInt lda; 1376 1377 PetscFunctionBegin; 1378 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1379 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1380 if (!a->cvec) { 1381 PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1382 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 1383 } 1384 a->vecinuse = col + 1; 1385 PetscCall(MatDenseGetLDA(a->A, &lda)); 1386 PetscCall(MatDenseHIPGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1387 PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1388 *v = a->cvec; 1389 PetscFunctionReturn(0); 1390 } 1391 1392 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1393 { 1394 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1395 1396 PetscFunctionBegin; 1397 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1398 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1399 a->vecinuse = 0; 1400 PetscCall(MatDenseHIPRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1401 PetscCall(VecHIPResetArray(a->cvec)); 1402 if (v) *v = NULL; 1403 PetscFunctionReturn(0); 1404 } 1405 1406 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1407 { 1408 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1409 PetscInt lda; 1410 1411 PetscFunctionBegin; 1412 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1413 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1414 if (!a->cvec) { 1415 PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1416 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 1417 } 1418 a->vecinuse = col + 1; 1419 PetscCall(MatDenseGetLDA(a->A, &lda)); 1420 PetscCall(MatDenseHIPGetArrayRead(a->A, &a->ptrinuse)); 1421 PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1422 PetscCall(VecLockReadPush(a->cvec)); 1423 *v = a->cvec; 1424 PetscFunctionReturn(0); 1425 } 1426 1427 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1428 { 1429 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1430 1431 PetscFunctionBegin; 1432 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1433 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1434 a->vecinuse = 0; 1435 PetscCall(MatDenseHIPRestoreArrayRead(a->A, &a->ptrinuse)); 1436 PetscCall(VecLockReadPop(a->cvec)); 1437 PetscCall(VecHIPResetArray(a->cvec)); 1438 if (v) *v = NULL; 1439 PetscFunctionReturn(0); 1440 } 1441 1442 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1443 { 1444 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1445 PetscInt lda; 1446 1447 PetscFunctionBegin; 1448 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1449 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1450 if (!a->cvec) { 1451 PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1452 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 1453 } 1454 a->vecinuse = col + 1; 1455 PetscCall(MatDenseGetLDA(a->A, &lda)); 1456 PetscCall(MatDenseHIPGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1457 PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1458 *v = a->cvec; 1459 PetscFunctionReturn(0); 1460 } 1461 1462 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1463 { 1464 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1465 1466 PetscFunctionBegin; 1467 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1468 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1469 a->vecinuse = 0; 1470 PetscCall(MatDenseHIPRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1471 PetscCall(VecHIPResetArray(a->cvec)); 1472 if (v) *v = NULL; 1473 PetscFunctionReturn(0); 1474 } 1475 1476 static PetscErrorCode MatDenseHIPPlaceArray_MPIDenseHIP(Mat A, const PetscScalar *a) 1477 { 1478 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1479 1480 PetscFunctionBegin; 1481 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1482 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1483 PetscCall(MatDenseHIPPlaceArray(l->A, a)); 1484 PetscFunctionReturn(0); 1485 } 1486 1487 static PetscErrorCode MatDenseHIPResetArray_MPIDenseHIP(Mat A) 1488 { 1489 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1490 1491 PetscFunctionBegin; 1492 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1493 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1494 PetscCall(MatDenseHIPResetArray(l->A)); 1495 PetscFunctionReturn(0); 1496 } 1497 1498 static PetscErrorCode MatDenseHIPReplaceArray_MPIDenseHIP(Mat A, const PetscScalar *a) 1499 { 1500 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1501 1502 PetscFunctionBegin; 1503 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1504 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1505 PetscCall(MatDenseHIPReplaceArray(l->A, a)); 1506 PetscFunctionReturn(0); 1507 } 1508 1509 static PetscErrorCode MatDenseHIPGetArrayWrite_MPIDenseHIP(Mat A, PetscScalar **a) 1510 { 1511 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1512 1513 PetscFunctionBegin; 1514 PetscCall(MatDenseHIPGetArrayWrite(l->A, a)); 1515 PetscFunctionReturn(0); 1516 } 1517 1518 static PetscErrorCode MatDenseHIPRestoreArrayWrite_MPIDenseHIP(Mat A, PetscScalar **a) 1519 { 1520 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1521 1522 PetscFunctionBegin; 1523 PetscCall(MatDenseHIPRestoreArrayWrite(l->A, a)); 1524 PetscFunctionReturn(0); 1525 } 1526 1527 static PetscErrorCode MatDenseHIPGetArrayRead_MPIDenseHIP(Mat A, const PetscScalar **a) 1528 { 1529 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1530 1531 PetscFunctionBegin; 1532 PetscCall(MatDenseHIPGetArrayRead(l->A, a)); 1533 PetscFunctionReturn(0); 1534 } 1535 1536 static PetscErrorCode MatDenseHIPRestoreArrayRead_MPIDenseHIP(Mat A, const PetscScalar **a) 1537 { 1538 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1539 1540 PetscFunctionBegin; 1541 PetscCall(MatDenseHIPRestoreArrayRead(l->A, a)); 1542 PetscFunctionReturn(0); 1543 } 1544 1545 static PetscErrorCode MatDenseHIPGetArray_MPIDenseHIP(Mat A, PetscScalar **a) 1546 { 1547 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1548 1549 PetscFunctionBegin; 1550 PetscCall(MatDenseHIPGetArray(l->A, a)); 1551 PetscFunctionReturn(0); 1552 } 1553 1554 static PetscErrorCode MatDenseHIPRestoreArray_MPIDenseHIP(Mat A, PetscScalar **a) 1555 { 1556 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1557 1558 PetscFunctionBegin; 1559 PetscCall(MatDenseHIPRestoreArray(l->A, a)); 1560 PetscFunctionReturn(0); 1561 } 1562 1563 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1564 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1565 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *); 1566 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1567 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1568 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *); 1569 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *); 1570 1571 static PetscErrorCode MatBindToCPU_MPIDenseHIP(Mat mat, PetscBool bind) 1572 { 1573 Mat_MPIDense *d = (Mat_MPIDense *)mat->data; 1574 1575 PetscFunctionBegin; 1576 PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1577 PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1578 if (d->A) PetscCall(MatBindToCPU(d->A, bind)); 1579 mat->boundtocpu = bind; 1580 if (!bind) { 1581 PetscBool iscuda; 1582 1583 PetscCall(PetscFree(mat->defaultrandtype)); 1584 PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype)); 1585 PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPIHIP, &iscuda)); 1586 if (!iscuda) PetscCall(VecDestroy(&d->cvec)); 1587 PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSEHIP, &iscuda)); 1588 if (!iscuda) PetscCall(MatDestroy(&d->cmat)); 1589 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseHIP)); 1590 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseHIP)); 1591 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseHIP)); 1592 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseHIP)); 1593 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseHIP)); 1594 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseHIP)); 1595 mat->ops->shift = MatShift_MPIDenseHIP; 1596 } else { 1597 PetscCall(PetscFree(mat->defaultrandtype)); 1598 PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype)); 1599 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1600 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1601 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1602 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1603 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1604 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1605 mat->ops->shift = MatShift_MPIDense; 1606 } 1607 if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind)); 1608 PetscFunctionReturn(0); 1609 } 1610 1611 PetscErrorCode MatMPIDenseHIPSetPreallocation(Mat A, PetscScalar *d_data) 1612 { 1613 Mat_MPIDense *d = (Mat_MPIDense *)A->data; 1614 PetscBool iscuda; 1615 1616 PetscFunctionBegin; 1617 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1618 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iscuda)); 1619 if (!iscuda) PetscFunctionReturn(0); 1620 PetscCall(PetscLayoutSetUp(A->rmap)); 1621 PetscCall(PetscLayoutSetUp(A->cmap)); 1622 if (!d->A) { 1623 PetscCall(MatCreate(PETSC_COMM_SELF, &d->A)); 1624 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)d->A)); 1625 PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 1626 } 1627 PetscCall(MatSetType(d->A, MATSEQDENSEHIP)); 1628 PetscCall(MatSeqDenseHIPSetPreallocation(d->A, d_data)); 1629 A->preallocated = PETSC_TRUE; 1630 A->assembled = PETSC_TRUE; 1631 PetscFunctionReturn(0); 1632 } 1633 #endif 1634 1635 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1636 { 1637 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1638 1639 PetscFunctionBegin; 1640 PetscCall(MatSetRandom(d->A, rctx)); 1641 #if defined(PETSC_HAVE_DEVICE) 1642 x->offloadmask = d->A->offloadmask; 1643 #endif 1644 PetscFunctionReturn(0); 1645 } 1646 1647 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1648 { 1649 PetscFunctionBegin; 1650 *missing = PETSC_FALSE; 1651 PetscFunctionReturn(0); 1652 } 1653 1654 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1655 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1656 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1657 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1658 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1659 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1660 1661 /* -------------------------------------------------------------------*/ 1662 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1663 MatGetRow_MPIDense, 1664 MatRestoreRow_MPIDense, 1665 MatMult_MPIDense, 1666 /* 4*/ MatMultAdd_MPIDense, 1667 MatMultTranspose_MPIDense, 1668 MatMultTransposeAdd_MPIDense, 1669 NULL, 1670 NULL, 1671 NULL, 1672 /* 10*/ NULL, 1673 NULL, 1674 NULL, 1675 NULL, 1676 MatTranspose_MPIDense, 1677 /* 15*/ MatGetInfo_MPIDense, 1678 MatEqual_MPIDense, 1679 MatGetDiagonal_MPIDense, 1680 MatDiagonalScale_MPIDense, 1681 MatNorm_MPIDense, 1682 /* 20*/ MatAssemblyBegin_MPIDense, 1683 MatAssemblyEnd_MPIDense, 1684 MatSetOption_MPIDense, 1685 MatZeroEntries_MPIDense, 1686 /* 24*/ MatZeroRows_MPIDense, 1687 NULL, 1688 NULL, 1689 NULL, 1690 NULL, 1691 /* 29*/ MatSetUp_MPIDense, 1692 NULL, 1693 NULL, 1694 MatGetDiagonalBlock_MPIDense, 1695 NULL, 1696 /* 34*/ MatDuplicate_MPIDense, 1697 NULL, 1698 NULL, 1699 NULL, 1700 NULL, 1701 /* 39*/ MatAXPY_MPIDense, 1702 MatCreateSubMatrices_MPIDense, 1703 NULL, 1704 MatGetValues_MPIDense, 1705 MatCopy_MPIDense, 1706 /* 44*/ NULL, 1707 MatScale_MPIDense, 1708 MatShift_MPIDense, 1709 NULL, 1710 NULL, 1711 /* 49*/ MatSetRandom_MPIDense, 1712 NULL, 1713 NULL, 1714 NULL, 1715 NULL, 1716 /* 54*/ NULL, 1717 NULL, 1718 NULL, 1719 NULL, 1720 NULL, 1721 /* 59*/ MatCreateSubMatrix_MPIDense, 1722 MatDestroy_MPIDense, 1723 MatView_MPIDense, 1724 NULL, 1725 NULL, 1726 /* 64*/ NULL, 1727 NULL, 1728 NULL, 1729 NULL, 1730 NULL, 1731 /* 69*/ NULL, 1732 NULL, 1733 NULL, 1734 NULL, 1735 NULL, 1736 /* 74*/ NULL, 1737 NULL, 1738 NULL, 1739 NULL, 1740 NULL, 1741 /* 79*/ NULL, 1742 NULL, 1743 NULL, 1744 NULL, 1745 /* 83*/ MatLoad_MPIDense, 1746 NULL, 1747 NULL, 1748 NULL, 1749 NULL, 1750 NULL, 1751 /* 89*/ NULL, 1752 NULL, 1753 NULL, 1754 NULL, 1755 NULL, 1756 /* 94*/ NULL, 1757 NULL, 1758 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1759 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1760 NULL, 1761 /* 99*/ MatProductSetFromOptions_MPIDense, 1762 NULL, 1763 NULL, 1764 MatConjugate_MPIDense, 1765 NULL, 1766 /*104*/ NULL, 1767 MatRealPart_MPIDense, 1768 MatImaginaryPart_MPIDense, 1769 NULL, 1770 NULL, 1771 /*109*/ NULL, 1772 NULL, 1773 NULL, 1774 MatGetColumnVector_MPIDense, 1775 MatMissingDiagonal_MPIDense, 1776 /*114*/ NULL, 1777 NULL, 1778 NULL, 1779 NULL, 1780 NULL, 1781 /*119*/ NULL, 1782 NULL, 1783 NULL, 1784 NULL, 1785 NULL, 1786 /*124*/ NULL, 1787 MatGetColumnReductions_MPIDense, 1788 NULL, 1789 NULL, 1790 NULL, 1791 /*129*/ NULL, 1792 NULL, 1793 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1794 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1795 NULL, 1796 /*134*/ NULL, 1797 NULL, 1798 NULL, 1799 NULL, 1800 NULL, 1801 /*139*/ NULL, 1802 NULL, 1803 NULL, 1804 NULL, 1805 NULL, 1806 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1807 /*145*/ NULL, 1808 NULL, 1809 NULL, 1810 NULL, 1811 NULL, 1812 /*150*/ NULL, 1813 NULL}; 1814 1815 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1816 { 1817 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1818 MatType mtype = MATSEQDENSE; 1819 1820 PetscFunctionBegin; 1821 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1822 PetscCall(PetscLayoutSetUp(mat->rmap)); 1823 PetscCall(PetscLayoutSetUp(mat->cmap)); 1824 if (!a->A) { 1825 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1826 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1827 } 1828 #if defined(PETSC_HAVE_CUDA) 1829 PetscBool iscuda; 1830 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1831 if (iscuda) mtype = MATSEQDENSECUDA; 1832 #endif 1833 #if defined(PETSC_HAVE_HIP) 1834 PetscBool iship; 1835 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1836 if (iship) mtype = MATSEQDENSEHIP; 1837 #endif 1838 PetscCall(MatSetType(a->A, mtype)); 1839 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1840 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1841 mat->offloadmask = a->A->offloadmask; 1842 #endif 1843 mat->preallocated = PETSC_TRUE; 1844 mat->assembled = PETSC_TRUE; 1845 PetscFunctionReturn(0); 1846 } 1847 1848 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1849 { 1850 Mat B, C; 1851 1852 PetscFunctionBegin; 1853 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1854 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1855 PetscCall(MatDestroy(&C)); 1856 if (reuse == MAT_REUSE_MATRIX) { 1857 C = *newmat; 1858 } else C = NULL; 1859 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1860 PetscCall(MatDestroy(&B)); 1861 if (reuse == MAT_INPLACE_MATRIX) { 1862 PetscCall(MatHeaderReplace(A, &C)); 1863 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1864 PetscFunctionReturn(0); 1865 } 1866 1867 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1868 { 1869 Mat B, C; 1870 1871 PetscFunctionBegin; 1872 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1873 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1874 if (reuse == MAT_REUSE_MATRIX) { 1875 C = *newmat; 1876 } else C = NULL; 1877 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1878 PetscCall(MatDestroy(&B)); 1879 if (reuse == MAT_INPLACE_MATRIX) { 1880 PetscCall(MatHeaderReplace(A, &C)); 1881 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1882 PetscFunctionReturn(0); 1883 } 1884 1885 #if defined(PETSC_HAVE_ELEMENTAL) 1886 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1887 { 1888 Mat mat_elemental; 1889 PetscScalar *v; 1890 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols; 1891 1892 PetscFunctionBegin; 1893 if (reuse == MAT_REUSE_MATRIX) { 1894 mat_elemental = *newmat; 1895 PetscCall(MatZeroEntries(*newmat)); 1896 } else { 1897 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1898 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1899 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1900 PetscCall(MatSetUp(mat_elemental)); 1901 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1902 } 1903 1904 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1905 for (i = 0; i < N; i++) cols[i] = i; 1906 for (i = 0; i < m; i++) rows[i] = rstart + i; 1907 1908 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1909 PetscCall(MatDenseGetArray(A, &v)); 1910 PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1911 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1912 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1913 PetscCall(MatDenseRestoreArray(A, &v)); 1914 PetscCall(PetscFree2(rows, cols)); 1915 1916 if (reuse == MAT_INPLACE_MATRIX) { 1917 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1918 } else { 1919 *newmat = mat_elemental; 1920 } 1921 PetscFunctionReturn(0); 1922 } 1923 #endif 1924 1925 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1926 { 1927 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1928 1929 PetscFunctionBegin; 1930 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1931 PetscFunctionReturn(0); 1932 } 1933 1934 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1935 { 1936 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1937 1938 PetscFunctionBegin; 1939 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1940 PetscFunctionReturn(0); 1941 } 1942 1943 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1944 { 1945 Mat_MPIDense *mat; 1946 PetscInt m, nloc, N; 1947 1948 PetscFunctionBegin; 1949 PetscCall(MatGetSize(inmat, &m, &N)); 1950 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1951 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1952 PetscInt sum; 1953 1954 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1955 /* Check sum(n) = N */ 1956 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1957 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1958 1959 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1960 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1961 } 1962 1963 /* numeric phase */ 1964 mat = (Mat_MPIDense *)(*outmat)->data; 1965 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1966 PetscFunctionReturn(0); 1967 } 1968 1969 #if defined(PETSC_HAVE_CUDA) 1970 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat) 1971 { 1972 Mat B; 1973 Mat_MPIDense *m; 1974 1975 PetscFunctionBegin; 1976 if (reuse == MAT_INITIAL_MATRIX) { 1977 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 1978 } else if (reuse == MAT_REUSE_MATRIX) { 1979 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 1980 } 1981 1982 B = *newmat; 1983 PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE)); 1984 PetscCall(PetscFree(B->defaultvectype)); 1985 PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype)); 1986 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE)); 1987 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL)); 1988 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 1989 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 1990 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 1991 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 1992 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL)); 1993 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL)); 1994 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL)); 1995 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL)); 1996 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL)); 1997 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL)); 1998 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL)); 1999 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL)); 2000 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL)); 2001 m = (Mat_MPIDense *)(B)->data; 2002 if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A)); 2003 B->ops->bindtocpu = NULL; 2004 B->offloadmask = PETSC_OFFLOAD_CPU; 2005 PetscFunctionReturn(0); 2006 } 2007 2008 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat) 2009 { 2010 Mat B; 2011 Mat_MPIDense *m; 2012 2013 PetscFunctionBegin; 2014 if (reuse == MAT_INITIAL_MATRIX) { 2015 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 2016 } else if (reuse == MAT_REUSE_MATRIX) { 2017 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 2018 } 2019 2020 B = *newmat; 2021 PetscCall(PetscFree(B->defaultvectype)); 2022 PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype)); 2023 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA)); 2024 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense)); 2025 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2026 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2027 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2028 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2029 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA)); 2030 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA)); 2031 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA)); 2032 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA)); 2033 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA)); 2034 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA)); 2035 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA)); 2036 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA)); 2037 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA)); 2038 m = (Mat_MPIDense *)(B->data); 2039 if (m->A) { 2040 PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A)); 2041 B->offloadmask = PETSC_OFFLOAD_BOTH; 2042 } else { 2043 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 2044 } 2045 PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE)); 2046 2047 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 2048 PetscFunctionReturn(0); 2049 } 2050 #endif 2051 2052 #if defined(PETSC_HAVE_HIP) 2053 PetscErrorCode MatConvert_MPIDenseHIP_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat) 2054 { 2055 Mat B; 2056 Mat_MPIDense *m; 2057 2058 PetscFunctionBegin; 2059 if (reuse == MAT_INITIAL_MATRIX) { 2060 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 2061 } else if (reuse == MAT_REUSE_MATRIX) { 2062 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 2063 } 2064 2065 B = *newmat; 2066 PetscCall(MatBindToCPU_MPIDenseHIP(B, PETSC_TRUE)); 2067 PetscCall(PetscFree(B->defaultvectype)); 2068 PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype)); 2069 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE)); 2070 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensehip_mpidense_C", NULL)); 2071 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 2072 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 2073 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 2074 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 2075 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArray_C", NULL)); 2076 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayRead_C", NULL)); 2077 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayWrite_C", NULL)); 2078 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArray_C", NULL)); 2079 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayRead_C", NULL)); 2080 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayWrite_C", NULL)); 2081 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPPlaceArray_C", NULL)); 2082 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPResetArray_C", NULL)); 2083 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPReplaceArray_C", NULL)); 2084 m = (Mat_MPIDense *)(B)->data; 2085 if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A)); 2086 B->ops->bindtocpu = NULL; 2087 B->offloadmask = PETSC_OFFLOAD_CPU; 2088 PetscFunctionReturn(0); 2089 } 2090 2091 PetscErrorCode MatConvert_MPIDense_MPIDenseHIP(Mat M, MatType type, MatReuse reuse, Mat *newmat) 2092 { 2093 Mat B; 2094 Mat_MPIDense *m; 2095 2096 PetscFunctionBegin; 2097 if (reuse == MAT_INITIAL_MATRIX) { 2098 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 2099 } else if (reuse == MAT_REUSE_MATRIX) { 2100 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 2101 } 2102 2103 B = *newmat; 2104 PetscCall(PetscFree(B->defaultvectype)); 2105 PetscCall(PetscStrallocpy(VECHIP, &B->defaultvectype)); 2106 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSEHIP)); 2107 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensehip_mpidense_C", MatConvert_MPIDenseHIP_MPIDense)); 2108 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensehip_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2109 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2110 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2111 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2112 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArray_C", MatDenseHIPGetArray_MPIDenseHIP)); 2113 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayRead_C", MatDenseHIPGetArrayRead_MPIDenseHIP)); 2114 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayWrite_C", MatDenseHIPGetArrayWrite_MPIDenseHIP)); 2115 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArray_C", MatDenseHIPRestoreArray_MPIDenseHIP)); 2116 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayRead_C", MatDenseHIPRestoreArrayRead_MPIDenseHIP)); 2117 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayWrite_C", MatDenseHIPRestoreArrayWrite_MPIDenseHIP)); 2118 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPPlaceArray_C", MatDenseHIPPlaceArray_MPIDenseHIP)); 2119 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPResetArray_C", MatDenseHIPResetArray_MPIDenseHIP)); 2120 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPReplaceArray_C", MatDenseHIPReplaceArray_MPIDenseHIP)); 2121 m = (Mat_MPIDense *)(B->data); 2122 if (m->A) { 2123 PetscCall(MatConvert(m->A, MATSEQDENSEHIP, MAT_INPLACE_MATRIX, &m->A)); 2124 B->offloadmask = PETSC_OFFLOAD_BOTH; 2125 } else { 2126 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 2127 } 2128 PetscCall(MatBindToCPU_MPIDenseHIP(B, PETSC_FALSE)); 2129 2130 B->ops->bindtocpu = MatBindToCPU_MPIDenseHIP; 2131 PetscFunctionReturn(0); 2132 } 2133 #endif 2134 2135 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 2136 { 2137 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2138 PetscInt lda; 2139 2140 PetscFunctionBegin; 2141 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 2142 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2143 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 2144 a->vecinuse = col + 1; 2145 PetscCall(MatDenseGetLDA(a->A, &lda)); 2146 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 2147 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 2148 *v = a->cvec; 2149 PetscFunctionReturn(0); 2150 } 2151 2152 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 2153 { 2154 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2155 2156 PetscFunctionBegin; 2157 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 2158 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 2159 a->vecinuse = 0; 2160 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 2161 PetscCall(VecResetArray(a->cvec)); 2162 if (v) *v = NULL; 2163 PetscFunctionReturn(0); 2164 } 2165 2166 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 2167 { 2168 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2169 PetscInt lda; 2170 2171 PetscFunctionBegin; 2172 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 2173 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2174 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 2175 a->vecinuse = col + 1; 2176 PetscCall(MatDenseGetLDA(a->A, &lda)); 2177 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 2178 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 2179 PetscCall(VecLockReadPush(a->cvec)); 2180 *v = a->cvec; 2181 PetscFunctionReturn(0); 2182 } 2183 2184 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 2185 { 2186 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2187 2188 PetscFunctionBegin; 2189 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 2190 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 2191 a->vecinuse = 0; 2192 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 2193 PetscCall(VecLockReadPop(a->cvec)); 2194 PetscCall(VecResetArray(a->cvec)); 2195 if (v) *v = NULL; 2196 PetscFunctionReturn(0); 2197 } 2198 2199 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 2200 { 2201 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2202 PetscInt lda; 2203 2204 PetscFunctionBegin; 2205 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 2206 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2207 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 2208 a->vecinuse = col + 1; 2209 PetscCall(MatDenseGetLDA(a->A, &lda)); 2210 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 2211 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 2212 *v = a->cvec; 2213 PetscFunctionReturn(0); 2214 } 2215 2216 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 2217 { 2218 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2219 2220 PetscFunctionBegin; 2221 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 2222 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 2223 a->vecinuse = 0; 2224 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 2225 PetscCall(VecResetArray(a->cvec)); 2226 if (v) *v = NULL; 2227 PetscFunctionReturn(0); 2228 } 2229 2230 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 2231 { 2232 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2233 Mat_MPIDense *c; 2234 MPI_Comm comm; 2235 PetscInt pbegin, pend; 2236 2237 PetscFunctionBegin; 2238 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2239 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 2240 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2241 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 2242 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 2243 if (!a->cmat) { 2244 PetscCall(MatCreate(comm, &a->cmat)); 2245 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 2246 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 2247 else { 2248 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 2249 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 2250 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 2251 } 2252 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 2253 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 2254 } else { 2255 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 2256 if (same && a->cmat->rmap->N != A->rmap->N) { 2257 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 2258 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2259 } 2260 if (!same) { 2261 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 2262 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 2263 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 2264 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 2265 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 2266 } 2267 if (cend - cbegin != a->cmat->cmap->N) { 2268 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 2269 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 2270 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 2271 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 2272 } 2273 } 2274 c = (Mat_MPIDense *)a->cmat->data; 2275 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2276 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 2277 2278 a->cmat->preallocated = PETSC_TRUE; 2279 a->cmat->assembled = PETSC_TRUE; 2280 #if defined(PETSC_HAVE_DEVICE) 2281 a->cmat->offloadmask = c->A->offloadmask; 2282 #endif 2283 a->matinuse = cbegin + 1; 2284 *v = a->cmat; 2285 PetscFunctionReturn(0); 2286 } 2287 2288 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 2289 { 2290 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2291 Mat_MPIDense *c; 2292 2293 PetscFunctionBegin; 2294 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 2295 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 2296 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 2297 a->matinuse = 0; 2298 c = (Mat_MPIDense *)a->cmat->data; 2299 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 2300 if (v) *v = NULL; 2301 #if defined(PETSC_HAVE_DEVICE) 2302 A->offloadmask = a->A->offloadmask; 2303 #endif 2304 PetscFunctionReturn(0); 2305 } 2306 2307 /*MC 2308 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 2309 2310 Options Database Keys: 2311 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 2312 2313 Level: beginner 2314 2315 .seealso: `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 2316 M*/ 2317 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 2318 { 2319 Mat_MPIDense *a; 2320 2321 PetscFunctionBegin; 2322 PetscCall(PetscNew(&a)); 2323 mat->data = (void *)a; 2324 PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps))); 2325 2326 mat->insertmode = NOT_SET_VALUES; 2327 2328 /* build cache for off array entries formed */ 2329 a->donotstash = PETSC_FALSE; 2330 2331 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 2332 2333 /* stuff used for matrix vector multiply */ 2334 a->lvec = NULL; 2335 a->Mvctx = NULL; 2336 a->roworiented = PETSC_TRUE; 2337 2338 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 2339 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 2340 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 2341 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 2342 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 2343 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 2344 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 2345 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 2346 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 2347 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 2348 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 2349 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 2350 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 2351 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 2352 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 2353 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 2354 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 2355 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 2356 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 2357 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 2358 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 2359 #if defined(PETSC_HAVE_ELEMENTAL) 2360 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 2361 #endif 2362 #if defined(PETSC_HAVE_SCALAPACK) 2363 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 2364 #endif 2365 #if defined(PETSC_HAVE_CUDA) 2366 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 2367 #endif 2368 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 2369 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2370 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2371 #if defined(PETSC_HAVE_CUDA) 2372 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2373 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2374 #endif 2375 #if defined(PETSC_HAVE_HIP) 2376 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 2377 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2378 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2379 #endif 2380 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 2381 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 2382 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 2383 PetscFunctionReturn(0); 2384 } 2385 2386 /*MC 2387 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 2388 2389 Options Database Keys: 2390 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()` 2391 2392 Level: beginner 2393 2394 .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`, `MATSEQDENSEHIP` 2395 M*/ 2396 #if defined(PETSC_HAVE_CUDA) 2397 #include <petsc/private/deviceimpl.h> 2398 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 2399 { 2400 PetscFunctionBegin; 2401 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); 2402 PetscCall(MatCreate_MPIDense(B)); 2403 PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B)); 2404 PetscFunctionReturn(0); 2405 } 2406 #endif 2407 2408 /*MC 2409 MATMPIDENSEHIP - MATMPIDENSEHIP = "mpidensehip" - A matrix type to be used for distributed dense matrices on GPUs. 2410 2411 Options Database Keys: 2412 . -mat_type mpidensehip - sets the matrix type to `MATMPIDENSEHIP` during a call to `MatSetFromOptions()` 2413 2414 Level: beginner 2415 2416 .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`, `MATMPIDENSEHIP` 2417 M*/ 2418 #if defined(PETSC_HAVE_HIP) 2419 #include <petsc/private/deviceimpl.h> 2420 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseHIP(Mat B) 2421 { 2422 PetscFunctionBegin; 2423 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_HIP)); 2424 PetscCall(MatCreate_MPIDense(B)); 2425 PetscCall(MatConvert_MPIDense_MPIDenseHIP(B, MATMPIDENSEHIP, MAT_INPLACE_MATRIX, &B)); 2426 PetscFunctionReturn(0); 2427 } 2428 #endif 2429 2430 /*MC 2431 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 2432 2433 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 2434 and `MATMPIDENSE` otherwise. 2435 2436 Options Database Keys: 2437 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 2438 2439 Level: beginner 2440 2441 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 2442 M*/ 2443 2444 /*MC 2445 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 2446 Similarly, 2447 MATDENSEHIP - MATDENSEHIP = "densehip" 2448 2449 This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator, 2450 and `MATMPIDENSECUDA` otherwise. 2451 2452 Options Database Keys: 2453 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()` 2454 2455 Level: beginner 2456 2457 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, `MATMPIDENSEHIP`, `MATDENSE` 2458 M*/ 2459 2460 /*MC 2461 MATDENSEHIP - MATDENSEHIP = "densehip" - A matrix type to be used for dense matrices on GPUs. 2462 2463 This matrix type is identical to `MATSEQDENSEHIP` when constructed with a single process communicator, 2464 and `MATMPIDENSEHIP` otherwise. 2465 2466 Options Database Keys: 2467 . -mat_type densehip - sets the matrix type to `MATDENSEHIP` during a call to `MatSetFromOptions()` 2468 2469 Level: beginner 2470 2471 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, `MATMPIDENSEHIP`, `MATDENSE` 2472 M*/ 2473 2474 /*@C 2475 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 2476 2477 Collective 2478 2479 Input Parameters: 2480 . B - the matrix 2481 - data - optional location of matrix data. Set data=NULL for PETSc 2482 to control all matrix memory allocation. 2483 2484 Notes: 2485 The dense format is fully compatible with standard Fortran 77 2486 storage by columns. 2487 2488 The data input variable is intended primarily for Fortran programmers 2489 who wish to allocate their own matrix memory space. Most users should 2490 set data=NULL. 2491 2492 Level: intermediate 2493 2494 .seealso: `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2495 @*/ 2496 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 2497 { 2498 PetscFunctionBegin; 2499 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2500 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 2501 PetscFunctionReturn(0); 2502 } 2503 2504 /*@ 2505 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 2506 array provided by the user. This is useful to avoid copying an array 2507 into a matrix 2508 2509 Not Collective 2510 2511 Input Parameters: 2512 + mat - the matrix 2513 - array - the array in column major order 2514 2515 Note: 2516 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 2517 freed when the matrix is destroyed. 2518 2519 Level: developer 2520 2521 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 2522 `MatDenseReplaceArray()` 2523 @*/ 2524 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 2525 { 2526 PetscFunctionBegin; 2527 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2528 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2529 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2530 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2531 mat->offloadmask = PETSC_OFFLOAD_CPU; 2532 #endif 2533 PetscFunctionReturn(0); 2534 } 2535 2536 /*@ 2537 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 2538 2539 Not Collective 2540 2541 Input Parameters: 2542 . mat - the matrix 2543 2544 Note: 2545 You can only call this after a call to `MatDensePlaceArray()` 2546 2547 Level: developer 2548 2549 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2550 @*/ 2551 PetscErrorCode MatDenseResetArray(Mat mat) 2552 { 2553 PetscFunctionBegin; 2554 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2555 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 2556 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2557 PetscFunctionReturn(0); 2558 } 2559 2560 /*@ 2561 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2562 array provided by the user. This is useful to avoid copying an array 2563 into a matrix 2564 2565 Not Collective 2566 2567 Input Parameters: 2568 + mat - the matrix 2569 - array - the array in column major order 2570 2571 Note: 2572 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 2573 freed by the user. It will be freed when the matrix is destroyed. 2574 2575 Level: developer 2576 2577 .seealso: `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 2578 @*/ 2579 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 2580 { 2581 PetscFunctionBegin; 2582 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2583 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2584 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2585 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2586 mat->offloadmask = PETSC_OFFLOAD_CPU; 2587 #endif 2588 PetscFunctionReturn(0); 2589 } 2590 2591 #if defined(PETSC_HAVE_CUDA) 2592 /*@C 2593 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an 2594 array provided by the user. This is useful to avoid copying an array 2595 into a matrix 2596 2597 Not Collective 2598 2599 Input Parameters: 2600 + mat - the matrix 2601 - array - the array in column major order 2602 2603 Note: 2604 You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be 2605 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2606 2607 Level: developer 2608 2609 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()` 2610 @*/ 2611 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array) 2612 { 2613 PetscFunctionBegin; 2614 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2615 PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2616 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2617 mat->offloadmask = PETSC_OFFLOAD_GPU; 2618 PetscFunctionReturn(0); 2619 } 2620 2621 /*@C 2622 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()` 2623 2624 Not Collective 2625 2626 Input Parameters: 2627 . mat - the matrix 2628 2629 Note: 2630 You can only call this after a call to `MatDenseCUDAPlaceArray()` 2631 2632 Level: developer 2633 2634 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()` 2635 @*/ 2636 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2637 { 2638 PetscFunctionBegin; 2639 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2640 PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat)); 2641 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2642 PetscFunctionReturn(0); 2643 } 2644 2645 /*@C 2646 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an 2647 array provided by the user. This is useful to avoid copying an array 2648 into a matrix 2649 2650 Not Collective 2651 2652 Input Parameters: 2653 + mat - the matrix 2654 - array - the array in column major order 2655 2656 Note: 2657 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2658 The memory passed in CANNOT be freed by the user. It will be freed 2659 when the matrix is destroyed. The array should respect the matrix leading dimension. 2660 2661 Level: developer 2662 2663 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()` 2664 @*/ 2665 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array) 2666 { 2667 PetscFunctionBegin; 2668 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2669 PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2670 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2671 mat->offloadmask = PETSC_OFFLOAD_GPU; 2672 PetscFunctionReturn(0); 2673 } 2674 2675 /*@C 2676 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix. 2677 2678 Not Collective 2679 2680 Input Parameters: 2681 . A - the matrix 2682 2683 Output Parameters 2684 . array - the GPU array in column major order 2685 2686 Notes: 2687 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`. 2688 2689 The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed. 2690 2691 Level: developer 2692 2693 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()` 2694 @*/ 2695 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2696 { 2697 PetscFunctionBegin; 2698 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2699 PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2700 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2701 PetscFunctionReturn(0); 2702 } 2703 2704 /*@C 2705 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`. 2706 2707 Not Collective 2708 2709 Input Parameters: 2710 + A - the matrix 2711 - array - the GPU array in column major order 2712 2713 Level: developer 2714 2715 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2716 @*/ 2717 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2718 { 2719 PetscFunctionBegin; 2720 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2721 PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2722 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2723 A->offloadmask = PETSC_OFFLOAD_GPU; 2724 PetscFunctionReturn(0); 2725 } 2726 2727 /*@C 2728 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed. 2729 2730 Not Collective 2731 2732 Input Parameters: 2733 . A - the matrix 2734 2735 Output Parameters 2736 . array - the GPU array in column major order 2737 2738 Note: 2739 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. 2740 2741 Level: developer 2742 2743 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2744 @*/ 2745 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2746 { 2747 PetscFunctionBegin; 2748 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2749 PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2750 PetscFunctionReturn(0); 2751 } 2752 2753 /*@C 2754 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`. 2755 2756 Not Collective 2757 2758 Input Parameters: 2759 + A - the matrix 2760 - array - the GPU array in column major order 2761 2762 Note: 2763 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. 2764 2765 Level: developer 2766 2767 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()` 2768 @*/ 2769 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2770 { 2771 PetscFunctionBegin; 2772 PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2773 PetscFunctionReturn(0); 2774 } 2775 2776 /*@C 2777 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed. 2778 2779 Not Collective 2780 2781 Input Parameters: 2782 . A - the matrix 2783 2784 Output Parameters 2785 . array - the GPU array in column major order 2786 2787 Note: 2788 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`. 2789 2790 Level: developer 2791 2792 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2793 @*/ 2794 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2795 { 2796 PetscFunctionBegin; 2797 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2798 PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a)); 2799 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2800 PetscFunctionReturn(0); 2801 } 2802 2803 /*@C 2804 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`. 2805 2806 Not Collective 2807 2808 Input Parameters: 2809 + A - the matrix 2810 - array - the GPU array in column major order 2811 2812 Level: developer 2813 2814 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2815 @*/ 2816 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2817 { 2818 PetscFunctionBegin; 2819 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2820 PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a)); 2821 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2822 A->offloadmask = PETSC_OFFLOAD_GPU; 2823 PetscFunctionReturn(0); 2824 } 2825 #endif 2826 2827 #if defined(PETSC_HAVE_HIP) 2828 /*@C 2829 MatDenseHIPPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2830 array provided by the user. This is useful to avoid copying an array 2831 into a matrix 2832 2833 Not Collective 2834 2835 Input Parameters: 2836 + mat - the matrix 2837 - array - the array in column major order 2838 2839 Notes: 2840 You can return to the original array with a call to MatDenseHIPResetArray(). The user is responsible for freeing this array; it will not be 2841 freed when the matrix is destroyed. The array must have been allocated with hipMalloc(). 2842 2843 Level: developer 2844 2845 .seealso: MatDenseHIPGetArray(), MatDenseHIPResetArray() 2846 @*/ 2847 PetscErrorCode MatDenseHIPPlaceArray(Mat mat, const PetscScalar *array) 2848 { 2849 PetscFunctionBegin; 2850 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2851 PetscUseMethod(mat, "MatDenseHIPPlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2852 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2853 mat->offloadmask = PETSC_OFFLOAD_GPU; 2854 PetscFunctionReturn(0); 2855 } 2856 2857 /*@C 2858 MatDenseHIPResetArray - Resets the matrix array to that it previously had before the call to MatDenseHIPPlaceArray() 2859 2860 Not Collective 2861 2862 Input Parameters: 2863 . mat - the matrix 2864 2865 Notes: 2866 You can only call this after a call to MatDenseHIPPlaceArray() 2867 2868 Level: developer 2869 2870 .seealso: MatDenseHIPGetArray(), MatDenseHIPPlaceArray() 2871 2872 @*/ 2873 PetscErrorCode MatDenseHIPResetArray(Mat mat) 2874 { 2875 PetscFunctionBegin; 2876 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2877 PetscUseMethod(mat, "MatDenseHIPResetArray_C", (Mat), (mat)); 2878 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2879 PetscFunctionReturn(0); 2880 } 2881 2882 /*@C 2883 MatDenseHIPReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2884 array provided by the user. This is useful to avoid copying an array 2885 into a matrix 2886 2887 Not Collective 2888 2889 Input Parameters: 2890 + mat - the matrix 2891 - array - the array in column major order 2892 2893 Notes: 2894 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2895 The memory passed in CANNOT be freed by the user. It will be freed 2896 when the matrix is destroyed. The array should respect the matrix leading dimension. 2897 2898 Level: developer 2899 2900 .seealso: MatDenseHIPGetArray(), MatDenseHIPPlaceArray(), MatDenseHIPResetArray() 2901 @*/ 2902 PetscErrorCode MatDenseHIPReplaceArray(Mat mat, const PetscScalar *array) 2903 { 2904 PetscFunctionBegin; 2905 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2906 PetscUseMethod(mat, "MatDenseHIPReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2907 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2908 mat->offloadmask = PETSC_OFFLOAD_GPU; 2909 PetscFunctionReturn(0); 2910 } 2911 2912 /*@C 2913 MatDenseHIPGetArrayWrite - Provides write access to the HIP buffer inside a dense matrix. 2914 2915 Not Collective 2916 2917 Input Parameters: 2918 . A - the matrix 2919 2920 Output Parameters 2921 . array - the GPU array in column major order 2922 2923 Notes: 2924 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseHIPGetArray(). The array must be restored with MatDenseHIPRestoreArrayWrite() when no longer needed. 2925 2926 Level: developer 2927 2928 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayRead(), MatDenseHIPRestoreArrayRead() 2929 @*/ 2930 PetscErrorCode MatDenseHIPGetArrayWrite(Mat A, PetscScalar **a) 2931 { 2932 PetscFunctionBegin; 2933 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2934 PetscUseMethod(A, "MatDenseHIPGetArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2935 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2936 PetscFunctionReturn(0); 2937 } 2938 2939 /*@C 2940 MatDenseHIPRestoreArrayWrite - Restore write access to the HIP buffer inside a dense matrix previously obtained with MatDenseHIPGetArrayWrite(). 2941 2942 Not Collective 2943 2944 Input Parameters: 2945 + A - the matrix 2946 - array - the GPU array in column major order 2947 2948 Notes: 2949 2950 Level: developer 2951 2952 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead(), MatDenseHIPGetArrayRead() 2953 @*/ 2954 PetscErrorCode MatDenseHIPRestoreArrayWrite(Mat A, PetscScalar **a) 2955 { 2956 PetscFunctionBegin; 2957 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2958 PetscUseMethod(A, "MatDenseHIPRestoreArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2959 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2960 A->offloadmask = PETSC_OFFLOAD_GPU; 2961 PetscFunctionReturn(0); 2962 } 2963 2964 /*@C 2965 MatDenseHIPGetArrayRead - Provides read-only access to the HIP buffer inside a dense matrix. The array must be restored with MatDenseHIPRestoreArrayRead() when no longer needed. 2966 2967 Not Collective 2968 2969 Input Parameters: 2970 . A - the matrix 2971 2972 Output Parameters 2973 . array - the GPU array in column major order 2974 2975 Notes: 2976 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseHIPGetArrayWrite(). 2977 2978 Level: developer 2979 2980 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead() 2981 @*/ 2982 PetscErrorCode MatDenseHIPGetArrayRead(Mat A, const PetscScalar **a) 2983 { 2984 PetscFunctionBegin; 2985 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2986 PetscUseMethod(A, "MatDenseHIPGetArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2987 PetscFunctionReturn(0); 2988 } 2989 2990 /*@C 2991 MatDenseHIPRestoreArrayRead - Restore read-only access to the HIP buffer inside a dense matrix previously obtained with a call to MatDenseHIPGetArrayRead(). 2992 2993 Not Collective 2994 2995 Input Parameters: 2996 + A - the matrix 2997 - array - the GPU array in column major order 2998 2999 Notes: 3000 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseHIPGetArrayWrite(). 3001 3002 Level: developer 3003 3004 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPGetArrayRead() 3005 @*/ 3006 PetscErrorCode MatDenseHIPRestoreArrayRead(Mat A, const PetscScalar **a) 3007 { 3008 PetscFunctionBegin; 3009 PetscUseMethod(A, "MatDenseHIPRestoreArrayRead_C", (Mat, const PetscScalar **), (A, a)); 3010 PetscFunctionReturn(0); 3011 } 3012 3013 /*@C 3014 MatDenseHIPGetArray - Provides access to the HIP buffer inside a dense matrix. The array must be restored with MatDenseHIPRestoreArray() when no longer needed. 3015 3016 Not Collective 3017 3018 Input Parameters: 3019 . A - the matrix 3020 3021 Output Parameters 3022 . array - the GPU array in column major order 3023 3024 Notes: 3025 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseHIPGetArrayWrite(). For read-only access, use MatDenseHIPGetArrayRead(). 3026 3027 Level: developer 3028 3029 .seealso: MatDenseHIPGetArrayRead(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead() 3030 @*/ 3031 PetscErrorCode MatDenseHIPGetArray(Mat A, PetscScalar **a) 3032 { 3033 PetscFunctionBegin; 3034 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3035 PetscUseMethod(A, "MatDenseHIPGetArray_C", (Mat, PetscScalar **), (A, a)); 3036 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 3037 PetscFunctionReturn(0); 3038 } 3039 3040 /*@C 3041 MatDenseHIPRestoreArray - Restore access to the HIP buffer inside a dense matrix previously obtained with MatDenseHIPGetArray(). 3042 3043 Not Collective 3044 3045 Input Parameters: 3046 + A - the matrix 3047 - array - the GPU array in column major order 3048 3049 Notes: 3050 3051 Level: developer 3052 3053 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead(), MatDenseHIPGetArrayRead() 3054 @*/ 3055 PetscErrorCode MatDenseHIPRestoreArray(Mat A, PetscScalar **a) 3056 { 3057 PetscFunctionBegin; 3058 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3059 PetscUseMethod(A, "MatDenseHIPRestoreArray_C", (Mat, PetscScalar **), (A, a)); 3060 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 3061 A->offloadmask = PETSC_OFFLOAD_GPU; 3062 PetscFunctionReturn(0); 3063 } 3064 #endif 3065 3066 /*@C 3067 MatCreateDense - Creates a matrix in `MATDENSE` format. 3068 3069 Collective 3070 3071 Input Parameters: 3072 + comm - MPI communicator 3073 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3074 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3075 . M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given) 3076 . N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given) 3077 - data - optional location of matrix data. Set data to NULL (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 3078 to control all matrix memory allocation. 3079 3080 Output Parameter: 3081 . A - the matrix 3082 3083 Notes: 3084 The dense format is fully compatible with standard Fortran 77 3085 storage by columns. 3086 3087 Although local portions of the matrix are stored in column-major 3088 order, the matrix is partitioned across MPI ranks by row. 3089 3090 The data input variable is intended primarily for Fortran programmers 3091 who wish to allocate their own matrix memory space. Most users should 3092 set data=NULL (`PETSC_NULL_SCALAR` for Fortran users). 3093 3094 The user MUST specify either the local or global matrix dimensions 3095 (possibly both). 3096 3097 Level: intermediate 3098 3099 .seealso: `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 3100 @*/ 3101 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 3102 { 3103 PetscFunctionBegin; 3104 PetscCall(MatCreate(comm, A)); 3105 PetscCall(MatSetSizes(*A, m, n, M, N)); 3106 PetscCall(MatSetType(*A, MATDENSE)); 3107 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 3108 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 3109 PetscFunctionReturn(0); 3110 } 3111 3112 #if defined(PETSC_HAVE_CUDA) 3113 /*@C 3114 MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA. 3115 3116 Collective 3117 3118 Input Parameters: 3119 + comm - MPI communicator 3120 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3121 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3122 . M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given) 3123 . N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given) 3124 - data - optional location of GPU matrix data. Set data=NULL for PETSc 3125 to control matrix memory allocation. 3126 3127 Output Parameter: 3128 . A - the matrix 3129 3130 Level: intermediate 3131 3132 .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()` 3133 @*/ 3134 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 3135 { 3136 PetscFunctionBegin; 3137 PetscCall(MatCreate(comm, A)); 3138 PetscValidLogicalCollectiveBool(*A, !!data, 6); 3139 PetscCall(MatSetSizes(*A, m, n, M, N)); 3140 PetscCall(MatSetType(*A, MATDENSECUDA)); 3141 PetscCall(MatSeqDenseCUDASetPreallocation(*A, data)); 3142 PetscCall(MatMPIDenseCUDASetPreallocation(*A, data)); 3143 PetscFunctionReturn(0); 3144 } 3145 #endif 3146 3147 #if defined(PETSC_HAVE_HIP) 3148 /*@C 3149 MatCreateDenseHIP - Creates a matrix in `MATDENSEHIP` format using HIP. 3150 3151 Collective 3152 3153 Input Parameters: 3154 + comm - MPI communicator 3155 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3156 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3157 . M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given) 3158 . N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given) 3159 - data - optional location of GPU matrix data. Set data=NULL for PETSc 3160 to control matrix memory allocation. 3161 3162 Output Parameter: 3163 . A - the matrix 3164 3165 Level: intermediate 3166 3167 .seealso: `MATDENSEHIP`, `MatCreate()`, `MatCreateDense()` 3168 @*/ 3169 PetscErrorCode MatCreateDenseHIP(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 3170 { 3171 PetscFunctionBegin; 3172 PetscCall(MatCreate(comm, A)); 3173 PetscValidLogicalCollectiveBool(*A, !!data, 6); 3174 PetscCall(MatSetSizes(*A, m, n, M, N)); 3175 PetscCall(MatSetType(*A, MATDENSEHIP)); 3176 PetscCall(MatSeqDenseHIPSetPreallocation(*A, data)); 3177 PetscCall(MatMPIDenseHIPSetPreallocation(*A, data)); 3178 PetscFunctionReturn(0); 3179 } 3180 #endif 3181 3182 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 3183 { 3184 Mat mat; 3185 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 3186 3187 PetscFunctionBegin; 3188 *newmat = NULL; 3189 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 3190 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3191 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 3192 a = (Mat_MPIDense *)mat->data; 3193 3194 mat->factortype = A->factortype; 3195 mat->assembled = PETSC_TRUE; 3196 mat->preallocated = PETSC_TRUE; 3197 3198 mat->insertmode = NOT_SET_VALUES; 3199 a->donotstash = oldmat->donotstash; 3200 3201 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 3202 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 3203 3204 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3205 3206 *newmat = mat; 3207 PetscFunctionReturn(0); 3208 } 3209 3210 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 3211 { 3212 PetscBool isbinary; 3213 #if defined(PETSC_HAVE_HDF5) 3214 PetscBool ishdf5; 3215 #endif 3216 3217 PetscFunctionBegin; 3218 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3219 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3220 /* force binary viewer to load .info file if it has not yet done so */ 3221 PetscCall(PetscViewerSetUp(viewer)); 3222 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3223 #if defined(PETSC_HAVE_HDF5) 3224 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3225 #endif 3226 if (isbinary) { 3227 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 3228 #if defined(PETSC_HAVE_HDF5) 3229 } else if (ishdf5) { 3230 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 3231 #endif 3232 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3233 PetscFunctionReturn(0); 3234 } 3235 3236 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 3237 { 3238 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 3239 Mat a, b; 3240 3241 PetscFunctionBegin; 3242 a = matA->A; 3243 b = matB->A; 3244 PetscCall(MatEqual(a, b, flag)); 3245 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 3246 PetscFunctionReturn(0); 3247 } 3248 3249 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 3250 { 3251 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 3252 3253 PetscFunctionBegin; 3254 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 3255 PetscCall(MatDestroy(&atb->atb)); 3256 PetscCall(PetscFree(atb)); 3257 PetscFunctionReturn(0); 3258 } 3259 3260 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 3261 { 3262 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 3263 3264 PetscFunctionBegin; 3265 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 3266 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 3267 PetscCall(PetscFree(abt)); 3268 PetscFunctionReturn(0); 3269 } 3270 3271 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 3272 { 3273 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 3274 Mat_TransMatMultDense *atb; 3275 MPI_Comm comm; 3276 PetscMPIInt size, *recvcounts; 3277 PetscScalar *carray, *sendbuf; 3278 const PetscScalar *atbarray; 3279 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 3280 const PetscInt *ranges; 3281 3282 PetscFunctionBegin; 3283 MatCheckProduct(C, 3); 3284 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 3285 atb = (Mat_TransMatMultDense *)C->product->data; 3286 recvcounts = atb->recvcounts; 3287 sendbuf = atb->sendbuf; 3288 3289 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 3290 PetscCallMPI(MPI_Comm_size(comm, &size)); 3291 3292 /* compute atbarray = aseq^T * bseq */ 3293 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 3294 3295 PetscCall(MatGetOwnershipRanges(C, &ranges)); 3296 3297 /* arrange atbarray into sendbuf */ 3298 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 3299 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 3300 for (proc = 0, k = 0; proc < size; proc++) { 3301 for (j = 0; j < cN; j++) { 3302 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 3303 } 3304 } 3305 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 3306 3307 /* sum all atbarray to local values of C */ 3308 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 3309 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 3310 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 3311 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3312 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3313 PetscFunctionReturn(0); 3314 } 3315 3316 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 3317 { 3318 MPI_Comm comm; 3319 PetscMPIInt size; 3320 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 3321 Mat_TransMatMultDense *atb; 3322 PetscBool cisdense = PETSC_FALSE; 3323 PetscInt i; 3324 const PetscInt *ranges; 3325 3326 PetscFunctionBegin; 3327 MatCheckProduct(C, 4); 3328 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 3329 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 3330 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 3331 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 3332 } 3333 3334 /* create matrix product C */ 3335 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 3336 #if defined(PETSC_HAVE_CUDA) 3337 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 3338 #endif 3339 #if defined(PETSC_HAVE_HIP) 3340 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 3341 #endif 3342 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 3343 PetscCall(MatSetUp(C)); 3344 3345 /* create data structure for reuse C */ 3346 PetscCallMPI(MPI_Comm_size(comm, &size)); 3347 PetscCall(PetscNew(&atb)); 3348 cM = C->rmap->N; 3349 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 3350 PetscCall(MatGetOwnershipRanges(C, &ranges)); 3351 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 3352 3353 C->product->data = atb; 3354 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 3355 PetscFunctionReturn(0); 3356 } 3357 3358 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 3359 { 3360 MPI_Comm comm; 3361 PetscMPIInt i, size; 3362 PetscInt maxRows, bufsiz; 3363 PetscMPIInt tag; 3364 PetscInt alg; 3365 Mat_MatTransMultDense *abt; 3366 Mat_Product *product = C->product; 3367 PetscBool flg; 3368 3369 PetscFunctionBegin; 3370 MatCheckProduct(C, 4); 3371 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 3372 /* check local size of A and B */ 3373 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 3374 3375 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 3376 alg = flg ? 0 : 1; 3377 3378 /* setup matrix product C */ 3379 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 3380 PetscCall(MatSetType(C, MATMPIDENSE)); 3381 PetscCall(MatSetUp(C)); 3382 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 3383 3384 /* create data structure for reuse C */ 3385 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 3386 PetscCallMPI(MPI_Comm_size(comm, &size)); 3387 PetscCall(PetscNew(&abt)); 3388 abt->tag = tag; 3389 abt->alg = alg; 3390 switch (alg) { 3391 case 1: /* alg: "cyclic" */ 3392 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 3393 bufsiz = A->cmap->N * maxRows; 3394 PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]))); 3395 break; 3396 default: /* alg: "allgatherv" */ 3397 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 3398 PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls))); 3399 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 3400 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 3401 break; 3402 } 3403 3404 C->product->data = abt; 3405 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 3406 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 3407 PetscFunctionReturn(0); 3408 } 3409 3410 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 3411 { 3412 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 3413 Mat_MatTransMultDense *abt; 3414 MPI_Comm comm; 3415 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 3416 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 3417 PetscInt i, cK = A->cmap->N, k, j, bn; 3418 PetscScalar _DOne = 1.0, _DZero = 0.0; 3419 const PetscScalar *av, *bv; 3420 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 3421 MPI_Request reqs[2]; 3422 const PetscInt *ranges; 3423 3424 PetscFunctionBegin; 3425 MatCheckProduct(C, 3); 3426 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 3427 abt = (Mat_MatTransMultDense *)C->product->data; 3428 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 3429 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3430 PetscCallMPI(MPI_Comm_size(comm, &size)); 3431 PetscCall(MatDenseGetArrayRead(a->A, &av)); 3432 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 3433 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 3434 PetscCall(MatDenseGetLDA(a->A, &i)); 3435 PetscCall(PetscBLASIntCast(i, &alda)); 3436 PetscCall(MatDenseGetLDA(b->A, &i)); 3437 PetscCall(PetscBLASIntCast(i, &blda)); 3438 PetscCall(MatDenseGetLDA(c->A, &i)); 3439 PetscCall(PetscBLASIntCast(i, &clda)); 3440 PetscCall(MatGetOwnershipRanges(B, &ranges)); 3441 bn = B->rmap->n; 3442 if (blda == bn) { 3443 sendbuf = (PetscScalar *)bv; 3444 } else { 3445 sendbuf = abt->buf[0]; 3446 for (k = 0, i = 0; i < cK; i++) { 3447 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 3448 } 3449 } 3450 if (size > 1) { 3451 sendto = (rank + size - 1) % size; 3452 recvfrom = (rank + size + 1) % size; 3453 } else { 3454 sendto = recvfrom = 0; 3455 } 3456 PetscCall(PetscBLASIntCast(cK, &ck)); 3457 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 3458 recvisfrom = rank; 3459 for (i = 0; i < size; i++) { 3460 /* we have finished receiving in sending, bufs can be read/modified */ 3461 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 3462 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 3463 3464 if (nextrecvisfrom != rank) { 3465 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 3466 sendsiz = cK * bn; 3467 recvsiz = cK * nextbn; 3468 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 3469 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 3470 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 3471 } 3472 3473 /* local aseq * sendbuf^T */ 3474 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 3475 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 3476 3477 if (nextrecvisfrom != rank) { 3478 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 3479 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 3480 } 3481 bn = nextbn; 3482 recvisfrom = nextrecvisfrom; 3483 sendbuf = recvbuf; 3484 } 3485 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 3486 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 3487 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 3488 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3489 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3490 PetscFunctionReturn(0); 3491 } 3492 3493 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 3494 { 3495 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 3496 Mat_MatTransMultDense *abt; 3497 MPI_Comm comm; 3498 PetscMPIInt size; 3499 PetscScalar *cv, *sendbuf, *recvbuf; 3500 const PetscScalar *av, *bv; 3501 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 3502 PetscScalar _DOne = 1.0, _DZero = 0.0; 3503 PetscBLASInt cm, cn, ck, alda, clda; 3504 3505 PetscFunctionBegin; 3506 MatCheckProduct(C, 3); 3507 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 3508 abt = (Mat_MatTransMultDense *)C->product->data; 3509 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 3510 PetscCallMPI(MPI_Comm_size(comm, &size)); 3511 PetscCall(MatDenseGetArrayRead(a->A, &av)); 3512 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 3513 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 3514 PetscCall(MatDenseGetLDA(a->A, &i)); 3515 PetscCall(PetscBLASIntCast(i, &alda)); 3516 PetscCall(MatDenseGetLDA(b->A, &blda)); 3517 PetscCall(MatDenseGetLDA(c->A, &i)); 3518 PetscCall(PetscBLASIntCast(i, &clda)); 3519 /* copy transpose of B into buf[0] */ 3520 bn = B->rmap->n; 3521 sendbuf = abt->buf[0]; 3522 recvbuf = abt->buf[1]; 3523 for (k = 0, j = 0; j < bn; j++) { 3524 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 3525 } 3526 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 3527 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 3528 PetscCall(PetscBLASIntCast(cK, &ck)); 3529 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 3530 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 3531 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 3532 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 3533 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 3534 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 3535 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3536 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3537 PetscFunctionReturn(0); 3538 } 3539 3540 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 3541 { 3542 Mat_MatTransMultDense *abt; 3543 3544 PetscFunctionBegin; 3545 MatCheckProduct(C, 3); 3546 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 3547 abt = (Mat_MatTransMultDense *)C->product->data; 3548 switch (abt->alg) { 3549 case 1: 3550 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 3551 break; 3552 default: 3553 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 3554 break; 3555 } 3556 PetscFunctionReturn(0); 3557 } 3558 3559 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 3560 { 3561 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 3562 3563 PetscFunctionBegin; 3564 PetscCall(MatDestroy(&ab->Ce)); 3565 PetscCall(MatDestroy(&ab->Ae)); 3566 PetscCall(MatDestroy(&ab->Be)); 3567 PetscCall(PetscFree(ab)); 3568 PetscFunctionReturn(0); 3569 } 3570 3571 #if defined(PETSC_HAVE_ELEMENTAL) 3572 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 3573 { 3574 Mat_MatMultDense *ab; 3575 3576 PetscFunctionBegin; 3577 MatCheckProduct(C, 3); 3578 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 3579 ab = (Mat_MatMultDense *)C->product->data; 3580 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 3581 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 3582 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 3583 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 3584 PetscFunctionReturn(0); 3585 } 3586 3587 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 3588 { 3589 Mat Ae, Be, Ce; 3590 Mat_MatMultDense *ab; 3591 3592 PetscFunctionBegin; 3593 MatCheckProduct(C, 4); 3594 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 3595 /* check local size of A and B */ 3596 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 3597 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 3598 } 3599 3600 /* create elemental matrices Ae and Be */ 3601 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 3602 PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 3603 PetscCall(MatSetType(Ae, MATELEMENTAL)); 3604 PetscCall(MatSetUp(Ae)); 3605 PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 3606 3607 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 3608 PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 3609 PetscCall(MatSetType(Be, MATELEMENTAL)); 3610 PetscCall(MatSetUp(Be)); 3611 PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 3612 3613 /* compute symbolic Ce = Ae*Be */ 3614 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce)); 3615 PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce)); 3616 3617 /* setup C */ 3618 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 3619 PetscCall(MatSetType(C, MATDENSE)); 3620 PetscCall(MatSetUp(C)); 3621 3622 /* create data structure for reuse Cdense */ 3623 PetscCall(PetscNew(&ab)); 3624 ab->Ae = Ae; 3625 ab->Be = Be; 3626 ab->Ce = Ce; 3627 3628 C->product->data = ab; 3629 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 3630 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 3631 PetscFunctionReturn(0); 3632 } 3633 #endif 3634 /* ----------------------------------------------- */ 3635 #if defined(PETSC_HAVE_ELEMENTAL) 3636 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 3637 { 3638 PetscFunctionBegin; 3639 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 3640 C->ops->productsymbolic = MatProductSymbolic_AB; 3641 PetscFunctionReturn(0); 3642 } 3643 #endif 3644 3645 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 3646 { 3647 Mat_Product *product = C->product; 3648 Mat A = product->A, B = product->B; 3649 3650 PetscFunctionBegin; 3651 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 3652 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 3653 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 3654 C->ops->productsymbolic = MatProductSymbolic_AtB; 3655 PetscFunctionReturn(0); 3656 } 3657 3658 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 3659 { 3660 Mat_Product *product = C->product; 3661 const char *algTypes[2] = {"allgatherv", "cyclic"}; 3662 PetscInt alg, nalg = 2; 3663 PetscBool flg = PETSC_FALSE; 3664 3665 PetscFunctionBegin; 3666 /* Set default algorithm */ 3667 alg = 0; /* default is allgatherv */ 3668 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 3669 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 3670 3671 /* Get runtime option */ 3672 if (product->api_user) { 3673 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 3674 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 3675 PetscOptionsEnd(); 3676 } else { 3677 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 3678 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 3679 PetscOptionsEnd(); 3680 } 3681 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 3682 3683 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 3684 C->ops->productsymbolic = MatProductSymbolic_ABt; 3685 PetscFunctionReturn(0); 3686 } 3687 3688 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 3689 { 3690 Mat_Product *product = C->product; 3691 3692 PetscFunctionBegin; 3693 switch (product->type) { 3694 #if defined(PETSC_HAVE_ELEMENTAL) 3695 case MATPRODUCT_AB: 3696 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 3697 break; 3698 #endif 3699 case MATPRODUCT_AtB: 3700 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 3701 break; 3702 case MATPRODUCT_ABt: 3703 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 3704 break; 3705 default: 3706 break; 3707 } 3708 PetscFunctionReturn(0); 3709 } 3710