1 2 /* 3 Basic functions for basic parallel dense matrices. 4 Portions of this code are under: 5 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 6 */ 7 8 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 9 #include <../src/mat/impls/aij/mpi/mpiaij.h> 10 #include <petscblaslapack.h> 11 12 /*@ 13 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the sequential or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 .seealso: `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 25 @*/ 26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 27 { 28 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 33 PetscValidPointer(B, 2); 34 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 35 if (flg) *B = mat->A; 36 else { 37 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 38 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 39 *B = A; 40 } 41 PetscFunctionReturn(0); 42 } 43 44 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 45 { 46 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 47 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 48 49 PetscFunctionBegin; 50 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 51 PetscFunctionReturn(0); 52 } 53 54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 55 { 56 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 57 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 58 PetscScalar *v; 59 60 PetscFunctionBegin; 61 PetscCall(MatDenseGetArray(mat->A, &v)); 62 PetscCall(MatDenseGetLDA(mat->A, &lda)); 63 rend2 = PetscMin(rend, A->cmap->N); 64 if (rend2 > rstart) { 65 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 66 PetscCall(PetscLogFlops(rend2 - rstart)); 67 } 68 PetscCall(MatDenseRestoreArray(mat->A, &v)); 69 PetscFunctionReturn(0); 70 } 71 72 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 73 { 74 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 75 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 76 77 PetscFunctionBegin; 78 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 79 lrow = row - rstart; 80 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 81 PetscFunctionReturn(0); 82 } 83 84 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 85 { 86 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 87 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 88 89 PetscFunctionBegin; 90 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 91 lrow = row - rstart; 92 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 93 PetscFunctionReturn(0); 94 } 95 96 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 97 { 98 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 99 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 100 PetscScalar *array; 101 MPI_Comm comm; 102 PetscBool flg; 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(MatHasCongruentLayouts(A, &flg)); 107 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 108 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 109 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 110 #if defined(PETSC_HAVE_CUDA) 111 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 112 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 113 #elif (PETSC_HAVE_HIP) 114 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 115 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 116 #endif 117 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm)); 118 PetscCall(MatCreate(comm, &B)); 119 PetscCall(MatSetSizes(B, m, m, m, m)); 120 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 121 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 122 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 123 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 124 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 125 *a = B; 126 PetscCall(MatDestroy(&B)); 127 } else *a = B; 128 PetscFunctionReturn(0); 129 } 130 131 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 132 { 133 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 134 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 135 PetscBool roworiented = A->roworiented; 136 137 PetscFunctionBegin; 138 for (i = 0; i < m; i++) { 139 if (idxm[i] < 0) continue; 140 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 141 if (idxm[i] >= rstart && idxm[i] < rend) { 142 row = idxm[i] - rstart; 143 if (roworiented) { 144 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv)); 145 } else { 146 for (j = 0; j < n; j++) { 147 if (idxn[j] < 0) continue; 148 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 149 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv)); 150 } 151 } 152 } else if (!A->donotstash) { 153 mat->assembled = PETSC_FALSE; 154 if (roworiented) { 155 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE)); 156 } else { 157 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE)); 158 } 159 } 160 } 161 PetscFunctionReturn(0); 162 } 163 164 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 165 { 166 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 167 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 168 169 PetscFunctionBegin; 170 for (i = 0; i < m; i++) { 171 if (idxm[i] < 0) continue; /* negative row */ 172 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 173 if (idxm[i] >= rstart && idxm[i] < rend) { 174 row = idxm[i] - rstart; 175 for (j = 0; j < n; j++) { 176 if (idxn[j] < 0) continue; /* negative column */ 177 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 178 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 179 } 180 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 181 } 182 PetscFunctionReturn(0); 183 } 184 185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 186 { 187 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 188 189 PetscFunctionBegin; 190 PetscCall(MatDenseGetLDA(a->A, lda)); 191 PetscFunctionReturn(0); 192 } 193 194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 195 { 196 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 197 MatType mtype = MATSEQDENSE; 198 199 PetscFunctionBegin; 200 if (!a->A) { 201 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 202 PetscCall(PetscLayoutSetUp(A->rmap)); 203 PetscCall(PetscLayoutSetUp(A->cmap)); 204 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 205 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 206 #if defined(PETSC_HAVE_CUDA) 207 PetscBool iscuda; 208 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 209 if (iscuda) mtype = MATSEQDENSECUDA; 210 #elif (PETSC_HAVE_HIP) 211 PetscBool iship; 212 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 213 if (iship) mtype = MATSEQDENSEHIP; 214 #endif 215 PetscCall(MatSetType(a->A, mtype)); 216 } 217 PetscCall(MatDenseSetLDA(a->A, lda)); 218 PetscFunctionReturn(0); 219 } 220 221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 222 { 223 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 224 225 PetscFunctionBegin; 226 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 227 PetscCall(MatDenseGetArray(a->A, array)); 228 PetscFunctionReturn(0); 229 } 230 231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) 232 { 233 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 234 235 PetscFunctionBegin; 236 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 237 PetscCall(MatDenseGetArrayRead(a->A, array)); 238 PetscFunctionReturn(0); 239 } 240 241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 242 { 243 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 244 245 PetscFunctionBegin; 246 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 247 PetscCall(MatDenseGetArrayWrite(a->A, array)); 248 PetscFunctionReturn(0); 249 } 250 251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 252 { 253 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 254 255 PetscFunctionBegin; 256 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 257 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 258 PetscCall(MatDensePlaceArray(a->A, array)); 259 PetscFunctionReturn(0); 260 } 261 262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 263 { 264 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 265 266 PetscFunctionBegin; 267 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 268 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 269 PetscCall(MatDenseResetArray(a->A)); 270 PetscFunctionReturn(0); 271 } 272 273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 274 { 275 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 276 277 PetscFunctionBegin; 278 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 279 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 280 PetscCall(MatDenseReplaceArray(a->A, array)); 281 PetscFunctionReturn(0); 282 } 283 284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 285 { 286 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 287 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 288 const PetscInt *irow, *icol; 289 const PetscScalar *v; 290 PetscScalar *bv; 291 Mat newmat; 292 IS iscol_local; 293 MPI_Comm comm_is, comm_mat; 294 295 PetscFunctionBegin; 296 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 297 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 298 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 299 300 PetscCall(ISAllGather(iscol, &iscol_local)); 301 PetscCall(ISGetIndices(isrow, &irow)); 302 PetscCall(ISGetIndices(iscol_local, &icol)); 303 PetscCall(ISGetLocalSize(isrow, &nrows)); 304 PetscCall(ISGetLocalSize(iscol, &ncols)); 305 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 306 307 /* No parallel redistribution currently supported! Should really check each index set 308 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 309 original matrix! */ 310 311 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 312 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 313 314 /* Check submatrix call */ 315 if (scall == MAT_REUSE_MATRIX) { 316 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 317 /* Really need to test rows and column sizes! */ 318 newmat = *B; 319 } else { 320 /* Create and fill new matrix */ 321 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 322 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 323 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 324 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 325 } 326 327 /* Now extract the data pointers and do the copy, column at a time */ 328 newmatd = (Mat_MPIDense *)newmat->data; 329 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 330 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 331 PetscCall(MatDenseGetLDA(mat->A, &lda)); 332 for (i = 0; i < Ncols; i++) { 333 const PetscScalar *av = v + lda * icol[i]; 334 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 335 } 336 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 337 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 338 339 /* Assemble the matrices so that the correct flags are set */ 340 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 341 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 342 343 /* Free work space */ 344 PetscCall(ISRestoreIndices(isrow, &irow)); 345 PetscCall(ISRestoreIndices(iscol_local, &icol)); 346 PetscCall(ISDestroy(&iscol_local)); 347 *B = newmat; 348 PetscFunctionReturn(0); 349 } 350 351 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 352 { 353 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 354 355 PetscFunctionBegin; 356 PetscCall(MatDenseRestoreArray(a->A, array)); 357 PetscFunctionReturn(0); 358 } 359 360 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) 361 { 362 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 363 364 PetscFunctionBegin; 365 PetscCall(MatDenseRestoreArrayRead(a->A, array)); 366 PetscFunctionReturn(0); 367 } 368 369 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 370 { 371 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 372 373 PetscFunctionBegin; 374 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 375 PetscFunctionReturn(0); 376 } 377 378 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 379 { 380 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 381 PetscInt nstash, reallocs; 382 383 PetscFunctionBegin; 384 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 385 386 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 387 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 388 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 389 PetscFunctionReturn(0); 390 } 391 392 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 393 { 394 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 395 PetscInt i, *row, *col, flg, j, rstart, ncols; 396 PetscMPIInt n; 397 PetscScalar *val; 398 399 PetscFunctionBegin; 400 if (!mdn->donotstash && !mat->nooffprocentries) { 401 /* wait on receives */ 402 while (1) { 403 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 404 if (!flg) break; 405 406 for (i = 0; i < n;) { 407 /* Now identify the consecutive vals belonging to the same row */ 408 for (j = i, rstart = row[j]; j < n; j++) { 409 if (row[j] != rstart) break; 410 } 411 if (j < n) ncols = j - i; 412 else ncols = n - i; 413 /* Now assemble all these values with a single function call */ 414 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 415 i = j; 416 } 417 } 418 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 419 } 420 421 PetscCall(MatAssemblyBegin(mdn->A, mode)); 422 PetscCall(MatAssemblyEnd(mdn->A, mode)); 423 PetscFunctionReturn(0); 424 } 425 426 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 427 { 428 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 429 430 PetscFunctionBegin; 431 PetscCall(MatZeroEntries(l->A)); 432 PetscFunctionReturn(0); 433 } 434 435 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 436 { 437 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 438 PetscInt i, len, *lrows; 439 440 PetscFunctionBegin; 441 /* get locally owned rows */ 442 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 443 /* fix right hand side if needed */ 444 if (x && b) { 445 const PetscScalar *xx; 446 PetscScalar *bb; 447 448 PetscCall(VecGetArrayRead(x, &xx)); 449 PetscCall(VecGetArrayWrite(b, &bb)); 450 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 451 PetscCall(VecRestoreArrayRead(x, &xx)); 452 PetscCall(VecRestoreArrayWrite(b, &bb)); 453 } 454 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 455 if (diag != 0.0) { 456 Vec d; 457 458 PetscCall(MatCreateVecs(A, NULL, &d)); 459 PetscCall(VecSet(d, diag)); 460 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 461 PetscCall(VecDestroy(&d)); 462 } 463 PetscCall(PetscFree(lrows)); 464 PetscFunctionReturn(0); 465 } 466 467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 471 472 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 475 const PetscScalar *ax; 476 PetscScalar *ay; 477 PetscMemType axmtype, aymtype; 478 479 PetscFunctionBegin; 480 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 481 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 482 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 483 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 484 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 485 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 486 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 487 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 488 PetscFunctionReturn(0); 489 } 490 491 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 494 const PetscScalar *ax; 495 PetscScalar *ay; 496 PetscMemType axmtype, aymtype; 497 498 PetscFunctionBegin; 499 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 500 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 501 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 502 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 503 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 504 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 505 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 506 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 507 PetscFunctionReturn(0); 508 } 509 510 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 511 { 512 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 513 const PetscScalar *ax; 514 PetscScalar *ay; 515 PetscMemType axmtype, aymtype; 516 517 PetscFunctionBegin; 518 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 519 PetscCall(VecSet(yy, 0.0)); 520 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 521 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 522 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 523 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 524 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 525 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 526 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 527 PetscFunctionReturn(0); 528 } 529 530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 531 { 532 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 533 const PetscScalar *ax; 534 PetscScalar *ay; 535 PetscMemType axmtype, aymtype; 536 537 PetscFunctionBegin; 538 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 539 PetscCall(VecCopy(yy, zz)); 540 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 541 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 542 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 543 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 544 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 545 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 546 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 547 PetscFunctionReturn(0); 548 } 549 550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 551 { 552 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 553 PetscInt lda, len, i, n, m = A->rmap->n, radd; 554 PetscScalar *x, zero = 0.0; 555 const PetscScalar *av; 556 557 PetscFunctionBegin; 558 PetscCall(VecSet(v, zero)); 559 PetscCall(VecGetArray(v, &x)); 560 PetscCall(VecGetSize(v, &n)); 561 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 562 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 563 radd = A->rmap->rstart * m; 564 PetscCall(MatDenseGetArrayRead(a->A, &av)); 565 PetscCall(MatDenseGetLDA(a->A, &lda)); 566 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 567 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 568 PetscCall(VecRestoreArray(v, &x)); 569 PetscFunctionReturn(0); 570 } 571 572 PetscErrorCode MatDestroy_MPIDense(Mat mat) 573 { 574 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 575 576 PetscFunctionBegin; 577 #if defined(PETSC_USE_LOG) 578 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 579 #endif 580 PetscCall(MatStashDestroy_Private(&mat->stash)); 581 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 582 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 583 PetscCall(MatDestroy(&mdn->A)); 584 PetscCall(VecDestroy(&mdn->lvec)); 585 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 586 PetscCall(VecDestroy(&mdn->cvec)); 587 PetscCall(MatDestroy(&mdn->cmat)); 588 589 PetscCall(PetscFree(mat->data)); 590 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 591 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 596 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 598 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 599 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 600 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 601 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 603 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 604 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 605 #if defined(PETSC_HAVE_ELEMENTAL) 606 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 607 #endif 608 #if defined(PETSC_HAVE_SCALAPACK) 609 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 610 #endif 611 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 612 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 613 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 614 #if defined(PETSC_HAVE_CUDA) 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 619 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 620 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 621 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 622 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 623 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 624 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 625 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 626 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 627 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 632 #endif 633 #if defined(PETSC_HAVE_HIP) 634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 646 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 649 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 651 #endif 652 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 662 663 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 664 PetscFunctionReturn(0); 665 } 666 667 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer); 668 669 #include <petscdraw.h> 670 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 671 { 672 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 673 PetscMPIInt rank; 674 PetscViewerType vtype; 675 PetscBool iascii, isdraw; 676 PetscViewer sviewer; 677 PetscViewerFormat format; 678 679 PetscFunctionBegin; 680 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 681 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 682 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 683 if (iascii) { 684 PetscCall(PetscViewerGetType(viewer, &vtype)); 685 PetscCall(PetscViewerGetFormat(viewer, &format)); 686 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 687 MatInfo info; 688 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 689 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 690 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 691 (PetscInt)info.memory)); 692 PetscCall(PetscViewerFlush(viewer)); 693 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 694 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 695 PetscFunctionReturn(0); 696 } else if (format == PETSC_VIEWER_ASCII_INFO) { 697 PetscFunctionReturn(0); 698 } 699 } else if (isdraw) { 700 PetscDraw draw; 701 PetscBool isnull; 702 703 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 704 PetscCall(PetscDrawIsNull(draw, &isnull)); 705 if (isnull) PetscFunctionReturn(0); 706 } 707 708 { 709 /* assemble the entire matrix onto first processor. */ 710 Mat A; 711 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 712 PetscInt *cols; 713 PetscScalar *vals; 714 715 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 716 if (rank == 0) { 717 PetscCall(MatSetSizes(A, M, N, M, N)); 718 } else { 719 PetscCall(MatSetSizes(A, 0, 0, M, N)); 720 } 721 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 722 PetscCall(MatSetType(A, MATMPIDENSE)); 723 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 724 725 /* Copy the matrix ... This isn't the most efficient means, 726 but it's quick for now */ 727 A->insertmode = INSERT_VALUES; 728 729 row = mat->rmap->rstart; 730 m = mdn->A->rmap->n; 731 for (i = 0; i < m; i++) { 732 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 733 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 734 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 735 row++; 736 } 737 738 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 739 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 740 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 741 if (rank == 0) { 742 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name)); 743 PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer)); 744 } 745 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 746 PetscCall(PetscViewerFlush(viewer)); 747 PetscCall(MatDestroy(&A)); 748 } 749 PetscFunctionReturn(0); 750 } 751 752 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 753 { 754 PetscBool iascii, isbinary, isdraw, issocket; 755 756 PetscFunctionBegin; 757 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 758 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 759 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 760 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 761 762 if (iascii || issocket || isdraw) { 763 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 764 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 765 PetscFunctionReturn(0); 766 } 767 768 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 769 { 770 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 771 Mat mdn = mat->A; 772 PetscLogDouble isend[5], irecv[5]; 773 774 PetscFunctionBegin; 775 info->block_size = 1.0; 776 777 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 778 779 isend[0] = info->nz_used; 780 isend[1] = info->nz_allocated; 781 isend[2] = info->nz_unneeded; 782 isend[3] = info->memory; 783 isend[4] = info->mallocs; 784 if (flag == MAT_LOCAL) { 785 info->nz_used = isend[0]; 786 info->nz_allocated = isend[1]; 787 info->nz_unneeded = isend[2]; 788 info->memory = isend[3]; 789 info->mallocs = isend[4]; 790 } else if (flag == MAT_GLOBAL_MAX) { 791 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 792 793 info->nz_used = irecv[0]; 794 info->nz_allocated = irecv[1]; 795 info->nz_unneeded = irecv[2]; 796 info->memory = irecv[3]; 797 info->mallocs = irecv[4]; 798 } else if (flag == MAT_GLOBAL_SUM) { 799 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 800 801 info->nz_used = irecv[0]; 802 info->nz_allocated = irecv[1]; 803 info->nz_unneeded = irecv[2]; 804 info->memory = irecv[3]; 805 info->mallocs = irecv[4]; 806 } 807 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 808 info->fill_ratio_needed = 0; 809 info->factor_mallocs = 0; 810 PetscFunctionReturn(0); 811 } 812 813 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 814 { 815 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 816 817 PetscFunctionBegin; 818 switch (op) { 819 case MAT_NEW_NONZERO_LOCATIONS: 820 case MAT_NEW_NONZERO_LOCATION_ERR: 821 case MAT_NEW_NONZERO_ALLOCATION_ERR: 822 MatCheckPreallocated(A, 1); 823 PetscCall(MatSetOption(a->A, op, flg)); 824 break; 825 case MAT_ROW_ORIENTED: 826 MatCheckPreallocated(A, 1); 827 a->roworiented = flg; 828 PetscCall(MatSetOption(a->A, op, flg)); 829 break; 830 case MAT_FORCE_DIAGONAL_ENTRIES: 831 case MAT_KEEP_NONZERO_PATTERN: 832 case MAT_USE_HASH_TABLE: 833 case MAT_SORTED_FULL: 834 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 835 break; 836 case MAT_IGNORE_OFF_PROC_ENTRIES: 837 a->donotstash = flg; 838 break; 839 case MAT_SYMMETRIC: 840 case MAT_STRUCTURALLY_SYMMETRIC: 841 case MAT_HERMITIAN: 842 case MAT_SYMMETRY_ETERNAL: 843 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 844 case MAT_SPD: 845 case MAT_IGNORE_LOWER_TRIANGULAR: 846 case MAT_IGNORE_ZERO_ENTRIES: 847 case MAT_SPD_ETERNAL: 848 /* if the diagonal matrix is square it inherits some of the properties above */ 849 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 850 break; 851 default: 852 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 853 } 854 PetscFunctionReturn(0); 855 } 856 857 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 858 { 859 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 860 const PetscScalar *l; 861 PetscScalar x, *v, *vv, *r; 862 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 863 864 PetscFunctionBegin; 865 PetscCall(MatDenseGetArray(mdn->A, &vv)); 866 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 867 PetscCall(MatGetLocalSize(A, &s2, &s3)); 868 if (ll) { 869 PetscCall(VecGetLocalSize(ll, &s2a)); 870 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 871 PetscCall(VecGetArrayRead(ll, &l)); 872 for (i = 0; i < m; i++) { 873 x = l[i]; 874 v = vv + i; 875 for (j = 0; j < n; j++) { 876 (*v) *= x; 877 v += lda; 878 } 879 } 880 PetscCall(VecRestoreArrayRead(ll, &l)); 881 PetscCall(PetscLogFlops(1.0 * n * m)); 882 } 883 if (rr) { 884 const PetscScalar *ar; 885 886 PetscCall(VecGetLocalSize(rr, &s3a)); 887 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 888 PetscCall(VecGetArrayRead(rr, &ar)); 889 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 890 PetscCall(VecGetArray(mdn->lvec, &r)); 891 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 892 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 893 PetscCall(VecRestoreArrayRead(rr, &ar)); 894 for (i = 0; i < n; i++) { 895 x = r[i]; 896 v = vv + i * lda; 897 for (j = 0; j < m; j++) (*v++) *= x; 898 } 899 PetscCall(VecRestoreArray(mdn->lvec, &r)); 900 PetscCall(PetscLogFlops(1.0 * n * m)); 901 } 902 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 903 PetscFunctionReturn(0); 904 } 905 906 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 907 { 908 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 909 PetscInt i, j; 910 PetscMPIInt size; 911 PetscReal sum = 0.0; 912 const PetscScalar *av, *v; 913 914 PetscFunctionBegin; 915 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 916 v = av; 917 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 918 if (size == 1) { 919 PetscCall(MatNorm(mdn->A, type, nrm)); 920 } else { 921 if (type == NORM_FROBENIUS) { 922 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 923 sum += PetscRealPart(PetscConj(*v) * (*v)); 924 v++; 925 } 926 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 927 *nrm = PetscSqrtReal(*nrm); 928 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 929 } else if (type == NORM_1) { 930 PetscReal *tmp, *tmp2; 931 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 932 *nrm = 0.0; 933 v = av; 934 for (j = 0; j < mdn->A->cmap->n; j++) { 935 for (i = 0; i < mdn->A->rmap->n; i++) { 936 tmp[j] += PetscAbsScalar(*v); 937 v++; 938 } 939 } 940 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 941 for (j = 0; j < A->cmap->N; j++) { 942 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 943 } 944 PetscCall(PetscFree2(tmp, tmp2)); 945 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 946 } else if (type == NORM_INFINITY) { /* max row norm */ 947 PetscReal ntemp; 948 PetscCall(MatNorm(mdn->A, type, &ntemp)); 949 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 950 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 951 } 952 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 953 PetscFunctionReturn(0); 954 } 955 956 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 957 { 958 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 959 Mat B; 960 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 961 PetscInt j, i, lda; 962 PetscScalar *v; 963 964 PetscFunctionBegin; 965 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 966 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 967 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 968 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 969 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 970 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 971 } else B = *matout; 972 973 m = a->A->rmap->n; 974 n = a->A->cmap->n; 975 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 976 PetscCall(MatDenseGetLDA(a->A, &lda)); 977 PetscCall(PetscMalloc1(m, &rwork)); 978 for (i = 0; i < m; i++) rwork[i] = rstart + i; 979 for (j = 0; j < n; j++) { 980 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 981 v += lda; 982 } 983 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 984 PetscCall(PetscFree(rwork)); 985 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 986 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 987 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 988 *matout = B; 989 } else { 990 PetscCall(MatHeaderMerge(A, &B)); 991 } 992 PetscFunctionReturn(0); 993 } 994 995 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 996 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 997 998 PetscErrorCode MatSetUp_MPIDense(Mat A) 999 { 1000 PetscFunctionBegin; 1001 PetscCall(PetscLayoutSetUp(A->rmap)); 1002 PetscCall(PetscLayoutSetUp(A->cmap)); 1003 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1004 PetscFunctionReturn(0); 1005 } 1006 1007 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1008 { 1009 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1010 1011 PetscFunctionBegin; 1012 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1013 PetscFunctionReturn(0); 1014 } 1015 1016 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1017 { 1018 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1019 1020 PetscFunctionBegin; 1021 PetscCall(MatConjugate(a->A)); 1022 PetscFunctionReturn(0); 1023 } 1024 1025 PetscErrorCode MatRealPart_MPIDense(Mat A) 1026 { 1027 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1028 1029 PetscFunctionBegin; 1030 PetscCall(MatRealPart(a->A)); 1031 PetscFunctionReturn(0); 1032 } 1033 1034 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1035 { 1036 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1037 1038 PetscFunctionBegin; 1039 PetscCall(MatImaginaryPart(a->A)); 1040 PetscFunctionReturn(0); 1041 } 1042 1043 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1044 { 1045 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1046 1047 PetscFunctionBegin; 1048 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1049 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1050 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1051 PetscFunctionReturn(0); 1052 } 1053 1054 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1055 1056 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1057 { 1058 PetscInt i, m, n; 1059 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1060 PetscReal *work; 1061 1062 PetscFunctionBegin; 1063 PetscCall(MatGetSize(A, &m, &n)); 1064 PetscCall(PetscMalloc1(n, &work)); 1065 if (type == REDUCTION_MEAN_REALPART) { 1066 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1067 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1068 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1069 } else { 1070 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1071 } 1072 if (type == NORM_2) { 1073 for (i = 0; i < n; i++) work[i] *= work[i]; 1074 } 1075 if (type == NORM_INFINITY) { 1076 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1077 } else { 1078 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1079 } 1080 PetscCall(PetscFree(work)); 1081 if (type == NORM_2) { 1082 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1083 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1084 for (i = 0; i < n; i++) reductions[i] /= m; 1085 } 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #if defined(PETSC_HAVE_CUDA) 1090 PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha) 1091 { 1092 PetscScalar *da; 1093 PetscInt lda; 1094 1095 PetscFunctionBegin; 1096 PetscCall(MatDenseCUDAGetArray(A, &da)); 1097 PetscCall(MatDenseGetLDA(A, &lda)); 1098 PetscCall(PetscInfo(A, "Performing Shift on backend\n")); 1099 PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N)); 1100 PetscCall(MatDenseCUDARestoreArray(A, &da)); 1101 PetscFunctionReturn(0); 1102 } 1103 1104 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1105 { 1106 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1107 PetscInt lda; 1108 1109 PetscFunctionBegin; 1110 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1111 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1112 if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1113 a->vecinuse = col + 1; 1114 PetscCall(MatDenseGetLDA(a->A, &lda)); 1115 PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1116 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1117 *v = a->cvec; 1118 PetscFunctionReturn(0); 1119 } 1120 1121 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1122 { 1123 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1124 1125 PetscFunctionBegin; 1126 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1127 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1128 a->vecinuse = 0; 1129 PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1130 PetscCall(VecCUDAResetArray(a->cvec)); 1131 if (v) *v = NULL; 1132 PetscFunctionReturn(0); 1133 } 1134 1135 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1136 { 1137 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1138 PetscInt lda; 1139 1140 PetscFunctionBegin; 1141 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1142 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1143 if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1144 a->vecinuse = col + 1; 1145 PetscCall(MatDenseGetLDA(a->A, &lda)); 1146 PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse)); 1147 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1148 PetscCall(VecLockReadPush(a->cvec)); 1149 *v = a->cvec; 1150 PetscFunctionReturn(0); 1151 } 1152 1153 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1154 { 1155 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1156 1157 PetscFunctionBegin; 1158 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1159 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1160 a->vecinuse = 0; 1161 PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse)); 1162 PetscCall(VecLockReadPop(a->cvec)); 1163 PetscCall(VecCUDAResetArray(a->cvec)); 1164 if (v) *v = NULL; 1165 PetscFunctionReturn(0); 1166 } 1167 1168 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1169 { 1170 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1171 PetscInt lda; 1172 1173 PetscFunctionBegin; 1174 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1175 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1176 if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1177 a->vecinuse = col + 1; 1178 PetscCall(MatDenseGetLDA(a->A, &lda)); 1179 PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1180 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1181 *v = a->cvec; 1182 PetscFunctionReturn(0); 1183 } 1184 1185 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1186 { 1187 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1188 1189 PetscFunctionBegin; 1190 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1191 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1192 a->vecinuse = 0; 1193 PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1194 PetscCall(VecCUDAResetArray(a->cvec)); 1195 if (v) *v = NULL; 1196 PetscFunctionReturn(0); 1197 } 1198 1199 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1200 { 1201 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1202 1203 PetscFunctionBegin; 1204 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1205 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1206 PetscCall(MatDenseCUDAPlaceArray(l->A, a)); 1207 PetscFunctionReturn(0); 1208 } 1209 1210 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1211 { 1212 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1213 1214 PetscFunctionBegin; 1215 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1216 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1217 PetscCall(MatDenseCUDAResetArray(l->A)); 1218 PetscFunctionReturn(0); 1219 } 1220 1221 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1222 { 1223 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1224 1225 PetscFunctionBegin; 1226 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1227 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1228 PetscCall(MatDenseCUDAReplaceArray(l->A, a)); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1233 { 1234 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1235 1236 PetscFunctionBegin; 1237 PetscCall(MatDenseCUDAGetArrayWrite(l->A, a)); 1238 PetscFunctionReturn(0); 1239 } 1240 1241 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1242 { 1243 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1244 1245 PetscFunctionBegin; 1246 PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a)); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1251 { 1252 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1253 1254 PetscFunctionBegin; 1255 PetscCall(MatDenseCUDAGetArrayRead(l->A, a)); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1260 { 1261 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1262 1263 PetscFunctionBegin; 1264 PetscCall(MatDenseCUDARestoreArrayRead(l->A, a)); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1269 { 1270 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1271 1272 PetscFunctionBegin; 1273 PetscCall(MatDenseCUDAGetArray(l->A, a)); 1274 PetscFunctionReturn(0); 1275 } 1276 1277 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1278 { 1279 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1280 1281 PetscFunctionBegin; 1282 PetscCall(MatDenseCUDARestoreArray(l->A, a)); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1287 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1288 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *); 1289 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1290 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1291 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *); 1292 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *); 1293 1294 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind) 1295 { 1296 Mat_MPIDense *d = (Mat_MPIDense *)mat->data; 1297 1298 PetscFunctionBegin; 1299 PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1300 PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1301 if (d->A) PetscCall(MatBindToCPU(d->A, bind)); 1302 mat->boundtocpu = bind; 1303 if (!bind) { 1304 PetscBool iscuda; 1305 1306 PetscCall(PetscFree(mat->defaultrandtype)); 1307 PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype)); 1308 PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda)); 1309 if (!iscuda) PetscCall(VecDestroy(&d->cvec)); 1310 PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda)); 1311 if (!iscuda) PetscCall(MatDestroy(&d->cmat)); 1312 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA)); 1313 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA)); 1314 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA)); 1315 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA)); 1316 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA)); 1317 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA)); 1318 mat->ops->shift = MatShift_MPIDenseCUDA; 1319 } else { 1320 PetscCall(PetscFree(mat->defaultrandtype)); 1321 PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype)); 1322 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1323 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1324 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1325 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1326 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1327 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1328 mat->ops->shift = MatShift_MPIDense; 1329 } 1330 if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind)); 1331 PetscFunctionReturn(0); 1332 } 1333 1334 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1335 { 1336 Mat_MPIDense *d = (Mat_MPIDense *)A->data; 1337 PetscBool iscuda; 1338 1339 PetscFunctionBegin; 1340 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1341 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 1342 if (!iscuda) PetscFunctionReturn(0); 1343 PetscCall(PetscLayoutSetUp(A->rmap)); 1344 PetscCall(PetscLayoutSetUp(A->cmap)); 1345 if (!d->A) { 1346 PetscCall(MatCreate(PETSC_COMM_SELF, &d->A)); 1347 PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 1348 } 1349 PetscCall(MatSetType(d->A, MATSEQDENSECUDA)); 1350 PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data)); 1351 A->preallocated = PETSC_TRUE; 1352 A->assembled = PETSC_TRUE; 1353 PetscFunctionReturn(0); 1354 } 1355 #endif 1356 1357 #if defined(PETSC_HAVE_HIP) 1358 PetscErrorCode MatShift_MPIDenseHIP(Mat A, PetscScalar alpha) 1359 { 1360 PetscScalar *da; 1361 PetscInt lda; 1362 1363 PetscFunctionBegin; 1364 PetscCall(MatDenseHIPGetArray(A, &da)); 1365 PetscCall(MatDenseGetLDA(A, &lda)); 1366 PetscCall(PetscInfo(A, "Performing Shift on backend\n")); 1367 PetscCall(MatShift_DenseHIP_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N)); 1368 PetscCall(MatDenseHIPRestoreArray(A, &da)); 1369 PetscFunctionReturn(0); 1370 } 1371 1372 static PetscErrorCode MatDenseGetColumnVec_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1373 { 1374 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1375 PetscInt lda; 1376 1377 PetscFunctionBegin; 1378 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1379 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1380 if (!a->cvec) { 1381 PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1382 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 1383 } 1384 a->vecinuse = col + 1; 1385 PetscCall(MatDenseGetLDA(a->A, &lda)); 1386 PetscCall(MatDenseHIPGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1387 PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1388 *v = a->cvec; 1389 PetscFunctionReturn(0); 1390 } 1391 1392 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1393 { 1394 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1395 1396 PetscFunctionBegin; 1397 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1398 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1399 a->vecinuse = 0; 1400 PetscCall(MatDenseHIPRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1401 PetscCall(VecHIPResetArray(a->cvec)); 1402 if (v) *v = NULL; 1403 PetscFunctionReturn(0); 1404 } 1405 1406 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1407 { 1408 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1409 PetscInt lda; 1410 1411 PetscFunctionBegin; 1412 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1413 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1414 if (!a->cvec) { 1415 PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1416 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 1417 } 1418 a->vecinuse = col + 1; 1419 PetscCall(MatDenseGetLDA(a->A, &lda)); 1420 PetscCall(MatDenseHIPGetArrayRead(a->A, &a->ptrinuse)); 1421 PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1422 PetscCall(VecLockReadPush(a->cvec)); 1423 *v = a->cvec; 1424 PetscFunctionReturn(0); 1425 } 1426 1427 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1428 { 1429 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1430 1431 PetscFunctionBegin; 1432 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1433 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1434 a->vecinuse = 0; 1435 PetscCall(MatDenseHIPRestoreArrayRead(a->A, &a->ptrinuse)); 1436 PetscCall(VecLockReadPop(a->cvec)); 1437 PetscCall(VecHIPResetArray(a->cvec)); 1438 if (v) *v = NULL; 1439 PetscFunctionReturn(0); 1440 } 1441 1442 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1443 { 1444 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1445 PetscInt lda; 1446 1447 PetscFunctionBegin; 1448 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1449 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1450 if (!a->cvec) { 1451 PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1452 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 1453 } 1454 a->vecinuse = col + 1; 1455 PetscCall(MatDenseGetLDA(a->A, &lda)); 1456 PetscCall(MatDenseHIPGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1457 PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1458 *v = a->cvec; 1459 PetscFunctionReturn(0); 1460 } 1461 1462 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1463 { 1464 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1465 1466 PetscFunctionBegin; 1467 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1468 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1469 a->vecinuse = 0; 1470 PetscCall(MatDenseHIPRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1471 PetscCall(VecHIPResetArray(a->cvec)); 1472 if (v) *v = NULL; 1473 PetscFunctionReturn(0); 1474 } 1475 1476 static PetscErrorCode MatDenseHIPPlaceArray_MPIDenseHIP(Mat A, const PetscScalar *a) 1477 { 1478 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1479 1480 PetscFunctionBegin; 1481 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1482 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1483 PetscCall(MatDenseHIPPlaceArray(l->A, a)); 1484 PetscFunctionReturn(0); 1485 } 1486 1487 static PetscErrorCode MatDenseHIPResetArray_MPIDenseHIP(Mat A) 1488 { 1489 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1490 1491 PetscFunctionBegin; 1492 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1493 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1494 PetscCall(MatDenseHIPResetArray(l->A)); 1495 PetscFunctionReturn(0); 1496 } 1497 1498 static PetscErrorCode MatDenseHIPReplaceArray_MPIDenseHIP(Mat A, const PetscScalar *a) 1499 { 1500 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1501 1502 PetscFunctionBegin; 1503 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1504 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1505 PetscCall(MatDenseHIPReplaceArray(l->A, a)); 1506 PetscFunctionReturn(0); 1507 } 1508 1509 static PetscErrorCode MatDenseHIPGetArrayWrite_MPIDenseHIP(Mat A, PetscScalar **a) 1510 { 1511 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1512 1513 PetscFunctionBegin; 1514 PetscCall(MatDenseHIPGetArrayWrite(l->A, a)); 1515 PetscFunctionReturn(0); 1516 } 1517 1518 static PetscErrorCode MatDenseHIPRestoreArrayWrite_MPIDenseHIP(Mat A, PetscScalar **a) 1519 { 1520 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1521 1522 PetscFunctionBegin; 1523 PetscCall(MatDenseHIPRestoreArrayWrite(l->A, a)); 1524 PetscFunctionReturn(0); 1525 } 1526 1527 static PetscErrorCode MatDenseHIPGetArrayRead_MPIDenseHIP(Mat A, const PetscScalar **a) 1528 { 1529 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1530 1531 PetscFunctionBegin; 1532 PetscCall(MatDenseHIPGetArrayRead(l->A, a)); 1533 PetscFunctionReturn(0); 1534 } 1535 1536 static PetscErrorCode MatDenseHIPRestoreArrayRead_MPIDenseHIP(Mat A, const PetscScalar **a) 1537 { 1538 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1539 1540 PetscFunctionBegin; 1541 PetscCall(MatDenseHIPRestoreArrayRead(l->A, a)); 1542 PetscFunctionReturn(0); 1543 } 1544 1545 static PetscErrorCode MatDenseHIPGetArray_MPIDenseHIP(Mat A, PetscScalar **a) 1546 { 1547 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1548 1549 PetscFunctionBegin; 1550 PetscCall(MatDenseHIPGetArray(l->A, a)); 1551 PetscFunctionReturn(0); 1552 } 1553 1554 static PetscErrorCode MatDenseHIPRestoreArray_MPIDenseHIP(Mat A, PetscScalar **a) 1555 { 1556 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1557 1558 PetscFunctionBegin; 1559 PetscCall(MatDenseHIPRestoreArray(l->A, a)); 1560 PetscFunctionReturn(0); 1561 } 1562 1563 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1564 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1565 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *); 1566 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1567 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1568 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *); 1569 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *); 1570 1571 static PetscErrorCode MatBindToCPU_MPIDenseHIP(Mat mat, PetscBool bind) 1572 { 1573 Mat_MPIDense *d = (Mat_MPIDense *)mat->data; 1574 1575 PetscFunctionBegin; 1576 PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1577 PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1578 if (d->A) PetscCall(MatBindToCPU(d->A, bind)); 1579 mat->boundtocpu = bind; 1580 if (!bind) { 1581 PetscBool iscuda; 1582 1583 PetscCall(PetscFree(mat->defaultrandtype)); 1584 PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype)); 1585 PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPIHIP, &iscuda)); 1586 if (!iscuda) PetscCall(VecDestroy(&d->cvec)); 1587 PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSEHIP, &iscuda)); 1588 if (!iscuda) PetscCall(MatDestroy(&d->cmat)); 1589 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseHIP)); 1590 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseHIP)); 1591 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseHIP)); 1592 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseHIP)); 1593 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseHIP)); 1594 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseHIP)); 1595 mat->ops->shift = MatShift_MPIDenseHIP; 1596 } else { 1597 PetscCall(PetscFree(mat->defaultrandtype)); 1598 PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype)); 1599 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1600 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1601 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1602 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1603 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1604 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1605 mat->ops->shift = MatShift_MPIDense; 1606 } 1607 if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind)); 1608 PetscFunctionReturn(0); 1609 } 1610 1611 PetscErrorCode MatMPIDenseHIPSetPreallocation(Mat A, PetscScalar *d_data) 1612 { 1613 Mat_MPIDense *d = (Mat_MPIDense *)A->data; 1614 PetscBool iscuda; 1615 1616 PetscFunctionBegin; 1617 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1618 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iscuda)); 1619 if (!iscuda) PetscFunctionReturn(0); 1620 PetscCall(PetscLayoutSetUp(A->rmap)); 1621 PetscCall(PetscLayoutSetUp(A->cmap)); 1622 if (!d->A) { 1623 PetscCall(MatCreate(PETSC_COMM_SELF, &d->A)); 1624 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)d->A)); 1625 PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 1626 } 1627 PetscCall(MatSetType(d->A, MATSEQDENSEHIP)); 1628 PetscCall(MatSeqDenseHIPSetPreallocation(d->A, d_data)); 1629 A->preallocated = PETSC_TRUE; 1630 A->assembled = PETSC_TRUE; 1631 PetscFunctionReturn(0); 1632 } 1633 #endif 1634 1635 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1636 { 1637 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1638 1639 PetscFunctionBegin; 1640 PetscCall(MatSetRandom(d->A, rctx)); 1641 #if defined(PETSC_HAVE_DEVICE) 1642 x->offloadmask = d->A->offloadmask; 1643 #endif 1644 PetscFunctionReturn(0); 1645 } 1646 1647 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1648 { 1649 PetscFunctionBegin; 1650 *missing = PETSC_FALSE; 1651 PetscFunctionReturn(0); 1652 } 1653 1654 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1655 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1656 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1657 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1658 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1659 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1660 1661 /* -------------------------------------------------------------------*/ 1662 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1663 MatGetRow_MPIDense, 1664 MatRestoreRow_MPIDense, 1665 MatMult_MPIDense, 1666 /* 4*/ MatMultAdd_MPIDense, 1667 MatMultTranspose_MPIDense, 1668 MatMultTransposeAdd_MPIDense, 1669 NULL, 1670 NULL, 1671 NULL, 1672 /* 10*/ NULL, 1673 NULL, 1674 NULL, 1675 NULL, 1676 MatTranspose_MPIDense, 1677 /* 15*/ MatGetInfo_MPIDense, 1678 MatEqual_MPIDense, 1679 MatGetDiagonal_MPIDense, 1680 MatDiagonalScale_MPIDense, 1681 MatNorm_MPIDense, 1682 /* 20*/ MatAssemblyBegin_MPIDense, 1683 MatAssemblyEnd_MPIDense, 1684 MatSetOption_MPIDense, 1685 MatZeroEntries_MPIDense, 1686 /* 24*/ MatZeroRows_MPIDense, 1687 NULL, 1688 NULL, 1689 NULL, 1690 NULL, 1691 /* 29*/ MatSetUp_MPIDense, 1692 NULL, 1693 NULL, 1694 MatGetDiagonalBlock_MPIDense, 1695 NULL, 1696 /* 34*/ MatDuplicate_MPIDense, 1697 NULL, 1698 NULL, 1699 NULL, 1700 NULL, 1701 /* 39*/ MatAXPY_MPIDense, 1702 MatCreateSubMatrices_MPIDense, 1703 NULL, 1704 MatGetValues_MPIDense, 1705 MatCopy_MPIDense, 1706 /* 44*/ NULL, 1707 MatScale_MPIDense, 1708 MatShift_MPIDense, 1709 NULL, 1710 NULL, 1711 /* 49*/ MatSetRandom_MPIDense, 1712 NULL, 1713 NULL, 1714 NULL, 1715 NULL, 1716 /* 54*/ NULL, 1717 NULL, 1718 NULL, 1719 NULL, 1720 NULL, 1721 /* 59*/ MatCreateSubMatrix_MPIDense, 1722 MatDestroy_MPIDense, 1723 MatView_MPIDense, 1724 NULL, 1725 NULL, 1726 /* 64*/ NULL, 1727 NULL, 1728 NULL, 1729 NULL, 1730 NULL, 1731 /* 69*/ NULL, 1732 NULL, 1733 NULL, 1734 NULL, 1735 NULL, 1736 /* 74*/ NULL, 1737 NULL, 1738 NULL, 1739 NULL, 1740 NULL, 1741 /* 79*/ NULL, 1742 NULL, 1743 NULL, 1744 NULL, 1745 /* 83*/ MatLoad_MPIDense, 1746 NULL, 1747 NULL, 1748 NULL, 1749 NULL, 1750 NULL, 1751 /* 89*/ NULL, 1752 NULL, 1753 NULL, 1754 NULL, 1755 NULL, 1756 /* 94*/ NULL, 1757 NULL, 1758 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1759 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1760 NULL, 1761 /* 99*/ MatProductSetFromOptions_MPIDense, 1762 NULL, 1763 NULL, 1764 MatConjugate_MPIDense, 1765 NULL, 1766 /*104*/ NULL, 1767 MatRealPart_MPIDense, 1768 MatImaginaryPart_MPIDense, 1769 NULL, 1770 NULL, 1771 /*109*/ NULL, 1772 NULL, 1773 NULL, 1774 MatGetColumnVector_MPIDense, 1775 MatMissingDiagonal_MPIDense, 1776 /*114*/ NULL, 1777 NULL, 1778 NULL, 1779 NULL, 1780 NULL, 1781 /*119*/ NULL, 1782 NULL, 1783 NULL, 1784 NULL, 1785 NULL, 1786 /*124*/ NULL, 1787 MatGetColumnReductions_MPIDense, 1788 NULL, 1789 NULL, 1790 NULL, 1791 /*129*/ NULL, 1792 NULL, 1793 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1794 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1795 NULL, 1796 /*134*/ NULL, 1797 NULL, 1798 NULL, 1799 NULL, 1800 NULL, 1801 /*139*/ NULL, 1802 NULL, 1803 NULL, 1804 NULL, 1805 NULL, 1806 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1807 /*145*/ NULL, 1808 NULL, 1809 NULL, 1810 NULL, 1811 NULL, 1812 /*150*/ NULL}; 1813 1814 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1815 { 1816 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1817 MatType mtype = MATSEQDENSE; 1818 1819 PetscFunctionBegin; 1820 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1821 PetscCall(PetscLayoutSetUp(mat->rmap)); 1822 PetscCall(PetscLayoutSetUp(mat->cmap)); 1823 if (!a->A) { 1824 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1825 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1826 } 1827 #if defined(PETSC_HAVE_CUDA) 1828 PetscBool iscuda; 1829 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1830 if (iscuda) mtype = MATSEQDENSECUDA; 1831 #endif 1832 #if defined(PETSC_HAVE_HIP) 1833 PetscBool iship; 1834 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1835 if (iship) mtype = MATSEQDENSEHIP; 1836 #endif 1837 PetscCall(MatSetType(a->A, mtype)); 1838 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1839 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1840 mat->offloadmask = a->A->offloadmask; 1841 #endif 1842 mat->preallocated = PETSC_TRUE; 1843 mat->assembled = PETSC_TRUE; 1844 PetscFunctionReturn(0); 1845 } 1846 1847 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1848 { 1849 Mat B, C; 1850 1851 PetscFunctionBegin; 1852 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1853 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1854 PetscCall(MatDestroy(&C)); 1855 if (reuse == MAT_REUSE_MATRIX) { 1856 C = *newmat; 1857 } else C = NULL; 1858 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1859 PetscCall(MatDestroy(&B)); 1860 if (reuse == MAT_INPLACE_MATRIX) { 1861 PetscCall(MatHeaderReplace(A, &C)); 1862 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1863 PetscFunctionReturn(0); 1864 } 1865 1866 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1867 { 1868 Mat B, C; 1869 1870 PetscFunctionBegin; 1871 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1872 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1873 if (reuse == MAT_REUSE_MATRIX) { 1874 C = *newmat; 1875 } else C = NULL; 1876 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1877 PetscCall(MatDestroy(&B)); 1878 if (reuse == MAT_INPLACE_MATRIX) { 1879 PetscCall(MatHeaderReplace(A, &C)); 1880 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1881 PetscFunctionReturn(0); 1882 } 1883 1884 #if defined(PETSC_HAVE_ELEMENTAL) 1885 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1886 { 1887 Mat mat_elemental; 1888 PetscScalar *v; 1889 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols; 1890 1891 PetscFunctionBegin; 1892 if (reuse == MAT_REUSE_MATRIX) { 1893 mat_elemental = *newmat; 1894 PetscCall(MatZeroEntries(*newmat)); 1895 } else { 1896 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1897 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1898 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1899 PetscCall(MatSetUp(mat_elemental)); 1900 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1901 } 1902 1903 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1904 for (i = 0; i < N; i++) cols[i] = i; 1905 for (i = 0; i < m; i++) rows[i] = rstart + i; 1906 1907 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1908 PetscCall(MatDenseGetArray(A, &v)); 1909 PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1910 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1911 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1912 PetscCall(MatDenseRestoreArray(A, &v)); 1913 PetscCall(PetscFree2(rows, cols)); 1914 1915 if (reuse == MAT_INPLACE_MATRIX) { 1916 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1917 } else { 1918 *newmat = mat_elemental; 1919 } 1920 PetscFunctionReturn(0); 1921 } 1922 #endif 1923 1924 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1925 { 1926 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1927 1928 PetscFunctionBegin; 1929 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1930 PetscFunctionReturn(0); 1931 } 1932 1933 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1934 { 1935 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1936 1937 PetscFunctionBegin; 1938 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1939 PetscFunctionReturn(0); 1940 } 1941 1942 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1943 { 1944 Mat_MPIDense *mat; 1945 PetscInt m, nloc, N; 1946 1947 PetscFunctionBegin; 1948 PetscCall(MatGetSize(inmat, &m, &N)); 1949 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1950 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1951 PetscInt sum; 1952 1953 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1954 /* Check sum(n) = N */ 1955 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1956 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1957 1958 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1959 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1960 } 1961 1962 /* numeric phase */ 1963 mat = (Mat_MPIDense *)(*outmat)->data; 1964 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1965 PetscFunctionReturn(0); 1966 } 1967 1968 #if defined(PETSC_HAVE_CUDA) 1969 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat) 1970 { 1971 Mat B; 1972 Mat_MPIDense *m; 1973 1974 PetscFunctionBegin; 1975 if (reuse == MAT_INITIAL_MATRIX) { 1976 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 1977 } else if (reuse == MAT_REUSE_MATRIX) { 1978 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 1979 } 1980 1981 B = *newmat; 1982 PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE)); 1983 PetscCall(PetscFree(B->defaultvectype)); 1984 PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype)); 1985 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE)); 1986 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL)); 1987 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 1988 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 1989 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 1990 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 1991 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL)); 1992 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL)); 1993 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL)); 1994 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL)); 1995 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL)); 1996 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL)); 1997 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL)); 1998 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL)); 1999 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL)); 2000 m = (Mat_MPIDense *)(B)->data; 2001 if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A)); 2002 B->ops->bindtocpu = NULL; 2003 B->offloadmask = PETSC_OFFLOAD_CPU; 2004 PetscFunctionReturn(0); 2005 } 2006 2007 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat) 2008 { 2009 Mat B; 2010 Mat_MPIDense *m; 2011 2012 PetscFunctionBegin; 2013 if (reuse == MAT_INITIAL_MATRIX) { 2014 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 2015 } else if (reuse == MAT_REUSE_MATRIX) { 2016 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 2017 } 2018 2019 B = *newmat; 2020 PetscCall(PetscFree(B->defaultvectype)); 2021 PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype)); 2022 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA)); 2023 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense)); 2024 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2025 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2026 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2027 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2028 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA)); 2029 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA)); 2030 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA)); 2031 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA)); 2032 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA)); 2033 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA)); 2034 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA)); 2035 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA)); 2036 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA)); 2037 m = (Mat_MPIDense *)(B->data); 2038 if (m->A) { 2039 PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A)); 2040 B->offloadmask = PETSC_OFFLOAD_BOTH; 2041 } else { 2042 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 2043 } 2044 PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE)); 2045 2046 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 2047 PetscFunctionReturn(0); 2048 } 2049 #endif 2050 2051 #if defined(PETSC_HAVE_HIP) 2052 PetscErrorCode MatConvert_MPIDenseHIP_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat) 2053 { 2054 Mat B; 2055 Mat_MPIDense *m; 2056 2057 PetscFunctionBegin; 2058 if (reuse == MAT_INITIAL_MATRIX) { 2059 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 2060 } else if (reuse == MAT_REUSE_MATRIX) { 2061 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 2062 } 2063 2064 B = *newmat; 2065 PetscCall(MatBindToCPU_MPIDenseHIP(B, PETSC_TRUE)); 2066 PetscCall(PetscFree(B->defaultvectype)); 2067 PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype)); 2068 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE)); 2069 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensehip_mpidense_C", NULL)); 2070 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 2071 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 2072 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 2073 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 2074 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArray_C", NULL)); 2075 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayRead_C", NULL)); 2076 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayWrite_C", NULL)); 2077 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArray_C", NULL)); 2078 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayRead_C", NULL)); 2079 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayWrite_C", NULL)); 2080 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPPlaceArray_C", NULL)); 2081 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPResetArray_C", NULL)); 2082 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPReplaceArray_C", NULL)); 2083 m = (Mat_MPIDense *)(B)->data; 2084 if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A)); 2085 B->ops->bindtocpu = NULL; 2086 B->offloadmask = PETSC_OFFLOAD_CPU; 2087 PetscFunctionReturn(0); 2088 } 2089 2090 PetscErrorCode MatConvert_MPIDense_MPIDenseHIP(Mat M, MatType type, MatReuse reuse, Mat *newmat) 2091 { 2092 Mat B; 2093 Mat_MPIDense *m; 2094 2095 PetscFunctionBegin; 2096 if (reuse == MAT_INITIAL_MATRIX) { 2097 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 2098 } else if (reuse == MAT_REUSE_MATRIX) { 2099 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 2100 } 2101 2102 B = *newmat; 2103 PetscCall(PetscFree(B->defaultvectype)); 2104 PetscCall(PetscStrallocpy(VECHIP, &B->defaultvectype)); 2105 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSEHIP)); 2106 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensehip_mpidense_C", MatConvert_MPIDenseHIP_MPIDense)); 2107 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensehip_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2108 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2109 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2110 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2111 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArray_C", MatDenseHIPGetArray_MPIDenseHIP)); 2112 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayRead_C", MatDenseHIPGetArrayRead_MPIDenseHIP)); 2113 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayWrite_C", MatDenseHIPGetArrayWrite_MPIDenseHIP)); 2114 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArray_C", MatDenseHIPRestoreArray_MPIDenseHIP)); 2115 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayRead_C", MatDenseHIPRestoreArrayRead_MPIDenseHIP)); 2116 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayWrite_C", MatDenseHIPRestoreArrayWrite_MPIDenseHIP)); 2117 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPPlaceArray_C", MatDenseHIPPlaceArray_MPIDenseHIP)); 2118 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPResetArray_C", MatDenseHIPResetArray_MPIDenseHIP)); 2119 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPReplaceArray_C", MatDenseHIPReplaceArray_MPIDenseHIP)); 2120 m = (Mat_MPIDense *)(B->data); 2121 if (m->A) { 2122 PetscCall(MatConvert(m->A, MATSEQDENSEHIP, MAT_INPLACE_MATRIX, &m->A)); 2123 B->offloadmask = PETSC_OFFLOAD_BOTH; 2124 } else { 2125 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 2126 } 2127 PetscCall(MatBindToCPU_MPIDenseHIP(B, PETSC_FALSE)); 2128 2129 B->ops->bindtocpu = MatBindToCPU_MPIDenseHIP; 2130 PetscFunctionReturn(0); 2131 } 2132 #endif 2133 2134 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 2135 { 2136 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2137 PetscInt lda; 2138 2139 PetscFunctionBegin; 2140 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 2141 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2142 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 2143 a->vecinuse = col + 1; 2144 PetscCall(MatDenseGetLDA(a->A, &lda)); 2145 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 2146 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 2147 *v = a->cvec; 2148 PetscFunctionReturn(0); 2149 } 2150 2151 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 2152 { 2153 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2154 2155 PetscFunctionBegin; 2156 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 2157 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 2158 a->vecinuse = 0; 2159 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 2160 PetscCall(VecResetArray(a->cvec)); 2161 if (v) *v = NULL; 2162 PetscFunctionReturn(0); 2163 } 2164 2165 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 2166 { 2167 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2168 PetscInt lda; 2169 2170 PetscFunctionBegin; 2171 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 2172 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2173 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 2174 a->vecinuse = col + 1; 2175 PetscCall(MatDenseGetLDA(a->A, &lda)); 2176 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 2177 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 2178 PetscCall(VecLockReadPush(a->cvec)); 2179 *v = a->cvec; 2180 PetscFunctionReturn(0); 2181 } 2182 2183 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 2184 { 2185 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2186 2187 PetscFunctionBegin; 2188 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 2189 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 2190 a->vecinuse = 0; 2191 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 2192 PetscCall(VecLockReadPop(a->cvec)); 2193 PetscCall(VecResetArray(a->cvec)); 2194 if (v) *v = NULL; 2195 PetscFunctionReturn(0); 2196 } 2197 2198 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 2199 { 2200 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2201 PetscInt lda; 2202 2203 PetscFunctionBegin; 2204 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 2205 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2206 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 2207 a->vecinuse = col + 1; 2208 PetscCall(MatDenseGetLDA(a->A, &lda)); 2209 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 2210 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 2211 *v = a->cvec; 2212 PetscFunctionReturn(0); 2213 } 2214 2215 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 2216 { 2217 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2218 2219 PetscFunctionBegin; 2220 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 2221 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 2222 a->vecinuse = 0; 2223 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 2224 PetscCall(VecResetArray(a->cvec)); 2225 if (v) *v = NULL; 2226 PetscFunctionReturn(0); 2227 } 2228 2229 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 2230 { 2231 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2232 Mat_MPIDense *c; 2233 MPI_Comm comm; 2234 PetscInt pbegin, pend; 2235 2236 PetscFunctionBegin; 2237 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2238 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 2239 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2240 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 2241 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 2242 if (!a->cmat) { 2243 PetscCall(MatCreate(comm, &a->cmat)); 2244 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 2245 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 2246 else { 2247 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 2248 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 2249 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 2250 } 2251 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 2252 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 2253 } else { 2254 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 2255 if (same && a->cmat->rmap->N != A->rmap->N) { 2256 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 2257 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2258 } 2259 if (!same) { 2260 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 2261 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 2262 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 2263 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 2264 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 2265 } 2266 if (cend - cbegin != a->cmat->cmap->N) { 2267 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 2268 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 2269 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 2270 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 2271 } 2272 } 2273 c = (Mat_MPIDense *)a->cmat->data; 2274 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2275 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 2276 2277 a->cmat->preallocated = PETSC_TRUE; 2278 a->cmat->assembled = PETSC_TRUE; 2279 #if defined(PETSC_HAVE_DEVICE) 2280 a->cmat->offloadmask = c->A->offloadmask; 2281 #endif 2282 a->matinuse = cbegin + 1; 2283 *v = a->cmat; 2284 PetscFunctionReturn(0); 2285 } 2286 2287 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 2288 { 2289 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2290 Mat_MPIDense *c; 2291 2292 PetscFunctionBegin; 2293 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 2294 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 2295 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 2296 a->matinuse = 0; 2297 c = (Mat_MPIDense *)a->cmat->data; 2298 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 2299 if (v) *v = NULL; 2300 #if defined(PETSC_HAVE_DEVICE) 2301 A->offloadmask = a->A->offloadmask; 2302 #endif 2303 PetscFunctionReturn(0); 2304 } 2305 2306 /*MC 2307 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 2308 2309 Options Database Keys: 2310 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 2311 2312 Level: beginner 2313 2314 .seealso: `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 2315 M*/ 2316 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 2317 { 2318 Mat_MPIDense *a; 2319 2320 PetscFunctionBegin; 2321 PetscCall(PetscNew(&a)); 2322 mat->data = (void *)a; 2323 PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps))); 2324 2325 mat->insertmode = NOT_SET_VALUES; 2326 2327 /* build cache for off array entries formed */ 2328 a->donotstash = PETSC_FALSE; 2329 2330 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 2331 2332 /* stuff used for matrix vector multiply */ 2333 a->lvec = NULL; 2334 a->Mvctx = NULL; 2335 a->roworiented = PETSC_TRUE; 2336 2337 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 2338 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 2339 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 2340 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 2341 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 2342 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 2343 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 2344 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 2345 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 2346 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 2347 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 2348 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 2349 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 2350 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 2351 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 2352 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 2353 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 2354 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 2355 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 2356 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 2357 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 2358 #if defined(PETSC_HAVE_ELEMENTAL) 2359 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 2360 #endif 2361 #if defined(PETSC_HAVE_SCALAPACK) 2362 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 2363 #endif 2364 #if defined(PETSC_HAVE_CUDA) 2365 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 2366 #endif 2367 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 2368 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2369 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2370 #if defined(PETSC_HAVE_CUDA) 2371 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2372 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2373 #endif 2374 #if defined(PETSC_HAVE_HIP) 2375 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 2376 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2377 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2378 #endif 2379 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 2380 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 2381 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 2382 PetscFunctionReturn(0); 2383 } 2384 2385 /*MC 2386 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 2387 2388 Options Database Keys: 2389 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()` 2390 2391 Level: beginner 2392 2393 .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`, `MATSEQDENSEHIP` 2394 M*/ 2395 #if defined(PETSC_HAVE_CUDA) 2396 #include <petsc/private/deviceimpl.h> 2397 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 2398 { 2399 PetscFunctionBegin; 2400 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); 2401 PetscCall(MatCreate_MPIDense(B)); 2402 PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B)); 2403 PetscFunctionReturn(0); 2404 } 2405 #endif 2406 2407 /*MC 2408 MATMPIDENSEHIP - MATMPIDENSEHIP = "mpidensehip" - A matrix type to be used for distributed dense matrices on GPUs. 2409 2410 Options Database Keys: 2411 . -mat_type mpidensehip - sets the matrix type to `MATMPIDENSEHIP` during a call to `MatSetFromOptions()` 2412 2413 Level: beginner 2414 2415 .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`, `MATMPIDENSEHIP` 2416 M*/ 2417 #if defined(PETSC_HAVE_HIP) 2418 #include <petsc/private/deviceimpl.h> 2419 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseHIP(Mat B) 2420 { 2421 PetscFunctionBegin; 2422 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_HIP)); 2423 PetscCall(MatCreate_MPIDense(B)); 2424 PetscCall(MatConvert_MPIDense_MPIDenseHIP(B, MATMPIDENSEHIP, MAT_INPLACE_MATRIX, &B)); 2425 PetscFunctionReturn(0); 2426 } 2427 #endif 2428 2429 /*MC 2430 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 2431 2432 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 2433 and `MATMPIDENSE` otherwise. 2434 2435 Options Database Keys: 2436 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 2437 2438 Level: beginner 2439 2440 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 2441 M*/ 2442 2443 /*MC 2444 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 2445 Similarly, 2446 MATDENSEHIP - MATDENSEHIP = "densehip" 2447 2448 This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator, 2449 and `MATMPIDENSECUDA` otherwise. 2450 2451 Options Database Keys: 2452 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()` 2453 2454 Level: beginner 2455 2456 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, `MATMPIDENSEHIP`, `MATDENSE` 2457 M*/ 2458 2459 /*MC 2460 MATDENSEHIP - MATDENSEHIP = "densehip" - A matrix type to be used for dense matrices on GPUs. 2461 2462 This matrix type is identical to `MATSEQDENSEHIP` when constructed with a single process communicator, 2463 and `MATMPIDENSEHIP` otherwise. 2464 2465 Options Database Keys: 2466 . -mat_type densehip - sets the matrix type to `MATDENSEHIP` during a call to `MatSetFromOptions()` 2467 2468 Level: beginner 2469 2470 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, `MATMPIDENSEHIP`, `MATDENSE` 2471 M*/ 2472 2473 /*@C 2474 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 2475 2476 Collective 2477 2478 Input Parameters: 2479 . B - the matrix 2480 - data - optional location of matrix data. Set data=NULL for PETSc 2481 to control all matrix memory allocation. 2482 2483 Notes: 2484 The dense format is fully compatible with standard Fortran 77 2485 storage by columns. 2486 2487 The data input variable is intended primarily for Fortran programmers 2488 who wish to allocate their own matrix memory space. Most users should 2489 set data=NULL. 2490 2491 Level: intermediate 2492 2493 .seealso: `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2494 @*/ 2495 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 2496 { 2497 PetscFunctionBegin; 2498 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2499 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 2500 PetscFunctionReturn(0); 2501 } 2502 2503 /*@ 2504 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 2505 array provided by the user. This is useful to avoid copying an array 2506 into a matrix 2507 2508 Not Collective 2509 2510 Input Parameters: 2511 + mat - the matrix 2512 - array - the array in column major order 2513 2514 Note: 2515 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 2516 freed when the matrix is destroyed. 2517 2518 Level: developer 2519 2520 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 2521 `MatDenseReplaceArray()` 2522 @*/ 2523 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 2524 { 2525 PetscFunctionBegin; 2526 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2527 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2528 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2529 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2530 mat->offloadmask = PETSC_OFFLOAD_CPU; 2531 #endif 2532 PetscFunctionReturn(0); 2533 } 2534 2535 /*@ 2536 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 2537 2538 Not Collective 2539 2540 Input Parameters: 2541 . mat - the matrix 2542 2543 Note: 2544 You can only call this after a call to `MatDensePlaceArray()` 2545 2546 Level: developer 2547 2548 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2549 @*/ 2550 PetscErrorCode MatDenseResetArray(Mat mat) 2551 { 2552 PetscFunctionBegin; 2553 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2554 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 2555 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2556 PetscFunctionReturn(0); 2557 } 2558 2559 /*@ 2560 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2561 array provided by the user. This is useful to avoid copying an array 2562 into a matrix 2563 2564 Not Collective 2565 2566 Input Parameters: 2567 + mat - the matrix 2568 - array - the array in column major order 2569 2570 Note: 2571 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 2572 freed by the user. It will be freed when the matrix is destroyed. 2573 2574 Level: developer 2575 2576 .seealso: `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 2577 @*/ 2578 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 2579 { 2580 PetscFunctionBegin; 2581 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2582 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2583 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2584 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2585 mat->offloadmask = PETSC_OFFLOAD_CPU; 2586 #endif 2587 PetscFunctionReturn(0); 2588 } 2589 2590 #if defined(PETSC_HAVE_CUDA) 2591 /*@C 2592 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an 2593 array provided by the user. This is useful to avoid copying an array 2594 into a matrix 2595 2596 Not Collective 2597 2598 Input Parameters: 2599 + mat - the matrix 2600 - array - the array in column major order 2601 2602 Note: 2603 You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be 2604 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2605 2606 Level: developer 2607 2608 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()` 2609 @*/ 2610 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array) 2611 { 2612 PetscFunctionBegin; 2613 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2614 PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2615 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2616 mat->offloadmask = PETSC_OFFLOAD_GPU; 2617 PetscFunctionReturn(0); 2618 } 2619 2620 /*@C 2621 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()` 2622 2623 Not Collective 2624 2625 Input Parameters: 2626 . mat - the matrix 2627 2628 Note: 2629 You can only call this after a call to `MatDenseCUDAPlaceArray()` 2630 2631 Level: developer 2632 2633 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()` 2634 @*/ 2635 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2636 { 2637 PetscFunctionBegin; 2638 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2639 PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat)); 2640 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2641 PetscFunctionReturn(0); 2642 } 2643 2644 /*@C 2645 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an 2646 array provided by the user. This is useful to avoid copying an array 2647 into a matrix 2648 2649 Not Collective 2650 2651 Input Parameters: 2652 + mat - the matrix 2653 - array - the array in column major order 2654 2655 Note: 2656 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2657 The memory passed in CANNOT be freed by the user. It will be freed 2658 when the matrix is destroyed. The array should respect the matrix leading dimension. 2659 2660 Level: developer 2661 2662 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()` 2663 @*/ 2664 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array) 2665 { 2666 PetscFunctionBegin; 2667 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2668 PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2669 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2670 mat->offloadmask = PETSC_OFFLOAD_GPU; 2671 PetscFunctionReturn(0); 2672 } 2673 2674 /*@C 2675 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix. 2676 2677 Not Collective 2678 2679 Input Parameters: 2680 . A - the matrix 2681 2682 Output Parameters 2683 . array - the GPU array in column major order 2684 2685 Notes: 2686 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`. 2687 2688 The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed. 2689 2690 Level: developer 2691 2692 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()` 2693 @*/ 2694 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2695 { 2696 PetscFunctionBegin; 2697 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2698 PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2699 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2700 PetscFunctionReturn(0); 2701 } 2702 2703 /*@C 2704 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`. 2705 2706 Not Collective 2707 2708 Input Parameters: 2709 + A - the matrix 2710 - array - the GPU array in column major order 2711 2712 Level: developer 2713 2714 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2715 @*/ 2716 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2717 { 2718 PetscFunctionBegin; 2719 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2720 PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2721 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2722 A->offloadmask = PETSC_OFFLOAD_GPU; 2723 PetscFunctionReturn(0); 2724 } 2725 2726 /*@C 2727 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed. 2728 2729 Not Collective 2730 2731 Input Parameters: 2732 . A - the matrix 2733 2734 Output Parameters 2735 . array - the GPU array in column major order 2736 2737 Note: 2738 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. 2739 2740 Level: developer 2741 2742 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2743 @*/ 2744 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2745 { 2746 PetscFunctionBegin; 2747 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2748 PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2749 PetscFunctionReturn(0); 2750 } 2751 2752 /*@C 2753 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`. 2754 2755 Not Collective 2756 2757 Input Parameters: 2758 + A - the matrix 2759 - array - the GPU array in column major order 2760 2761 Note: 2762 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. 2763 2764 Level: developer 2765 2766 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()` 2767 @*/ 2768 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2769 { 2770 PetscFunctionBegin; 2771 PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2772 PetscFunctionReturn(0); 2773 } 2774 2775 /*@C 2776 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed. 2777 2778 Not Collective 2779 2780 Input Parameters: 2781 . A - the matrix 2782 2783 Output Parameters 2784 . array - the GPU array in column major order 2785 2786 Note: 2787 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`. 2788 2789 Level: developer 2790 2791 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2792 @*/ 2793 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2794 { 2795 PetscFunctionBegin; 2796 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2797 PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a)); 2798 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2799 PetscFunctionReturn(0); 2800 } 2801 2802 /*@C 2803 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`. 2804 2805 Not Collective 2806 2807 Input Parameters: 2808 + A - the matrix 2809 - array - the GPU array in column major order 2810 2811 Level: developer 2812 2813 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2814 @*/ 2815 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2816 { 2817 PetscFunctionBegin; 2818 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2819 PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a)); 2820 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2821 A->offloadmask = PETSC_OFFLOAD_GPU; 2822 PetscFunctionReturn(0); 2823 } 2824 #endif 2825 2826 #if defined(PETSC_HAVE_HIP) 2827 /*@C 2828 MatDenseHIPPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2829 array provided by the user. This is useful to avoid copying an array 2830 into a matrix 2831 2832 Not Collective 2833 2834 Input Parameters: 2835 + mat - the matrix 2836 - array - the array in column major order 2837 2838 Notes: 2839 You can return to the original array with a call to MatDenseHIPResetArray(). The user is responsible for freeing this array; it will not be 2840 freed when the matrix is destroyed. The array must have been allocated with hipMalloc(). 2841 2842 Level: developer 2843 2844 .seealso: MatDenseHIPGetArray(), MatDenseHIPResetArray() 2845 @*/ 2846 PetscErrorCode MatDenseHIPPlaceArray(Mat mat, const PetscScalar *array) 2847 { 2848 PetscFunctionBegin; 2849 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2850 PetscUseMethod(mat, "MatDenseHIPPlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2851 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2852 mat->offloadmask = PETSC_OFFLOAD_GPU; 2853 PetscFunctionReturn(0); 2854 } 2855 2856 /*@C 2857 MatDenseHIPResetArray - Resets the matrix array to that it previously had before the call to MatDenseHIPPlaceArray() 2858 2859 Not Collective 2860 2861 Input Parameters: 2862 . mat - the matrix 2863 2864 Notes: 2865 You can only call this after a call to MatDenseHIPPlaceArray() 2866 2867 Level: developer 2868 2869 .seealso: MatDenseHIPGetArray(), MatDenseHIPPlaceArray() 2870 2871 @*/ 2872 PetscErrorCode MatDenseHIPResetArray(Mat mat) 2873 { 2874 PetscFunctionBegin; 2875 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2876 PetscUseMethod(mat, "MatDenseHIPResetArray_C", (Mat), (mat)); 2877 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2878 PetscFunctionReturn(0); 2879 } 2880 2881 /*@C 2882 MatDenseHIPReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2883 array provided by the user. This is useful to avoid copying an array 2884 into a matrix 2885 2886 Not Collective 2887 2888 Input Parameters: 2889 + mat - the matrix 2890 - array - the array in column major order 2891 2892 Notes: 2893 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2894 The memory passed in CANNOT be freed by the user. It will be freed 2895 when the matrix is destroyed. The array should respect the matrix leading dimension. 2896 2897 Level: developer 2898 2899 .seealso: MatDenseHIPGetArray(), MatDenseHIPPlaceArray(), MatDenseHIPResetArray() 2900 @*/ 2901 PetscErrorCode MatDenseHIPReplaceArray(Mat mat, const PetscScalar *array) 2902 { 2903 PetscFunctionBegin; 2904 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2905 PetscUseMethod(mat, "MatDenseHIPReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2906 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2907 mat->offloadmask = PETSC_OFFLOAD_GPU; 2908 PetscFunctionReturn(0); 2909 } 2910 2911 /*@C 2912 MatDenseHIPGetArrayWrite - Provides write access to the HIP buffer inside a dense matrix. 2913 2914 Not Collective 2915 2916 Input Parameters: 2917 . A - the matrix 2918 2919 Output Parameters 2920 . array - the GPU array in column major order 2921 2922 Notes: 2923 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseHIPGetArray(). The array must be restored with MatDenseHIPRestoreArrayWrite() when no longer needed. 2924 2925 Level: developer 2926 2927 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayRead(), MatDenseHIPRestoreArrayRead() 2928 @*/ 2929 PetscErrorCode MatDenseHIPGetArrayWrite(Mat A, PetscScalar **a) 2930 { 2931 PetscFunctionBegin; 2932 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2933 PetscUseMethod(A, "MatDenseHIPGetArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2934 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2935 PetscFunctionReturn(0); 2936 } 2937 2938 /*@C 2939 MatDenseHIPRestoreArrayWrite - Restore write access to the HIP buffer inside a dense matrix previously obtained with MatDenseHIPGetArrayWrite(). 2940 2941 Not Collective 2942 2943 Input Parameters: 2944 + A - the matrix 2945 - array - the GPU array in column major order 2946 2947 Notes: 2948 2949 Level: developer 2950 2951 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead(), MatDenseHIPGetArrayRead() 2952 @*/ 2953 PetscErrorCode MatDenseHIPRestoreArrayWrite(Mat A, PetscScalar **a) 2954 { 2955 PetscFunctionBegin; 2956 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2957 PetscUseMethod(A, "MatDenseHIPRestoreArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2958 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2959 A->offloadmask = PETSC_OFFLOAD_GPU; 2960 PetscFunctionReturn(0); 2961 } 2962 2963 /*@C 2964 MatDenseHIPGetArrayRead - Provides read-only access to the HIP buffer inside a dense matrix. The array must be restored with MatDenseHIPRestoreArrayRead() when no longer needed. 2965 2966 Not Collective 2967 2968 Input Parameters: 2969 . A - the matrix 2970 2971 Output Parameters 2972 . array - the GPU array in column major order 2973 2974 Notes: 2975 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseHIPGetArrayWrite(). 2976 2977 Level: developer 2978 2979 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead() 2980 @*/ 2981 PetscErrorCode MatDenseHIPGetArrayRead(Mat A, const PetscScalar **a) 2982 { 2983 PetscFunctionBegin; 2984 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2985 PetscUseMethod(A, "MatDenseHIPGetArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2986 PetscFunctionReturn(0); 2987 } 2988 2989 /*@C 2990 MatDenseHIPRestoreArrayRead - Restore read-only access to the HIP buffer inside a dense matrix previously obtained with a call to MatDenseHIPGetArrayRead(). 2991 2992 Not Collective 2993 2994 Input Parameters: 2995 + A - the matrix 2996 - array - the GPU array in column major order 2997 2998 Notes: 2999 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseHIPGetArrayWrite(). 3000 3001 Level: developer 3002 3003 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPGetArrayRead() 3004 @*/ 3005 PetscErrorCode MatDenseHIPRestoreArrayRead(Mat A, const PetscScalar **a) 3006 { 3007 PetscFunctionBegin; 3008 PetscUseMethod(A, "MatDenseHIPRestoreArrayRead_C", (Mat, const PetscScalar **), (A, a)); 3009 PetscFunctionReturn(0); 3010 } 3011 3012 /*@C 3013 MatDenseHIPGetArray - Provides access to the HIP buffer inside a dense matrix. The array must be restored with MatDenseHIPRestoreArray() when no longer needed. 3014 3015 Not Collective 3016 3017 Input Parameters: 3018 . A - the matrix 3019 3020 Output Parameters 3021 . array - the GPU array in column major order 3022 3023 Notes: 3024 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseHIPGetArrayWrite(). For read-only access, use MatDenseHIPGetArrayRead(). 3025 3026 Level: developer 3027 3028 .seealso: MatDenseHIPGetArrayRead(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead() 3029 @*/ 3030 PetscErrorCode MatDenseHIPGetArray(Mat A, PetscScalar **a) 3031 { 3032 PetscFunctionBegin; 3033 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3034 PetscUseMethod(A, "MatDenseHIPGetArray_C", (Mat, PetscScalar **), (A, a)); 3035 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 3036 PetscFunctionReturn(0); 3037 } 3038 3039 /*@C 3040 MatDenseHIPRestoreArray - Restore access to the HIP buffer inside a dense matrix previously obtained with MatDenseHIPGetArray(). 3041 3042 Not Collective 3043 3044 Input Parameters: 3045 + A - the matrix 3046 - array - the GPU array in column major order 3047 3048 Notes: 3049 3050 Level: developer 3051 3052 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead(), MatDenseHIPGetArrayRead() 3053 @*/ 3054 PetscErrorCode MatDenseHIPRestoreArray(Mat A, PetscScalar **a) 3055 { 3056 PetscFunctionBegin; 3057 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3058 PetscUseMethod(A, "MatDenseHIPRestoreArray_C", (Mat, PetscScalar **), (A, a)); 3059 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 3060 A->offloadmask = PETSC_OFFLOAD_GPU; 3061 PetscFunctionReturn(0); 3062 } 3063 #endif 3064 3065 /*@C 3066 MatCreateDense - Creates a matrix in `MATDENSE` format. 3067 3068 Collective 3069 3070 Input Parameters: 3071 + comm - MPI communicator 3072 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3073 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3074 . M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given) 3075 . N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given) 3076 - data - optional location of matrix data. Set data to NULL (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 3077 to control all matrix memory allocation. 3078 3079 Output Parameter: 3080 . A - the matrix 3081 3082 Notes: 3083 The dense format is fully compatible with standard Fortran 77 3084 storage by columns. 3085 3086 Although local portions of the matrix are stored in column-major 3087 order, the matrix is partitioned across MPI ranks by row. 3088 3089 The data input variable is intended primarily for Fortran programmers 3090 who wish to allocate their own matrix memory space. Most users should 3091 set data=NULL (`PETSC_NULL_SCALAR` for Fortran users). 3092 3093 The user MUST specify either the local or global matrix dimensions 3094 (possibly both). 3095 3096 Level: intermediate 3097 3098 .seealso: `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 3099 @*/ 3100 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 3101 { 3102 PetscFunctionBegin; 3103 PetscCall(MatCreate(comm, A)); 3104 PetscCall(MatSetSizes(*A, m, n, M, N)); 3105 PetscCall(MatSetType(*A, MATDENSE)); 3106 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 3107 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 3108 PetscFunctionReturn(0); 3109 } 3110 3111 #if defined(PETSC_HAVE_CUDA) 3112 /*@C 3113 MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA. 3114 3115 Collective 3116 3117 Input Parameters: 3118 + comm - MPI communicator 3119 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3120 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3121 . M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given) 3122 . N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given) 3123 - data - optional location of GPU matrix data. Set data=NULL for PETSc 3124 to control matrix memory allocation. 3125 3126 Output Parameter: 3127 . A - the matrix 3128 3129 Level: intermediate 3130 3131 .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()` 3132 @*/ 3133 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 3134 { 3135 PetscFunctionBegin; 3136 PetscCall(MatCreate(comm, A)); 3137 PetscValidLogicalCollectiveBool(*A, !!data, 6); 3138 PetscCall(MatSetSizes(*A, m, n, M, N)); 3139 PetscCall(MatSetType(*A, MATDENSECUDA)); 3140 PetscCall(MatSeqDenseCUDASetPreallocation(*A, data)); 3141 PetscCall(MatMPIDenseCUDASetPreallocation(*A, data)); 3142 PetscFunctionReturn(0); 3143 } 3144 #endif 3145 3146 #if defined(PETSC_HAVE_HIP) 3147 /*@C 3148 MatCreateDenseHIP - Creates a matrix in `MATDENSEHIP` format using HIP. 3149 3150 Collective 3151 3152 Input Parameters: 3153 + comm - MPI communicator 3154 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3155 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3156 . M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given) 3157 . N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given) 3158 - data - optional location of GPU matrix data. Set data=NULL for PETSc 3159 to control matrix memory allocation. 3160 3161 Output Parameter: 3162 . A - the matrix 3163 3164 Level: intermediate 3165 3166 .seealso: `MATDENSEHIP`, `MatCreate()`, `MatCreateDense()` 3167 @*/ 3168 PetscErrorCode MatCreateDenseHIP(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 3169 { 3170 PetscFunctionBegin; 3171 PetscCall(MatCreate(comm, A)); 3172 PetscValidLogicalCollectiveBool(*A, !!data, 6); 3173 PetscCall(MatSetSizes(*A, m, n, M, N)); 3174 PetscCall(MatSetType(*A, MATDENSEHIP)); 3175 PetscCall(MatSeqDenseHIPSetPreallocation(*A, data)); 3176 PetscCall(MatMPIDenseHIPSetPreallocation(*A, data)); 3177 PetscFunctionReturn(0); 3178 } 3179 #endif 3180 3181 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 3182 { 3183 Mat mat; 3184 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 3185 3186 PetscFunctionBegin; 3187 *newmat = NULL; 3188 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 3189 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3190 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 3191 a = (Mat_MPIDense *)mat->data; 3192 3193 mat->factortype = A->factortype; 3194 mat->assembled = PETSC_TRUE; 3195 mat->preallocated = PETSC_TRUE; 3196 3197 mat->insertmode = NOT_SET_VALUES; 3198 a->donotstash = oldmat->donotstash; 3199 3200 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 3201 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 3202 3203 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3204 3205 *newmat = mat; 3206 PetscFunctionReturn(0); 3207 } 3208 3209 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 3210 { 3211 PetscBool isbinary; 3212 #if defined(PETSC_HAVE_HDF5) 3213 PetscBool ishdf5; 3214 #endif 3215 3216 PetscFunctionBegin; 3217 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3218 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3219 /* force binary viewer to load .info file if it has not yet done so */ 3220 PetscCall(PetscViewerSetUp(viewer)); 3221 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3222 #if defined(PETSC_HAVE_HDF5) 3223 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3224 #endif 3225 if (isbinary) { 3226 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 3227 #if defined(PETSC_HAVE_HDF5) 3228 } else if (ishdf5) { 3229 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 3230 #endif 3231 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3232 PetscFunctionReturn(0); 3233 } 3234 3235 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 3236 { 3237 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 3238 Mat a, b; 3239 3240 PetscFunctionBegin; 3241 a = matA->A; 3242 b = matB->A; 3243 PetscCall(MatEqual(a, b, flag)); 3244 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 3245 PetscFunctionReturn(0); 3246 } 3247 3248 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 3249 { 3250 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 3251 3252 PetscFunctionBegin; 3253 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 3254 PetscCall(MatDestroy(&atb->atb)); 3255 PetscCall(PetscFree(atb)); 3256 PetscFunctionReturn(0); 3257 } 3258 3259 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 3260 { 3261 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 3262 3263 PetscFunctionBegin; 3264 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 3265 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 3266 PetscCall(PetscFree(abt)); 3267 PetscFunctionReturn(0); 3268 } 3269 3270 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 3271 { 3272 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 3273 Mat_TransMatMultDense *atb; 3274 MPI_Comm comm; 3275 PetscMPIInt size, *recvcounts; 3276 PetscScalar *carray, *sendbuf; 3277 const PetscScalar *atbarray; 3278 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 3279 const PetscInt *ranges; 3280 3281 PetscFunctionBegin; 3282 MatCheckProduct(C, 3); 3283 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 3284 atb = (Mat_TransMatMultDense *)C->product->data; 3285 recvcounts = atb->recvcounts; 3286 sendbuf = atb->sendbuf; 3287 3288 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 3289 PetscCallMPI(MPI_Comm_size(comm, &size)); 3290 3291 /* compute atbarray = aseq^T * bseq */ 3292 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 3293 3294 PetscCall(MatGetOwnershipRanges(C, &ranges)); 3295 3296 /* arrange atbarray into sendbuf */ 3297 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 3298 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 3299 for (proc = 0, k = 0; proc < size; proc++) { 3300 for (j = 0; j < cN; j++) { 3301 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 3302 } 3303 } 3304 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 3305 3306 /* sum all atbarray to local values of C */ 3307 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 3308 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 3309 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 3310 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3311 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3312 PetscFunctionReturn(0); 3313 } 3314 3315 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 3316 { 3317 MPI_Comm comm; 3318 PetscMPIInt size; 3319 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 3320 Mat_TransMatMultDense *atb; 3321 PetscBool cisdense = PETSC_FALSE; 3322 PetscInt i; 3323 const PetscInt *ranges; 3324 3325 PetscFunctionBegin; 3326 MatCheckProduct(C, 4); 3327 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 3328 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 3329 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 3330 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 3331 } 3332 3333 /* create matrix product C */ 3334 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 3335 #if defined(PETSC_HAVE_CUDA) 3336 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 3337 #endif 3338 #if defined(PETSC_HAVE_HIP) 3339 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 3340 #endif 3341 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 3342 PetscCall(MatSetUp(C)); 3343 3344 /* create data structure for reuse C */ 3345 PetscCallMPI(MPI_Comm_size(comm, &size)); 3346 PetscCall(PetscNew(&atb)); 3347 cM = C->rmap->N; 3348 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 3349 PetscCall(MatGetOwnershipRanges(C, &ranges)); 3350 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 3351 3352 C->product->data = atb; 3353 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 3354 PetscFunctionReturn(0); 3355 } 3356 3357 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 3358 { 3359 MPI_Comm comm; 3360 PetscMPIInt i, size; 3361 PetscInt maxRows, bufsiz; 3362 PetscMPIInt tag; 3363 PetscInt alg; 3364 Mat_MatTransMultDense *abt; 3365 Mat_Product *product = C->product; 3366 PetscBool flg; 3367 3368 PetscFunctionBegin; 3369 MatCheckProduct(C, 4); 3370 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 3371 /* check local size of A and B */ 3372 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 3373 3374 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 3375 alg = flg ? 0 : 1; 3376 3377 /* setup matrix product C */ 3378 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 3379 PetscCall(MatSetType(C, MATMPIDENSE)); 3380 PetscCall(MatSetUp(C)); 3381 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 3382 3383 /* create data structure for reuse C */ 3384 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 3385 PetscCallMPI(MPI_Comm_size(comm, &size)); 3386 PetscCall(PetscNew(&abt)); 3387 abt->tag = tag; 3388 abt->alg = alg; 3389 switch (alg) { 3390 case 1: /* alg: "cyclic" */ 3391 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 3392 bufsiz = A->cmap->N * maxRows; 3393 PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]))); 3394 break; 3395 default: /* alg: "allgatherv" */ 3396 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 3397 PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls))); 3398 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 3399 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 3400 break; 3401 } 3402 3403 C->product->data = abt; 3404 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 3405 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 3406 PetscFunctionReturn(0); 3407 } 3408 3409 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 3410 { 3411 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 3412 Mat_MatTransMultDense *abt; 3413 MPI_Comm comm; 3414 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 3415 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 3416 PetscInt i, cK = A->cmap->N, k, j, bn; 3417 PetscScalar _DOne = 1.0, _DZero = 0.0; 3418 const PetscScalar *av, *bv; 3419 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 3420 MPI_Request reqs[2]; 3421 const PetscInt *ranges; 3422 3423 PetscFunctionBegin; 3424 MatCheckProduct(C, 3); 3425 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 3426 abt = (Mat_MatTransMultDense *)C->product->data; 3427 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 3428 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3429 PetscCallMPI(MPI_Comm_size(comm, &size)); 3430 PetscCall(MatDenseGetArrayRead(a->A, &av)); 3431 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 3432 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 3433 PetscCall(MatDenseGetLDA(a->A, &i)); 3434 PetscCall(PetscBLASIntCast(i, &alda)); 3435 PetscCall(MatDenseGetLDA(b->A, &i)); 3436 PetscCall(PetscBLASIntCast(i, &blda)); 3437 PetscCall(MatDenseGetLDA(c->A, &i)); 3438 PetscCall(PetscBLASIntCast(i, &clda)); 3439 PetscCall(MatGetOwnershipRanges(B, &ranges)); 3440 bn = B->rmap->n; 3441 if (blda == bn) { 3442 sendbuf = (PetscScalar *)bv; 3443 } else { 3444 sendbuf = abt->buf[0]; 3445 for (k = 0, i = 0; i < cK; i++) { 3446 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 3447 } 3448 } 3449 if (size > 1) { 3450 sendto = (rank + size - 1) % size; 3451 recvfrom = (rank + size + 1) % size; 3452 } else { 3453 sendto = recvfrom = 0; 3454 } 3455 PetscCall(PetscBLASIntCast(cK, &ck)); 3456 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 3457 recvisfrom = rank; 3458 for (i = 0; i < size; i++) { 3459 /* we have finished receiving in sending, bufs can be read/modified */ 3460 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 3461 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 3462 3463 if (nextrecvisfrom != rank) { 3464 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 3465 sendsiz = cK * bn; 3466 recvsiz = cK * nextbn; 3467 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 3468 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 3469 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 3470 } 3471 3472 /* local aseq * sendbuf^T */ 3473 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 3474 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 3475 3476 if (nextrecvisfrom != rank) { 3477 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 3478 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 3479 } 3480 bn = nextbn; 3481 recvisfrom = nextrecvisfrom; 3482 sendbuf = recvbuf; 3483 } 3484 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 3485 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 3486 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 3487 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3488 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3489 PetscFunctionReturn(0); 3490 } 3491 3492 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 3493 { 3494 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 3495 Mat_MatTransMultDense *abt; 3496 MPI_Comm comm; 3497 PetscMPIInt size; 3498 PetscScalar *cv, *sendbuf, *recvbuf; 3499 const PetscScalar *av, *bv; 3500 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 3501 PetscScalar _DOne = 1.0, _DZero = 0.0; 3502 PetscBLASInt cm, cn, ck, alda, clda; 3503 3504 PetscFunctionBegin; 3505 MatCheckProduct(C, 3); 3506 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 3507 abt = (Mat_MatTransMultDense *)C->product->data; 3508 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 3509 PetscCallMPI(MPI_Comm_size(comm, &size)); 3510 PetscCall(MatDenseGetArrayRead(a->A, &av)); 3511 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 3512 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 3513 PetscCall(MatDenseGetLDA(a->A, &i)); 3514 PetscCall(PetscBLASIntCast(i, &alda)); 3515 PetscCall(MatDenseGetLDA(b->A, &blda)); 3516 PetscCall(MatDenseGetLDA(c->A, &i)); 3517 PetscCall(PetscBLASIntCast(i, &clda)); 3518 /* copy transpose of B into buf[0] */ 3519 bn = B->rmap->n; 3520 sendbuf = abt->buf[0]; 3521 recvbuf = abt->buf[1]; 3522 for (k = 0, j = 0; j < bn; j++) { 3523 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 3524 } 3525 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 3526 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 3527 PetscCall(PetscBLASIntCast(cK, &ck)); 3528 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 3529 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 3530 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 3531 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 3532 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 3533 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 3534 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3535 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3536 PetscFunctionReturn(0); 3537 } 3538 3539 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 3540 { 3541 Mat_MatTransMultDense *abt; 3542 3543 PetscFunctionBegin; 3544 MatCheckProduct(C, 3); 3545 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 3546 abt = (Mat_MatTransMultDense *)C->product->data; 3547 switch (abt->alg) { 3548 case 1: 3549 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 3550 break; 3551 default: 3552 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 3553 break; 3554 } 3555 PetscFunctionReturn(0); 3556 } 3557 3558 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 3559 { 3560 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 3561 3562 PetscFunctionBegin; 3563 PetscCall(MatDestroy(&ab->Ce)); 3564 PetscCall(MatDestroy(&ab->Ae)); 3565 PetscCall(MatDestroy(&ab->Be)); 3566 PetscCall(PetscFree(ab)); 3567 PetscFunctionReturn(0); 3568 } 3569 3570 #if defined(PETSC_HAVE_ELEMENTAL) 3571 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 3572 { 3573 Mat_MatMultDense *ab; 3574 3575 PetscFunctionBegin; 3576 MatCheckProduct(C, 3); 3577 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 3578 ab = (Mat_MatMultDense *)C->product->data; 3579 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 3580 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 3581 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 3582 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 3583 PetscFunctionReturn(0); 3584 } 3585 3586 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 3587 { 3588 Mat Ae, Be, Ce; 3589 Mat_MatMultDense *ab; 3590 3591 PetscFunctionBegin; 3592 MatCheckProduct(C, 4); 3593 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 3594 /* check local size of A and B */ 3595 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 3596 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 3597 } 3598 3599 /* create elemental matrices Ae and Be */ 3600 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 3601 PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 3602 PetscCall(MatSetType(Ae, MATELEMENTAL)); 3603 PetscCall(MatSetUp(Ae)); 3604 PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 3605 3606 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 3607 PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 3608 PetscCall(MatSetType(Be, MATELEMENTAL)); 3609 PetscCall(MatSetUp(Be)); 3610 PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 3611 3612 /* compute symbolic Ce = Ae*Be */ 3613 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce)); 3614 PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce)); 3615 3616 /* setup C */ 3617 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 3618 PetscCall(MatSetType(C, MATDENSE)); 3619 PetscCall(MatSetUp(C)); 3620 3621 /* create data structure for reuse Cdense */ 3622 PetscCall(PetscNew(&ab)); 3623 ab->Ae = Ae; 3624 ab->Be = Be; 3625 ab->Ce = Ce; 3626 3627 C->product->data = ab; 3628 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 3629 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 3630 PetscFunctionReturn(0); 3631 } 3632 #endif 3633 /* ----------------------------------------------- */ 3634 #if defined(PETSC_HAVE_ELEMENTAL) 3635 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 3636 { 3637 PetscFunctionBegin; 3638 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 3639 C->ops->productsymbolic = MatProductSymbolic_AB; 3640 PetscFunctionReturn(0); 3641 } 3642 #endif 3643 3644 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 3645 { 3646 Mat_Product *product = C->product; 3647 Mat A = product->A, B = product->B; 3648 3649 PetscFunctionBegin; 3650 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 3651 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 3652 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 3653 C->ops->productsymbolic = MatProductSymbolic_AtB; 3654 PetscFunctionReturn(0); 3655 } 3656 3657 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 3658 { 3659 Mat_Product *product = C->product; 3660 const char *algTypes[2] = {"allgatherv", "cyclic"}; 3661 PetscInt alg, nalg = 2; 3662 PetscBool flg = PETSC_FALSE; 3663 3664 PetscFunctionBegin; 3665 /* Set default algorithm */ 3666 alg = 0; /* default is allgatherv */ 3667 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 3668 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 3669 3670 /* Get runtime option */ 3671 if (product->api_user) { 3672 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 3673 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 3674 PetscOptionsEnd(); 3675 } else { 3676 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 3677 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 3678 PetscOptionsEnd(); 3679 } 3680 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 3681 3682 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 3683 C->ops->productsymbolic = MatProductSymbolic_ABt; 3684 PetscFunctionReturn(0); 3685 } 3686 3687 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 3688 { 3689 Mat_Product *product = C->product; 3690 3691 PetscFunctionBegin; 3692 switch (product->type) { 3693 #if defined(PETSC_HAVE_ELEMENTAL) 3694 case MATPRODUCT_AB: 3695 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 3696 break; 3697 #endif 3698 case MATPRODUCT_AtB: 3699 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 3700 break; 3701 case MATPRODUCT_ABt: 3702 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 3703 break; 3704 default: 3705 break; 3706 } 3707 PetscFunctionReturn(0); 3708 } 3709