1 2 /* 3 Basic functions for basic parallel dense matrices. 4 Portions of this code are under: 5 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 6 */ 7 8 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 9 #include <../src/mat/impls/aij/mpi/mpiaij.h> 10 #include <petscblaslapack.h> 11 12 /*@ 13 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the sequential or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 .seealso: `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 25 @*/ 26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 27 { 28 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 33 PetscValidPointer(B, 2); 34 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 35 if (flg) *B = mat->A; 36 else { 37 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 38 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 39 *B = A; 40 } 41 PetscFunctionReturn(PETSC_SUCCESS); 42 } 43 44 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 45 { 46 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 47 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 48 49 PetscFunctionBegin; 50 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 51 PetscFunctionReturn(PETSC_SUCCESS); 52 } 53 54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 55 { 56 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 57 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 58 PetscScalar *v; 59 60 PetscFunctionBegin; 61 PetscCall(MatDenseGetArray(mat->A, &v)); 62 PetscCall(MatDenseGetLDA(mat->A, &lda)); 63 rend2 = PetscMin(rend, A->cmap->N); 64 if (rend2 > rstart) { 65 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 66 PetscCall(PetscLogFlops(rend2 - rstart)); 67 } 68 PetscCall(MatDenseRestoreArray(mat->A, &v)); 69 PetscFunctionReturn(PETSC_SUCCESS); 70 } 71 72 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 73 { 74 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 75 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 76 77 PetscFunctionBegin; 78 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 79 lrow = row - rstart; 80 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 85 { 86 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 87 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 88 89 PetscFunctionBegin; 90 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 91 lrow = row - rstart; 92 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 93 PetscFunctionReturn(PETSC_SUCCESS); 94 } 95 96 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 97 { 98 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 99 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 100 PetscScalar *array; 101 MPI_Comm comm; 102 PetscBool flg; 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(MatHasCongruentLayouts(A, &flg)); 107 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 108 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 109 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 110 #if defined(PETSC_HAVE_CUDA) 111 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 112 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 113 #elif (PETSC_HAVE_HIP) 114 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 115 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 116 #endif 117 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm)); 118 PetscCall(MatCreate(comm, &B)); 119 PetscCall(MatSetSizes(B, m, m, m, m)); 120 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 121 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 122 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 123 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 124 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 125 *a = B; 126 PetscCall(MatDestroy(&B)); 127 } else *a = B; 128 PetscFunctionReturn(PETSC_SUCCESS); 129 } 130 131 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 132 { 133 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 134 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 135 PetscBool roworiented = A->roworiented; 136 137 PetscFunctionBegin; 138 for (i = 0; i < m; i++) { 139 if (idxm[i] < 0) continue; 140 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 141 if (idxm[i] >= rstart && idxm[i] < rend) { 142 row = idxm[i] - rstart; 143 if (roworiented) { 144 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv)); 145 } else { 146 for (j = 0; j < n; j++) { 147 if (idxn[j] < 0) continue; 148 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 149 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv)); 150 } 151 } 152 } else if (!A->donotstash) { 153 mat->assembled = PETSC_FALSE; 154 if (roworiented) { 155 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE)); 156 } else { 157 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE)); 158 } 159 } 160 } 161 PetscFunctionReturn(PETSC_SUCCESS); 162 } 163 164 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 165 { 166 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 167 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 168 169 PetscFunctionBegin; 170 for (i = 0; i < m; i++) { 171 if (idxm[i] < 0) continue; /* negative row */ 172 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 173 if (idxm[i] >= rstart && idxm[i] < rend) { 174 row = idxm[i] - rstart; 175 for (j = 0; j < n; j++) { 176 if (idxn[j] < 0) continue; /* negative column */ 177 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 178 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 179 } 180 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 181 } 182 PetscFunctionReturn(PETSC_SUCCESS); 183 } 184 185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 186 { 187 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 188 189 PetscFunctionBegin; 190 PetscCall(MatDenseGetLDA(a->A, lda)); 191 PetscFunctionReturn(PETSC_SUCCESS); 192 } 193 194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 195 { 196 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 197 MatType mtype = MATSEQDENSE; 198 199 PetscFunctionBegin; 200 if (!a->A) { 201 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 202 PetscCall(PetscLayoutSetUp(A->rmap)); 203 PetscCall(PetscLayoutSetUp(A->cmap)); 204 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 205 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 206 #if defined(PETSC_HAVE_CUDA) 207 PetscBool iscuda; 208 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 209 if (iscuda) mtype = MATSEQDENSECUDA; 210 #elif (PETSC_HAVE_HIP) 211 PetscBool iship; 212 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 213 if (iship) mtype = MATSEQDENSEHIP; 214 #endif 215 PetscCall(MatSetType(a->A, mtype)); 216 } 217 PetscCall(MatDenseSetLDA(a->A, lda)); 218 PetscFunctionReturn(PETSC_SUCCESS); 219 } 220 221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 222 { 223 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 224 225 PetscFunctionBegin; 226 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 227 PetscCall(MatDenseGetArray(a->A, array)); 228 PetscFunctionReturn(PETSC_SUCCESS); 229 } 230 231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) 232 { 233 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 234 235 PetscFunctionBegin; 236 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 237 PetscCall(MatDenseGetArrayRead(a->A, array)); 238 PetscFunctionReturn(PETSC_SUCCESS); 239 } 240 241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 242 { 243 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 244 245 PetscFunctionBegin; 246 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 247 PetscCall(MatDenseGetArrayWrite(a->A, array)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 252 { 253 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 254 255 PetscFunctionBegin; 256 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 257 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 258 PetscCall(MatDensePlaceArray(a->A, array)); 259 PetscFunctionReturn(PETSC_SUCCESS); 260 } 261 262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 263 { 264 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 265 266 PetscFunctionBegin; 267 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 268 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 269 PetscCall(MatDenseResetArray(a->A)); 270 PetscFunctionReturn(PETSC_SUCCESS); 271 } 272 273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 274 { 275 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 276 277 PetscFunctionBegin; 278 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 279 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 280 PetscCall(MatDenseReplaceArray(a->A, array)); 281 PetscFunctionReturn(PETSC_SUCCESS); 282 } 283 284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 285 { 286 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 287 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 288 const PetscInt *irow, *icol; 289 const PetscScalar *v; 290 PetscScalar *bv; 291 Mat newmat; 292 IS iscol_local; 293 MPI_Comm comm_is, comm_mat; 294 295 PetscFunctionBegin; 296 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 297 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 298 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 299 300 PetscCall(ISAllGather(iscol, &iscol_local)); 301 PetscCall(ISGetIndices(isrow, &irow)); 302 PetscCall(ISGetIndices(iscol_local, &icol)); 303 PetscCall(ISGetLocalSize(isrow, &nrows)); 304 PetscCall(ISGetLocalSize(iscol, &ncols)); 305 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 306 307 /* No parallel redistribution currently supported! Should really check each index set 308 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 309 original matrix! */ 310 311 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 312 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 313 314 /* Check submatrix call */ 315 if (scall == MAT_REUSE_MATRIX) { 316 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 317 /* Really need to test rows and column sizes! */ 318 newmat = *B; 319 } else { 320 /* Create and fill new matrix */ 321 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 322 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 323 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 324 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 325 } 326 327 /* Now extract the data pointers and do the copy, column at a time */ 328 newmatd = (Mat_MPIDense *)newmat->data; 329 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 330 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 331 PetscCall(MatDenseGetLDA(mat->A, &lda)); 332 for (i = 0; i < Ncols; i++) { 333 const PetscScalar *av = v + lda * icol[i]; 334 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 335 } 336 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 337 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 338 339 /* Assemble the matrices so that the correct flags are set */ 340 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 341 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 342 343 /* Free work space */ 344 PetscCall(ISRestoreIndices(isrow, &irow)); 345 PetscCall(ISRestoreIndices(iscol_local, &icol)); 346 PetscCall(ISDestroy(&iscol_local)); 347 *B = newmat; 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 352 { 353 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 354 355 PetscFunctionBegin; 356 PetscCall(MatDenseRestoreArray(a->A, array)); 357 PetscFunctionReturn(PETSC_SUCCESS); 358 } 359 360 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) 361 { 362 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 363 364 PetscFunctionBegin; 365 PetscCall(MatDenseRestoreArrayRead(a->A, array)); 366 PetscFunctionReturn(PETSC_SUCCESS); 367 } 368 369 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 370 { 371 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 372 373 PetscFunctionBegin; 374 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 379 { 380 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 381 PetscInt nstash, reallocs; 382 383 PetscFunctionBegin; 384 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 385 386 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 387 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 388 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 389 PetscFunctionReturn(PETSC_SUCCESS); 390 } 391 392 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 393 { 394 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 395 PetscInt i, *row, *col, flg, j, rstart, ncols; 396 PetscMPIInt n; 397 PetscScalar *val; 398 399 PetscFunctionBegin; 400 if (!mdn->donotstash && !mat->nooffprocentries) { 401 /* wait on receives */ 402 while (1) { 403 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 404 if (!flg) break; 405 406 for (i = 0; i < n;) { 407 /* Now identify the consecutive vals belonging to the same row */ 408 for (j = i, rstart = row[j]; j < n; j++) { 409 if (row[j] != rstart) break; 410 } 411 if (j < n) ncols = j - i; 412 else ncols = n - i; 413 /* Now assemble all these values with a single function call */ 414 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 415 i = j; 416 } 417 } 418 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 419 } 420 421 PetscCall(MatAssemblyBegin(mdn->A, mode)); 422 PetscCall(MatAssemblyEnd(mdn->A, mode)); 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } 425 426 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 427 { 428 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 429 430 PetscFunctionBegin; 431 PetscCall(MatZeroEntries(l->A)); 432 PetscFunctionReturn(PETSC_SUCCESS); 433 } 434 435 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 436 { 437 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 438 PetscInt i, len, *lrows; 439 440 PetscFunctionBegin; 441 /* get locally owned rows */ 442 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 443 /* fix right hand side if needed */ 444 if (x && b) { 445 const PetscScalar *xx; 446 PetscScalar *bb; 447 448 PetscCall(VecGetArrayRead(x, &xx)); 449 PetscCall(VecGetArrayWrite(b, &bb)); 450 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 451 PetscCall(VecRestoreArrayRead(x, &xx)); 452 PetscCall(VecRestoreArrayWrite(b, &bb)); 453 } 454 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 455 if (diag != 0.0) { 456 Vec d; 457 458 PetscCall(MatCreateVecs(A, NULL, &d)); 459 PetscCall(VecSet(d, diag)); 460 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 461 PetscCall(VecDestroy(&d)); 462 } 463 PetscCall(PetscFree(lrows)); 464 PetscFunctionReturn(PETSC_SUCCESS); 465 } 466 467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 471 472 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 475 const PetscScalar *ax; 476 PetscScalar *ay; 477 PetscMemType axmtype, aymtype; 478 479 PetscFunctionBegin; 480 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 481 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 482 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 483 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 484 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 485 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 486 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 487 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 488 PetscFunctionReturn(PETSC_SUCCESS); 489 } 490 491 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 494 const PetscScalar *ax; 495 PetscScalar *ay; 496 PetscMemType axmtype, aymtype; 497 498 PetscFunctionBegin; 499 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 500 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 501 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 502 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 503 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 504 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 505 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 506 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 511 { 512 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 513 const PetscScalar *ax; 514 PetscScalar *ay; 515 PetscMemType axmtype, aymtype; 516 517 PetscFunctionBegin; 518 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 519 PetscCall(VecSet(yy, 0.0)); 520 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 521 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 522 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 523 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 524 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 525 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 526 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 527 PetscFunctionReturn(PETSC_SUCCESS); 528 } 529 530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 531 { 532 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 533 const PetscScalar *ax; 534 PetscScalar *ay; 535 PetscMemType axmtype, aymtype; 536 537 PetscFunctionBegin; 538 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 539 PetscCall(VecCopy(yy, zz)); 540 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 541 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 542 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 543 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 544 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 545 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 546 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 547 PetscFunctionReturn(PETSC_SUCCESS); 548 } 549 550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 551 { 552 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 553 PetscInt lda, len, i, n, m = A->rmap->n, radd; 554 PetscScalar *x, zero = 0.0; 555 const PetscScalar *av; 556 557 PetscFunctionBegin; 558 PetscCall(VecSet(v, zero)); 559 PetscCall(VecGetArray(v, &x)); 560 PetscCall(VecGetSize(v, &n)); 561 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 562 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 563 radd = A->rmap->rstart * m; 564 PetscCall(MatDenseGetArrayRead(a->A, &av)); 565 PetscCall(MatDenseGetLDA(a->A, &lda)); 566 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 567 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 568 PetscCall(VecRestoreArray(v, &x)); 569 PetscFunctionReturn(PETSC_SUCCESS); 570 } 571 572 PetscErrorCode MatDestroy_MPIDense(Mat mat) 573 { 574 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 575 576 PetscFunctionBegin; 577 #if defined(PETSC_USE_LOG) 578 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 579 #endif 580 PetscCall(MatStashDestroy_Private(&mat->stash)); 581 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 582 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 583 PetscCall(MatDestroy(&mdn->A)); 584 PetscCall(VecDestroy(&mdn->lvec)); 585 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 586 PetscCall(VecDestroy(&mdn->cvec)); 587 PetscCall(MatDestroy(&mdn->cmat)); 588 589 PetscCall(PetscFree(mat->data)); 590 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 591 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 596 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 598 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 599 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 600 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 601 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 603 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 604 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 605 #if defined(PETSC_HAVE_ELEMENTAL) 606 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 607 #endif 608 #if defined(PETSC_HAVE_SCALAPACK) 609 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 610 #endif 611 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 612 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 613 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 614 #if defined(PETSC_HAVE_CUDA) 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 619 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 620 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 621 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 622 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 623 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 624 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 625 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 626 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 627 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 632 #endif 633 #if defined(PETSC_HAVE_HIP) 634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 646 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 649 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 651 #endif 652 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 662 663 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 664 PetscFunctionReturn(PETSC_SUCCESS); 665 } 666 667 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer); 668 669 #include <petscdraw.h> 670 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 671 { 672 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 673 PetscMPIInt rank; 674 PetscViewerType vtype; 675 PetscBool iascii, isdraw; 676 PetscViewer sviewer; 677 PetscViewerFormat format; 678 679 PetscFunctionBegin; 680 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 681 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 682 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 683 if (iascii) { 684 PetscCall(PetscViewerGetType(viewer, &vtype)); 685 PetscCall(PetscViewerGetFormat(viewer, &format)); 686 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 687 MatInfo info; 688 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 689 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 690 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 691 (PetscInt)info.memory)); 692 PetscCall(PetscViewerFlush(viewer)); 693 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 694 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 695 PetscFunctionReturn(PETSC_SUCCESS); 696 } else if (format == PETSC_VIEWER_ASCII_INFO) { 697 PetscFunctionReturn(PETSC_SUCCESS); 698 } 699 } else if (isdraw) { 700 PetscDraw draw; 701 PetscBool isnull; 702 703 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 704 PetscCall(PetscDrawIsNull(draw, &isnull)); 705 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 706 } 707 708 { 709 /* assemble the entire matrix onto first processor. */ 710 Mat A; 711 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 712 PetscInt *cols; 713 PetscScalar *vals; 714 715 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 716 if (rank == 0) { 717 PetscCall(MatSetSizes(A, M, N, M, N)); 718 } else { 719 PetscCall(MatSetSizes(A, 0, 0, M, N)); 720 } 721 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 722 PetscCall(MatSetType(A, MATMPIDENSE)); 723 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 724 725 /* Copy the matrix ... This isn't the most efficient means, 726 but it's quick for now */ 727 A->insertmode = INSERT_VALUES; 728 729 row = mat->rmap->rstart; 730 m = mdn->A->rmap->n; 731 for (i = 0; i < m; i++) { 732 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 733 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 734 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 735 row++; 736 } 737 738 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 739 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 740 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 741 if (rank == 0) { 742 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name)); 743 PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer)); 744 } 745 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 746 PetscCall(PetscViewerFlush(viewer)); 747 PetscCall(MatDestroy(&A)); 748 } 749 PetscFunctionReturn(PETSC_SUCCESS); 750 } 751 752 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 753 { 754 PetscBool iascii, isbinary, isdraw, issocket; 755 756 PetscFunctionBegin; 757 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 758 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 759 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 760 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 761 762 if (iascii || issocket || isdraw) { 763 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 764 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 765 PetscFunctionReturn(PETSC_SUCCESS); 766 } 767 768 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 769 { 770 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 771 Mat mdn = mat->A; 772 PetscLogDouble isend[5], irecv[5]; 773 774 PetscFunctionBegin; 775 info->block_size = 1.0; 776 777 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 778 779 isend[0] = info->nz_used; 780 isend[1] = info->nz_allocated; 781 isend[2] = info->nz_unneeded; 782 isend[3] = info->memory; 783 isend[4] = info->mallocs; 784 if (flag == MAT_LOCAL) { 785 info->nz_used = isend[0]; 786 info->nz_allocated = isend[1]; 787 info->nz_unneeded = isend[2]; 788 info->memory = isend[3]; 789 info->mallocs = isend[4]; 790 } else if (flag == MAT_GLOBAL_MAX) { 791 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 792 793 info->nz_used = irecv[0]; 794 info->nz_allocated = irecv[1]; 795 info->nz_unneeded = irecv[2]; 796 info->memory = irecv[3]; 797 info->mallocs = irecv[4]; 798 } else if (flag == MAT_GLOBAL_SUM) { 799 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 800 801 info->nz_used = irecv[0]; 802 info->nz_allocated = irecv[1]; 803 info->nz_unneeded = irecv[2]; 804 info->memory = irecv[3]; 805 info->mallocs = irecv[4]; 806 } 807 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 808 info->fill_ratio_needed = 0; 809 info->factor_mallocs = 0; 810 PetscFunctionReturn(PETSC_SUCCESS); 811 } 812 813 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 814 { 815 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 816 817 PetscFunctionBegin; 818 switch (op) { 819 case MAT_NEW_NONZERO_LOCATIONS: 820 case MAT_NEW_NONZERO_LOCATION_ERR: 821 case MAT_NEW_NONZERO_ALLOCATION_ERR: 822 MatCheckPreallocated(A, 1); 823 PetscCall(MatSetOption(a->A, op, flg)); 824 break; 825 case MAT_ROW_ORIENTED: 826 MatCheckPreallocated(A, 1); 827 a->roworiented = flg; 828 PetscCall(MatSetOption(a->A, op, flg)); 829 break; 830 case MAT_FORCE_DIAGONAL_ENTRIES: 831 case MAT_KEEP_NONZERO_PATTERN: 832 case MAT_USE_HASH_TABLE: 833 case MAT_SORTED_FULL: 834 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 835 break; 836 case MAT_IGNORE_OFF_PROC_ENTRIES: 837 a->donotstash = flg; 838 break; 839 case MAT_SYMMETRIC: 840 case MAT_STRUCTURALLY_SYMMETRIC: 841 case MAT_HERMITIAN: 842 case MAT_SYMMETRY_ETERNAL: 843 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 844 case MAT_SPD: 845 case MAT_IGNORE_LOWER_TRIANGULAR: 846 case MAT_IGNORE_ZERO_ENTRIES: 847 case MAT_SPD_ETERNAL: 848 /* if the diagonal matrix is square it inherits some of the properties above */ 849 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 850 break; 851 default: 852 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 853 } 854 PetscFunctionReturn(PETSC_SUCCESS); 855 } 856 857 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 858 { 859 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 860 const PetscScalar *l; 861 PetscScalar x, *v, *vv, *r; 862 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 863 864 PetscFunctionBegin; 865 PetscCall(MatDenseGetArray(mdn->A, &vv)); 866 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 867 PetscCall(MatGetLocalSize(A, &s2, &s3)); 868 if (ll) { 869 PetscCall(VecGetLocalSize(ll, &s2a)); 870 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 871 PetscCall(VecGetArrayRead(ll, &l)); 872 for (i = 0; i < m; i++) { 873 x = l[i]; 874 v = vv + i; 875 for (j = 0; j < n; j++) { 876 (*v) *= x; 877 v += lda; 878 } 879 } 880 PetscCall(VecRestoreArrayRead(ll, &l)); 881 PetscCall(PetscLogFlops(1.0 * n * m)); 882 } 883 if (rr) { 884 const PetscScalar *ar; 885 886 PetscCall(VecGetLocalSize(rr, &s3a)); 887 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 888 PetscCall(VecGetArrayRead(rr, &ar)); 889 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 890 PetscCall(VecGetArray(mdn->lvec, &r)); 891 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 892 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 893 PetscCall(VecRestoreArrayRead(rr, &ar)); 894 for (i = 0; i < n; i++) { 895 x = r[i]; 896 v = vv + i * lda; 897 for (j = 0; j < m; j++) (*v++) *= x; 898 } 899 PetscCall(VecRestoreArray(mdn->lvec, &r)); 900 PetscCall(PetscLogFlops(1.0 * n * m)); 901 } 902 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 903 PetscFunctionReturn(PETSC_SUCCESS); 904 } 905 906 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 907 { 908 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 909 PetscInt i, j; 910 PetscMPIInt size; 911 PetscReal sum = 0.0; 912 const PetscScalar *av, *v; 913 914 PetscFunctionBegin; 915 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 916 v = av; 917 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 918 if (size == 1) { 919 PetscCall(MatNorm(mdn->A, type, nrm)); 920 } else { 921 if (type == NORM_FROBENIUS) { 922 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 923 sum += PetscRealPart(PetscConj(*v) * (*v)); 924 v++; 925 } 926 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 927 *nrm = PetscSqrtReal(*nrm); 928 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 929 } else if (type == NORM_1) { 930 PetscReal *tmp, *tmp2; 931 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 932 *nrm = 0.0; 933 v = av; 934 for (j = 0; j < mdn->A->cmap->n; j++) { 935 for (i = 0; i < mdn->A->rmap->n; i++) { 936 tmp[j] += PetscAbsScalar(*v); 937 v++; 938 } 939 } 940 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 941 for (j = 0; j < A->cmap->N; j++) { 942 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 943 } 944 PetscCall(PetscFree2(tmp, tmp2)); 945 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 946 } else if (type == NORM_INFINITY) { /* max row norm */ 947 PetscReal ntemp; 948 PetscCall(MatNorm(mdn->A, type, &ntemp)); 949 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 950 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 951 } 952 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 953 PetscFunctionReturn(PETSC_SUCCESS); 954 } 955 956 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 957 { 958 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 959 Mat B; 960 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 961 PetscInt j, i, lda; 962 PetscScalar *v; 963 964 PetscFunctionBegin; 965 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 966 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 967 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 968 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 969 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 970 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 971 } else B = *matout; 972 973 m = a->A->rmap->n; 974 n = a->A->cmap->n; 975 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 976 PetscCall(MatDenseGetLDA(a->A, &lda)); 977 PetscCall(PetscMalloc1(m, &rwork)); 978 for (i = 0; i < m; i++) rwork[i] = rstart + i; 979 for (j = 0; j < n; j++) { 980 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 981 v += lda; 982 } 983 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 984 PetscCall(PetscFree(rwork)); 985 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 986 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 987 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 988 *matout = B; 989 } else { 990 PetscCall(MatHeaderMerge(A, &B)); 991 } 992 PetscFunctionReturn(PETSC_SUCCESS); 993 } 994 995 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 996 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 997 998 PetscErrorCode MatSetUp_MPIDense(Mat A) 999 { 1000 PetscFunctionBegin; 1001 PetscCall(PetscLayoutSetUp(A->rmap)); 1002 PetscCall(PetscLayoutSetUp(A->cmap)); 1003 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1004 PetscFunctionReturn(PETSC_SUCCESS); 1005 } 1006 1007 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1008 { 1009 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1010 1011 PetscFunctionBegin; 1012 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1013 PetscFunctionReturn(PETSC_SUCCESS); 1014 } 1015 1016 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1017 { 1018 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1019 1020 PetscFunctionBegin; 1021 PetscCall(MatConjugate(a->A)); 1022 PetscFunctionReturn(PETSC_SUCCESS); 1023 } 1024 1025 PetscErrorCode MatRealPart_MPIDense(Mat A) 1026 { 1027 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1028 1029 PetscFunctionBegin; 1030 PetscCall(MatRealPart(a->A)); 1031 PetscFunctionReturn(PETSC_SUCCESS); 1032 } 1033 1034 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1035 { 1036 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1037 1038 PetscFunctionBegin; 1039 PetscCall(MatImaginaryPart(a->A)); 1040 PetscFunctionReturn(PETSC_SUCCESS); 1041 } 1042 1043 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1044 { 1045 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1046 1047 PetscFunctionBegin; 1048 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1049 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1050 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1051 PetscFunctionReturn(PETSC_SUCCESS); 1052 } 1053 1054 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1055 1056 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1057 { 1058 PetscInt i, m, n; 1059 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1060 PetscReal *work; 1061 1062 PetscFunctionBegin; 1063 PetscCall(MatGetSize(A, &m, &n)); 1064 PetscCall(PetscMalloc1(n, &work)); 1065 if (type == REDUCTION_MEAN_REALPART) { 1066 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1067 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1068 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1069 } else { 1070 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1071 } 1072 if (type == NORM_2) { 1073 for (i = 0; i < n; i++) work[i] *= work[i]; 1074 } 1075 if (type == NORM_INFINITY) { 1076 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1077 } else { 1078 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1079 } 1080 PetscCall(PetscFree(work)); 1081 if (type == NORM_2) { 1082 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1083 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1084 for (i = 0; i < n; i++) reductions[i] /= m; 1085 } 1086 PetscFunctionReturn(PETSC_SUCCESS); 1087 } 1088 1089 #if defined(PETSC_HAVE_CUDA) 1090 PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha) 1091 { 1092 PetscScalar *da; 1093 PetscInt lda; 1094 1095 PetscFunctionBegin; 1096 PetscCall(MatDenseCUDAGetArray(A, &da)); 1097 PetscCall(MatDenseGetLDA(A, &lda)); 1098 PetscCall(PetscInfo(A, "Performing Shift on backend\n")); 1099 PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N)); 1100 PetscCall(MatDenseCUDARestoreArray(A, &da)); 1101 PetscFunctionReturn(PETSC_SUCCESS); 1102 } 1103 1104 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1105 { 1106 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1107 PetscInt lda; 1108 1109 PetscFunctionBegin; 1110 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1111 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1112 if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1113 a->vecinuse = col + 1; 1114 PetscCall(MatDenseGetLDA(a->A, &lda)); 1115 PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1116 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1117 *v = a->cvec; 1118 PetscFunctionReturn(PETSC_SUCCESS); 1119 } 1120 1121 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1122 { 1123 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1124 1125 PetscFunctionBegin; 1126 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1127 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1128 a->vecinuse = 0; 1129 PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1130 PetscCall(VecCUDAResetArray(a->cvec)); 1131 if (v) *v = NULL; 1132 PetscFunctionReturn(PETSC_SUCCESS); 1133 } 1134 1135 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1136 { 1137 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1138 PetscInt lda; 1139 1140 PetscFunctionBegin; 1141 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1142 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1143 if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1144 a->vecinuse = col + 1; 1145 PetscCall(MatDenseGetLDA(a->A, &lda)); 1146 PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse)); 1147 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1148 PetscCall(VecLockReadPush(a->cvec)); 1149 *v = a->cvec; 1150 PetscFunctionReturn(PETSC_SUCCESS); 1151 } 1152 1153 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1154 { 1155 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1156 1157 PetscFunctionBegin; 1158 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1159 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1160 a->vecinuse = 0; 1161 PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse)); 1162 PetscCall(VecLockReadPop(a->cvec)); 1163 PetscCall(VecCUDAResetArray(a->cvec)); 1164 if (v) *v = NULL; 1165 PetscFunctionReturn(PETSC_SUCCESS); 1166 } 1167 1168 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1169 { 1170 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1171 PetscInt lda; 1172 1173 PetscFunctionBegin; 1174 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1175 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1176 if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1177 a->vecinuse = col + 1; 1178 PetscCall(MatDenseGetLDA(a->A, &lda)); 1179 PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1180 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1181 *v = a->cvec; 1182 PetscFunctionReturn(PETSC_SUCCESS); 1183 } 1184 1185 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1186 { 1187 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1188 1189 PetscFunctionBegin; 1190 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1191 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1192 a->vecinuse = 0; 1193 PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1194 PetscCall(VecCUDAResetArray(a->cvec)); 1195 if (v) *v = NULL; 1196 PetscFunctionReturn(PETSC_SUCCESS); 1197 } 1198 1199 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1200 { 1201 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1202 1203 PetscFunctionBegin; 1204 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1205 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1206 PetscCall(MatDenseCUDAPlaceArray(l->A, a)); 1207 PetscFunctionReturn(PETSC_SUCCESS); 1208 } 1209 1210 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1211 { 1212 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1213 1214 PetscFunctionBegin; 1215 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1216 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1217 PetscCall(MatDenseCUDAResetArray(l->A)); 1218 PetscFunctionReturn(PETSC_SUCCESS); 1219 } 1220 1221 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1222 { 1223 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1224 1225 PetscFunctionBegin; 1226 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1227 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1228 PetscCall(MatDenseCUDAReplaceArray(l->A, a)); 1229 PetscFunctionReturn(PETSC_SUCCESS); 1230 } 1231 1232 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1233 { 1234 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1235 1236 PetscFunctionBegin; 1237 PetscCall(MatDenseCUDAGetArrayWrite(l->A, a)); 1238 PetscFunctionReturn(PETSC_SUCCESS); 1239 } 1240 1241 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1242 { 1243 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1244 1245 PetscFunctionBegin; 1246 PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a)); 1247 PetscFunctionReturn(PETSC_SUCCESS); 1248 } 1249 1250 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1251 { 1252 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1253 1254 PetscFunctionBegin; 1255 PetscCall(MatDenseCUDAGetArrayRead(l->A, a)); 1256 PetscFunctionReturn(PETSC_SUCCESS); 1257 } 1258 1259 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1260 { 1261 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1262 1263 PetscFunctionBegin; 1264 PetscCall(MatDenseCUDARestoreArrayRead(l->A, a)); 1265 PetscFunctionReturn(PETSC_SUCCESS); 1266 } 1267 1268 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1269 { 1270 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1271 1272 PetscFunctionBegin; 1273 PetscCall(MatDenseCUDAGetArray(l->A, a)); 1274 PetscFunctionReturn(PETSC_SUCCESS); 1275 } 1276 1277 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1278 { 1279 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1280 1281 PetscFunctionBegin; 1282 PetscCall(MatDenseCUDARestoreArray(l->A, a)); 1283 PetscFunctionReturn(PETSC_SUCCESS); 1284 } 1285 1286 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1287 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1288 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *); 1289 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1290 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1291 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *); 1292 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *); 1293 1294 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind) 1295 { 1296 Mat_MPIDense *d = (Mat_MPIDense *)mat->data; 1297 1298 PetscFunctionBegin; 1299 PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1300 PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1301 if (d->A) PetscCall(MatBindToCPU(d->A, bind)); 1302 mat->boundtocpu = bind; 1303 if (!bind) { 1304 PetscBool iscuda; 1305 1306 PetscCall(PetscFree(mat->defaultrandtype)); 1307 PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype)); 1308 PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda)); 1309 if (!iscuda) PetscCall(VecDestroy(&d->cvec)); 1310 PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda)); 1311 if (!iscuda) PetscCall(MatDestroy(&d->cmat)); 1312 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA)); 1313 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA)); 1314 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA)); 1315 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA)); 1316 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA)); 1317 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA)); 1318 mat->ops->shift = MatShift_MPIDenseCUDA; 1319 } else { 1320 PetscCall(PetscFree(mat->defaultrandtype)); 1321 PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype)); 1322 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1323 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1324 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1325 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1326 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1327 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1328 mat->ops->shift = MatShift_MPIDense; 1329 } 1330 if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind)); 1331 PetscFunctionReturn(PETSC_SUCCESS); 1332 } 1333 1334 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1335 { 1336 Mat_MPIDense *d = (Mat_MPIDense *)A->data; 1337 PetscBool iscuda; 1338 1339 PetscFunctionBegin; 1340 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1341 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 1342 if (!iscuda) PetscFunctionReturn(PETSC_SUCCESS); 1343 PetscCall(PetscLayoutSetUp(A->rmap)); 1344 PetscCall(PetscLayoutSetUp(A->cmap)); 1345 if (!d->A) { 1346 PetscCall(MatCreate(PETSC_COMM_SELF, &d->A)); 1347 PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 1348 } 1349 PetscCall(MatSetType(d->A, MATSEQDENSECUDA)); 1350 PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data)); 1351 A->preallocated = PETSC_TRUE; 1352 A->assembled = PETSC_TRUE; 1353 PetscFunctionReturn(PETSC_SUCCESS); 1354 } 1355 #endif 1356 1357 #if defined(PETSC_HAVE_HIP) 1358 PetscErrorCode MatShift_MPIDenseHIP(Mat A, PetscScalar alpha) 1359 { 1360 PetscScalar *da; 1361 PetscInt lda; 1362 1363 PetscFunctionBegin; 1364 PetscCall(MatDenseHIPGetArray(A, &da)); 1365 PetscCall(MatDenseGetLDA(A, &lda)); 1366 PetscCall(PetscInfo(A, "Performing Shift on backend\n")); 1367 PetscCall(MatShift_DenseHIP_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N)); 1368 PetscCall(MatDenseHIPRestoreArray(A, &da)); 1369 PetscFunctionReturn(PETSC_SUCCESS); 1370 } 1371 1372 static PetscErrorCode MatDenseGetColumnVec_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1373 { 1374 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1375 PetscInt lda; 1376 1377 PetscFunctionBegin; 1378 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1379 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1380 if (!a->cvec) { PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1381 a->vecinuse = col + 1; 1382 PetscCall(MatDenseGetLDA(a->A, &lda)); 1383 PetscCall(MatDenseHIPGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1384 PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1385 *v = a->cvec; 1386 PetscFunctionReturn(PETSC_SUCCESS); 1387 } 1388 1389 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1390 { 1391 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1392 1393 PetscFunctionBegin; 1394 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1395 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1396 a->vecinuse = 0; 1397 PetscCall(MatDenseHIPRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1398 PetscCall(VecHIPResetArray(a->cvec)); 1399 if (v) *v = NULL; 1400 PetscFunctionReturn(PETSC_SUCCESS); 1401 } 1402 1403 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1404 { 1405 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1406 PetscInt lda; 1407 1408 PetscFunctionBegin; 1409 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1410 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1411 if (!a->cvec) { PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1412 a->vecinuse = col + 1; 1413 PetscCall(MatDenseGetLDA(a->A, &lda)); 1414 PetscCall(MatDenseHIPGetArrayRead(a->A, &a->ptrinuse)); 1415 PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1416 PetscCall(VecLockReadPush(a->cvec)); 1417 *v = a->cvec; 1418 PetscFunctionReturn(PETSC_SUCCESS); 1419 } 1420 1421 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1422 { 1423 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1424 1425 PetscFunctionBegin; 1426 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1427 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1428 a->vecinuse = 0; 1429 PetscCall(MatDenseHIPRestoreArrayRead(a->A, &a->ptrinuse)); 1430 PetscCall(VecLockReadPop(a->cvec)); 1431 PetscCall(VecHIPResetArray(a->cvec)); 1432 if (v) *v = NULL; 1433 PetscFunctionReturn(PETSC_SUCCESS); 1434 } 1435 1436 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1437 { 1438 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1439 PetscInt lda; 1440 1441 PetscFunctionBegin; 1442 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1443 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1444 if (!a->cvec) { PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1445 a->vecinuse = col + 1; 1446 PetscCall(MatDenseGetLDA(a->A, &lda)); 1447 PetscCall(MatDenseHIPGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1448 PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1449 *v = a->cvec; 1450 PetscFunctionReturn(PETSC_SUCCESS); 1451 } 1452 1453 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1454 { 1455 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1456 1457 PetscFunctionBegin; 1458 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1459 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1460 a->vecinuse = 0; 1461 PetscCall(MatDenseHIPRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1462 PetscCall(VecHIPResetArray(a->cvec)); 1463 if (v) *v = NULL; 1464 PetscFunctionReturn(PETSC_SUCCESS); 1465 } 1466 1467 static PetscErrorCode MatDenseHIPPlaceArray_MPIDenseHIP(Mat A, const PetscScalar *a) 1468 { 1469 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1470 1471 PetscFunctionBegin; 1472 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1473 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1474 PetscCall(MatDenseHIPPlaceArray(l->A, a)); 1475 PetscFunctionReturn(PETSC_SUCCESS); 1476 } 1477 1478 static PetscErrorCode MatDenseHIPResetArray_MPIDenseHIP(Mat A) 1479 { 1480 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1481 1482 PetscFunctionBegin; 1483 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1484 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1485 PetscCall(MatDenseHIPResetArray(l->A)); 1486 PetscFunctionReturn(PETSC_SUCCESS); 1487 } 1488 1489 static PetscErrorCode MatDenseHIPReplaceArray_MPIDenseHIP(Mat A, const PetscScalar *a) 1490 { 1491 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1492 1493 PetscFunctionBegin; 1494 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1495 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1496 PetscCall(MatDenseHIPReplaceArray(l->A, a)); 1497 PetscFunctionReturn(PETSC_SUCCESS); 1498 } 1499 1500 static PetscErrorCode MatDenseHIPGetArrayWrite_MPIDenseHIP(Mat A, PetscScalar **a) 1501 { 1502 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1503 1504 PetscFunctionBegin; 1505 PetscCall(MatDenseHIPGetArrayWrite(l->A, a)); 1506 PetscFunctionReturn(PETSC_SUCCESS); 1507 } 1508 1509 static PetscErrorCode MatDenseHIPRestoreArrayWrite_MPIDenseHIP(Mat A, PetscScalar **a) 1510 { 1511 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1512 1513 PetscFunctionBegin; 1514 PetscCall(MatDenseHIPRestoreArrayWrite(l->A, a)); 1515 PetscFunctionReturn(PETSC_SUCCESS); 1516 } 1517 1518 static PetscErrorCode MatDenseHIPGetArrayRead_MPIDenseHIP(Mat A, const PetscScalar **a) 1519 { 1520 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1521 1522 PetscFunctionBegin; 1523 PetscCall(MatDenseHIPGetArrayRead(l->A, a)); 1524 PetscFunctionReturn(PETSC_SUCCESS); 1525 } 1526 1527 static PetscErrorCode MatDenseHIPRestoreArrayRead_MPIDenseHIP(Mat A, const PetscScalar **a) 1528 { 1529 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1530 1531 PetscFunctionBegin; 1532 PetscCall(MatDenseHIPRestoreArrayRead(l->A, a)); 1533 PetscFunctionReturn(PETSC_SUCCESS); 1534 } 1535 1536 static PetscErrorCode MatDenseHIPGetArray_MPIDenseHIP(Mat A, PetscScalar **a) 1537 { 1538 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1539 1540 PetscFunctionBegin; 1541 PetscCall(MatDenseHIPGetArray(l->A, a)); 1542 PetscFunctionReturn(PETSC_SUCCESS); 1543 } 1544 1545 static PetscErrorCode MatDenseHIPRestoreArray_MPIDenseHIP(Mat A, PetscScalar **a) 1546 { 1547 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1548 1549 PetscFunctionBegin; 1550 PetscCall(MatDenseHIPRestoreArray(l->A, a)); 1551 PetscFunctionReturn(PETSC_SUCCESS); 1552 } 1553 1554 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1555 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1556 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *); 1557 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1558 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1559 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *); 1560 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *); 1561 1562 static PetscErrorCode MatBindToCPU_MPIDenseHIP(Mat mat, PetscBool bind) 1563 { 1564 Mat_MPIDense *d = (Mat_MPIDense *)mat->data; 1565 1566 PetscFunctionBegin; 1567 PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1568 PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1569 if (d->A) PetscCall(MatBindToCPU(d->A, bind)); 1570 mat->boundtocpu = bind; 1571 if (!bind) { 1572 PetscBool iscuda; 1573 1574 PetscCall(PetscFree(mat->defaultrandtype)); 1575 PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype)); 1576 PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPIHIP, &iscuda)); 1577 if (!iscuda) PetscCall(VecDestroy(&d->cvec)); 1578 PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSEHIP, &iscuda)); 1579 if (!iscuda) PetscCall(MatDestroy(&d->cmat)); 1580 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseHIP)); 1581 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseHIP)); 1582 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseHIP)); 1583 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseHIP)); 1584 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseHIP)); 1585 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseHIP)); 1586 mat->ops->shift = MatShift_MPIDenseHIP; 1587 } else { 1588 PetscCall(PetscFree(mat->defaultrandtype)); 1589 PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype)); 1590 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1591 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1592 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1593 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1594 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1595 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1596 mat->ops->shift = MatShift_MPIDense; 1597 } 1598 if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind)); 1599 PetscFunctionReturn(PETSC_SUCCESS); 1600 } 1601 1602 PetscErrorCode MatMPIDenseHIPSetPreallocation(Mat A, PetscScalar *d_data) 1603 { 1604 Mat_MPIDense *d = (Mat_MPIDense *)A->data; 1605 PetscBool iscuda; 1606 1607 PetscFunctionBegin; 1608 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1609 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iscuda)); 1610 if (!iscuda) PetscFunctionReturn(PETSC_SUCCESS); 1611 PetscCall(PetscLayoutSetUp(A->rmap)); 1612 PetscCall(PetscLayoutSetUp(A->cmap)); 1613 if (!d->A) { 1614 PetscCall(MatCreate(PETSC_COMM_SELF, &d->A)); 1615 PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 1616 } 1617 PetscCall(MatSetType(d->A, MATSEQDENSEHIP)); 1618 PetscCall(MatSeqDenseHIPSetPreallocation(d->A, d_data)); 1619 A->preallocated = PETSC_TRUE; 1620 A->assembled = PETSC_TRUE; 1621 PetscFunctionReturn(PETSC_SUCCESS); 1622 } 1623 #endif 1624 1625 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1626 { 1627 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1628 1629 PetscFunctionBegin; 1630 PetscCall(MatSetRandom(d->A, rctx)); 1631 #if defined(PETSC_HAVE_DEVICE) 1632 x->offloadmask = d->A->offloadmask; 1633 #endif 1634 PetscFunctionReturn(PETSC_SUCCESS); 1635 } 1636 1637 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1638 { 1639 PetscFunctionBegin; 1640 *missing = PETSC_FALSE; 1641 PetscFunctionReturn(PETSC_SUCCESS); 1642 } 1643 1644 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1645 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1646 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1647 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1648 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1649 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1650 1651 /* -------------------------------------------------------------------*/ 1652 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1653 MatGetRow_MPIDense, 1654 MatRestoreRow_MPIDense, 1655 MatMult_MPIDense, 1656 /* 4*/ MatMultAdd_MPIDense, 1657 MatMultTranspose_MPIDense, 1658 MatMultTransposeAdd_MPIDense, 1659 NULL, 1660 NULL, 1661 NULL, 1662 /* 10*/ NULL, 1663 NULL, 1664 NULL, 1665 NULL, 1666 MatTranspose_MPIDense, 1667 /* 15*/ MatGetInfo_MPIDense, 1668 MatEqual_MPIDense, 1669 MatGetDiagonal_MPIDense, 1670 MatDiagonalScale_MPIDense, 1671 MatNorm_MPIDense, 1672 /* 20*/ MatAssemblyBegin_MPIDense, 1673 MatAssemblyEnd_MPIDense, 1674 MatSetOption_MPIDense, 1675 MatZeroEntries_MPIDense, 1676 /* 24*/ MatZeroRows_MPIDense, 1677 NULL, 1678 NULL, 1679 NULL, 1680 NULL, 1681 /* 29*/ MatSetUp_MPIDense, 1682 NULL, 1683 NULL, 1684 MatGetDiagonalBlock_MPIDense, 1685 NULL, 1686 /* 34*/ MatDuplicate_MPIDense, 1687 NULL, 1688 NULL, 1689 NULL, 1690 NULL, 1691 /* 39*/ MatAXPY_MPIDense, 1692 MatCreateSubMatrices_MPIDense, 1693 NULL, 1694 MatGetValues_MPIDense, 1695 MatCopy_MPIDense, 1696 /* 44*/ NULL, 1697 MatScale_MPIDense, 1698 MatShift_MPIDense, 1699 NULL, 1700 NULL, 1701 /* 49*/ MatSetRandom_MPIDense, 1702 NULL, 1703 NULL, 1704 NULL, 1705 NULL, 1706 /* 54*/ NULL, 1707 NULL, 1708 NULL, 1709 NULL, 1710 NULL, 1711 /* 59*/ MatCreateSubMatrix_MPIDense, 1712 MatDestroy_MPIDense, 1713 MatView_MPIDense, 1714 NULL, 1715 NULL, 1716 /* 64*/ NULL, 1717 NULL, 1718 NULL, 1719 NULL, 1720 NULL, 1721 /* 69*/ NULL, 1722 NULL, 1723 NULL, 1724 NULL, 1725 NULL, 1726 /* 74*/ NULL, 1727 NULL, 1728 NULL, 1729 NULL, 1730 NULL, 1731 /* 79*/ NULL, 1732 NULL, 1733 NULL, 1734 NULL, 1735 /* 83*/ MatLoad_MPIDense, 1736 NULL, 1737 NULL, 1738 NULL, 1739 NULL, 1740 NULL, 1741 /* 89*/ NULL, 1742 NULL, 1743 NULL, 1744 NULL, 1745 NULL, 1746 /* 94*/ NULL, 1747 NULL, 1748 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1749 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1750 NULL, 1751 /* 99*/ MatProductSetFromOptions_MPIDense, 1752 NULL, 1753 NULL, 1754 MatConjugate_MPIDense, 1755 NULL, 1756 /*104*/ NULL, 1757 MatRealPart_MPIDense, 1758 MatImaginaryPart_MPIDense, 1759 NULL, 1760 NULL, 1761 /*109*/ NULL, 1762 NULL, 1763 NULL, 1764 MatGetColumnVector_MPIDense, 1765 MatMissingDiagonal_MPIDense, 1766 /*114*/ NULL, 1767 NULL, 1768 NULL, 1769 NULL, 1770 NULL, 1771 /*119*/ NULL, 1772 NULL, 1773 NULL, 1774 NULL, 1775 NULL, 1776 /*124*/ NULL, 1777 MatGetColumnReductions_MPIDense, 1778 NULL, 1779 NULL, 1780 NULL, 1781 /*129*/ NULL, 1782 NULL, 1783 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1784 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1785 NULL, 1786 /*134*/ NULL, 1787 NULL, 1788 NULL, 1789 NULL, 1790 NULL, 1791 /*139*/ NULL, 1792 NULL, 1793 NULL, 1794 NULL, 1795 NULL, 1796 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1797 /*145*/ NULL, 1798 NULL, 1799 NULL, 1800 NULL, 1801 NULL, 1802 /*150*/ NULL, 1803 NULL}; 1804 1805 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1806 { 1807 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1808 MatType mtype = MATSEQDENSE; 1809 1810 PetscFunctionBegin; 1811 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1812 PetscCall(PetscLayoutSetUp(mat->rmap)); 1813 PetscCall(PetscLayoutSetUp(mat->cmap)); 1814 if (!a->A) { 1815 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1816 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1817 } 1818 #if defined(PETSC_HAVE_CUDA) 1819 PetscBool iscuda; 1820 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1821 if (iscuda) mtype = MATSEQDENSECUDA; 1822 #endif 1823 #if defined(PETSC_HAVE_HIP) 1824 PetscBool iship; 1825 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1826 if (iship) mtype = MATSEQDENSEHIP; 1827 #endif 1828 PetscCall(MatSetType(a->A, mtype)); 1829 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1830 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1831 mat->offloadmask = a->A->offloadmask; 1832 #endif 1833 mat->preallocated = PETSC_TRUE; 1834 mat->assembled = PETSC_TRUE; 1835 PetscFunctionReturn(PETSC_SUCCESS); 1836 } 1837 1838 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1839 { 1840 Mat B, C; 1841 1842 PetscFunctionBegin; 1843 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1844 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1845 PetscCall(MatDestroy(&C)); 1846 if (reuse == MAT_REUSE_MATRIX) { 1847 C = *newmat; 1848 } else C = NULL; 1849 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1850 PetscCall(MatDestroy(&B)); 1851 if (reuse == MAT_INPLACE_MATRIX) { 1852 PetscCall(MatHeaderReplace(A, &C)); 1853 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1854 PetscFunctionReturn(PETSC_SUCCESS); 1855 } 1856 1857 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1858 { 1859 Mat B, C; 1860 1861 PetscFunctionBegin; 1862 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1863 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1864 if (reuse == MAT_REUSE_MATRIX) { 1865 C = *newmat; 1866 } else C = NULL; 1867 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1868 PetscCall(MatDestroy(&B)); 1869 if (reuse == MAT_INPLACE_MATRIX) { 1870 PetscCall(MatHeaderReplace(A, &C)); 1871 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1872 PetscFunctionReturn(PETSC_SUCCESS); 1873 } 1874 1875 #if defined(PETSC_HAVE_ELEMENTAL) 1876 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1877 { 1878 Mat mat_elemental; 1879 PetscScalar *v; 1880 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols; 1881 1882 PetscFunctionBegin; 1883 if (reuse == MAT_REUSE_MATRIX) { 1884 mat_elemental = *newmat; 1885 PetscCall(MatZeroEntries(*newmat)); 1886 } else { 1887 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1888 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1889 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1890 PetscCall(MatSetUp(mat_elemental)); 1891 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1892 } 1893 1894 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1895 for (i = 0; i < N; i++) cols[i] = i; 1896 for (i = 0; i < m; i++) rows[i] = rstart + i; 1897 1898 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1899 PetscCall(MatDenseGetArray(A, &v)); 1900 PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1901 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1902 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1903 PetscCall(MatDenseRestoreArray(A, &v)); 1904 PetscCall(PetscFree2(rows, cols)); 1905 1906 if (reuse == MAT_INPLACE_MATRIX) { 1907 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1908 } else { 1909 *newmat = mat_elemental; 1910 } 1911 PetscFunctionReturn(PETSC_SUCCESS); 1912 } 1913 #endif 1914 1915 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1916 { 1917 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1918 1919 PetscFunctionBegin; 1920 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1921 PetscFunctionReturn(PETSC_SUCCESS); 1922 } 1923 1924 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1925 { 1926 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1927 1928 PetscFunctionBegin; 1929 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1930 PetscFunctionReturn(PETSC_SUCCESS); 1931 } 1932 1933 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1934 { 1935 Mat_MPIDense *mat; 1936 PetscInt m, nloc, N; 1937 1938 PetscFunctionBegin; 1939 PetscCall(MatGetSize(inmat, &m, &N)); 1940 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1941 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1942 PetscInt sum; 1943 1944 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1945 /* Check sum(n) = N */ 1946 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1947 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1948 1949 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1950 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1951 } 1952 1953 /* numeric phase */ 1954 mat = (Mat_MPIDense *)(*outmat)->data; 1955 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1956 PetscFunctionReturn(PETSC_SUCCESS); 1957 } 1958 1959 #if defined(PETSC_HAVE_CUDA) 1960 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat) 1961 { 1962 Mat B; 1963 Mat_MPIDense *m; 1964 1965 PetscFunctionBegin; 1966 if (reuse == MAT_INITIAL_MATRIX) { 1967 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 1968 } else if (reuse == MAT_REUSE_MATRIX) { 1969 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 1970 } 1971 1972 B = *newmat; 1973 PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE)); 1974 PetscCall(PetscFree(B->defaultvectype)); 1975 PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype)); 1976 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE)); 1977 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL)); 1978 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 1979 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 1980 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 1981 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 1982 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL)); 1983 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL)); 1984 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL)); 1985 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL)); 1986 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL)); 1987 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL)); 1988 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL)); 1989 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL)); 1990 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL)); 1991 m = (Mat_MPIDense *)(B)->data; 1992 if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A)); 1993 B->ops->bindtocpu = NULL; 1994 B->offloadmask = PETSC_OFFLOAD_CPU; 1995 PetscFunctionReturn(PETSC_SUCCESS); 1996 } 1997 1998 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat) 1999 { 2000 Mat B; 2001 Mat_MPIDense *m; 2002 2003 PetscFunctionBegin; 2004 if (reuse == MAT_INITIAL_MATRIX) { 2005 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 2006 } else if (reuse == MAT_REUSE_MATRIX) { 2007 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 2008 } 2009 2010 B = *newmat; 2011 PetscCall(PetscFree(B->defaultvectype)); 2012 PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype)); 2013 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA)); 2014 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense)); 2015 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2016 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2017 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2018 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2019 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA)); 2020 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA)); 2021 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA)); 2022 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA)); 2023 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA)); 2024 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA)); 2025 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA)); 2026 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA)); 2027 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA)); 2028 m = (Mat_MPIDense *)(B->data); 2029 if (m->A) { 2030 PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A)); 2031 B->offloadmask = PETSC_OFFLOAD_BOTH; 2032 } else { 2033 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 2034 } 2035 PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE)); 2036 2037 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 2038 PetscFunctionReturn(PETSC_SUCCESS); 2039 } 2040 #endif 2041 2042 #if defined(PETSC_HAVE_HIP) 2043 PetscErrorCode MatConvert_MPIDenseHIP_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat) 2044 { 2045 Mat B; 2046 Mat_MPIDense *m; 2047 2048 PetscFunctionBegin; 2049 if (reuse == MAT_INITIAL_MATRIX) { 2050 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 2051 } else if (reuse == MAT_REUSE_MATRIX) { 2052 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 2053 } 2054 2055 B = *newmat; 2056 PetscCall(MatBindToCPU_MPIDenseHIP(B, PETSC_TRUE)); 2057 PetscCall(PetscFree(B->defaultvectype)); 2058 PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype)); 2059 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE)); 2060 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensehip_mpidense_C", NULL)); 2061 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 2062 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 2063 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 2064 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 2065 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArray_C", NULL)); 2066 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayRead_C", NULL)); 2067 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayWrite_C", NULL)); 2068 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArray_C", NULL)); 2069 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayRead_C", NULL)); 2070 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayWrite_C", NULL)); 2071 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPPlaceArray_C", NULL)); 2072 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPResetArray_C", NULL)); 2073 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPReplaceArray_C", NULL)); 2074 m = (Mat_MPIDense *)(B)->data; 2075 if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A)); 2076 B->ops->bindtocpu = NULL; 2077 B->offloadmask = PETSC_OFFLOAD_CPU; 2078 PetscFunctionReturn(PETSC_SUCCESS); 2079 } 2080 2081 PetscErrorCode MatConvert_MPIDense_MPIDenseHIP(Mat M, MatType type, MatReuse reuse, Mat *newmat) 2082 { 2083 Mat B; 2084 Mat_MPIDense *m; 2085 2086 PetscFunctionBegin; 2087 if (reuse == MAT_INITIAL_MATRIX) { 2088 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 2089 } else if (reuse == MAT_REUSE_MATRIX) { 2090 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 2091 } 2092 2093 B = *newmat; 2094 PetscCall(PetscFree(B->defaultvectype)); 2095 PetscCall(PetscStrallocpy(VECHIP, &B->defaultvectype)); 2096 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSEHIP)); 2097 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensehip_mpidense_C", MatConvert_MPIDenseHIP_MPIDense)); 2098 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensehip_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2099 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2100 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2101 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2102 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArray_C", MatDenseHIPGetArray_MPIDenseHIP)); 2103 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayRead_C", MatDenseHIPGetArrayRead_MPIDenseHIP)); 2104 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayWrite_C", MatDenseHIPGetArrayWrite_MPIDenseHIP)); 2105 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArray_C", MatDenseHIPRestoreArray_MPIDenseHIP)); 2106 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayRead_C", MatDenseHIPRestoreArrayRead_MPIDenseHIP)); 2107 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayWrite_C", MatDenseHIPRestoreArrayWrite_MPIDenseHIP)); 2108 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPPlaceArray_C", MatDenseHIPPlaceArray_MPIDenseHIP)); 2109 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPResetArray_C", MatDenseHIPResetArray_MPIDenseHIP)); 2110 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPReplaceArray_C", MatDenseHIPReplaceArray_MPIDenseHIP)); 2111 m = (Mat_MPIDense *)(B->data); 2112 if (m->A) { 2113 PetscCall(MatConvert(m->A, MATSEQDENSEHIP, MAT_INPLACE_MATRIX, &m->A)); 2114 B->offloadmask = PETSC_OFFLOAD_BOTH; 2115 } else { 2116 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 2117 } 2118 PetscCall(MatBindToCPU_MPIDenseHIP(B, PETSC_FALSE)); 2119 2120 B->ops->bindtocpu = MatBindToCPU_MPIDenseHIP; 2121 PetscFunctionReturn(PETSC_SUCCESS); 2122 } 2123 #endif 2124 2125 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 2126 { 2127 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2128 PetscInt lda; 2129 2130 PetscFunctionBegin; 2131 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 2132 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2133 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 2134 a->vecinuse = col + 1; 2135 PetscCall(MatDenseGetLDA(a->A, &lda)); 2136 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 2137 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 2138 *v = a->cvec; 2139 PetscFunctionReturn(PETSC_SUCCESS); 2140 } 2141 2142 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 2143 { 2144 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2145 2146 PetscFunctionBegin; 2147 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 2148 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 2149 a->vecinuse = 0; 2150 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 2151 PetscCall(VecResetArray(a->cvec)); 2152 if (v) *v = NULL; 2153 PetscFunctionReturn(PETSC_SUCCESS); 2154 } 2155 2156 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 2157 { 2158 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2159 PetscInt lda; 2160 2161 PetscFunctionBegin; 2162 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 2163 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2164 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 2165 a->vecinuse = col + 1; 2166 PetscCall(MatDenseGetLDA(a->A, &lda)); 2167 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 2168 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 2169 PetscCall(VecLockReadPush(a->cvec)); 2170 *v = a->cvec; 2171 PetscFunctionReturn(PETSC_SUCCESS); 2172 } 2173 2174 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 2175 { 2176 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2177 2178 PetscFunctionBegin; 2179 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 2180 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 2181 a->vecinuse = 0; 2182 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 2183 PetscCall(VecLockReadPop(a->cvec)); 2184 PetscCall(VecResetArray(a->cvec)); 2185 if (v) *v = NULL; 2186 PetscFunctionReturn(PETSC_SUCCESS); 2187 } 2188 2189 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 2190 { 2191 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2192 PetscInt lda; 2193 2194 PetscFunctionBegin; 2195 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 2196 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2197 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 2198 a->vecinuse = col + 1; 2199 PetscCall(MatDenseGetLDA(a->A, &lda)); 2200 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 2201 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 2202 *v = a->cvec; 2203 PetscFunctionReturn(PETSC_SUCCESS); 2204 } 2205 2206 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 2207 { 2208 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2209 2210 PetscFunctionBegin; 2211 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 2212 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 2213 a->vecinuse = 0; 2214 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 2215 PetscCall(VecResetArray(a->cvec)); 2216 if (v) *v = NULL; 2217 PetscFunctionReturn(PETSC_SUCCESS); 2218 } 2219 2220 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 2221 { 2222 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2223 Mat_MPIDense *c; 2224 MPI_Comm comm; 2225 PetscInt pbegin, pend; 2226 2227 PetscFunctionBegin; 2228 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2229 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 2230 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2231 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 2232 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 2233 if (!a->cmat) { 2234 PetscCall(MatCreate(comm, &a->cmat)); 2235 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 2236 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 2237 else { 2238 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 2239 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 2240 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 2241 } 2242 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 2243 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 2244 } else { 2245 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 2246 if (same && a->cmat->rmap->N != A->rmap->N) { 2247 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 2248 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2249 } 2250 if (!same) { 2251 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 2252 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 2253 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 2254 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 2255 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 2256 } 2257 if (cend - cbegin != a->cmat->cmap->N) { 2258 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 2259 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 2260 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 2261 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 2262 } 2263 } 2264 c = (Mat_MPIDense *)a->cmat->data; 2265 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2266 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 2267 2268 a->cmat->preallocated = PETSC_TRUE; 2269 a->cmat->assembled = PETSC_TRUE; 2270 #if defined(PETSC_HAVE_DEVICE) 2271 a->cmat->offloadmask = c->A->offloadmask; 2272 #endif 2273 a->matinuse = cbegin + 1; 2274 *v = a->cmat; 2275 PetscFunctionReturn(PETSC_SUCCESS); 2276 } 2277 2278 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 2279 { 2280 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2281 Mat_MPIDense *c; 2282 2283 PetscFunctionBegin; 2284 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 2285 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 2286 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 2287 a->matinuse = 0; 2288 c = (Mat_MPIDense *)a->cmat->data; 2289 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 2290 if (v) *v = NULL; 2291 #if defined(PETSC_HAVE_DEVICE) 2292 A->offloadmask = a->A->offloadmask; 2293 #endif 2294 PetscFunctionReturn(PETSC_SUCCESS); 2295 } 2296 2297 /*MC 2298 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 2299 2300 Options Database Keys: 2301 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 2302 2303 Level: beginner 2304 2305 .seealso: `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 2306 M*/ 2307 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 2308 { 2309 Mat_MPIDense *a; 2310 2311 PetscFunctionBegin; 2312 PetscCall(PetscNew(&a)); 2313 mat->data = (void *)a; 2314 PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps))); 2315 2316 mat->insertmode = NOT_SET_VALUES; 2317 2318 /* build cache for off array entries formed */ 2319 a->donotstash = PETSC_FALSE; 2320 2321 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 2322 2323 /* stuff used for matrix vector multiply */ 2324 a->lvec = NULL; 2325 a->Mvctx = NULL; 2326 a->roworiented = PETSC_TRUE; 2327 2328 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 2329 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 2330 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 2331 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 2332 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 2333 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 2334 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 2335 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 2336 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 2337 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 2338 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 2339 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 2340 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 2341 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 2342 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 2343 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 2344 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 2345 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 2346 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 2347 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 2348 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 2349 #if defined(PETSC_HAVE_ELEMENTAL) 2350 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 2351 #endif 2352 #if defined(PETSC_HAVE_SCALAPACK) 2353 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 2354 #endif 2355 #if defined(PETSC_HAVE_CUDA) 2356 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 2357 #endif 2358 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 2359 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2360 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2361 #if defined(PETSC_HAVE_CUDA) 2362 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2363 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2364 #endif 2365 #if defined(PETSC_HAVE_HIP) 2366 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 2367 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2368 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2369 #endif 2370 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 2371 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 2372 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 2373 PetscFunctionReturn(PETSC_SUCCESS); 2374 } 2375 2376 /*MC 2377 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 2378 2379 Options Database Keys: 2380 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()` 2381 2382 Level: beginner 2383 2384 .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`, `MATSEQDENSEHIP` 2385 M*/ 2386 #if defined(PETSC_HAVE_CUDA) 2387 #include <petsc/private/deviceimpl.h> 2388 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 2389 { 2390 PetscFunctionBegin; 2391 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); 2392 PetscCall(MatCreate_MPIDense(B)); 2393 PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B)); 2394 PetscFunctionReturn(PETSC_SUCCESS); 2395 } 2396 #endif 2397 2398 /*MC 2399 MATMPIDENSEHIP - MATMPIDENSEHIP = "mpidensehip" - A matrix type to be used for distributed dense matrices on GPUs. 2400 2401 Options Database Keys: 2402 . -mat_type mpidensehip - sets the matrix type to `MATMPIDENSEHIP` during a call to `MatSetFromOptions()` 2403 2404 Level: beginner 2405 2406 .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`, `MATMPIDENSEHIP` 2407 M*/ 2408 #if defined(PETSC_HAVE_HIP) 2409 #include <petsc/private/deviceimpl.h> 2410 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseHIP(Mat B) 2411 { 2412 PetscFunctionBegin; 2413 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_HIP)); 2414 PetscCall(MatCreate_MPIDense(B)); 2415 PetscCall(MatConvert_MPIDense_MPIDenseHIP(B, MATMPIDENSEHIP, MAT_INPLACE_MATRIX, &B)); 2416 PetscFunctionReturn(PETSC_SUCCESS); 2417 } 2418 #endif 2419 2420 /*MC 2421 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 2422 2423 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 2424 and `MATMPIDENSE` otherwise. 2425 2426 Options Database Keys: 2427 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 2428 2429 Level: beginner 2430 2431 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 2432 M*/ 2433 2434 /*MC 2435 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 2436 Similarly, 2437 MATDENSEHIP - MATDENSEHIP = "densehip" 2438 2439 This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator, 2440 and `MATMPIDENSECUDA` otherwise. 2441 2442 Options Database Keys: 2443 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()` 2444 2445 Level: beginner 2446 2447 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, `MATMPIDENSEHIP`, `MATDENSE` 2448 M*/ 2449 2450 /*MC 2451 MATDENSEHIP - MATDENSEHIP = "densehip" - A matrix type to be used for dense matrices on GPUs. 2452 2453 This matrix type is identical to `MATSEQDENSEHIP` when constructed with a single process communicator, 2454 and `MATMPIDENSEHIP` otherwise. 2455 2456 Options Database Keys: 2457 . -mat_type densehip - sets the matrix type to `MATDENSEHIP` during a call to `MatSetFromOptions()` 2458 2459 Level: beginner 2460 2461 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, `MATMPIDENSEHIP`, `MATDENSE` 2462 M*/ 2463 2464 /*@C 2465 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 2466 2467 Collective 2468 2469 Input Parameters: 2470 . B - the matrix 2471 - data - optional location of matrix data. Set data=NULL for PETSc 2472 to control all matrix memory allocation. 2473 2474 Notes: 2475 The dense format is fully compatible with standard Fortran 77 2476 storage by columns. 2477 2478 The data input variable is intended primarily for Fortran programmers 2479 who wish to allocate their own matrix memory space. Most users should 2480 set data=NULL. 2481 2482 Level: intermediate 2483 2484 .seealso: `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2485 @*/ 2486 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 2487 { 2488 PetscFunctionBegin; 2489 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2490 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 2491 PetscFunctionReturn(PETSC_SUCCESS); 2492 } 2493 2494 /*@ 2495 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 2496 array provided by the user. This is useful to avoid copying an array 2497 into a matrix 2498 2499 Not Collective 2500 2501 Input Parameters: 2502 + mat - the matrix 2503 - array - the array in column major order 2504 2505 Note: 2506 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 2507 freed when the matrix is destroyed. 2508 2509 Level: developer 2510 2511 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 2512 `MatDenseReplaceArray()` 2513 @*/ 2514 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 2515 { 2516 PetscFunctionBegin; 2517 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2518 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2519 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2520 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2521 mat->offloadmask = PETSC_OFFLOAD_CPU; 2522 #endif 2523 PetscFunctionReturn(PETSC_SUCCESS); 2524 } 2525 2526 /*@ 2527 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 2528 2529 Not Collective 2530 2531 Input Parameters: 2532 . mat - the matrix 2533 2534 Note: 2535 You can only call this after a call to `MatDensePlaceArray()` 2536 2537 Level: developer 2538 2539 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2540 @*/ 2541 PetscErrorCode MatDenseResetArray(Mat mat) 2542 { 2543 PetscFunctionBegin; 2544 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2545 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 2546 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2547 PetscFunctionReturn(PETSC_SUCCESS); 2548 } 2549 2550 /*@ 2551 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2552 array provided by the user. This is useful to avoid copying an array 2553 into a matrix 2554 2555 Not Collective 2556 2557 Input Parameters: 2558 + mat - the matrix 2559 - array - the array in column major order 2560 2561 Note: 2562 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 2563 freed by the user. It will be freed when the matrix is destroyed. 2564 2565 Level: developer 2566 2567 .seealso: `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 2568 @*/ 2569 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 2570 { 2571 PetscFunctionBegin; 2572 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2573 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2574 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2575 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2576 mat->offloadmask = PETSC_OFFLOAD_CPU; 2577 #endif 2578 PetscFunctionReturn(PETSC_SUCCESS); 2579 } 2580 2581 #if defined(PETSC_HAVE_CUDA) 2582 /*@C 2583 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an 2584 array provided by the user. This is useful to avoid copying an array 2585 into a matrix 2586 2587 Not Collective 2588 2589 Input Parameters: 2590 + mat - the matrix 2591 - array - the array in column major order 2592 2593 Note: 2594 You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be 2595 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2596 2597 Level: developer 2598 2599 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()` 2600 @*/ 2601 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array) 2602 { 2603 PetscFunctionBegin; 2604 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2605 PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2606 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2607 mat->offloadmask = PETSC_OFFLOAD_GPU; 2608 PetscFunctionReturn(PETSC_SUCCESS); 2609 } 2610 2611 /*@C 2612 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()` 2613 2614 Not Collective 2615 2616 Input Parameters: 2617 . mat - the matrix 2618 2619 Note: 2620 You can only call this after a call to `MatDenseCUDAPlaceArray()` 2621 2622 Level: developer 2623 2624 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()` 2625 @*/ 2626 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2627 { 2628 PetscFunctionBegin; 2629 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2630 PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat)); 2631 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2632 PetscFunctionReturn(PETSC_SUCCESS); 2633 } 2634 2635 /*@C 2636 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an 2637 array provided by the user. This is useful to avoid copying an array 2638 into a matrix 2639 2640 Not Collective 2641 2642 Input Parameters: 2643 + mat - the matrix 2644 - array - the array in column major order 2645 2646 Note: 2647 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2648 The memory passed in CANNOT be freed by the user. It will be freed 2649 when the matrix is destroyed. The array should respect the matrix leading dimension. 2650 2651 Level: developer 2652 2653 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()` 2654 @*/ 2655 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array) 2656 { 2657 PetscFunctionBegin; 2658 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2659 PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2660 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2661 mat->offloadmask = PETSC_OFFLOAD_GPU; 2662 PetscFunctionReturn(PETSC_SUCCESS); 2663 } 2664 2665 /*@C 2666 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix. 2667 2668 Not Collective 2669 2670 Input Parameters: 2671 . A - the matrix 2672 2673 Output Parameters 2674 . array - the GPU array in column major order 2675 2676 Notes: 2677 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`. 2678 2679 The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed. 2680 2681 Level: developer 2682 2683 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()` 2684 @*/ 2685 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2686 { 2687 PetscFunctionBegin; 2688 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2689 PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2690 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2691 PetscFunctionReturn(PETSC_SUCCESS); 2692 } 2693 2694 /*@C 2695 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`. 2696 2697 Not Collective 2698 2699 Input Parameters: 2700 + A - the matrix 2701 - array - the GPU array in column major order 2702 2703 Level: developer 2704 2705 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2706 @*/ 2707 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2708 { 2709 PetscFunctionBegin; 2710 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2711 PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2712 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2713 A->offloadmask = PETSC_OFFLOAD_GPU; 2714 PetscFunctionReturn(PETSC_SUCCESS); 2715 } 2716 2717 /*@C 2718 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed. 2719 2720 Not Collective 2721 2722 Input Parameters: 2723 . A - the matrix 2724 2725 Output Parameters 2726 . array - the GPU array in column major order 2727 2728 Note: 2729 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. 2730 2731 Level: developer 2732 2733 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2734 @*/ 2735 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2736 { 2737 PetscFunctionBegin; 2738 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2739 PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2740 PetscFunctionReturn(PETSC_SUCCESS); 2741 } 2742 2743 /*@C 2744 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`. 2745 2746 Not Collective 2747 2748 Input Parameters: 2749 + A - the matrix 2750 - array - the GPU array in column major order 2751 2752 Note: 2753 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. 2754 2755 Level: developer 2756 2757 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()` 2758 @*/ 2759 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2760 { 2761 PetscFunctionBegin; 2762 PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2763 PetscFunctionReturn(PETSC_SUCCESS); 2764 } 2765 2766 /*@C 2767 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed. 2768 2769 Not Collective 2770 2771 Input Parameters: 2772 . A - the matrix 2773 2774 Output Parameters 2775 . array - the GPU array in column major order 2776 2777 Note: 2778 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`. 2779 2780 Level: developer 2781 2782 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2783 @*/ 2784 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2785 { 2786 PetscFunctionBegin; 2787 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2788 PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a)); 2789 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2790 PetscFunctionReturn(PETSC_SUCCESS); 2791 } 2792 2793 /*@C 2794 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`. 2795 2796 Not Collective 2797 2798 Input Parameters: 2799 + A - the matrix 2800 - array - the GPU array in column major order 2801 2802 Level: developer 2803 2804 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2805 @*/ 2806 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2807 { 2808 PetscFunctionBegin; 2809 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2810 PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a)); 2811 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2812 A->offloadmask = PETSC_OFFLOAD_GPU; 2813 PetscFunctionReturn(PETSC_SUCCESS); 2814 } 2815 #endif 2816 2817 #if defined(PETSC_HAVE_HIP) 2818 /*@C 2819 MatDenseHIPPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2820 array provided by the user. This is useful to avoid copying an array 2821 into a matrix 2822 2823 Not Collective 2824 2825 Input Parameters: 2826 + mat - the matrix 2827 - array - the array in column major order 2828 2829 Notes: 2830 You can return to the original array with a call to MatDenseHIPResetArray(). The user is responsible for freeing this array; it will not be 2831 freed when the matrix is destroyed. The array must have been allocated with hipMalloc(). 2832 2833 Level: developer 2834 2835 .seealso: MatDenseHIPGetArray(), MatDenseHIPResetArray() 2836 @*/ 2837 PetscErrorCode MatDenseHIPPlaceArray(Mat mat, const PetscScalar *array) 2838 { 2839 PetscFunctionBegin; 2840 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2841 PetscUseMethod(mat, "MatDenseHIPPlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2842 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2843 mat->offloadmask = PETSC_OFFLOAD_GPU; 2844 PetscFunctionReturn(PETSC_SUCCESS); 2845 } 2846 2847 /*@C 2848 MatDenseHIPResetArray - Resets the matrix array to that it previously had before the call to MatDenseHIPPlaceArray() 2849 2850 Not Collective 2851 2852 Input Parameters: 2853 . mat - the matrix 2854 2855 Notes: 2856 You can only call this after a call to MatDenseHIPPlaceArray() 2857 2858 Level: developer 2859 2860 .seealso: MatDenseHIPGetArray(), MatDenseHIPPlaceArray() 2861 2862 @*/ 2863 PetscErrorCode MatDenseHIPResetArray(Mat mat) 2864 { 2865 PetscFunctionBegin; 2866 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2867 PetscUseMethod(mat, "MatDenseHIPResetArray_C", (Mat), (mat)); 2868 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2869 PetscFunctionReturn(PETSC_SUCCESS); 2870 } 2871 2872 /*@C 2873 MatDenseHIPReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2874 array provided by the user. This is useful to avoid copying an array 2875 into a matrix 2876 2877 Not Collective 2878 2879 Input Parameters: 2880 + mat - the matrix 2881 - array - the array in column major order 2882 2883 Notes: 2884 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2885 The memory passed in CANNOT be freed by the user. It will be freed 2886 when the matrix is destroyed. The array should respect the matrix leading dimension. 2887 2888 Level: developer 2889 2890 .seealso: MatDenseHIPGetArray(), MatDenseHIPPlaceArray(), MatDenseHIPResetArray() 2891 @*/ 2892 PetscErrorCode MatDenseHIPReplaceArray(Mat mat, const PetscScalar *array) 2893 { 2894 PetscFunctionBegin; 2895 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2896 PetscUseMethod(mat, "MatDenseHIPReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2897 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2898 mat->offloadmask = PETSC_OFFLOAD_GPU; 2899 PetscFunctionReturn(PETSC_SUCCESS); 2900 } 2901 2902 /*@C 2903 MatDenseHIPGetArrayWrite - Provides write access to the HIP buffer inside a dense matrix. 2904 2905 Not Collective 2906 2907 Input Parameters: 2908 . A - the matrix 2909 2910 Output Parameters 2911 . array - the GPU array in column major order 2912 2913 Notes: 2914 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseHIPGetArray(). The array must be restored with MatDenseHIPRestoreArrayWrite() when no longer needed. 2915 2916 Level: developer 2917 2918 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayRead(), MatDenseHIPRestoreArrayRead() 2919 @*/ 2920 PetscErrorCode MatDenseHIPGetArrayWrite(Mat A, PetscScalar **a) 2921 { 2922 PetscFunctionBegin; 2923 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2924 PetscUseMethod(A, "MatDenseHIPGetArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2925 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2926 PetscFunctionReturn(PETSC_SUCCESS); 2927 } 2928 2929 /*@C 2930 MatDenseHIPRestoreArrayWrite - Restore write access to the HIP buffer inside a dense matrix previously obtained with MatDenseHIPGetArrayWrite(). 2931 2932 Not Collective 2933 2934 Input Parameters: 2935 + A - the matrix 2936 - array - the GPU array in column major order 2937 2938 Notes: 2939 2940 Level: developer 2941 2942 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead(), MatDenseHIPGetArrayRead() 2943 @*/ 2944 PetscErrorCode MatDenseHIPRestoreArrayWrite(Mat A, PetscScalar **a) 2945 { 2946 PetscFunctionBegin; 2947 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2948 PetscUseMethod(A, "MatDenseHIPRestoreArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2949 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2950 A->offloadmask = PETSC_OFFLOAD_GPU; 2951 PetscFunctionReturn(PETSC_SUCCESS); 2952 } 2953 2954 /*@C 2955 MatDenseHIPGetArrayRead - Provides read-only access to the HIP buffer inside a dense matrix. The array must be restored with MatDenseHIPRestoreArrayRead() when no longer needed. 2956 2957 Not Collective 2958 2959 Input Parameters: 2960 . A - the matrix 2961 2962 Output Parameters 2963 . array - the GPU array in column major order 2964 2965 Notes: 2966 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseHIPGetArrayWrite(). 2967 2968 Level: developer 2969 2970 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead() 2971 @*/ 2972 PetscErrorCode MatDenseHIPGetArrayRead(Mat A, const PetscScalar **a) 2973 { 2974 PetscFunctionBegin; 2975 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2976 PetscUseMethod(A, "MatDenseHIPGetArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2977 PetscFunctionReturn(PETSC_SUCCESS); 2978 } 2979 2980 /*@C 2981 MatDenseHIPRestoreArrayRead - Restore read-only access to the HIP buffer inside a dense matrix previously obtained with a call to MatDenseHIPGetArrayRead(). 2982 2983 Not Collective 2984 2985 Input Parameters: 2986 + A - the matrix 2987 - array - the GPU array in column major order 2988 2989 Notes: 2990 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseHIPGetArrayWrite(). 2991 2992 Level: developer 2993 2994 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPGetArrayRead() 2995 @*/ 2996 PetscErrorCode MatDenseHIPRestoreArrayRead(Mat A, const PetscScalar **a) 2997 { 2998 PetscFunctionBegin; 2999 PetscUseMethod(A, "MatDenseHIPRestoreArrayRead_C", (Mat, const PetscScalar **), (A, a)); 3000 PetscFunctionReturn(PETSC_SUCCESS); 3001 } 3002 3003 /*@C 3004 MatDenseHIPGetArray - Provides access to the HIP buffer inside a dense matrix. The array must be restored with MatDenseHIPRestoreArray() when no longer needed. 3005 3006 Not Collective 3007 3008 Input Parameters: 3009 . A - the matrix 3010 3011 Output Parameters 3012 . array - the GPU array in column major order 3013 3014 Notes: 3015 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseHIPGetArrayWrite(). For read-only access, use MatDenseHIPGetArrayRead(). 3016 3017 Level: developer 3018 3019 .seealso: MatDenseHIPGetArrayRead(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead() 3020 @*/ 3021 PetscErrorCode MatDenseHIPGetArray(Mat A, PetscScalar **a) 3022 { 3023 PetscFunctionBegin; 3024 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3025 PetscUseMethod(A, "MatDenseHIPGetArray_C", (Mat, PetscScalar **), (A, a)); 3026 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 3027 PetscFunctionReturn(PETSC_SUCCESS); 3028 } 3029 3030 /*@C 3031 MatDenseHIPRestoreArray - Restore access to the HIP buffer inside a dense matrix previously obtained with MatDenseHIPGetArray(). 3032 3033 Not Collective 3034 3035 Input Parameters: 3036 + A - the matrix 3037 - array - the GPU array in column major order 3038 3039 Notes: 3040 3041 Level: developer 3042 3043 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead(), MatDenseHIPGetArrayRead() 3044 @*/ 3045 PetscErrorCode MatDenseHIPRestoreArray(Mat A, PetscScalar **a) 3046 { 3047 PetscFunctionBegin; 3048 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3049 PetscUseMethod(A, "MatDenseHIPRestoreArray_C", (Mat, PetscScalar **), (A, a)); 3050 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 3051 A->offloadmask = PETSC_OFFLOAD_GPU; 3052 PetscFunctionReturn(PETSC_SUCCESS); 3053 } 3054 #endif 3055 3056 /*@C 3057 MatCreateDense - Creates a matrix in `MATDENSE` format. 3058 3059 Collective 3060 3061 Input Parameters: 3062 + comm - MPI communicator 3063 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3064 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3065 . M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given) 3066 . N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given) 3067 - data - optional location of matrix data. Set data to NULL (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 3068 to control all matrix memory allocation. 3069 3070 Output Parameter: 3071 . A - the matrix 3072 3073 Notes: 3074 The dense format is fully compatible with standard Fortran 77 3075 storage by columns. 3076 3077 Although local portions of the matrix are stored in column-major 3078 order, the matrix is partitioned across MPI ranks by row. 3079 3080 The data input variable is intended primarily for Fortran programmers 3081 who wish to allocate their own matrix memory space. Most users should 3082 set data=NULL (`PETSC_NULL_SCALAR` for Fortran users). 3083 3084 The user MUST specify either the local or global matrix dimensions 3085 (possibly both). 3086 3087 Level: intermediate 3088 3089 .seealso: `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 3090 @*/ 3091 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 3092 { 3093 PetscFunctionBegin; 3094 PetscCall(MatCreate(comm, A)); 3095 PetscCall(MatSetSizes(*A, m, n, M, N)); 3096 PetscCall(MatSetType(*A, MATDENSE)); 3097 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 3098 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 3099 PetscFunctionReturn(PETSC_SUCCESS); 3100 } 3101 3102 #if defined(PETSC_HAVE_CUDA) 3103 /*@C 3104 MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA. 3105 3106 Collective 3107 3108 Input Parameters: 3109 + comm - MPI communicator 3110 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3111 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3112 . M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given) 3113 . N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given) 3114 - data - optional location of GPU matrix data. Set data=NULL for PETSc 3115 to control matrix memory allocation. 3116 3117 Output Parameter: 3118 . A - the matrix 3119 3120 Level: intermediate 3121 3122 .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()` 3123 @*/ 3124 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 3125 { 3126 PetscFunctionBegin; 3127 PetscCall(MatCreate(comm, A)); 3128 PetscValidLogicalCollectiveBool(*A, !!data, 6); 3129 PetscCall(MatSetSizes(*A, m, n, M, N)); 3130 PetscCall(MatSetType(*A, MATDENSECUDA)); 3131 PetscCall(MatSeqDenseCUDASetPreallocation(*A, data)); 3132 PetscCall(MatMPIDenseCUDASetPreallocation(*A, data)); 3133 PetscFunctionReturn(PETSC_SUCCESS); 3134 } 3135 #endif 3136 3137 #if defined(PETSC_HAVE_HIP) 3138 /*@C 3139 MatCreateDenseHIP - Creates a matrix in `MATDENSEHIP` format using HIP. 3140 3141 Collective 3142 3143 Input Parameters: 3144 + comm - MPI communicator 3145 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3146 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3147 . M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given) 3148 . N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given) 3149 - data - optional location of GPU matrix data. Set data=NULL for PETSc 3150 to control matrix memory allocation. 3151 3152 Output Parameter: 3153 . A - the matrix 3154 3155 Level: intermediate 3156 3157 .seealso: `MATDENSEHIP`, `MatCreate()`, `MatCreateDense()` 3158 @*/ 3159 PetscErrorCode MatCreateDenseHIP(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 3160 { 3161 PetscFunctionBegin; 3162 PetscCall(MatCreate(comm, A)); 3163 PetscValidLogicalCollectiveBool(*A, !!data, 6); 3164 PetscCall(MatSetSizes(*A, m, n, M, N)); 3165 PetscCall(MatSetType(*A, MATDENSEHIP)); 3166 PetscCall(MatSeqDenseHIPSetPreallocation(*A, data)); 3167 PetscCall(MatMPIDenseHIPSetPreallocation(*A, data)); 3168 PetscFunctionReturn(PETSC_SUCCESS); 3169 } 3170 #endif 3171 3172 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 3173 { 3174 Mat mat; 3175 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 3176 3177 PetscFunctionBegin; 3178 *newmat = NULL; 3179 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 3180 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3181 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 3182 a = (Mat_MPIDense *)mat->data; 3183 3184 mat->factortype = A->factortype; 3185 mat->assembled = PETSC_TRUE; 3186 mat->preallocated = PETSC_TRUE; 3187 3188 mat->insertmode = NOT_SET_VALUES; 3189 a->donotstash = oldmat->donotstash; 3190 3191 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 3192 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 3193 3194 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3195 3196 *newmat = mat; 3197 PetscFunctionReturn(PETSC_SUCCESS); 3198 } 3199 3200 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 3201 { 3202 PetscBool isbinary; 3203 #if defined(PETSC_HAVE_HDF5) 3204 PetscBool ishdf5; 3205 #endif 3206 3207 PetscFunctionBegin; 3208 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3209 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3210 /* force binary viewer to load .info file if it has not yet done so */ 3211 PetscCall(PetscViewerSetUp(viewer)); 3212 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3213 #if defined(PETSC_HAVE_HDF5) 3214 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3215 #endif 3216 if (isbinary) { 3217 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 3218 #if defined(PETSC_HAVE_HDF5) 3219 } else if (ishdf5) { 3220 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 3221 #endif 3222 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3223 PetscFunctionReturn(PETSC_SUCCESS); 3224 } 3225 3226 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 3227 { 3228 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 3229 Mat a, b; 3230 3231 PetscFunctionBegin; 3232 a = matA->A; 3233 b = matB->A; 3234 PetscCall(MatEqual(a, b, flag)); 3235 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 3236 PetscFunctionReturn(PETSC_SUCCESS); 3237 } 3238 3239 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 3240 { 3241 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 3242 3243 PetscFunctionBegin; 3244 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 3245 PetscCall(MatDestroy(&atb->atb)); 3246 PetscCall(PetscFree(atb)); 3247 PetscFunctionReturn(PETSC_SUCCESS); 3248 } 3249 3250 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 3251 { 3252 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 3253 3254 PetscFunctionBegin; 3255 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 3256 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 3257 PetscCall(PetscFree(abt)); 3258 PetscFunctionReturn(PETSC_SUCCESS); 3259 } 3260 3261 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 3262 { 3263 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 3264 Mat_TransMatMultDense *atb; 3265 MPI_Comm comm; 3266 PetscMPIInt size, *recvcounts; 3267 PetscScalar *carray, *sendbuf; 3268 const PetscScalar *atbarray; 3269 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 3270 const PetscInt *ranges; 3271 3272 PetscFunctionBegin; 3273 MatCheckProduct(C, 3); 3274 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 3275 atb = (Mat_TransMatMultDense *)C->product->data; 3276 recvcounts = atb->recvcounts; 3277 sendbuf = atb->sendbuf; 3278 3279 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 3280 PetscCallMPI(MPI_Comm_size(comm, &size)); 3281 3282 /* compute atbarray = aseq^T * bseq */ 3283 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 3284 3285 PetscCall(MatGetOwnershipRanges(C, &ranges)); 3286 3287 /* arrange atbarray into sendbuf */ 3288 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 3289 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 3290 for (proc = 0, k = 0; proc < size; proc++) { 3291 for (j = 0; j < cN; j++) { 3292 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 3293 } 3294 } 3295 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 3296 3297 /* sum all atbarray to local values of C */ 3298 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 3299 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 3300 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 3301 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3302 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3303 PetscFunctionReturn(PETSC_SUCCESS); 3304 } 3305 3306 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 3307 { 3308 MPI_Comm comm; 3309 PetscMPIInt size; 3310 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 3311 Mat_TransMatMultDense *atb; 3312 PetscBool cisdense = PETSC_FALSE; 3313 PetscInt i; 3314 const PetscInt *ranges; 3315 3316 PetscFunctionBegin; 3317 MatCheckProduct(C, 4); 3318 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 3319 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 3320 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 3321 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 3322 } 3323 3324 /* create matrix product C */ 3325 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 3326 #if defined(PETSC_HAVE_CUDA) 3327 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 3328 #endif 3329 #if defined(PETSC_HAVE_HIP) 3330 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 3331 #endif 3332 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 3333 PetscCall(MatSetUp(C)); 3334 3335 /* create data structure for reuse C */ 3336 PetscCallMPI(MPI_Comm_size(comm, &size)); 3337 PetscCall(PetscNew(&atb)); 3338 cM = C->rmap->N; 3339 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 3340 PetscCall(MatGetOwnershipRanges(C, &ranges)); 3341 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 3342 3343 C->product->data = atb; 3344 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 3345 PetscFunctionReturn(PETSC_SUCCESS); 3346 } 3347 3348 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 3349 { 3350 MPI_Comm comm; 3351 PetscMPIInt i, size; 3352 PetscInt maxRows, bufsiz; 3353 PetscMPIInt tag; 3354 PetscInt alg; 3355 Mat_MatTransMultDense *abt; 3356 Mat_Product *product = C->product; 3357 PetscBool flg; 3358 3359 PetscFunctionBegin; 3360 MatCheckProduct(C, 4); 3361 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 3362 /* check local size of A and B */ 3363 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 3364 3365 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 3366 alg = flg ? 0 : 1; 3367 3368 /* setup matrix product C */ 3369 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 3370 PetscCall(MatSetType(C, MATMPIDENSE)); 3371 PetscCall(MatSetUp(C)); 3372 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 3373 3374 /* create data structure for reuse C */ 3375 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 3376 PetscCallMPI(MPI_Comm_size(comm, &size)); 3377 PetscCall(PetscNew(&abt)); 3378 abt->tag = tag; 3379 abt->alg = alg; 3380 switch (alg) { 3381 case 1: /* alg: "cyclic" */ 3382 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 3383 bufsiz = A->cmap->N * maxRows; 3384 PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]))); 3385 break; 3386 default: /* alg: "allgatherv" */ 3387 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 3388 PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls))); 3389 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 3390 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 3391 break; 3392 } 3393 3394 C->product->data = abt; 3395 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 3396 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 3397 PetscFunctionReturn(PETSC_SUCCESS); 3398 } 3399 3400 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 3401 { 3402 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 3403 Mat_MatTransMultDense *abt; 3404 MPI_Comm comm; 3405 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 3406 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 3407 PetscInt i, cK = A->cmap->N, k, j, bn; 3408 PetscScalar _DOne = 1.0, _DZero = 0.0; 3409 const PetscScalar *av, *bv; 3410 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 3411 MPI_Request reqs[2]; 3412 const PetscInt *ranges; 3413 3414 PetscFunctionBegin; 3415 MatCheckProduct(C, 3); 3416 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 3417 abt = (Mat_MatTransMultDense *)C->product->data; 3418 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 3419 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3420 PetscCallMPI(MPI_Comm_size(comm, &size)); 3421 PetscCall(MatDenseGetArrayRead(a->A, &av)); 3422 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 3423 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 3424 PetscCall(MatDenseGetLDA(a->A, &i)); 3425 PetscCall(PetscBLASIntCast(i, &alda)); 3426 PetscCall(MatDenseGetLDA(b->A, &i)); 3427 PetscCall(PetscBLASIntCast(i, &blda)); 3428 PetscCall(MatDenseGetLDA(c->A, &i)); 3429 PetscCall(PetscBLASIntCast(i, &clda)); 3430 PetscCall(MatGetOwnershipRanges(B, &ranges)); 3431 bn = B->rmap->n; 3432 if (blda == bn) { 3433 sendbuf = (PetscScalar *)bv; 3434 } else { 3435 sendbuf = abt->buf[0]; 3436 for (k = 0, i = 0; i < cK; i++) { 3437 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 3438 } 3439 } 3440 if (size > 1) { 3441 sendto = (rank + size - 1) % size; 3442 recvfrom = (rank + size + 1) % size; 3443 } else { 3444 sendto = recvfrom = 0; 3445 } 3446 PetscCall(PetscBLASIntCast(cK, &ck)); 3447 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 3448 recvisfrom = rank; 3449 for (i = 0; i < size; i++) { 3450 /* we have finished receiving in sending, bufs can be read/modified */ 3451 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 3452 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 3453 3454 if (nextrecvisfrom != rank) { 3455 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 3456 sendsiz = cK * bn; 3457 recvsiz = cK * nextbn; 3458 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 3459 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 3460 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 3461 } 3462 3463 /* local aseq * sendbuf^T */ 3464 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 3465 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 3466 3467 if (nextrecvisfrom != rank) { 3468 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 3469 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 3470 } 3471 bn = nextbn; 3472 recvisfrom = nextrecvisfrom; 3473 sendbuf = recvbuf; 3474 } 3475 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 3476 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 3477 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 3478 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3479 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3480 PetscFunctionReturn(PETSC_SUCCESS); 3481 } 3482 3483 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 3484 { 3485 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 3486 Mat_MatTransMultDense *abt; 3487 MPI_Comm comm; 3488 PetscMPIInt size; 3489 PetscScalar *cv, *sendbuf, *recvbuf; 3490 const PetscScalar *av, *bv; 3491 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 3492 PetscScalar _DOne = 1.0, _DZero = 0.0; 3493 PetscBLASInt cm, cn, ck, alda, clda; 3494 3495 PetscFunctionBegin; 3496 MatCheckProduct(C, 3); 3497 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 3498 abt = (Mat_MatTransMultDense *)C->product->data; 3499 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 3500 PetscCallMPI(MPI_Comm_size(comm, &size)); 3501 PetscCall(MatDenseGetArrayRead(a->A, &av)); 3502 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 3503 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 3504 PetscCall(MatDenseGetLDA(a->A, &i)); 3505 PetscCall(PetscBLASIntCast(i, &alda)); 3506 PetscCall(MatDenseGetLDA(b->A, &blda)); 3507 PetscCall(MatDenseGetLDA(c->A, &i)); 3508 PetscCall(PetscBLASIntCast(i, &clda)); 3509 /* copy transpose of B into buf[0] */ 3510 bn = B->rmap->n; 3511 sendbuf = abt->buf[0]; 3512 recvbuf = abt->buf[1]; 3513 for (k = 0, j = 0; j < bn; j++) { 3514 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 3515 } 3516 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 3517 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 3518 PetscCall(PetscBLASIntCast(cK, &ck)); 3519 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 3520 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 3521 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 3522 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 3523 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 3524 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 3525 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3526 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3527 PetscFunctionReturn(PETSC_SUCCESS); 3528 } 3529 3530 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 3531 { 3532 Mat_MatTransMultDense *abt; 3533 3534 PetscFunctionBegin; 3535 MatCheckProduct(C, 3); 3536 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 3537 abt = (Mat_MatTransMultDense *)C->product->data; 3538 switch (abt->alg) { 3539 case 1: 3540 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 3541 break; 3542 default: 3543 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 3544 break; 3545 } 3546 PetscFunctionReturn(PETSC_SUCCESS); 3547 } 3548 3549 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 3550 { 3551 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 3552 3553 PetscFunctionBegin; 3554 PetscCall(MatDestroy(&ab->Ce)); 3555 PetscCall(MatDestroy(&ab->Ae)); 3556 PetscCall(MatDestroy(&ab->Be)); 3557 PetscCall(PetscFree(ab)); 3558 PetscFunctionReturn(PETSC_SUCCESS); 3559 } 3560 3561 #if defined(PETSC_HAVE_ELEMENTAL) 3562 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 3563 { 3564 Mat_MatMultDense *ab; 3565 3566 PetscFunctionBegin; 3567 MatCheckProduct(C, 3); 3568 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 3569 ab = (Mat_MatMultDense *)C->product->data; 3570 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 3571 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 3572 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 3573 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 3574 PetscFunctionReturn(PETSC_SUCCESS); 3575 } 3576 3577 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 3578 { 3579 Mat Ae, Be, Ce; 3580 Mat_MatMultDense *ab; 3581 3582 PetscFunctionBegin; 3583 MatCheckProduct(C, 4); 3584 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 3585 /* check local size of A and B */ 3586 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 3587 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 3588 } 3589 3590 /* create elemental matrices Ae and Be */ 3591 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 3592 PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 3593 PetscCall(MatSetType(Ae, MATELEMENTAL)); 3594 PetscCall(MatSetUp(Ae)); 3595 PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 3596 3597 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 3598 PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 3599 PetscCall(MatSetType(Be, MATELEMENTAL)); 3600 PetscCall(MatSetUp(Be)); 3601 PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 3602 3603 /* compute symbolic Ce = Ae*Be */ 3604 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce)); 3605 PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce)); 3606 3607 /* setup C */ 3608 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 3609 PetscCall(MatSetType(C, MATDENSE)); 3610 PetscCall(MatSetUp(C)); 3611 3612 /* create data structure for reuse Cdense */ 3613 PetscCall(PetscNew(&ab)); 3614 ab->Ae = Ae; 3615 ab->Be = Be; 3616 ab->Ce = Ce; 3617 3618 C->product->data = ab; 3619 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 3620 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 3621 PetscFunctionReturn(PETSC_SUCCESS); 3622 } 3623 #endif 3624 /* ----------------------------------------------- */ 3625 #if defined(PETSC_HAVE_ELEMENTAL) 3626 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 3627 { 3628 PetscFunctionBegin; 3629 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 3630 C->ops->productsymbolic = MatProductSymbolic_AB; 3631 PetscFunctionReturn(PETSC_SUCCESS); 3632 } 3633 #endif 3634 3635 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 3636 { 3637 Mat_Product *product = C->product; 3638 Mat A = product->A, B = product->B; 3639 3640 PetscFunctionBegin; 3641 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 3642 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 3643 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 3644 C->ops->productsymbolic = MatProductSymbolic_AtB; 3645 PetscFunctionReturn(PETSC_SUCCESS); 3646 } 3647 3648 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 3649 { 3650 Mat_Product *product = C->product; 3651 const char *algTypes[2] = {"allgatherv", "cyclic"}; 3652 PetscInt alg, nalg = 2; 3653 PetscBool flg = PETSC_FALSE; 3654 3655 PetscFunctionBegin; 3656 /* Set default algorithm */ 3657 alg = 0; /* default is allgatherv */ 3658 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 3659 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 3660 3661 /* Get runtime option */ 3662 if (product->api_user) { 3663 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 3664 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 3665 PetscOptionsEnd(); 3666 } else { 3667 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 3668 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 3669 PetscOptionsEnd(); 3670 } 3671 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 3672 3673 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 3674 C->ops->productsymbolic = MatProductSymbolic_ABt; 3675 PetscFunctionReturn(PETSC_SUCCESS); 3676 } 3677 3678 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 3679 { 3680 Mat_Product *product = C->product; 3681 3682 PetscFunctionBegin; 3683 switch (product->type) { 3684 #if defined(PETSC_HAVE_ELEMENTAL) 3685 case MATPRODUCT_AB: 3686 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 3687 break; 3688 #endif 3689 case MATPRODUCT_AtB: 3690 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 3691 break; 3692 case MATPRODUCT_ABt: 3693 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 3694 break; 3695 default: 3696 break; 3697 } 3698 PetscFunctionReturn(PETSC_SUCCESS); 3699 } 3700