1 2 /* 3 Basic functions for basic parallel dense matrices. 4 Portions of this code are under: 5 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 6 */ 7 8 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 9 #include <../src/mat/impls/aij/mpi/mpiaij.h> 10 #include <petscblaslapack.h> 11 12 /*@ 13 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the sequential or MPI `MATDENSE` matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 25 @*/ 26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 27 { 28 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 33 PetscValidPointer(B, 2); 34 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 35 if (flg) *B = mat->A; 36 else { 37 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 38 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 39 *B = A; 40 } 41 PetscFunctionReturn(PETSC_SUCCESS); 42 } 43 44 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 45 { 46 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 47 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 48 49 PetscFunctionBegin; 50 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 51 PetscFunctionReturn(PETSC_SUCCESS); 52 } 53 54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 55 { 56 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 57 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 58 PetscScalar *v; 59 60 PetscFunctionBegin; 61 PetscCall(MatDenseGetArray(mat->A, &v)); 62 PetscCall(MatDenseGetLDA(mat->A, &lda)); 63 rend2 = PetscMin(rend, A->cmap->N); 64 if (rend2 > rstart) { 65 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 66 PetscCall(PetscLogFlops(rend2 - rstart)); 67 } 68 PetscCall(MatDenseRestoreArray(mat->A, &v)); 69 PetscFunctionReturn(PETSC_SUCCESS); 70 } 71 72 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 73 { 74 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 75 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 76 77 PetscFunctionBegin; 78 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 79 lrow = row - rstart; 80 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 85 { 86 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 87 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 88 89 PetscFunctionBegin; 90 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 91 lrow = row - rstart; 92 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 93 PetscFunctionReturn(PETSC_SUCCESS); 94 } 95 96 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 97 { 98 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 99 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 100 PetscScalar *array; 101 MPI_Comm comm; 102 PetscBool flg; 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(MatHasCongruentLayouts(A, &flg)); 107 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 108 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 109 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 110 #if defined(PETSC_HAVE_CUDA) 111 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 112 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 113 #elif (PETSC_HAVE_HIP) 114 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 115 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 116 #endif 117 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm)); 118 PetscCall(MatCreate(comm, &B)); 119 PetscCall(MatSetSizes(B, m, m, m, m)); 120 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 121 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 122 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 123 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 124 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 125 *a = B; 126 PetscCall(MatDestroy(&B)); 127 } else *a = B; 128 PetscFunctionReturn(PETSC_SUCCESS); 129 } 130 131 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 132 { 133 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 134 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 135 PetscBool roworiented = A->roworiented; 136 137 PetscFunctionBegin; 138 for (i = 0; i < m; i++) { 139 if (idxm[i] < 0) continue; 140 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 141 if (idxm[i] >= rstart && idxm[i] < rend) { 142 row = idxm[i] - rstart; 143 if (roworiented) { 144 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv)); 145 } else { 146 for (j = 0; j < n; j++) { 147 if (idxn[j] < 0) continue; 148 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 149 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv)); 150 } 151 } 152 } else if (!A->donotstash) { 153 mat->assembled = PETSC_FALSE; 154 if (roworiented) { 155 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE)); 156 } else { 157 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE)); 158 } 159 } 160 } 161 PetscFunctionReturn(PETSC_SUCCESS); 162 } 163 164 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 165 { 166 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 167 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 168 169 PetscFunctionBegin; 170 for (i = 0; i < m; i++) { 171 if (idxm[i] < 0) continue; /* negative row */ 172 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 173 if (idxm[i] >= rstart && idxm[i] < rend) { 174 row = idxm[i] - rstart; 175 for (j = 0; j < n; j++) { 176 if (idxn[j] < 0) continue; /* negative column */ 177 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 178 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 179 } 180 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 181 } 182 PetscFunctionReturn(PETSC_SUCCESS); 183 } 184 185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 186 { 187 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 188 189 PetscFunctionBegin; 190 PetscCall(MatDenseGetLDA(a->A, lda)); 191 PetscFunctionReturn(PETSC_SUCCESS); 192 } 193 194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 195 { 196 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 197 MatType mtype = MATSEQDENSE; 198 199 PetscFunctionBegin; 200 if (!a->A) { 201 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 202 PetscCall(PetscLayoutSetUp(A->rmap)); 203 PetscCall(PetscLayoutSetUp(A->cmap)); 204 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 205 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 206 #if defined(PETSC_HAVE_CUDA) 207 PetscBool iscuda; 208 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 209 if (iscuda) mtype = MATSEQDENSECUDA; 210 #elif (PETSC_HAVE_HIP) 211 PetscBool iship; 212 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 213 if (iship) mtype = MATSEQDENSEHIP; 214 #endif 215 PetscCall(MatSetType(a->A, mtype)); 216 } 217 PetscCall(MatDenseSetLDA(a->A, lda)); 218 PetscFunctionReturn(PETSC_SUCCESS); 219 } 220 221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 222 { 223 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 224 225 PetscFunctionBegin; 226 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 227 PetscCall(MatDenseGetArray(a->A, array)); 228 PetscFunctionReturn(PETSC_SUCCESS); 229 } 230 231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) 232 { 233 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 234 235 PetscFunctionBegin; 236 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 237 PetscCall(MatDenseGetArrayRead(a->A, array)); 238 PetscFunctionReturn(PETSC_SUCCESS); 239 } 240 241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 242 { 243 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 244 245 PetscFunctionBegin; 246 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 247 PetscCall(MatDenseGetArrayWrite(a->A, array)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 252 { 253 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 254 255 PetscFunctionBegin; 256 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 257 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 258 PetscCall(MatDensePlaceArray(a->A, array)); 259 PetscFunctionReturn(PETSC_SUCCESS); 260 } 261 262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 263 { 264 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 265 266 PetscFunctionBegin; 267 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 268 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 269 PetscCall(MatDenseResetArray(a->A)); 270 PetscFunctionReturn(PETSC_SUCCESS); 271 } 272 273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 274 { 275 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 276 277 PetscFunctionBegin; 278 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 279 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 280 PetscCall(MatDenseReplaceArray(a->A, array)); 281 PetscFunctionReturn(PETSC_SUCCESS); 282 } 283 284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 285 { 286 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 287 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 288 const PetscInt *irow, *icol; 289 const PetscScalar *v; 290 PetscScalar *bv; 291 Mat newmat; 292 IS iscol_local; 293 MPI_Comm comm_is, comm_mat; 294 295 PetscFunctionBegin; 296 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 297 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 298 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 299 300 PetscCall(ISAllGather(iscol, &iscol_local)); 301 PetscCall(ISGetIndices(isrow, &irow)); 302 PetscCall(ISGetIndices(iscol_local, &icol)); 303 PetscCall(ISGetLocalSize(isrow, &nrows)); 304 PetscCall(ISGetLocalSize(iscol, &ncols)); 305 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 306 307 /* No parallel redistribution currently supported! Should really check each index set 308 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 309 original matrix! */ 310 311 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 312 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 313 314 /* Check submatrix call */ 315 if (scall == MAT_REUSE_MATRIX) { 316 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 317 /* Really need to test rows and column sizes! */ 318 newmat = *B; 319 } else { 320 /* Create and fill new matrix */ 321 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 322 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 323 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 324 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 325 } 326 327 /* Now extract the data pointers and do the copy, column at a time */ 328 newmatd = (Mat_MPIDense *)newmat->data; 329 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 330 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 331 PetscCall(MatDenseGetLDA(mat->A, &lda)); 332 for (i = 0; i < Ncols; i++) { 333 const PetscScalar *av = v + lda * icol[i]; 334 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 335 } 336 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 337 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 338 339 /* Assemble the matrices so that the correct flags are set */ 340 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 341 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 342 343 /* Free work space */ 344 PetscCall(ISRestoreIndices(isrow, &irow)); 345 PetscCall(ISRestoreIndices(iscol_local, &icol)); 346 PetscCall(ISDestroy(&iscol_local)); 347 *B = newmat; 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 352 { 353 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 354 355 PetscFunctionBegin; 356 PetscCall(MatDenseRestoreArray(a->A, array)); 357 PetscFunctionReturn(PETSC_SUCCESS); 358 } 359 360 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) 361 { 362 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 363 364 PetscFunctionBegin; 365 PetscCall(MatDenseRestoreArrayRead(a->A, array)); 366 PetscFunctionReturn(PETSC_SUCCESS); 367 } 368 369 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 370 { 371 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 372 373 PetscFunctionBegin; 374 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 379 { 380 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 381 PetscInt nstash, reallocs; 382 383 PetscFunctionBegin; 384 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 385 386 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 387 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 388 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 389 PetscFunctionReturn(PETSC_SUCCESS); 390 } 391 392 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 393 { 394 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 395 PetscInt i, *row, *col, flg, j, rstart, ncols; 396 PetscMPIInt n; 397 PetscScalar *val; 398 399 PetscFunctionBegin; 400 if (!mdn->donotstash && !mat->nooffprocentries) { 401 /* wait on receives */ 402 while (1) { 403 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 404 if (!flg) break; 405 406 for (i = 0; i < n;) { 407 /* Now identify the consecutive vals belonging to the same row */ 408 for (j = i, rstart = row[j]; j < n; j++) { 409 if (row[j] != rstart) break; 410 } 411 if (j < n) ncols = j - i; 412 else ncols = n - i; 413 /* Now assemble all these values with a single function call */ 414 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 415 i = j; 416 } 417 } 418 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 419 } 420 421 PetscCall(MatAssemblyBegin(mdn->A, mode)); 422 PetscCall(MatAssemblyEnd(mdn->A, mode)); 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } 425 426 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 427 { 428 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 429 430 PetscFunctionBegin; 431 PetscCall(MatZeroEntries(l->A)); 432 PetscFunctionReturn(PETSC_SUCCESS); 433 } 434 435 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 436 { 437 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 438 PetscInt i, len, *lrows; 439 440 PetscFunctionBegin; 441 /* get locally owned rows */ 442 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 443 /* fix right hand side if needed */ 444 if (x && b) { 445 const PetscScalar *xx; 446 PetscScalar *bb; 447 448 PetscCall(VecGetArrayRead(x, &xx)); 449 PetscCall(VecGetArrayWrite(b, &bb)); 450 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 451 PetscCall(VecRestoreArrayRead(x, &xx)); 452 PetscCall(VecRestoreArrayWrite(b, &bb)); 453 } 454 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 455 if (diag != 0.0) { 456 Vec d; 457 458 PetscCall(MatCreateVecs(A, NULL, &d)); 459 PetscCall(VecSet(d, diag)); 460 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 461 PetscCall(VecDestroy(&d)); 462 } 463 PetscCall(PetscFree(lrows)); 464 PetscFunctionReturn(PETSC_SUCCESS); 465 } 466 467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 471 472 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 475 const PetscScalar *ax; 476 PetscScalar *ay; 477 PetscMemType axmtype, aymtype; 478 479 PetscFunctionBegin; 480 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 481 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 482 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 483 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 484 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 485 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 486 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 487 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 488 PetscFunctionReturn(PETSC_SUCCESS); 489 } 490 491 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 494 const PetscScalar *ax; 495 PetscScalar *ay; 496 PetscMemType axmtype, aymtype; 497 498 PetscFunctionBegin; 499 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 500 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 501 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 502 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 503 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 504 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 505 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 506 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 511 { 512 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 513 const PetscScalar *ax; 514 PetscScalar *ay; 515 PetscMemType axmtype, aymtype; 516 517 PetscFunctionBegin; 518 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 519 PetscCall(VecSet(yy, 0.0)); 520 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 521 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 522 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 523 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 524 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 525 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 526 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 527 PetscFunctionReturn(PETSC_SUCCESS); 528 } 529 530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 531 { 532 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 533 const PetscScalar *ax; 534 PetscScalar *ay; 535 PetscMemType axmtype, aymtype; 536 537 PetscFunctionBegin; 538 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 539 PetscCall(VecCopy(yy, zz)); 540 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 541 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 542 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 543 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 544 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 545 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 546 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 547 PetscFunctionReturn(PETSC_SUCCESS); 548 } 549 550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 551 { 552 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 553 PetscInt lda, len, i, n, m = A->rmap->n, radd; 554 PetscScalar *x, zero = 0.0; 555 const PetscScalar *av; 556 557 PetscFunctionBegin; 558 PetscCall(VecSet(v, zero)); 559 PetscCall(VecGetArray(v, &x)); 560 PetscCall(VecGetSize(v, &n)); 561 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 562 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 563 radd = A->rmap->rstart * m; 564 PetscCall(MatDenseGetArrayRead(a->A, &av)); 565 PetscCall(MatDenseGetLDA(a->A, &lda)); 566 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 567 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 568 PetscCall(VecRestoreArray(v, &x)); 569 PetscFunctionReturn(PETSC_SUCCESS); 570 } 571 572 PetscErrorCode MatDestroy_MPIDense(Mat mat) 573 { 574 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 575 576 PetscFunctionBegin; 577 #if defined(PETSC_USE_LOG) 578 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 579 #endif 580 PetscCall(MatStashDestroy_Private(&mat->stash)); 581 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 582 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 583 PetscCall(MatDestroy(&mdn->A)); 584 PetscCall(VecDestroy(&mdn->lvec)); 585 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 586 PetscCall(VecDestroy(&mdn->cvec)); 587 PetscCall(MatDestroy(&mdn->cmat)); 588 589 PetscCall(PetscFree(mat->data)); 590 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 591 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 596 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 598 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 599 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 600 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 601 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 603 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 604 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 605 #if defined(PETSC_HAVE_ELEMENTAL) 606 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 607 #endif 608 #if defined(PETSC_HAVE_SCALAPACK) 609 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 610 #endif 611 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 612 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 613 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 614 #if defined(PETSC_HAVE_CUDA) 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 619 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 620 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 621 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 622 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 623 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 624 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 625 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 626 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 627 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 632 #endif 633 #if defined(PETSC_HAVE_HIP) 634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 646 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 649 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 651 #endif 652 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 662 663 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 664 PetscFunctionReturn(PETSC_SUCCESS); 665 } 666 667 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer); 668 669 #include <petscdraw.h> 670 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 671 { 672 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 673 PetscMPIInt rank; 674 PetscViewerType vtype; 675 PetscBool iascii, isdraw; 676 PetscViewer sviewer; 677 PetscViewerFormat format; 678 679 PetscFunctionBegin; 680 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 681 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 682 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 683 if (iascii) { 684 PetscCall(PetscViewerGetType(viewer, &vtype)); 685 PetscCall(PetscViewerGetFormat(viewer, &format)); 686 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 687 MatInfo info; 688 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 689 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 690 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 691 (PetscInt)info.memory)); 692 PetscCall(PetscViewerFlush(viewer)); 693 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 694 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 695 PetscFunctionReturn(PETSC_SUCCESS); 696 } else if (format == PETSC_VIEWER_ASCII_INFO) { 697 PetscFunctionReturn(PETSC_SUCCESS); 698 } 699 } else if (isdraw) { 700 PetscDraw draw; 701 PetscBool isnull; 702 703 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 704 PetscCall(PetscDrawIsNull(draw, &isnull)); 705 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 706 } 707 708 { 709 /* assemble the entire matrix onto first processor. */ 710 Mat A; 711 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 712 PetscInt *cols; 713 PetscScalar *vals; 714 715 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 716 if (rank == 0) { 717 PetscCall(MatSetSizes(A, M, N, M, N)); 718 } else { 719 PetscCall(MatSetSizes(A, 0, 0, M, N)); 720 } 721 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 722 PetscCall(MatSetType(A, MATMPIDENSE)); 723 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 724 725 /* Copy the matrix ... This isn't the most efficient means, 726 but it's quick for now */ 727 A->insertmode = INSERT_VALUES; 728 729 row = mat->rmap->rstart; 730 m = mdn->A->rmap->n; 731 for (i = 0; i < m; i++) { 732 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 733 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 734 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 735 row++; 736 } 737 738 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 739 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 740 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 741 if (rank == 0) { 742 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name)); 743 PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer)); 744 } 745 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 746 PetscCall(PetscViewerFlush(viewer)); 747 PetscCall(MatDestroy(&A)); 748 } 749 PetscFunctionReturn(PETSC_SUCCESS); 750 } 751 752 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 753 { 754 PetscBool iascii, isbinary, isdraw, issocket; 755 756 PetscFunctionBegin; 757 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 758 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 759 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 760 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 761 762 if (iascii || issocket || isdraw) { 763 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 764 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 765 PetscFunctionReturn(PETSC_SUCCESS); 766 } 767 768 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 769 { 770 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 771 Mat mdn = mat->A; 772 PetscLogDouble isend[5], irecv[5]; 773 774 PetscFunctionBegin; 775 info->block_size = 1.0; 776 777 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 778 779 isend[0] = info->nz_used; 780 isend[1] = info->nz_allocated; 781 isend[2] = info->nz_unneeded; 782 isend[3] = info->memory; 783 isend[4] = info->mallocs; 784 if (flag == MAT_LOCAL) { 785 info->nz_used = isend[0]; 786 info->nz_allocated = isend[1]; 787 info->nz_unneeded = isend[2]; 788 info->memory = isend[3]; 789 info->mallocs = isend[4]; 790 } else if (flag == MAT_GLOBAL_MAX) { 791 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 792 793 info->nz_used = irecv[0]; 794 info->nz_allocated = irecv[1]; 795 info->nz_unneeded = irecv[2]; 796 info->memory = irecv[3]; 797 info->mallocs = irecv[4]; 798 } else if (flag == MAT_GLOBAL_SUM) { 799 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 800 801 info->nz_used = irecv[0]; 802 info->nz_allocated = irecv[1]; 803 info->nz_unneeded = irecv[2]; 804 info->memory = irecv[3]; 805 info->mallocs = irecv[4]; 806 } 807 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 808 info->fill_ratio_needed = 0; 809 info->factor_mallocs = 0; 810 PetscFunctionReturn(PETSC_SUCCESS); 811 } 812 813 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 814 { 815 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 816 817 PetscFunctionBegin; 818 switch (op) { 819 case MAT_NEW_NONZERO_LOCATIONS: 820 case MAT_NEW_NONZERO_LOCATION_ERR: 821 case MAT_NEW_NONZERO_ALLOCATION_ERR: 822 MatCheckPreallocated(A, 1); 823 PetscCall(MatSetOption(a->A, op, flg)); 824 break; 825 case MAT_ROW_ORIENTED: 826 MatCheckPreallocated(A, 1); 827 a->roworiented = flg; 828 PetscCall(MatSetOption(a->A, op, flg)); 829 break; 830 case MAT_FORCE_DIAGONAL_ENTRIES: 831 case MAT_KEEP_NONZERO_PATTERN: 832 case MAT_USE_HASH_TABLE: 833 case MAT_SORTED_FULL: 834 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 835 break; 836 case MAT_IGNORE_OFF_PROC_ENTRIES: 837 a->donotstash = flg; 838 break; 839 case MAT_SYMMETRIC: 840 case MAT_STRUCTURALLY_SYMMETRIC: 841 case MAT_HERMITIAN: 842 case MAT_SYMMETRY_ETERNAL: 843 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 844 case MAT_SPD: 845 case MAT_IGNORE_LOWER_TRIANGULAR: 846 case MAT_IGNORE_ZERO_ENTRIES: 847 case MAT_SPD_ETERNAL: 848 /* if the diagonal matrix is square it inherits some of the properties above */ 849 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 850 break; 851 default: 852 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 853 } 854 PetscFunctionReturn(PETSC_SUCCESS); 855 } 856 857 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 858 { 859 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 860 const PetscScalar *l; 861 PetscScalar x, *v, *vv, *r; 862 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 863 864 PetscFunctionBegin; 865 PetscCall(MatDenseGetArray(mdn->A, &vv)); 866 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 867 PetscCall(MatGetLocalSize(A, &s2, &s3)); 868 if (ll) { 869 PetscCall(VecGetLocalSize(ll, &s2a)); 870 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 871 PetscCall(VecGetArrayRead(ll, &l)); 872 for (i = 0; i < m; i++) { 873 x = l[i]; 874 v = vv + i; 875 for (j = 0; j < n; j++) { 876 (*v) *= x; 877 v += lda; 878 } 879 } 880 PetscCall(VecRestoreArrayRead(ll, &l)); 881 PetscCall(PetscLogFlops(1.0 * n * m)); 882 } 883 if (rr) { 884 const PetscScalar *ar; 885 886 PetscCall(VecGetLocalSize(rr, &s3a)); 887 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 888 PetscCall(VecGetArrayRead(rr, &ar)); 889 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 890 PetscCall(VecGetArray(mdn->lvec, &r)); 891 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 892 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 893 PetscCall(VecRestoreArrayRead(rr, &ar)); 894 for (i = 0; i < n; i++) { 895 x = r[i]; 896 v = vv + i * lda; 897 for (j = 0; j < m; j++) (*v++) *= x; 898 } 899 PetscCall(VecRestoreArray(mdn->lvec, &r)); 900 PetscCall(PetscLogFlops(1.0 * n * m)); 901 } 902 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 903 PetscFunctionReturn(PETSC_SUCCESS); 904 } 905 906 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 907 { 908 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 909 PetscInt i, j; 910 PetscMPIInt size; 911 PetscReal sum = 0.0; 912 const PetscScalar *av, *v; 913 914 PetscFunctionBegin; 915 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 916 v = av; 917 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 918 if (size == 1) { 919 PetscCall(MatNorm(mdn->A, type, nrm)); 920 } else { 921 if (type == NORM_FROBENIUS) { 922 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 923 sum += PetscRealPart(PetscConj(*v) * (*v)); 924 v++; 925 } 926 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 927 *nrm = PetscSqrtReal(*nrm); 928 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 929 } else if (type == NORM_1) { 930 PetscReal *tmp, *tmp2; 931 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 932 *nrm = 0.0; 933 v = av; 934 for (j = 0; j < mdn->A->cmap->n; j++) { 935 for (i = 0; i < mdn->A->rmap->n; i++) { 936 tmp[j] += PetscAbsScalar(*v); 937 v++; 938 } 939 } 940 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 941 for (j = 0; j < A->cmap->N; j++) { 942 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 943 } 944 PetscCall(PetscFree2(tmp, tmp2)); 945 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 946 } else if (type == NORM_INFINITY) { /* max row norm */ 947 PetscReal ntemp; 948 PetscCall(MatNorm(mdn->A, type, &ntemp)); 949 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 950 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 951 } 952 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 953 PetscFunctionReturn(PETSC_SUCCESS); 954 } 955 956 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 957 { 958 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 959 Mat B; 960 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 961 PetscInt j, i, lda; 962 PetscScalar *v; 963 964 PetscFunctionBegin; 965 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 966 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 967 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 968 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 969 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 970 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 971 } else B = *matout; 972 973 m = a->A->rmap->n; 974 n = a->A->cmap->n; 975 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 976 PetscCall(MatDenseGetLDA(a->A, &lda)); 977 PetscCall(PetscMalloc1(m, &rwork)); 978 for (i = 0; i < m; i++) rwork[i] = rstart + i; 979 for (j = 0; j < n; j++) { 980 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 981 v += lda; 982 } 983 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 984 PetscCall(PetscFree(rwork)); 985 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 986 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 987 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 988 *matout = B; 989 } else { 990 PetscCall(MatHeaderMerge(A, &B)); 991 } 992 PetscFunctionReturn(PETSC_SUCCESS); 993 } 994 995 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 996 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 997 998 PetscErrorCode MatSetUp_MPIDense(Mat A) 999 { 1000 PetscFunctionBegin; 1001 PetscCall(PetscLayoutSetUp(A->rmap)); 1002 PetscCall(PetscLayoutSetUp(A->cmap)); 1003 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1004 PetscFunctionReturn(PETSC_SUCCESS); 1005 } 1006 1007 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1008 { 1009 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1010 1011 PetscFunctionBegin; 1012 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1013 PetscFunctionReturn(PETSC_SUCCESS); 1014 } 1015 1016 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1017 { 1018 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1019 1020 PetscFunctionBegin; 1021 PetscCall(MatConjugate(a->A)); 1022 PetscFunctionReturn(PETSC_SUCCESS); 1023 } 1024 1025 PetscErrorCode MatRealPart_MPIDense(Mat A) 1026 { 1027 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1028 1029 PetscFunctionBegin; 1030 PetscCall(MatRealPart(a->A)); 1031 PetscFunctionReturn(PETSC_SUCCESS); 1032 } 1033 1034 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1035 { 1036 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1037 1038 PetscFunctionBegin; 1039 PetscCall(MatImaginaryPart(a->A)); 1040 PetscFunctionReturn(PETSC_SUCCESS); 1041 } 1042 1043 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1044 { 1045 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1046 1047 PetscFunctionBegin; 1048 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1049 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1050 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1051 PetscFunctionReturn(PETSC_SUCCESS); 1052 } 1053 1054 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1055 1056 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1057 { 1058 PetscInt i, m, n; 1059 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1060 PetscReal *work; 1061 1062 PetscFunctionBegin; 1063 PetscCall(MatGetSize(A, &m, &n)); 1064 PetscCall(PetscMalloc1(n, &work)); 1065 if (type == REDUCTION_MEAN_REALPART) { 1066 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1067 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1068 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1069 } else { 1070 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1071 } 1072 if (type == NORM_2) { 1073 for (i = 0; i < n; i++) work[i] *= work[i]; 1074 } 1075 if (type == NORM_INFINITY) { 1076 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1077 } else { 1078 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1079 } 1080 PetscCall(PetscFree(work)); 1081 if (type == NORM_2) { 1082 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1083 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1084 for (i = 0; i < n; i++) reductions[i] /= m; 1085 } 1086 PetscFunctionReturn(PETSC_SUCCESS); 1087 } 1088 1089 #if defined(PETSC_HAVE_CUDA) 1090 PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha) 1091 { 1092 PetscScalar *da; 1093 PetscInt lda; 1094 1095 PetscFunctionBegin; 1096 PetscCall(MatDenseCUDAGetArray(A, &da)); 1097 PetscCall(MatDenseGetLDA(A, &lda)); 1098 PetscCall(PetscInfo(A, "Performing Shift on backend\n")); 1099 PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N)); 1100 PetscCall(MatDenseCUDARestoreArray(A, &da)); 1101 PetscFunctionReturn(PETSC_SUCCESS); 1102 } 1103 1104 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1105 { 1106 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1107 PetscInt lda; 1108 1109 PetscFunctionBegin; 1110 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1111 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1112 if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1113 a->vecinuse = col + 1; 1114 PetscCall(MatDenseGetLDA(a->A, &lda)); 1115 PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1116 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1117 *v = a->cvec; 1118 PetscFunctionReturn(PETSC_SUCCESS); 1119 } 1120 1121 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1122 { 1123 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1124 1125 PetscFunctionBegin; 1126 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1127 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1128 a->vecinuse = 0; 1129 PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1130 PetscCall(VecCUDAResetArray(a->cvec)); 1131 if (v) *v = NULL; 1132 PetscFunctionReturn(PETSC_SUCCESS); 1133 } 1134 1135 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1136 { 1137 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1138 PetscInt lda; 1139 1140 PetscFunctionBegin; 1141 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1142 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1143 if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1144 a->vecinuse = col + 1; 1145 PetscCall(MatDenseGetLDA(a->A, &lda)); 1146 PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse)); 1147 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1148 PetscCall(VecLockReadPush(a->cvec)); 1149 *v = a->cvec; 1150 PetscFunctionReturn(PETSC_SUCCESS); 1151 } 1152 1153 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1154 { 1155 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1156 1157 PetscFunctionBegin; 1158 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1159 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1160 a->vecinuse = 0; 1161 PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse)); 1162 PetscCall(VecLockReadPop(a->cvec)); 1163 PetscCall(VecCUDAResetArray(a->cvec)); 1164 if (v) *v = NULL; 1165 PetscFunctionReturn(PETSC_SUCCESS); 1166 } 1167 1168 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1169 { 1170 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1171 PetscInt lda; 1172 1173 PetscFunctionBegin; 1174 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1175 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1176 if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1177 a->vecinuse = col + 1; 1178 PetscCall(MatDenseGetLDA(a->A, &lda)); 1179 PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1180 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1181 *v = a->cvec; 1182 PetscFunctionReturn(PETSC_SUCCESS); 1183 } 1184 1185 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1186 { 1187 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1188 1189 PetscFunctionBegin; 1190 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1191 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1192 a->vecinuse = 0; 1193 PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1194 PetscCall(VecCUDAResetArray(a->cvec)); 1195 if (v) *v = NULL; 1196 PetscFunctionReturn(PETSC_SUCCESS); 1197 } 1198 1199 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1200 { 1201 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1202 1203 PetscFunctionBegin; 1204 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1205 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1206 PetscCall(MatDenseCUDAPlaceArray(l->A, a)); 1207 PetscFunctionReturn(PETSC_SUCCESS); 1208 } 1209 1210 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1211 { 1212 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1213 1214 PetscFunctionBegin; 1215 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1216 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1217 PetscCall(MatDenseCUDAResetArray(l->A)); 1218 PetscFunctionReturn(PETSC_SUCCESS); 1219 } 1220 1221 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1222 { 1223 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1224 1225 PetscFunctionBegin; 1226 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1227 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1228 PetscCall(MatDenseCUDAReplaceArray(l->A, a)); 1229 PetscFunctionReturn(PETSC_SUCCESS); 1230 } 1231 1232 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1233 { 1234 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1235 1236 PetscFunctionBegin; 1237 PetscCall(MatDenseCUDAGetArrayWrite(l->A, a)); 1238 PetscFunctionReturn(PETSC_SUCCESS); 1239 } 1240 1241 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1242 { 1243 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1244 1245 PetscFunctionBegin; 1246 PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a)); 1247 PetscFunctionReturn(PETSC_SUCCESS); 1248 } 1249 1250 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1251 { 1252 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1253 1254 PetscFunctionBegin; 1255 PetscCall(MatDenseCUDAGetArrayRead(l->A, a)); 1256 PetscFunctionReturn(PETSC_SUCCESS); 1257 } 1258 1259 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1260 { 1261 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1262 1263 PetscFunctionBegin; 1264 PetscCall(MatDenseCUDARestoreArrayRead(l->A, a)); 1265 PetscFunctionReturn(PETSC_SUCCESS); 1266 } 1267 1268 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1269 { 1270 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1271 1272 PetscFunctionBegin; 1273 PetscCall(MatDenseCUDAGetArray(l->A, a)); 1274 PetscFunctionReturn(PETSC_SUCCESS); 1275 } 1276 1277 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1278 { 1279 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1280 1281 PetscFunctionBegin; 1282 PetscCall(MatDenseCUDARestoreArray(l->A, a)); 1283 PetscFunctionReturn(PETSC_SUCCESS); 1284 } 1285 1286 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1287 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1288 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *); 1289 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1290 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1291 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *); 1292 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *); 1293 1294 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind) 1295 { 1296 Mat_MPIDense *d = (Mat_MPIDense *)mat->data; 1297 1298 PetscFunctionBegin; 1299 PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1300 PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1301 if (d->A) PetscCall(MatBindToCPU(d->A, bind)); 1302 mat->boundtocpu = bind; 1303 if (!bind) { 1304 PetscBool iscuda; 1305 1306 PetscCall(PetscFree(mat->defaultrandtype)); 1307 PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype)); 1308 PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda)); 1309 if (!iscuda) PetscCall(VecDestroy(&d->cvec)); 1310 PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda)); 1311 if (!iscuda) PetscCall(MatDestroy(&d->cmat)); 1312 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA)); 1313 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA)); 1314 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA)); 1315 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA)); 1316 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA)); 1317 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA)); 1318 mat->ops->shift = MatShift_MPIDenseCUDA; 1319 } else { 1320 PetscCall(PetscFree(mat->defaultrandtype)); 1321 PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype)); 1322 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1323 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1324 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1325 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1326 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1327 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1328 mat->ops->shift = MatShift_MPIDense; 1329 } 1330 if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind)); 1331 PetscFunctionReturn(PETSC_SUCCESS); 1332 } 1333 1334 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1335 { 1336 Mat_MPIDense *d = (Mat_MPIDense *)A->data; 1337 PetscBool iscuda; 1338 1339 PetscFunctionBegin; 1340 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1341 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 1342 if (!iscuda) PetscFunctionReturn(PETSC_SUCCESS); 1343 PetscCall(PetscLayoutSetUp(A->rmap)); 1344 PetscCall(PetscLayoutSetUp(A->cmap)); 1345 if (!d->A) { 1346 PetscCall(MatCreate(PETSC_COMM_SELF, &d->A)); 1347 PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 1348 } 1349 PetscCall(MatSetType(d->A, MATSEQDENSECUDA)); 1350 PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data)); 1351 A->preallocated = PETSC_TRUE; 1352 A->assembled = PETSC_TRUE; 1353 PetscFunctionReturn(PETSC_SUCCESS); 1354 } 1355 #endif 1356 1357 #if defined(PETSC_HAVE_HIP) 1358 PetscErrorCode MatShift_MPIDenseHIP(Mat A, PetscScalar alpha) 1359 { 1360 PetscScalar *da; 1361 PetscInt lda; 1362 1363 PetscFunctionBegin; 1364 PetscCall(MatDenseHIPGetArray(A, &da)); 1365 PetscCall(MatDenseGetLDA(A, &lda)); 1366 PetscCall(PetscInfo(A, "Performing Shift on backend\n")); 1367 PetscCall(MatShift_DenseHIP_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N)); 1368 PetscCall(MatDenseHIPRestoreArray(A, &da)); 1369 PetscFunctionReturn(PETSC_SUCCESS); 1370 } 1371 1372 static PetscErrorCode MatDenseGetColumnVec_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1373 { 1374 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1375 PetscInt lda; 1376 1377 PetscFunctionBegin; 1378 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1379 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1380 if (!a->cvec) { PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1381 a->vecinuse = col + 1; 1382 PetscCall(MatDenseGetLDA(a->A, &lda)); 1383 PetscCall(MatDenseHIPGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1384 PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1385 *v = a->cvec; 1386 PetscFunctionReturn(PETSC_SUCCESS); 1387 } 1388 1389 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1390 { 1391 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1392 1393 PetscFunctionBegin; 1394 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1395 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1396 a->vecinuse = 0; 1397 PetscCall(MatDenseHIPRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1398 PetscCall(VecHIPResetArray(a->cvec)); 1399 if (v) *v = NULL; 1400 PetscFunctionReturn(PETSC_SUCCESS); 1401 } 1402 1403 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1404 { 1405 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1406 PetscInt lda; 1407 1408 PetscFunctionBegin; 1409 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1410 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1411 if (!a->cvec) { PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1412 a->vecinuse = col + 1; 1413 PetscCall(MatDenseGetLDA(a->A, &lda)); 1414 PetscCall(MatDenseHIPGetArrayRead(a->A, &a->ptrinuse)); 1415 PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1416 PetscCall(VecLockReadPush(a->cvec)); 1417 *v = a->cvec; 1418 PetscFunctionReturn(PETSC_SUCCESS); 1419 } 1420 1421 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1422 { 1423 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1424 1425 PetscFunctionBegin; 1426 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1427 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1428 a->vecinuse = 0; 1429 PetscCall(MatDenseHIPRestoreArrayRead(a->A, &a->ptrinuse)); 1430 PetscCall(VecLockReadPop(a->cvec)); 1431 PetscCall(VecHIPResetArray(a->cvec)); 1432 if (v) *v = NULL; 1433 PetscFunctionReturn(PETSC_SUCCESS); 1434 } 1435 1436 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1437 { 1438 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1439 PetscInt lda; 1440 1441 PetscFunctionBegin; 1442 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1443 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1444 if (!a->cvec) PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1445 a->vecinuse = col + 1; 1446 PetscCall(MatDenseGetLDA(a->A, &lda)); 1447 PetscCall(MatDenseHIPGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1448 PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1449 *v = a->cvec; 1450 PetscFunctionReturn(PETSC_SUCCESS); 1451 } 1452 1453 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseHIP(Mat A, PetscInt col, Vec *v) 1454 { 1455 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1456 1457 PetscFunctionBegin; 1458 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1459 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1460 a->vecinuse = 0; 1461 PetscCall(MatDenseHIPRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1462 PetscCall(VecHIPResetArray(a->cvec)); 1463 if (v) *v = NULL; 1464 PetscFunctionReturn(PETSC_SUCCESS); 1465 } 1466 1467 static PetscErrorCode MatDenseHIPPlaceArray_MPIDenseHIP(Mat A, const PetscScalar *a) 1468 { 1469 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1470 1471 PetscFunctionBegin; 1472 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1473 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1474 PetscCall(MatDenseHIPPlaceArray(l->A, a)); 1475 PetscFunctionReturn(PETSC_SUCCESS); 1476 } 1477 1478 static PetscErrorCode MatDenseHIPResetArray_MPIDenseHIP(Mat A) 1479 { 1480 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1481 1482 PetscFunctionBegin; 1483 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1484 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1485 PetscCall(MatDenseHIPResetArray(l->A)); 1486 PetscFunctionReturn(PETSC_SUCCESS); 1487 } 1488 1489 static PetscErrorCode MatDenseHIPReplaceArray_MPIDenseHIP(Mat A, const PetscScalar *a) 1490 { 1491 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1492 1493 PetscFunctionBegin; 1494 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1495 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1496 PetscCall(MatDenseHIPReplaceArray(l->A, a)); 1497 PetscFunctionReturn(PETSC_SUCCESS); 1498 } 1499 1500 static PetscErrorCode MatDenseHIPGetArrayWrite_MPIDenseHIP(Mat A, PetscScalar **a) 1501 { 1502 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1503 1504 PetscFunctionBegin; 1505 PetscCall(MatDenseHIPGetArrayWrite(l->A, a)); 1506 PetscFunctionReturn(PETSC_SUCCESS); 1507 } 1508 1509 static PetscErrorCode MatDenseHIPRestoreArrayWrite_MPIDenseHIP(Mat A, PetscScalar **a) 1510 { 1511 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1512 1513 PetscFunctionBegin; 1514 PetscCall(MatDenseHIPRestoreArrayWrite(l->A, a)); 1515 PetscFunctionReturn(PETSC_SUCCESS); 1516 } 1517 1518 static PetscErrorCode MatDenseHIPGetArrayRead_MPIDenseHIP(Mat A, const PetscScalar **a) 1519 { 1520 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1521 1522 PetscFunctionBegin; 1523 PetscCall(MatDenseHIPGetArrayRead(l->A, a)); 1524 PetscFunctionReturn(PETSC_SUCCESS); 1525 } 1526 1527 static PetscErrorCode MatDenseHIPRestoreArrayRead_MPIDenseHIP(Mat A, const PetscScalar **a) 1528 { 1529 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1530 1531 PetscFunctionBegin; 1532 PetscCall(MatDenseHIPRestoreArrayRead(l->A, a)); 1533 PetscFunctionReturn(PETSC_SUCCESS); 1534 } 1535 1536 static PetscErrorCode MatDenseHIPGetArray_MPIDenseHIP(Mat A, PetscScalar **a) 1537 { 1538 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1539 1540 PetscFunctionBegin; 1541 PetscCall(MatDenseHIPGetArray(l->A, a)); 1542 PetscFunctionReturn(PETSC_SUCCESS); 1543 } 1544 1545 static PetscErrorCode MatDenseHIPRestoreArray_MPIDenseHIP(Mat A, PetscScalar **a) 1546 { 1547 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1548 1549 PetscFunctionBegin; 1550 PetscCall(MatDenseHIPRestoreArray(l->A, a)); 1551 PetscFunctionReturn(PETSC_SUCCESS); 1552 } 1553 1554 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1555 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1556 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *); 1557 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1558 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1559 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *); 1560 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *); 1561 1562 static PetscErrorCode MatBindToCPU_MPIDenseHIP(Mat mat, PetscBool bind) 1563 { 1564 Mat_MPIDense *d = (Mat_MPIDense *)mat->data; 1565 1566 PetscFunctionBegin; 1567 PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1568 PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1569 if (d->A) PetscCall(MatBindToCPU(d->A, bind)); 1570 mat->boundtocpu = bind; 1571 if (!bind) { 1572 PetscBool iscuda; 1573 1574 PetscCall(PetscFree(mat->defaultrandtype)); 1575 PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype)); 1576 PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPIHIP, &iscuda)); 1577 if (!iscuda) PetscCall(VecDestroy(&d->cvec)); 1578 PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSEHIP, &iscuda)); 1579 if (!iscuda) PetscCall(MatDestroy(&d->cmat)); 1580 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseHIP)); 1581 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseHIP)); 1582 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseHIP)); 1583 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseHIP)); 1584 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseHIP)); 1585 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseHIP)); 1586 mat->ops->shift = MatShift_MPIDenseHIP; 1587 } else { 1588 PetscCall(PetscFree(mat->defaultrandtype)); 1589 PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype)); 1590 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1591 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1592 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1593 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1594 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1595 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1596 mat->ops->shift = MatShift_MPIDense; 1597 } 1598 if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind)); 1599 PetscFunctionReturn(PETSC_SUCCESS); 1600 } 1601 1602 PetscErrorCode MatMPIDenseHIPSetPreallocation(Mat A, PetscScalar *d_data) 1603 { 1604 Mat_MPIDense *d = (Mat_MPIDense *)A->data; 1605 PetscBool iscuda; 1606 1607 PetscFunctionBegin; 1608 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1609 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iscuda)); 1610 if (!iscuda) PetscFunctionReturn(PETSC_SUCCESS); 1611 PetscCall(PetscLayoutSetUp(A->rmap)); 1612 PetscCall(PetscLayoutSetUp(A->cmap)); 1613 if (!d->A) { 1614 PetscCall(MatCreate(PETSC_COMM_SELF, &d->A)); 1615 PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 1616 } 1617 PetscCall(MatSetType(d->A, MATSEQDENSEHIP)); 1618 PetscCall(MatSeqDenseHIPSetPreallocation(d->A, d_data)); 1619 A->preallocated = PETSC_TRUE; 1620 A->assembled = PETSC_TRUE; 1621 PetscFunctionReturn(PETSC_SUCCESS); 1622 } 1623 #endif 1624 1625 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1626 { 1627 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1628 1629 PetscFunctionBegin; 1630 PetscCall(MatSetRandom(d->A, rctx)); 1631 #if defined(PETSC_HAVE_DEVICE) 1632 x->offloadmask = d->A->offloadmask; 1633 #endif 1634 PetscFunctionReturn(PETSC_SUCCESS); 1635 } 1636 1637 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1638 { 1639 PetscFunctionBegin; 1640 *missing = PETSC_FALSE; 1641 PetscFunctionReturn(PETSC_SUCCESS); 1642 } 1643 1644 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1645 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1646 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1647 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1648 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1649 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1650 1651 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1652 MatGetRow_MPIDense, 1653 MatRestoreRow_MPIDense, 1654 MatMult_MPIDense, 1655 /* 4*/ MatMultAdd_MPIDense, 1656 MatMultTranspose_MPIDense, 1657 MatMultTransposeAdd_MPIDense, 1658 NULL, 1659 NULL, 1660 NULL, 1661 /* 10*/ NULL, 1662 NULL, 1663 NULL, 1664 NULL, 1665 MatTranspose_MPIDense, 1666 /* 15*/ MatGetInfo_MPIDense, 1667 MatEqual_MPIDense, 1668 MatGetDiagonal_MPIDense, 1669 MatDiagonalScale_MPIDense, 1670 MatNorm_MPIDense, 1671 /* 20*/ MatAssemblyBegin_MPIDense, 1672 MatAssemblyEnd_MPIDense, 1673 MatSetOption_MPIDense, 1674 MatZeroEntries_MPIDense, 1675 /* 24*/ MatZeroRows_MPIDense, 1676 NULL, 1677 NULL, 1678 NULL, 1679 NULL, 1680 /* 29*/ MatSetUp_MPIDense, 1681 NULL, 1682 NULL, 1683 MatGetDiagonalBlock_MPIDense, 1684 NULL, 1685 /* 34*/ MatDuplicate_MPIDense, 1686 NULL, 1687 NULL, 1688 NULL, 1689 NULL, 1690 /* 39*/ MatAXPY_MPIDense, 1691 MatCreateSubMatrices_MPIDense, 1692 NULL, 1693 MatGetValues_MPIDense, 1694 MatCopy_MPIDense, 1695 /* 44*/ NULL, 1696 MatScale_MPIDense, 1697 MatShift_MPIDense, 1698 NULL, 1699 NULL, 1700 /* 49*/ MatSetRandom_MPIDense, 1701 NULL, 1702 NULL, 1703 NULL, 1704 NULL, 1705 /* 54*/ NULL, 1706 NULL, 1707 NULL, 1708 NULL, 1709 NULL, 1710 /* 59*/ MatCreateSubMatrix_MPIDense, 1711 MatDestroy_MPIDense, 1712 MatView_MPIDense, 1713 NULL, 1714 NULL, 1715 /* 64*/ NULL, 1716 NULL, 1717 NULL, 1718 NULL, 1719 NULL, 1720 /* 69*/ NULL, 1721 NULL, 1722 NULL, 1723 NULL, 1724 NULL, 1725 /* 74*/ NULL, 1726 NULL, 1727 NULL, 1728 NULL, 1729 NULL, 1730 /* 79*/ NULL, 1731 NULL, 1732 NULL, 1733 NULL, 1734 /* 83*/ MatLoad_MPIDense, 1735 NULL, 1736 NULL, 1737 NULL, 1738 NULL, 1739 NULL, 1740 /* 89*/ NULL, 1741 NULL, 1742 NULL, 1743 NULL, 1744 NULL, 1745 /* 94*/ NULL, 1746 NULL, 1747 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1748 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1749 NULL, 1750 /* 99*/ MatProductSetFromOptions_MPIDense, 1751 NULL, 1752 NULL, 1753 MatConjugate_MPIDense, 1754 NULL, 1755 /*104*/ NULL, 1756 MatRealPart_MPIDense, 1757 MatImaginaryPart_MPIDense, 1758 NULL, 1759 NULL, 1760 /*109*/ NULL, 1761 NULL, 1762 NULL, 1763 MatGetColumnVector_MPIDense, 1764 MatMissingDiagonal_MPIDense, 1765 /*114*/ NULL, 1766 NULL, 1767 NULL, 1768 NULL, 1769 NULL, 1770 /*119*/ NULL, 1771 NULL, 1772 NULL, 1773 NULL, 1774 NULL, 1775 /*124*/ NULL, 1776 MatGetColumnReductions_MPIDense, 1777 NULL, 1778 NULL, 1779 NULL, 1780 /*129*/ NULL, 1781 NULL, 1782 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1783 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1784 NULL, 1785 /*134*/ NULL, 1786 NULL, 1787 NULL, 1788 NULL, 1789 NULL, 1790 /*139*/ NULL, 1791 NULL, 1792 NULL, 1793 NULL, 1794 NULL, 1795 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1796 /*145*/ NULL, 1797 NULL, 1798 NULL, 1799 NULL, 1800 NULL, 1801 /*150*/ NULL, 1802 NULL}; 1803 1804 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1805 { 1806 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1807 MatType mtype = MATSEQDENSE; 1808 1809 PetscFunctionBegin; 1810 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1811 PetscCall(PetscLayoutSetUp(mat->rmap)); 1812 PetscCall(PetscLayoutSetUp(mat->cmap)); 1813 if (!a->A) { 1814 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1815 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1816 } 1817 #if defined(PETSC_HAVE_CUDA) 1818 PetscBool iscuda; 1819 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1820 if (iscuda) mtype = MATSEQDENSECUDA; 1821 #endif 1822 #if defined(PETSC_HAVE_HIP) 1823 PetscBool iship; 1824 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1825 if (iship) mtype = MATSEQDENSEHIP; 1826 #endif 1827 PetscCall(MatSetType(a->A, mtype)); 1828 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1829 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1830 mat->offloadmask = a->A->offloadmask; 1831 #endif 1832 mat->preallocated = PETSC_TRUE; 1833 mat->assembled = PETSC_TRUE; 1834 PetscFunctionReturn(PETSC_SUCCESS); 1835 } 1836 1837 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1838 { 1839 Mat B, C; 1840 1841 PetscFunctionBegin; 1842 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1843 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1844 PetscCall(MatDestroy(&C)); 1845 if (reuse == MAT_REUSE_MATRIX) { 1846 C = *newmat; 1847 } else C = NULL; 1848 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1849 PetscCall(MatDestroy(&B)); 1850 if (reuse == MAT_INPLACE_MATRIX) { 1851 PetscCall(MatHeaderReplace(A, &C)); 1852 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1853 PetscFunctionReturn(PETSC_SUCCESS); 1854 } 1855 1856 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1857 { 1858 Mat B, C; 1859 1860 PetscFunctionBegin; 1861 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1862 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1863 if (reuse == MAT_REUSE_MATRIX) { 1864 C = *newmat; 1865 } else C = NULL; 1866 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1867 PetscCall(MatDestroy(&B)); 1868 if (reuse == MAT_INPLACE_MATRIX) { 1869 PetscCall(MatHeaderReplace(A, &C)); 1870 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1871 PetscFunctionReturn(PETSC_SUCCESS); 1872 } 1873 1874 #if defined(PETSC_HAVE_ELEMENTAL) 1875 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1876 { 1877 Mat mat_elemental; 1878 PetscScalar *v; 1879 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols; 1880 1881 PetscFunctionBegin; 1882 if (reuse == MAT_REUSE_MATRIX) { 1883 mat_elemental = *newmat; 1884 PetscCall(MatZeroEntries(*newmat)); 1885 } else { 1886 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1887 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1888 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1889 PetscCall(MatSetUp(mat_elemental)); 1890 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1891 } 1892 1893 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1894 for (i = 0; i < N; i++) cols[i] = i; 1895 for (i = 0; i < m; i++) rows[i] = rstart + i; 1896 1897 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1898 PetscCall(MatDenseGetArray(A, &v)); 1899 PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1900 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1901 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1902 PetscCall(MatDenseRestoreArray(A, &v)); 1903 PetscCall(PetscFree2(rows, cols)); 1904 1905 if (reuse == MAT_INPLACE_MATRIX) { 1906 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1907 } else { 1908 *newmat = mat_elemental; 1909 } 1910 PetscFunctionReturn(PETSC_SUCCESS); 1911 } 1912 #endif 1913 1914 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1915 { 1916 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1917 1918 PetscFunctionBegin; 1919 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1920 PetscFunctionReturn(PETSC_SUCCESS); 1921 } 1922 1923 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1924 { 1925 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1926 1927 PetscFunctionBegin; 1928 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1929 PetscFunctionReturn(PETSC_SUCCESS); 1930 } 1931 1932 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1933 { 1934 Mat_MPIDense *mat; 1935 PetscInt m, nloc, N; 1936 1937 PetscFunctionBegin; 1938 PetscCall(MatGetSize(inmat, &m, &N)); 1939 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1940 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1941 PetscInt sum; 1942 1943 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1944 /* Check sum(n) = N */ 1945 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1946 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1947 1948 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1949 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1950 } 1951 1952 /* numeric phase */ 1953 mat = (Mat_MPIDense *)(*outmat)->data; 1954 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1955 PetscFunctionReturn(PETSC_SUCCESS); 1956 } 1957 1958 #if defined(PETSC_HAVE_CUDA) 1959 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat) 1960 { 1961 Mat B; 1962 Mat_MPIDense *m; 1963 1964 PetscFunctionBegin; 1965 if (reuse == MAT_INITIAL_MATRIX) { 1966 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 1967 } else if (reuse == MAT_REUSE_MATRIX) { 1968 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 1969 } 1970 1971 B = *newmat; 1972 PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE)); 1973 PetscCall(PetscFree(B->defaultvectype)); 1974 PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype)); 1975 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE)); 1976 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL)); 1977 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 1978 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 1979 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 1980 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 1981 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL)); 1982 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL)); 1983 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL)); 1984 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL)); 1985 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL)); 1986 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL)); 1987 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL)); 1988 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL)); 1989 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL)); 1990 m = (Mat_MPIDense *)(B)->data; 1991 if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A)); 1992 B->ops->bindtocpu = NULL; 1993 B->offloadmask = PETSC_OFFLOAD_CPU; 1994 PetscFunctionReturn(PETSC_SUCCESS); 1995 } 1996 1997 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat) 1998 { 1999 Mat B; 2000 Mat_MPIDense *m; 2001 2002 PetscFunctionBegin; 2003 if (reuse == MAT_INITIAL_MATRIX) { 2004 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 2005 } else if (reuse == MAT_REUSE_MATRIX) { 2006 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 2007 } 2008 2009 B = *newmat; 2010 PetscCall(PetscFree(B->defaultvectype)); 2011 PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype)); 2012 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA)); 2013 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense)); 2014 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2015 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2016 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2017 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2018 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA)); 2019 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA)); 2020 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA)); 2021 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA)); 2022 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA)); 2023 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA)); 2024 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA)); 2025 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA)); 2026 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA)); 2027 m = (Mat_MPIDense *)(B->data); 2028 if (m->A) { 2029 PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A)); 2030 B->offloadmask = PETSC_OFFLOAD_BOTH; 2031 } else { 2032 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 2033 } 2034 PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE)); 2035 2036 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 2037 PetscFunctionReturn(PETSC_SUCCESS); 2038 } 2039 #endif 2040 2041 #if defined(PETSC_HAVE_HIP) 2042 PetscErrorCode MatConvert_MPIDenseHIP_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat) 2043 { 2044 Mat B; 2045 Mat_MPIDense *m; 2046 2047 PetscFunctionBegin; 2048 if (reuse == MAT_INITIAL_MATRIX) { 2049 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 2050 } else if (reuse == MAT_REUSE_MATRIX) { 2051 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 2052 } 2053 2054 B = *newmat; 2055 PetscCall(MatBindToCPU_MPIDenseHIP(B, PETSC_TRUE)); 2056 PetscCall(PetscFree(B->defaultvectype)); 2057 PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype)); 2058 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE)); 2059 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensehip_mpidense_C", NULL)); 2060 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 2061 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 2062 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 2063 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 2064 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArray_C", NULL)); 2065 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayRead_C", NULL)); 2066 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayWrite_C", NULL)); 2067 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArray_C", NULL)); 2068 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayRead_C", NULL)); 2069 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayWrite_C", NULL)); 2070 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPPlaceArray_C", NULL)); 2071 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPResetArray_C", NULL)); 2072 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPReplaceArray_C", NULL)); 2073 m = (Mat_MPIDense *)(B)->data; 2074 if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A)); 2075 B->ops->bindtocpu = NULL; 2076 B->offloadmask = PETSC_OFFLOAD_CPU; 2077 PetscFunctionReturn(PETSC_SUCCESS); 2078 } 2079 2080 PetscErrorCode MatConvert_MPIDense_MPIDenseHIP(Mat M, MatType type, MatReuse reuse, Mat *newmat) 2081 { 2082 Mat B; 2083 Mat_MPIDense *m; 2084 2085 PetscFunctionBegin; 2086 if (reuse == MAT_INITIAL_MATRIX) { 2087 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 2088 } else if (reuse == MAT_REUSE_MATRIX) { 2089 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 2090 } 2091 2092 B = *newmat; 2093 PetscCall(PetscFree(B->defaultvectype)); 2094 PetscCall(PetscStrallocpy(VECHIP, &B->defaultvectype)); 2095 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSEHIP)); 2096 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensehip_mpidense_C", MatConvert_MPIDenseHIP_MPIDense)); 2097 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensehip_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2098 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2099 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2100 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2101 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArray_C", MatDenseHIPGetArray_MPIDenseHIP)); 2102 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayRead_C", MatDenseHIPGetArrayRead_MPIDenseHIP)); 2103 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayWrite_C", MatDenseHIPGetArrayWrite_MPIDenseHIP)); 2104 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArray_C", MatDenseHIPRestoreArray_MPIDenseHIP)); 2105 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayRead_C", MatDenseHIPRestoreArrayRead_MPIDenseHIP)); 2106 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayWrite_C", MatDenseHIPRestoreArrayWrite_MPIDenseHIP)); 2107 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPPlaceArray_C", MatDenseHIPPlaceArray_MPIDenseHIP)); 2108 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPResetArray_C", MatDenseHIPResetArray_MPIDenseHIP)); 2109 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPReplaceArray_C", MatDenseHIPReplaceArray_MPIDenseHIP)); 2110 m = (Mat_MPIDense *)(B->data); 2111 if (m->A) { 2112 PetscCall(MatConvert(m->A, MATSEQDENSEHIP, MAT_INPLACE_MATRIX, &m->A)); 2113 B->offloadmask = PETSC_OFFLOAD_BOTH; 2114 } else { 2115 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 2116 } 2117 PetscCall(MatBindToCPU_MPIDenseHIP(B, PETSC_FALSE)); 2118 2119 B->ops->bindtocpu = MatBindToCPU_MPIDenseHIP; 2120 PetscFunctionReturn(PETSC_SUCCESS); 2121 } 2122 #endif 2123 2124 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 2125 { 2126 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2127 PetscInt lda; 2128 2129 PetscFunctionBegin; 2130 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 2131 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2132 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 2133 a->vecinuse = col + 1; 2134 PetscCall(MatDenseGetLDA(a->A, &lda)); 2135 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 2136 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 2137 *v = a->cvec; 2138 PetscFunctionReturn(PETSC_SUCCESS); 2139 } 2140 2141 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 2142 { 2143 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2144 2145 PetscFunctionBegin; 2146 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 2147 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 2148 a->vecinuse = 0; 2149 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 2150 PetscCall(VecResetArray(a->cvec)); 2151 if (v) *v = NULL; 2152 PetscFunctionReturn(PETSC_SUCCESS); 2153 } 2154 2155 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 2156 { 2157 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2158 PetscInt lda; 2159 2160 PetscFunctionBegin; 2161 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 2162 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2163 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 2164 a->vecinuse = col + 1; 2165 PetscCall(MatDenseGetLDA(a->A, &lda)); 2166 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 2167 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 2168 PetscCall(VecLockReadPush(a->cvec)); 2169 *v = a->cvec; 2170 PetscFunctionReturn(PETSC_SUCCESS); 2171 } 2172 2173 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 2174 { 2175 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2176 2177 PetscFunctionBegin; 2178 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 2179 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 2180 a->vecinuse = 0; 2181 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 2182 PetscCall(VecLockReadPop(a->cvec)); 2183 PetscCall(VecResetArray(a->cvec)); 2184 if (v) *v = NULL; 2185 PetscFunctionReturn(PETSC_SUCCESS); 2186 } 2187 2188 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 2189 { 2190 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2191 PetscInt lda; 2192 2193 PetscFunctionBegin; 2194 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 2195 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2196 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 2197 a->vecinuse = col + 1; 2198 PetscCall(MatDenseGetLDA(a->A, &lda)); 2199 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 2200 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 2201 *v = a->cvec; 2202 PetscFunctionReturn(PETSC_SUCCESS); 2203 } 2204 2205 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 2206 { 2207 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2208 2209 PetscFunctionBegin; 2210 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 2211 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 2212 a->vecinuse = 0; 2213 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 2214 PetscCall(VecResetArray(a->cvec)); 2215 if (v) *v = NULL; 2216 PetscFunctionReturn(PETSC_SUCCESS); 2217 } 2218 2219 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 2220 { 2221 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2222 Mat_MPIDense *c; 2223 MPI_Comm comm; 2224 PetscInt pbegin, pend; 2225 2226 PetscFunctionBegin; 2227 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2228 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 2229 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2230 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 2231 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 2232 if (!a->cmat) { 2233 PetscCall(MatCreate(comm, &a->cmat)); 2234 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 2235 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 2236 else { 2237 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 2238 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 2239 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 2240 } 2241 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 2242 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 2243 } else { 2244 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 2245 if (same && a->cmat->rmap->N != A->rmap->N) { 2246 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 2247 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2248 } 2249 if (!same) { 2250 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 2251 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 2252 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 2253 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 2254 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 2255 } 2256 if (cend - cbegin != a->cmat->cmap->N) { 2257 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 2258 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 2259 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 2260 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 2261 } 2262 } 2263 c = (Mat_MPIDense *)a->cmat->data; 2264 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2265 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 2266 2267 a->cmat->preallocated = PETSC_TRUE; 2268 a->cmat->assembled = PETSC_TRUE; 2269 #if defined(PETSC_HAVE_DEVICE) 2270 a->cmat->offloadmask = c->A->offloadmask; 2271 #endif 2272 a->matinuse = cbegin + 1; 2273 *v = a->cmat; 2274 PetscFunctionReturn(PETSC_SUCCESS); 2275 } 2276 2277 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 2278 { 2279 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2280 Mat_MPIDense *c; 2281 2282 PetscFunctionBegin; 2283 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 2284 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 2285 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 2286 a->matinuse = 0; 2287 c = (Mat_MPIDense *)a->cmat->data; 2288 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 2289 if (v) *v = NULL; 2290 #if defined(PETSC_HAVE_DEVICE) 2291 A->offloadmask = a->A->offloadmask; 2292 #endif 2293 PetscFunctionReturn(PETSC_SUCCESS); 2294 } 2295 2296 /*MC 2297 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 2298 2299 Options Database Key: 2300 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 2301 2302 Level: beginner 2303 2304 .seealso: [](chapter_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 2305 M*/ 2306 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 2307 { 2308 Mat_MPIDense *a; 2309 2310 PetscFunctionBegin; 2311 PetscCall(PetscNew(&a)); 2312 mat->data = (void *)a; 2313 PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps))); 2314 2315 mat->insertmode = NOT_SET_VALUES; 2316 2317 /* build cache for off array entries formed */ 2318 a->donotstash = PETSC_FALSE; 2319 2320 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 2321 2322 /* stuff used for matrix vector multiply */ 2323 a->lvec = NULL; 2324 a->Mvctx = NULL; 2325 a->roworiented = PETSC_TRUE; 2326 2327 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 2328 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 2329 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 2330 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 2331 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 2332 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 2333 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 2334 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 2335 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 2336 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 2337 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 2338 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 2339 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 2340 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 2341 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 2342 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 2343 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 2344 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 2345 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 2346 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 2347 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 2348 #if defined(PETSC_HAVE_ELEMENTAL) 2349 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 2350 #endif 2351 #if defined(PETSC_HAVE_SCALAPACK) 2352 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 2353 #endif 2354 #if defined(PETSC_HAVE_CUDA) 2355 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 2356 #endif 2357 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 2358 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2359 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2360 #if defined(PETSC_HAVE_CUDA) 2361 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2362 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2363 #endif 2364 #if defined(PETSC_HAVE_HIP) 2365 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 2366 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 2367 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 2368 #endif 2369 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 2370 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 2371 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 2372 PetscFunctionReturn(PETSC_SUCCESS); 2373 } 2374 2375 /*MC 2376 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 2377 2378 Options Database Key: 2379 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()` 2380 2381 Level: beginner 2382 2383 .seealso: [](chapter_matrices), `Mat`, `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`, `MATSEQDENSEHIP` 2384 M*/ 2385 #if defined(PETSC_HAVE_CUDA) 2386 #include <petsc/private/deviceimpl.h> 2387 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 2388 { 2389 PetscFunctionBegin; 2390 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); 2391 PetscCall(MatCreate_MPIDense(B)); 2392 PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B)); 2393 PetscFunctionReturn(PETSC_SUCCESS); 2394 } 2395 #endif 2396 2397 /*MC 2398 MATMPIDENSEHIP - MATMPIDENSEHIP = "mpidensehip" - A matrix type to be used for distributed dense matrices on GPUs. 2399 2400 Options Database Key: 2401 . -mat_type mpidensehip - sets the matrix type to `MATMPIDENSEHIP` during a call to `MatSetFromOptions()` 2402 2403 Level: beginner 2404 2405 .seealso: [](chapter_matrices), `Mat`, `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`, `MATMPIDENSEHIP` 2406 M*/ 2407 #if defined(PETSC_HAVE_HIP) 2408 #include <petsc/private/deviceimpl.h> 2409 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseHIP(Mat B) 2410 { 2411 PetscFunctionBegin; 2412 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_HIP)); 2413 PetscCall(MatCreate_MPIDense(B)); 2414 PetscCall(MatConvert_MPIDense_MPIDenseHIP(B, MATMPIDENSEHIP, MAT_INPLACE_MATRIX, &B)); 2415 PetscFunctionReturn(PETSC_SUCCESS); 2416 } 2417 #endif 2418 2419 /*MC 2420 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 2421 2422 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 2423 and `MATMPIDENSE` otherwise. 2424 2425 Options Database Key: 2426 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 2427 2428 Level: beginner 2429 2430 .seealso: [](chapter_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 2431 M*/ 2432 2433 /*MC 2434 MATDENSECUDA - "densecuda" - A matrix type to be used for dense matrices on GPUs. 2435 Similarly, 2436 `MATDENSEHIP` = "densehip" 2437 2438 This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator, 2439 and `MATMPIDENSECUDA` otherwise. 2440 2441 Options Database Key: 2442 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()` 2443 2444 Level: beginner 2445 2446 .seealso: [](chapter_matrices), `Mat`, `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, `MATMPIDENSEHIP`, `MATDENSE` 2447 M*/ 2448 2449 /*MC 2450 MATDENSEHIP - "densehip" - A matrix type to be used for dense matrices on GPUs. 2451 2452 This matrix type is identical to `MATSEQDENSEHIP` when constructed with a single process communicator, 2453 and `MATMPIDENSEHIP` otherwise. 2454 2455 Options Database Key: 2456 . -mat_type densehip - sets the matrix type to `MATDENSEHIP` during a call to `MatSetFromOptions()` 2457 2458 Level: beginner 2459 2460 .seealso: [](chapter_matrices), `Mat`, `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, `MATMPIDENSEHIP`, `MATDENSE` 2461 M*/ 2462 2463 /*@C 2464 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 2465 2466 Collective 2467 2468 Input Parameters: 2469 . B - the matrix 2470 - data - optional location of matrix data. Set to `NULL` for PETSc 2471 to control all matrix memory allocation. 2472 2473 Level: intermediate 2474 2475 Notes: 2476 The dense format is fully compatible with standard Fortran 2477 storage by columns. 2478 2479 The data input variable is intended primarily for Fortran programmers 2480 who wish to allocate their own matrix memory space. Most users should 2481 set `data` to `NULL`. 2482 2483 .seealso: [](chapter_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2484 @*/ 2485 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 2486 { 2487 PetscFunctionBegin; 2488 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2489 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 2490 PetscFunctionReturn(PETSC_SUCCESS); 2491 } 2492 2493 /*@ 2494 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 2495 array provided by the user. This is useful to avoid copying an array 2496 into a matrix 2497 2498 Not Collective 2499 2500 Input Parameters: 2501 + mat - the matrix 2502 - array - the array in column major order 2503 2504 Level: developer 2505 2506 Note: 2507 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 2508 freed when the matrix is destroyed. 2509 2510 .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 2511 `MatDenseReplaceArray()` 2512 @*/ 2513 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 2514 { 2515 PetscFunctionBegin; 2516 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2517 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2518 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2519 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2520 mat->offloadmask = PETSC_OFFLOAD_CPU; 2521 #endif 2522 PetscFunctionReturn(PETSC_SUCCESS); 2523 } 2524 2525 /*@ 2526 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 2527 2528 Not Collective 2529 2530 Input Parameters: 2531 . mat - the matrix 2532 2533 Level: developer 2534 2535 Note: 2536 You can only call this after a call to `MatDensePlaceArray()` 2537 2538 .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2539 @*/ 2540 PetscErrorCode MatDenseResetArray(Mat mat) 2541 { 2542 PetscFunctionBegin; 2543 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2544 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 2545 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2546 PetscFunctionReturn(PETSC_SUCCESS); 2547 } 2548 2549 /*@ 2550 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2551 array provided by the user. This is useful to avoid copying an array 2552 into a matrix 2553 2554 Not Collective 2555 2556 Input Parameters: 2557 + mat - the matrix 2558 - array - the array in column major order 2559 2560 Level: developer 2561 2562 Note: 2563 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 2564 freed by the user. It will be freed when the matrix is destroyed. 2565 2566 .seealso: [](chapter_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 2567 @*/ 2568 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 2569 { 2570 PetscFunctionBegin; 2571 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2572 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2573 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2574 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 2575 mat->offloadmask = PETSC_OFFLOAD_CPU; 2576 #endif 2577 PetscFunctionReturn(PETSC_SUCCESS); 2578 } 2579 2580 #if defined(PETSC_HAVE_CUDA) 2581 /*@C 2582 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an 2583 array provided by the user. This is useful to avoid copying an array 2584 into a matrix 2585 2586 Not Collective 2587 2588 Input Parameters: 2589 + mat - the matrix 2590 - array - the array in column major order 2591 2592 Level: developer 2593 2594 Note: 2595 You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be 2596 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2597 2598 .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()` 2599 @*/ 2600 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array) 2601 { 2602 PetscFunctionBegin; 2603 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2604 PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2605 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2606 mat->offloadmask = PETSC_OFFLOAD_GPU; 2607 PetscFunctionReturn(PETSC_SUCCESS); 2608 } 2609 2610 /*@C 2611 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()` 2612 2613 Not Collective 2614 2615 Input Parameters: 2616 . mat - the matrix 2617 2618 Level: developer 2619 2620 Note: 2621 You can only call this after a call to `MatDenseCUDAPlaceArray()` 2622 2623 .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()` 2624 @*/ 2625 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2626 { 2627 PetscFunctionBegin; 2628 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2629 PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat)); 2630 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2631 PetscFunctionReturn(PETSC_SUCCESS); 2632 } 2633 2634 /*@C 2635 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an 2636 array provided by the user. This is useful to avoid copying an array 2637 into a matrix 2638 2639 Not Collective 2640 2641 Input Parameters: 2642 + mat - the matrix 2643 - array - the array in column major order 2644 2645 Level: developer 2646 2647 Note: 2648 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2649 The memory passed in CANNOT be freed by the user. It will be freed 2650 when the matrix is destroyed. The array should respect the matrix leading dimension. 2651 2652 .seealso: [](chapter_matrices), `Mat`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()` 2653 @*/ 2654 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array) 2655 { 2656 PetscFunctionBegin; 2657 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2658 PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2659 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2660 mat->offloadmask = PETSC_OFFLOAD_GPU; 2661 PetscFunctionReturn(PETSC_SUCCESS); 2662 } 2663 2664 /*@C 2665 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix. 2666 2667 Not Collective 2668 2669 Input Parameters: 2670 . A - the matrix 2671 2672 Output Parameters 2673 . array - the GPU array in column major order 2674 2675 Level: developer 2676 2677 Notes: 2678 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`. 2679 2680 The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed. 2681 2682 .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()` 2683 @*/ 2684 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2685 { 2686 PetscFunctionBegin; 2687 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2688 PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2689 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2690 PetscFunctionReturn(PETSC_SUCCESS); 2691 } 2692 2693 /*@C 2694 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`. 2695 2696 Not Collective 2697 2698 Input Parameters: 2699 + A - the matrix 2700 - array - the GPU array in column major order 2701 2702 Level: developer 2703 2704 .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2705 @*/ 2706 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2707 { 2708 PetscFunctionBegin; 2709 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2710 PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2711 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2712 A->offloadmask = PETSC_OFFLOAD_GPU; 2713 PetscFunctionReturn(PETSC_SUCCESS); 2714 } 2715 2716 /*@C 2717 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed. 2718 2719 Not Collective 2720 2721 Input Parameters: 2722 . A - the matrix 2723 2724 Output Parameters 2725 . array - the GPU array in column major order 2726 2727 Level: developer 2728 2729 Note: 2730 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. 2731 2732 .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2733 @*/ 2734 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2735 { 2736 PetscFunctionBegin; 2737 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2738 PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2739 PetscFunctionReturn(PETSC_SUCCESS); 2740 } 2741 2742 /*@C 2743 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`. 2744 2745 Not Collective 2746 2747 Input Parameters: 2748 + A - the matrix 2749 - array - the GPU array in column major order 2750 2751 Level: developer 2752 2753 Note: 2754 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. 2755 2756 .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()` 2757 @*/ 2758 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2759 { 2760 PetscFunctionBegin; 2761 PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2762 PetscFunctionReturn(PETSC_SUCCESS); 2763 } 2764 2765 /*@C 2766 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed. 2767 2768 Not Collective 2769 2770 Input Parameters: 2771 . A - the matrix 2772 2773 Output Parameters 2774 . array - the GPU array in column major order 2775 2776 Level: developer 2777 2778 Note: 2779 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`. 2780 2781 .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2782 @*/ 2783 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2784 { 2785 PetscFunctionBegin; 2786 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2787 PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a)); 2788 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2789 PetscFunctionReturn(PETSC_SUCCESS); 2790 } 2791 2792 /*@C 2793 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`. 2794 2795 Not Collective 2796 2797 Input Parameters: 2798 + A - the matrix 2799 - array - the GPU array in column major order 2800 2801 Level: developer 2802 2803 .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2804 @*/ 2805 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2806 { 2807 PetscFunctionBegin; 2808 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2809 PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a)); 2810 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2811 A->offloadmask = PETSC_OFFLOAD_GPU; 2812 PetscFunctionReturn(PETSC_SUCCESS); 2813 } 2814 #endif 2815 2816 #if defined(PETSC_HAVE_HIP) 2817 /*@C 2818 MatDenseHIPPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2819 array provided by the user. This is useful to avoid copying an array 2820 into a matrix 2821 2822 Not Collective 2823 2824 Input Parameters: 2825 + mat - the matrix 2826 - array - the array in column major order 2827 2828 Level: developer 2829 2830 Note: 2831 You can return to the original array with a call to `MatDenseHIPResetArray()`. The user is responsible for freeing this array; it will not be 2832 freed when the matrix is destroyed. The array must have been allocated with `hipMalloc()`. 2833 2834 .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPResetArray() 2835 @*/ 2836 PetscErrorCode MatDenseHIPPlaceArray(Mat mat, const PetscScalar *array) 2837 { 2838 PetscFunctionBegin; 2839 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2840 PetscUseMethod(mat, "MatDenseHIPPlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2841 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2842 mat->offloadmask = PETSC_OFFLOAD_GPU; 2843 PetscFunctionReturn(PETSC_SUCCESS); 2844 } 2845 2846 /*@C 2847 MatDenseHIPResetArray - Resets the matrix array to that it previously had before the call to `MatDenseHIPPlaceArray()` 2848 2849 Not Collective 2850 2851 Input Parameters: 2852 . mat - the matrix 2853 2854 Level: developer 2855 2856 Notes: 2857 You can only call this after a call to `MatDenseHIPPlaceArray()` 2858 2859 .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPPlaceArray() 2860 @*/ 2861 PetscErrorCode MatDenseHIPResetArray(Mat mat) 2862 { 2863 PetscFunctionBegin; 2864 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2865 PetscUseMethod(mat, "MatDenseHIPResetArray_C", (Mat), (mat)); 2866 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2867 PetscFunctionReturn(PETSC_SUCCESS); 2868 } 2869 2870 /*@C 2871 MatDenseHIPReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2872 array provided by the user. This is useful to avoid copying an array 2873 into a matrix 2874 2875 Not Collective 2876 2877 Input Parameters: 2878 + mat - the matrix 2879 - array - the array in column major order 2880 2881 Level: developer 2882 2883 Note: 2884 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2885 The memory passed in CANNOT be freed by the user. It will be freed 2886 when the matrix is destroyed. The array should respect the matrix leading dimension. 2887 2888 .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPPlaceArray(), MatDenseHIPResetArray() 2889 @*/ 2890 PetscErrorCode MatDenseHIPReplaceArray(Mat mat, const PetscScalar *array) 2891 { 2892 PetscFunctionBegin; 2893 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2894 PetscUseMethod(mat, "MatDenseHIPReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2895 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2896 mat->offloadmask = PETSC_OFFLOAD_GPU; 2897 PetscFunctionReturn(PETSC_SUCCESS); 2898 } 2899 2900 /*@C 2901 MatDenseHIPGetArrayWrite - Provides write access to the HIP buffer inside a dense matrix. 2902 2903 Not Collective 2904 2905 Input Parameters: 2906 . A - the matrix 2907 2908 Output Parameters 2909 . array - the GPU array in column major order 2910 2911 Level: developer 2912 2913 Note: 2914 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseHIPGetArray()`. 2915 The array must be restored with `MatDenseHIPRestoreArrayWrite()` when no longer needed. 2916 2917 .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayRead(), MatDenseHIPRestoreArrayRead() 2918 @*/ 2919 PetscErrorCode MatDenseHIPGetArrayWrite(Mat A, PetscScalar **a) 2920 { 2921 PetscFunctionBegin; 2922 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2923 PetscUseMethod(A, "MatDenseHIPGetArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2924 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2925 PetscFunctionReturn(PETSC_SUCCESS); 2926 } 2927 2928 /*@C 2929 MatDenseHIPRestoreArrayWrite - Restore write access to the HIP buffer inside a dense matrix previously obtained with `MatDenseHIPGetArrayWrite()`. 2930 2931 Not Collective 2932 2933 Input Parameters: 2934 + A - the matrix 2935 - array - the GPU array in column major order 2936 2937 Level: developer 2938 2939 .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead(), MatDenseHIPGetArrayRead() 2940 @*/ 2941 PetscErrorCode MatDenseHIPRestoreArrayWrite(Mat A, PetscScalar **a) 2942 { 2943 PetscFunctionBegin; 2944 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2945 PetscUseMethod(A, "MatDenseHIPRestoreArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2946 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2947 A->offloadmask = PETSC_OFFLOAD_GPU; 2948 PetscFunctionReturn(PETSC_SUCCESS); 2949 } 2950 2951 /*@C 2952 MatDenseHIPGetArrayRead - Provides read-only access to the HIP buffer inside a dense matrix. The array must be restored with MatDenseHIPRestoreArrayRead() when no longer needed. 2953 2954 Not Collective 2955 2956 Input Parameters: 2957 . A - the matrix 2958 2959 Output Parameters 2960 . array - the GPU array in column major order 2961 2962 Level: developer 2963 2964 Note: 2965 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseHIPGetArrayWrite()`. 2966 2967 .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead() 2968 @*/ 2969 PetscErrorCode MatDenseHIPGetArrayRead(Mat A, const PetscScalar **a) 2970 { 2971 PetscFunctionBegin; 2972 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2973 PetscUseMethod(A, "MatDenseHIPGetArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2974 PetscFunctionReturn(PETSC_SUCCESS); 2975 } 2976 2977 /*@C 2978 MatDenseHIPRestoreArrayRead - Restore read-only access to the HIP buffer inside a dense matrix previously obtained with a call to MatDenseHIPGetArrayRead(). 2979 2980 Not Collective 2981 2982 Input Parameters: 2983 + A - the matrix 2984 - array - the GPU array in column major order 2985 2986 Level: developer 2987 2988 Notes: 2989 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseHIPGetArrayWrite()`. 2990 2991 .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPGetArrayRead() 2992 @*/ 2993 PetscErrorCode MatDenseHIPRestoreArrayRead(Mat A, const PetscScalar **a) 2994 { 2995 PetscFunctionBegin; 2996 PetscUseMethod(A, "MatDenseHIPRestoreArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2997 PetscFunctionReturn(PETSC_SUCCESS); 2998 } 2999 3000 /*@C 3001 MatDenseHIPGetArray - Provides access to the HIP buffer inside a dense matrix. The array must be restored with MatDenseHIPRestoreArray() when no longer needed. 3002 3003 Not Collective 3004 3005 Input Parameters: 3006 . A - the matrix 3007 3008 Output Parameters 3009 . array - the GPU array in column major order 3010 3011 Level: developer 3012 3013 Note: 3014 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseHIPGetArrayWrite()`. 3015 3016 For read-only access, use `MatDenseHIPGetArrayRead()`. 3017 3018 .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArrayRead(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead() 3019 @*/ 3020 PetscErrorCode MatDenseHIPGetArray(Mat A, PetscScalar **a) 3021 { 3022 PetscFunctionBegin; 3023 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3024 PetscUseMethod(A, "MatDenseHIPGetArray_C", (Mat, PetscScalar **), (A, a)); 3025 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 3026 PetscFunctionReturn(PETSC_SUCCESS); 3027 } 3028 3029 /*@C 3030 MatDenseHIPRestoreArray - Restore access to the HIP buffer inside a dense matrix previously obtained with MatDenseHIPGetArray(). 3031 3032 Not Collective 3033 3034 Input Parameters: 3035 + A - the matrix 3036 - array - the GPU array in column major order 3037 3038 Level: developer 3039 3040 .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead(), MatDenseHIPGetArrayRead() 3041 @*/ 3042 PetscErrorCode MatDenseHIPRestoreArray(Mat A, PetscScalar **a) 3043 { 3044 PetscFunctionBegin; 3045 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 3046 PetscUseMethod(A, "MatDenseHIPRestoreArray_C", (Mat, PetscScalar **), (A, a)); 3047 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 3048 A->offloadmask = PETSC_OFFLOAD_GPU; 3049 PetscFunctionReturn(PETSC_SUCCESS); 3050 } 3051 #endif 3052 3053 /*@C 3054 MatCreateDense - Creates a matrix in `MATDENSE` format. 3055 3056 Collective 3057 3058 Input Parameters: 3059 + comm - MPI communicator 3060 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 3061 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 3062 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 3063 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 3064 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 3065 to control all matrix memory allocation. 3066 3067 Output Parameter: 3068 . A - the matrix 3069 3070 Level: intermediate 3071 3072 Notes: 3073 The dense format is fully compatible with standard Fortran 3074 storage by columns. 3075 3076 Although local portions of the matrix are stored in column-major 3077 order, the matrix is partitioned across MPI ranks by row. 3078 3079 The data input variable is intended primarily for Fortran programmers 3080 who wish to allocate their own matrix memory space. Most users should 3081 set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 3082 3083 The user MUST specify either the local or global matrix dimensions 3084 (possibly both). 3085 3086 .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 3087 @*/ 3088 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 3089 { 3090 PetscFunctionBegin; 3091 PetscCall(MatCreate(comm, A)); 3092 PetscCall(MatSetSizes(*A, m, n, M, N)); 3093 PetscCall(MatSetType(*A, MATDENSE)); 3094 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 3095 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 3096 PetscFunctionReturn(PETSC_SUCCESS); 3097 } 3098 3099 #if defined(PETSC_HAVE_CUDA) 3100 /*@C 3101 MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA. 3102 3103 Collective 3104 3105 Input Parameters: 3106 + comm - MPI communicator 3107 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 3108 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 3109 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 3110 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 3111 - data - optional location of GPU matrix data. Use `NULL` for PETSc 3112 to control matrix memory allocation. 3113 3114 Output Parameter: 3115 . A - the matrix 3116 3117 Level: intermediate 3118 3119 .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()` 3120 @*/ 3121 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 3122 { 3123 PetscFunctionBegin; 3124 PetscCall(MatCreate(comm, A)); 3125 PetscValidLogicalCollectiveBool(*A, !!data, 6); 3126 PetscCall(MatSetSizes(*A, m, n, M, N)); 3127 PetscCall(MatSetType(*A, MATDENSECUDA)); 3128 PetscCall(MatSeqDenseCUDASetPreallocation(*A, data)); 3129 PetscCall(MatMPIDenseCUDASetPreallocation(*A, data)); 3130 PetscFunctionReturn(PETSC_SUCCESS); 3131 } 3132 #endif 3133 3134 #if defined(PETSC_HAVE_HIP) 3135 /*@C 3136 MatCreateDenseHIP - Creates a matrix in `MATDENSEHIP` format using HIP. 3137 3138 Collective 3139 3140 Input Parameters: 3141 + comm - MPI communicator 3142 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 3143 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 3144 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 3145 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 3146 - data - optional location of GPU matrix data. Use `NULL` for PETSc 3147 to control matrix memory allocation. 3148 3149 Output Parameter: 3150 . A - the matrix 3151 3152 Level: intermediate 3153 3154 .seealso: [](chapter_matrices), `Mat`, `MATDENSEHIP`, `MatCreate()`, `MatCreateDense()` 3155 @*/ 3156 PetscErrorCode MatCreateDenseHIP(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 3157 { 3158 PetscFunctionBegin; 3159 PetscCall(MatCreate(comm, A)); 3160 PetscValidLogicalCollectiveBool(*A, !!data, 6); 3161 PetscCall(MatSetSizes(*A, m, n, M, N)); 3162 PetscCall(MatSetType(*A, MATDENSEHIP)); 3163 PetscCall(MatSeqDenseHIPSetPreallocation(*A, data)); 3164 PetscCall(MatMPIDenseHIPSetPreallocation(*A, data)); 3165 PetscFunctionReturn(PETSC_SUCCESS); 3166 } 3167 #endif 3168 3169 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 3170 { 3171 Mat mat; 3172 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 3173 3174 PetscFunctionBegin; 3175 *newmat = NULL; 3176 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 3177 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3178 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 3179 a = (Mat_MPIDense *)mat->data; 3180 3181 mat->factortype = A->factortype; 3182 mat->assembled = PETSC_TRUE; 3183 mat->preallocated = PETSC_TRUE; 3184 3185 mat->insertmode = NOT_SET_VALUES; 3186 a->donotstash = oldmat->donotstash; 3187 3188 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 3189 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 3190 3191 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3192 3193 *newmat = mat; 3194 PetscFunctionReturn(PETSC_SUCCESS); 3195 } 3196 3197 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 3198 { 3199 PetscBool isbinary; 3200 #if defined(PETSC_HAVE_HDF5) 3201 PetscBool ishdf5; 3202 #endif 3203 3204 PetscFunctionBegin; 3205 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3206 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3207 /* force binary viewer to load .info file if it has not yet done so */ 3208 PetscCall(PetscViewerSetUp(viewer)); 3209 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3210 #if defined(PETSC_HAVE_HDF5) 3211 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3212 #endif 3213 if (isbinary) { 3214 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 3215 #if defined(PETSC_HAVE_HDF5) 3216 } else if (ishdf5) { 3217 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 3218 #endif 3219 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3220 PetscFunctionReturn(PETSC_SUCCESS); 3221 } 3222 3223 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 3224 { 3225 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 3226 Mat a, b; 3227 3228 PetscFunctionBegin; 3229 a = matA->A; 3230 b = matB->A; 3231 PetscCall(MatEqual(a, b, flag)); 3232 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 3233 PetscFunctionReturn(PETSC_SUCCESS); 3234 } 3235 3236 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 3237 { 3238 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 3239 3240 PetscFunctionBegin; 3241 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 3242 PetscCall(MatDestroy(&atb->atb)); 3243 PetscCall(PetscFree(atb)); 3244 PetscFunctionReturn(PETSC_SUCCESS); 3245 } 3246 3247 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 3248 { 3249 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 3250 3251 PetscFunctionBegin; 3252 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 3253 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 3254 PetscCall(PetscFree(abt)); 3255 PetscFunctionReturn(PETSC_SUCCESS); 3256 } 3257 3258 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 3259 { 3260 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 3261 Mat_TransMatMultDense *atb; 3262 MPI_Comm comm; 3263 PetscMPIInt size, *recvcounts; 3264 PetscScalar *carray, *sendbuf; 3265 const PetscScalar *atbarray; 3266 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 3267 const PetscInt *ranges; 3268 3269 PetscFunctionBegin; 3270 MatCheckProduct(C, 3); 3271 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 3272 atb = (Mat_TransMatMultDense *)C->product->data; 3273 recvcounts = atb->recvcounts; 3274 sendbuf = atb->sendbuf; 3275 3276 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 3277 PetscCallMPI(MPI_Comm_size(comm, &size)); 3278 3279 /* compute atbarray = aseq^T * bseq */ 3280 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 3281 3282 PetscCall(MatGetOwnershipRanges(C, &ranges)); 3283 3284 /* arrange atbarray into sendbuf */ 3285 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 3286 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 3287 for (proc = 0, k = 0; proc < size; proc++) { 3288 for (j = 0; j < cN; j++) { 3289 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 3290 } 3291 } 3292 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 3293 3294 /* sum all atbarray to local values of C */ 3295 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 3296 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 3297 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 3298 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3299 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3300 PetscFunctionReturn(PETSC_SUCCESS); 3301 } 3302 3303 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 3304 { 3305 MPI_Comm comm; 3306 PetscMPIInt size; 3307 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 3308 Mat_TransMatMultDense *atb; 3309 PetscBool cisdense = PETSC_FALSE; 3310 PetscInt i; 3311 const PetscInt *ranges; 3312 3313 PetscFunctionBegin; 3314 MatCheckProduct(C, 4); 3315 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 3316 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 3317 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 3318 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 3319 } 3320 3321 /* create matrix product C */ 3322 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 3323 #if defined(PETSC_HAVE_CUDA) 3324 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 3325 #endif 3326 #if defined(PETSC_HAVE_HIP) 3327 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 3328 #endif 3329 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 3330 PetscCall(MatSetUp(C)); 3331 3332 /* create data structure for reuse C */ 3333 PetscCallMPI(MPI_Comm_size(comm, &size)); 3334 PetscCall(PetscNew(&atb)); 3335 cM = C->rmap->N; 3336 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 3337 PetscCall(MatGetOwnershipRanges(C, &ranges)); 3338 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 3339 3340 C->product->data = atb; 3341 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 3342 PetscFunctionReturn(PETSC_SUCCESS); 3343 } 3344 3345 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 3346 { 3347 MPI_Comm comm; 3348 PetscMPIInt i, size; 3349 PetscInt maxRows, bufsiz; 3350 PetscMPIInt tag; 3351 PetscInt alg; 3352 Mat_MatTransMultDense *abt; 3353 Mat_Product *product = C->product; 3354 PetscBool flg; 3355 3356 PetscFunctionBegin; 3357 MatCheckProduct(C, 4); 3358 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 3359 /* check local size of A and B */ 3360 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 3361 3362 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 3363 alg = flg ? 0 : 1; 3364 3365 /* setup matrix product C */ 3366 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 3367 PetscCall(MatSetType(C, MATMPIDENSE)); 3368 PetscCall(MatSetUp(C)); 3369 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 3370 3371 /* create data structure for reuse C */ 3372 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 3373 PetscCallMPI(MPI_Comm_size(comm, &size)); 3374 PetscCall(PetscNew(&abt)); 3375 abt->tag = tag; 3376 abt->alg = alg; 3377 switch (alg) { 3378 case 1: /* alg: "cyclic" */ 3379 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 3380 bufsiz = A->cmap->N * maxRows; 3381 PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]))); 3382 break; 3383 default: /* alg: "allgatherv" */ 3384 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 3385 PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls))); 3386 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 3387 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 3388 break; 3389 } 3390 3391 C->product->data = abt; 3392 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 3393 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 3394 PetscFunctionReturn(PETSC_SUCCESS); 3395 } 3396 3397 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 3398 { 3399 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 3400 Mat_MatTransMultDense *abt; 3401 MPI_Comm comm; 3402 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 3403 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 3404 PetscInt i, cK = A->cmap->N, k, j, bn; 3405 PetscScalar _DOne = 1.0, _DZero = 0.0; 3406 const PetscScalar *av, *bv; 3407 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 3408 MPI_Request reqs[2]; 3409 const PetscInt *ranges; 3410 3411 PetscFunctionBegin; 3412 MatCheckProduct(C, 3); 3413 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 3414 abt = (Mat_MatTransMultDense *)C->product->data; 3415 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 3416 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3417 PetscCallMPI(MPI_Comm_size(comm, &size)); 3418 PetscCall(MatDenseGetArrayRead(a->A, &av)); 3419 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 3420 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 3421 PetscCall(MatDenseGetLDA(a->A, &i)); 3422 PetscCall(PetscBLASIntCast(i, &alda)); 3423 PetscCall(MatDenseGetLDA(b->A, &i)); 3424 PetscCall(PetscBLASIntCast(i, &blda)); 3425 PetscCall(MatDenseGetLDA(c->A, &i)); 3426 PetscCall(PetscBLASIntCast(i, &clda)); 3427 PetscCall(MatGetOwnershipRanges(B, &ranges)); 3428 bn = B->rmap->n; 3429 if (blda == bn) { 3430 sendbuf = (PetscScalar *)bv; 3431 } else { 3432 sendbuf = abt->buf[0]; 3433 for (k = 0, i = 0; i < cK; i++) { 3434 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 3435 } 3436 } 3437 if (size > 1) { 3438 sendto = (rank + size - 1) % size; 3439 recvfrom = (rank + size + 1) % size; 3440 } else { 3441 sendto = recvfrom = 0; 3442 } 3443 PetscCall(PetscBLASIntCast(cK, &ck)); 3444 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 3445 recvisfrom = rank; 3446 for (i = 0; i < size; i++) { 3447 /* we have finished receiving in sending, bufs can be read/modified */ 3448 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 3449 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 3450 3451 if (nextrecvisfrom != rank) { 3452 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 3453 sendsiz = cK * bn; 3454 recvsiz = cK * nextbn; 3455 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 3456 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 3457 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 3458 } 3459 3460 /* local aseq * sendbuf^T */ 3461 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 3462 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 3463 3464 if (nextrecvisfrom != rank) { 3465 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 3466 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 3467 } 3468 bn = nextbn; 3469 recvisfrom = nextrecvisfrom; 3470 sendbuf = recvbuf; 3471 } 3472 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 3473 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 3474 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 3475 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3476 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3477 PetscFunctionReturn(PETSC_SUCCESS); 3478 } 3479 3480 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 3481 { 3482 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 3483 Mat_MatTransMultDense *abt; 3484 MPI_Comm comm; 3485 PetscMPIInt size; 3486 PetscScalar *cv, *sendbuf, *recvbuf; 3487 const PetscScalar *av, *bv; 3488 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 3489 PetscScalar _DOne = 1.0, _DZero = 0.0; 3490 PetscBLASInt cm, cn, ck, alda, clda; 3491 3492 PetscFunctionBegin; 3493 MatCheckProduct(C, 3); 3494 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 3495 abt = (Mat_MatTransMultDense *)C->product->data; 3496 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 3497 PetscCallMPI(MPI_Comm_size(comm, &size)); 3498 PetscCall(MatDenseGetArrayRead(a->A, &av)); 3499 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 3500 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 3501 PetscCall(MatDenseGetLDA(a->A, &i)); 3502 PetscCall(PetscBLASIntCast(i, &alda)); 3503 PetscCall(MatDenseGetLDA(b->A, &blda)); 3504 PetscCall(MatDenseGetLDA(c->A, &i)); 3505 PetscCall(PetscBLASIntCast(i, &clda)); 3506 /* copy transpose of B into buf[0] */ 3507 bn = B->rmap->n; 3508 sendbuf = abt->buf[0]; 3509 recvbuf = abt->buf[1]; 3510 for (k = 0, j = 0; j < bn; j++) { 3511 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 3512 } 3513 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 3514 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 3515 PetscCall(PetscBLASIntCast(cK, &ck)); 3516 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 3517 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 3518 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 3519 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 3520 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 3521 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 3522 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3523 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3524 PetscFunctionReturn(PETSC_SUCCESS); 3525 } 3526 3527 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 3528 { 3529 Mat_MatTransMultDense *abt; 3530 3531 PetscFunctionBegin; 3532 MatCheckProduct(C, 3); 3533 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 3534 abt = (Mat_MatTransMultDense *)C->product->data; 3535 switch (abt->alg) { 3536 case 1: 3537 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 3538 break; 3539 default: 3540 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 3541 break; 3542 } 3543 PetscFunctionReturn(PETSC_SUCCESS); 3544 } 3545 3546 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 3547 { 3548 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 3549 3550 PetscFunctionBegin; 3551 PetscCall(MatDestroy(&ab->Ce)); 3552 PetscCall(MatDestroy(&ab->Ae)); 3553 PetscCall(MatDestroy(&ab->Be)); 3554 PetscCall(PetscFree(ab)); 3555 PetscFunctionReturn(PETSC_SUCCESS); 3556 } 3557 3558 #if defined(PETSC_HAVE_ELEMENTAL) 3559 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 3560 { 3561 Mat_MatMultDense *ab; 3562 3563 PetscFunctionBegin; 3564 MatCheckProduct(C, 3); 3565 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 3566 ab = (Mat_MatMultDense *)C->product->data; 3567 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 3568 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 3569 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 3570 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 3571 PetscFunctionReturn(PETSC_SUCCESS); 3572 } 3573 3574 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 3575 { 3576 Mat Ae, Be, Ce; 3577 Mat_MatMultDense *ab; 3578 3579 PetscFunctionBegin; 3580 MatCheckProduct(C, 4); 3581 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 3582 /* check local size of A and B */ 3583 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 3584 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 3585 } 3586 3587 /* create elemental matrices Ae and Be */ 3588 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 3589 PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 3590 PetscCall(MatSetType(Ae, MATELEMENTAL)); 3591 PetscCall(MatSetUp(Ae)); 3592 PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 3593 3594 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 3595 PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 3596 PetscCall(MatSetType(Be, MATELEMENTAL)); 3597 PetscCall(MatSetUp(Be)); 3598 PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 3599 3600 /* compute symbolic Ce = Ae*Be */ 3601 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce)); 3602 PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce)); 3603 3604 /* setup C */ 3605 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 3606 PetscCall(MatSetType(C, MATDENSE)); 3607 PetscCall(MatSetUp(C)); 3608 3609 /* create data structure for reuse Cdense */ 3610 PetscCall(PetscNew(&ab)); 3611 ab->Ae = Ae; 3612 ab->Be = Be; 3613 ab->Ce = Ce; 3614 3615 C->product->data = ab; 3616 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 3617 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 3618 PetscFunctionReturn(PETSC_SUCCESS); 3619 } 3620 #endif 3621 3622 #if defined(PETSC_HAVE_ELEMENTAL) 3623 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 3624 { 3625 PetscFunctionBegin; 3626 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 3627 C->ops->productsymbolic = MatProductSymbolic_AB; 3628 PetscFunctionReturn(PETSC_SUCCESS); 3629 } 3630 #endif 3631 3632 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 3633 { 3634 Mat_Product *product = C->product; 3635 Mat A = product->A, B = product->B; 3636 3637 PetscFunctionBegin; 3638 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 3639 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 3640 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 3641 C->ops->productsymbolic = MatProductSymbolic_AtB; 3642 PetscFunctionReturn(PETSC_SUCCESS); 3643 } 3644 3645 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 3646 { 3647 Mat_Product *product = C->product; 3648 const char *algTypes[2] = {"allgatherv", "cyclic"}; 3649 PetscInt alg, nalg = 2; 3650 PetscBool flg = PETSC_FALSE; 3651 3652 PetscFunctionBegin; 3653 /* Set default algorithm */ 3654 alg = 0; /* default is allgatherv */ 3655 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 3656 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 3657 3658 /* Get runtime option */ 3659 if (product->api_user) { 3660 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 3661 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 3662 PetscOptionsEnd(); 3663 } else { 3664 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 3665 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 3666 PetscOptionsEnd(); 3667 } 3668 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 3669 3670 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 3671 C->ops->productsymbolic = MatProductSymbolic_ABt; 3672 PetscFunctionReturn(PETSC_SUCCESS); 3673 } 3674 3675 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 3676 { 3677 Mat_Product *product = C->product; 3678 3679 PetscFunctionBegin; 3680 switch (product->type) { 3681 #if defined(PETSC_HAVE_ELEMENTAL) 3682 case MATPRODUCT_AB: 3683 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 3684 break; 3685 #endif 3686 case MATPRODUCT_AtB: 3687 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 3688 break; 3689 case MATPRODUCT_ABt: 3690 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 3691 break; 3692 default: 3693 break; 3694 } 3695 PetscFunctionReturn(PETSC_SUCCESS); 3696 } 3697